From c0f7e509ccdf8610753221c86c2794b842eafd76 Mon Sep 17 00:00:00 2001 From: Joseph Rahmeh Date: Tue, 4 Jun 2019 07:57:48 -0700 Subject: [PATCH] SweRV 1.1 --- configs/README.md | 14 + configs/snapshots/default/common_defines.vh | 170 ++ configs/snapshots/default/defines.h | 132 + configs/snapshots/default/pd_defines.vh | 11 + configs/snapshots/default/perl_configs.pl | 566 ++++ .../default/pic_ctrl_verilator_unroll.sv | 173 ++ configs/snapshots/default/pic_map_auto.h | 31 + configs/snapshots/default/whisper.json | 395 +++ configs/swerv.config | 1706 +++++++++++ design/dbg/dbg.sv | 626 ++++ design/dec/cdecode | 254 ++ design/dec/csrdecode | 229 ++ design/dec/dec.sv | 579 ++++ design/dec/dec_decode_ctl.sv | 2660 +++++++++++++++++ design/dec/dec_gpr_ctl.sv | 113 + design/dec/dec_ib_ctl.sv | 463 +++ design/dec/dec_tlu_ctl.sv | 2570 ++++++++++++++++ design/dec/dec_trigger.sv | 57 + design/dec/decode | 322 ++ design/dma_ctrl.sv | 697 +++++ design/dmi/dmi_jtag_to_core_sync.v | 65 + design/dmi/dmi_wrapper.v | 91 + design/dmi/rvjtag_tap.sv | 223 ++ design/exu/exu.sv | 839 ++++++ design/exu/exu_alu_ctl.sv | 275 ++ design/exu/exu_div_ctl.sv | 315 ++ design/exu/exu_mul_ctl.sv | 118 + design/flist.questa | 52 + design/ifu/ifu.sv | 406 +++ design/ifu/ifu_aln_ctl.sv | 1245 ++++++++ design/ifu/ifu_bp_ctl.sv | 1777 +++++++++++ design/ifu/ifu_compress_ctl.sv | 379 +++ design/ifu/ifu_ic_mem.sv | 559 ++++ design/ifu/ifu_iccm_mem.sv | 143 + design/ifu/ifu_ifc_ctl.sv | 284 ++ design/ifu/ifu_mem_ctl.sv | 1521 ++++++++++ design/include/build.h | 65 + design/include/global.h | 51 + design/include/swerv_types.sv | 335 +++ design/lib/ahb_to_axi4.sv | 281 ++ design/lib/axi4_to_ahb.sv | 472 +++ design/lib/beh_lib.sv | 452 +++ design/lib/mem_lib.sv | 1025 +++++++ design/lsu/lsu.sv | 389 +++ design/lsu/lsu_addrcheck.sv | 183 ++ design/lsu/lsu_bus_buffer.sv | 923 ++++++ design/lsu/lsu_bus_intf.sv | 419 +++ design/lsu/lsu_clkdomain.sv | 210 ++ design/lsu/lsu_dccm_ctl.sv | 197 ++ design/lsu/lsu_dccm_mem.sv | 127 + design/lsu/lsu_ecc.sv | 185 ++ design/lsu/lsu_lsc_ctl.sv | 329 ++ design/lsu/lsu_stbuf.sv | 399 +++ design/lsu/lsu_trigger.sv | 61 + design/mem.sv | 132 + design/pic_ctrl.sv | 521 ++++ design/swerv.sv | 1351 +++++++++ design/swerv_wrapper.sv | 409 +++ testbench/ahb_sif.sv | 166 + testbench/asm/hello_world.s | 67 + testbench/asm/hello_world2.s | 71 + testbench/flist.spyglass | 42 + testbench/flist.vcs | 42 + testbench/flist.verilator | 42 + testbench/flist.vlog | 42 + testbench/hex/data.hex | 7 + testbench/hex/program.hex | 6 + testbench/input.tcl | 4 + testbench/link.ld | 12 + testbench/tb_top.sv | 415 +++ testbench/test_tb_top.cpp | 83 + tools/JSON.pm | 2267 ++++++++++++++ tools/Makefile | 129 + tools/addassign | 46 + tools/coredecode | 198 ++ tools/picmap | 59 + tools/smalldiv | 121 + tools/unrollforverilator | 169 ++ 78 files changed, 32564 insertions(+) create mode 100644 configs/README.md create mode 100644 configs/snapshots/default/common_defines.vh create mode 100644 configs/snapshots/default/defines.h create mode 100644 configs/snapshots/default/pd_defines.vh create mode 100644 configs/snapshots/default/perl_configs.pl create mode 100644 configs/snapshots/default/pic_ctrl_verilator_unroll.sv create mode 100644 configs/snapshots/default/pic_map_auto.h create mode 100644 configs/snapshots/default/whisper.json create mode 100755 configs/swerv.config create mode 100644 design/dbg/dbg.sv create mode 100644 design/dec/cdecode create mode 100644 design/dec/csrdecode create mode 100644 design/dec/dec.sv create mode 100644 design/dec/dec_decode_ctl.sv create mode 100644 design/dec/dec_gpr_ctl.sv create mode 100644 design/dec/dec_ib_ctl.sv create mode 100644 design/dec/dec_tlu_ctl.sv create mode 100644 design/dec/dec_trigger.sv create mode 100644 design/dec/decode create mode 100644 design/dma_ctrl.sv create mode 100644 design/dmi/dmi_jtag_to_core_sync.v create mode 100644 design/dmi/dmi_wrapper.v create mode 100644 design/dmi/rvjtag_tap.sv create mode 100644 design/exu/exu.sv create mode 100644 design/exu/exu_alu_ctl.sv create mode 100644 design/exu/exu_div_ctl.sv create mode 100644 design/exu/exu_mul_ctl.sv create mode 100644 design/flist.questa create mode 100644 design/ifu/ifu.sv create mode 100644 design/ifu/ifu_aln_ctl.sv create mode 100644 design/ifu/ifu_bp_ctl.sv create mode 100644 design/ifu/ifu_compress_ctl.sv create mode 100644 design/ifu/ifu_ic_mem.sv create mode 100644 design/ifu/ifu_iccm_mem.sv create mode 100644 design/ifu/ifu_ifc_ctl.sv create mode 100644 design/ifu/ifu_mem_ctl.sv create mode 100644 design/include/build.h create mode 100644 design/include/global.h create mode 100644 design/include/swerv_types.sv create mode 100644 design/lib/ahb_to_axi4.sv create mode 100644 design/lib/axi4_to_ahb.sv create mode 100644 design/lib/beh_lib.sv create mode 100644 design/lib/mem_lib.sv create mode 100644 design/lsu/lsu.sv create mode 100644 design/lsu/lsu_addrcheck.sv create mode 100644 design/lsu/lsu_bus_buffer.sv create mode 100644 design/lsu/lsu_bus_intf.sv create mode 100644 design/lsu/lsu_clkdomain.sv create mode 100644 design/lsu/lsu_dccm_ctl.sv create mode 100644 design/lsu/lsu_dccm_mem.sv create mode 100644 design/lsu/lsu_ecc.sv create mode 100644 design/lsu/lsu_lsc_ctl.sv create mode 100644 design/lsu/lsu_stbuf.sv create mode 100644 design/lsu/lsu_trigger.sv create mode 100644 design/mem.sv create mode 100644 design/pic_ctrl.sv create mode 100644 design/swerv.sv create mode 100644 design/swerv_wrapper.sv create mode 100644 testbench/ahb_sif.sv create mode 100644 testbench/asm/hello_world.s create mode 100644 testbench/asm/hello_world2.s create mode 100644 testbench/flist.spyglass create mode 100644 testbench/flist.vcs create mode 100644 testbench/flist.verilator create mode 100644 testbench/flist.vlog create mode 100755 testbench/hex/data.hex create mode 100644 testbench/hex/program.hex create mode 100644 testbench/input.tcl create mode 100644 testbench/link.ld create mode 100644 testbench/tb_top.sv create mode 100644 testbench/test_tb_top.cpp create mode 100644 tools/JSON.pm create mode 100755 tools/Makefile create mode 100755 tools/addassign create mode 100755 tools/coredecode create mode 100755 tools/picmap create mode 100755 tools/smalldiv create mode 100755 tools/unrollforverilator diff --git a/configs/README.md b/configs/README.md new file mode 100644 index 0000000..7496944 --- /dev/null +++ b/configs/README.md @@ -0,0 +1,14 @@ +# SweRV RISC-V core from Western Digital + +## Configuration + +### Contents +Name | Description +---------------------- | ------------------------------ +swerv.config | Configuration script for SweRV + + +This script will generate a consistent st of `defines/#defines needed for the design and testbench. +A perl hash (*perl_configs.pl*) and a JSON format for SweRV-iss are also generated. + +While the defines fines may be modified by hand, it is recommended that this script be used to generate a consistent set. diff --git a/configs/snapshots/default/common_defines.vh b/configs/snapshots/default/common_defines.vh new file mode 100644 index 0000000..34dec8f --- /dev/null +++ b/configs/snapshots/default/common_defines.vh @@ -0,0 +1,170 @@ +// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE +// This is an automatically generated file by joseph.rahmeh on Tue Jun 4 07:50:46 PDT 2019 +// +// cmd: swerv -snapshot=default -ahb_lite +// +`define RV_INST_ACCESS_MASK5 'hffffffff +`define RV_DATA_ACCESS_ENABLE4 1'h0 +`define RV_INST_ACCESS_ENABLE3 1'h0 +`define RV_INST_ACCESS_ENABLE0 1'h0 +`define RV_INST_ACCESS_MASK3 'hffffffff +`define RV_DATA_ACCESS_ENABLE5 1'h0 +`define RV_DATA_ACCESS_MASK5 'hffffffff +`define RV_DATA_ACCESS_ADDR3 'h00000000 +`define RV_INST_ACCESS_ENABLE7 1'h0 +`define RV_DATA_ACCESS_ADDR6 'h00000000 +`define RV_INST_ACCESS_MASK7 'hffffffff +`define RV_INST_ACCESS_ENABLE6 1'h0 +`define RV_INST_ACCESS_ENABLE5 1'h0 +`define RV_DATA_ACCESS_ADDR4 'h00000000 +`define RV_DATA_ACCESS_ADDR7 'h00000000 +`define RV_DATA_ACCESS_MASK3 'hffffffff +`define RV_INST_ACCESS_MASK4 'hffffffff +`define RV_DATA_ACCESS_ADDR1 'h00000000 +`define RV_INST_ACCESS_ADDR4 'h00000000 +`define RV_INST_ACCESS_ADDR3 'h00000000 +`define RV_DATA_ACCESS_ENABLE1 1'h0 +`define RV_DATA_ACCESS_ADDR0 'h00000000 +`define RV_DATA_ACCESS_MASK0 'hffffffff +`define RV_DATA_ACCESS_MASK6 'hffffffff +`define RV_INST_ACCESS_ADDR7 'h00000000 +`define RV_INST_ACCESS_MASK0 'hffffffff +`define RV_DATA_ACCESS_ADDR5 'h00000000 +`define RV_DATA_ACCESS_ADDR2 'h00000000 +`define RV_DATA_ACCESS_MASK4 'hffffffff +`define RV_DATA_ACCESS_MASK1 'hffffffff +`define RV_INST_ACCESS_ADDR0 'h00000000 +`define RV_INST_ACCESS_ADDR2 'h00000000 +`define RV_DATA_ACCESS_ENABLE0 1'h0 +`define RV_DATA_ACCESS_ENABLE2 1'h0 +`define RV_DATA_ACCESS_ENABLE7 1'h0 +`define RV_INST_ACCESS_ENABLE4 1'h0 +`define RV_DATA_ACCESS_MASK7 'hffffffff +`define RV_INST_ACCESS_ADDR5 'h00000000 +`define RV_INST_ACCESS_ENABLE1 1'h0 +`define RV_DATA_ACCESS_MASK2 'hffffffff +`define RV_INST_ACCESS_MASK6 'hffffffff +`define RV_DATA_ACCESS_ENABLE3 1'h0 +`define RV_INST_ACCESS_ADDR6 'h00000000 +`define RV_INST_ACCESS_MASK2 'hffffffff +`define RV_INST_ACCESS_ENABLE2 1'h0 +`define RV_DATA_ACCESS_ENABLE6 1'h0 +`define RV_INST_ACCESS_ADDR1 'h00000000 +`define RV_INST_ACCESS_MASK1 'hffffffff +`define RV_DEC_INSTBUF_DEPTH 4 +`define RV_DMA_BUF_DEPTH 4 +`define RV_LSU_NUM_NBLOAD 8 +`define RV_LSU_STBUF_DEPTH 8 +`define RV_LSU_NUM_NBLOAD_WIDTH 3 +`define RV_IFU_BUS_TAG 3 +`define RV_LSU_BUS_TAG 4 +`define RV_SB_BUS_TAG 1 +`define RV_DMA_BUS_TAG 1 +`define RV_DCCM_WIDTH_BITS 2 +`define RV_DCCM_REGION 4'hf +`define RV_DCCM_RESERVED 'h1000 +`define RV_DCCM_SIZE 64 +`define RV_DCCM_DATA_WIDTH 32 +`define RV_DCCM_NUM_BANKS_8 +`define RV_DCCM_FDATA_WIDTH 39 +`define RV_DCCM_BYTE_WIDTH 4 +`define RV_DCCM_DATA_CELL ram_2048x39 +`define RV_DCCM_ENABLE 1 +`define RV_DCCM_BITS 16 +`define RV_DCCM_OFFSET 28'h40000 +`define RV_DCCM_ECC_WIDTH 7 +`define RV_DCCM_SIZE_64 +`define RV_DCCM_ROWS 2048 +`define RV_DCCM_BANK_BITS 3 +`define RV_DCCM_NUM_BANKS 8 +`define RV_DCCM_INDEX_BITS 11 +`define RV_LSU_SB_BITS 16 +`define RV_DCCM_EADR 32'hf004ffff +`define RV_DCCM_SADR 32'hf0040000 +`define RV_RESET_VEC 'h80000000 +`define RV_RET_STACK_SIZE 4 +`define RV_XLEN 32 +`define RV_TARGET default +`define RV_BTB_BTAG_FOLD 1 +`define RV_BTB_INDEX3_HI 9 +`define RV_BTB_INDEX1_LO 4 +`define RV_BTB_ADDR_HI 5 +`define RV_BTB_ADDR_LO 4 +`define RV_BTB_INDEX1_HI 5 +`define RV_BTB_INDEX2_HI 7 +`define RV_BTB_INDEX2_LO 6 +`define RV_BTB_ARRAY_DEPTH 4 +`define RV_BTB_BTAG_SIZE 9 +`define RV_BTB_SIZE 32 +`define RV_BTB_INDEX3_LO 8 +`define RV_ICCM_NUM_BANKS 8 +`define RV_ICCM_BITS 19 +`define RV_ICCM_BANK_BITS 3 +`define RV_ICCM_ROWS 16384 +`define RV_ICCM_OFFSET 10'he000000 +`define RV_ICCM_REGION 4'he +`define RV_ICCM_SADR 32'hee000000 +`define RV_ICCM_RESERVED 'h1000 +`define RV_ICCM_DATA_CELL ram_16384x39 +`define RV_ICCM_INDEX_BITS 14 +`define RV_ICCM_NUM_BANKS_8 +`define RV_ICCM_SIZE 512 +`define RV_ICCM_EADR 32'hee07ffff +`define RV_ICCM_SIZE_512 +`define RV_ICACHE_SIZE 16 +`define RV_ICACHE_TAG_HIGH 12 +`define RV_ICACHE_IC_ROWS 256 +`define RV_ICACHE_TADDR_HIGH 5 +`define RV_ICACHE_TAG_LOW 6 +`define RV_ICACHE_TAG_CELL ram_64x21 +`define RV_ICACHE_IC_DEPTH 8 +`define RV_ICACHE_IC_INDEX 8 +`define RV_ICACHE_ENABLE 1 +`define RV_ICACHE_DATA_CELL ram_256x34 +`define RV_ICACHE_TAG_DEPTH 64 +`define RV_EXTERNAL_PROG 'hb0000000 +`define RV_EXTERNAL_DATA_1 'h00000000 +`define RV_DEBUG_SB_MEM 'hb0580000 +`define RV_EXTERNAL_DATA 'hc0580000 +`define RV_SERIALIO 'hd0580000 +`define RV_NMI_VEC 'h11110000 +`define RV_BHT_HASH_STRING {ghr[3:2] ^ {ghr[3+1], {4-1-2{1'b0} } },hashin[5:4]^ghr[2-1:0]} +`define RV_BHT_ADDR_HI 7 +`define RV_BHT_GHR_RANGE 4:0 +`define RV_BHT_GHR_SIZE 5 +`define RV_BHT_GHR_PAD2 fghr[4:3],2'b0 +`define RV_BHT_SIZE 128 +`define RV_BHT_ADDR_LO 4 +`define RV_BHT_ARRAY_DEPTH 16 +`define RV_BHT_GHR_PAD fghr[4],3'b0 +`define RV_NUMIREGS 32 +`define RV_PIC_BITS 15 +`define RV_PIC_REGION 4'hf +`define RV_PIC_INT_WORDS 1 +`define RV_PIC_TOTAL_INT_PLUS1 9 +`define RV_PIC_MEIP_OFFSET 'h1000 +`define RV_PIC_BASE_ADDR 32'hf00c0000 +`define RV_PIC_MEIGWCTRL_OFFSET 'h4000 +`define RV_PIC_MEIPL_OFFSET 'h0000 +`define RV_PIC_TOTAL_INT 8 +`define RV_PIC_SIZE 32 +`define RV_PIC_MEIE_OFFSET 'h2000 +`define RV_PIC_OFFSET 10'hc0000 +`define RV_PIC_MEIPT_OFFSET 'h3004 +`define RV_PIC_MPICCFG_OFFSET 'h3000 +`define RV_PIC_MEIGWCLR_OFFSET 'h5000 +`define CLOCK_PERIOD 100 +`define CPU_TOP `RV_TOP.swerv +`define TOP tb_top +`define RV_BUILD_AHB_LITE 1 +`define RV_TOP `TOP.rvtop +`define DATAWIDTH 64 +`define RV_STERR_ROLLBACK 0 +`define RV_EXT_ADDRWIDTH 32 +`define RV_EXT_DATAWIDTH 64 +`define SDVT_AHB 1 +`define RV_LDERR_ROLLBACK 1 +`define ASSERT_ON +`define TEC_RV_ICG clockhdr +`define REGWIDTH 32 +`undef ASSERT_ON diff --git a/configs/snapshots/default/defines.h b/configs/snapshots/default/defines.h new file mode 100644 index 0000000..4ac3480 --- /dev/null +++ b/configs/snapshots/default/defines.h @@ -0,0 +1,132 @@ +// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE +// This is an automatically generated file by joseph.rahmeh on Tue Jun 4 07:50:46 PDT 2019 +// +// cmd: swerv -snapshot=default -ahb_lite +// +#define RV_INST_ACCESS_MASK5 0xffffffff +#define RV_DATA_ACCESS_ENABLE4 0x0 +#define RV_INST_ACCESS_ENABLE3 0x0 +#define RV_INST_ACCESS_ENABLE0 0x0 +#define RV_INST_ACCESS_MASK3 0xffffffff +#define RV_DATA_ACCESS_ENABLE5 0x0 +#define RV_DATA_ACCESS_MASK5 0xffffffff +#define RV_DATA_ACCESS_ADDR3 0x00000000 +#define RV_INST_ACCESS_ENABLE7 0x0 +#define RV_DATA_ACCESS_ADDR6 0x00000000 +#define RV_INST_ACCESS_MASK7 0xffffffff +#define RV_INST_ACCESS_ENABLE6 0x0 +#define RV_INST_ACCESS_ENABLE5 0x0 +#define RV_DATA_ACCESS_ADDR4 0x00000000 +#define RV_DATA_ACCESS_ADDR7 0x00000000 +#define RV_DATA_ACCESS_MASK3 0xffffffff +#define RV_INST_ACCESS_MASK4 0xffffffff +#define RV_DATA_ACCESS_ADDR1 0x00000000 +#define RV_INST_ACCESS_ADDR4 0x00000000 +#define RV_INST_ACCESS_ADDR3 0x00000000 +#define RV_DATA_ACCESS_ENABLE1 0x0 +#define RV_DATA_ACCESS_ADDR0 0x00000000 +#define RV_DATA_ACCESS_MASK0 0xffffffff +#define RV_DATA_ACCESS_MASK6 0xffffffff +#define RV_INST_ACCESS_ADDR7 0x00000000 +#define RV_INST_ACCESS_MASK0 0xffffffff +#define RV_DATA_ACCESS_ADDR5 0x00000000 +#define RV_DATA_ACCESS_ADDR2 0x00000000 +#define RV_DATA_ACCESS_MASK4 0xffffffff +#define RV_DATA_ACCESS_MASK1 0xffffffff +#define RV_INST_ACCESS_ADDR0 0x00000000 +#define RV_INST_ACCESS_ADDR2 0x00000000 +#define RV_DATA_ACCESS_ENABLE0 0x0 +#define RV_DATA_ACCESS_ENABLE2 0x0 +#define RV_DATA_ACCESS_ENABLE7 0x0 +#define RV_INST_ACCESS_ENABLE4 0x0 +#define RV_DATA_ACCESS_MASK7 0xffffffff +#define RV_INST_ACCESS_ADDR5 0x00000000 +#define RV_INST_ACCESS_ENABLE1 0x0 +#define RV_DATA_ACCESS_MASK2 0xffffffff +#define RV_INST_ACCESS_MASK6 0xffffffff +#define RV_DATA_ACCESS_ENABLE3 0x0 +#define RV_INST_ACCESS_ADDR6 0x00000000 +#define RV_INST_ACCESS_MASK2 0xffffffff +#define RV_INST_ACCESS_ENABLE2 0x0 +#define RV_DATA_ACCESS_ENABLE6 0x0 +#define RV_INST_ACCESS_ADDR1 0x00000000 +#define RV_INST_ACCESS_MASK1 0xffffffff +#define RV_IFU_BUS_TAG 3 +#define RV_LSU_BUS_TAG 4 +#define RV_SB_BUS_TAG 1 +#define RV_DMA_BUS_TAG 1 +#define RV_DCCM_WIDTH_BITS 2 +#define RV_DCCM_REGION 0xf +#define RV_DCCM_RESERVED 0x1000 +#define RV_DCCM_SIZE 64 +#define RV_DCCM_DATA_WIDTH 32 +#define RV_DCCM_NUM_BANKS_8 +#define RV_DCCM_FDATA_WIDTH 39 +#define RV_DCCM_BYTE_WIDTH 4 +#define RV_DCCM_DATA_CELL ram_2048x39 +#define RV_DCCM_ENABLE 1 +#define RV_DCCM_BITS 16 +#define RV_DCCM_OFFSET 0x40000 +#define RV_DCCM_ECC_WIDTH 7 +#define RV_DCCM_SIZE_64 +#define RV_DCCM_ROWS 2048 +#define RV_DCCM_BANK_BITS 3 +#define RV_DCCM_NUM_BANKS 8 +#define RV_DCCM_INDEX_BITS 11 +#define RV_LSU_SB_BITS 16 +#define RV_DCCM_EADR 0xf004ffff +#define RV_DCCM_SADR 0xf0040000 +#ifndef RV_RESET_VEC +#define RV_RESET_VEC 0x80000000 +#endif +#define RV_XLEN 32 +#define RV_TARGET default +#define RV_ICCM_NUM_BANKS 8 +#define RV_ICCM_BITS 19 +#define RV_ICCM_BANK_BITS 3 +#define RV_ICCM_ROWS 16384 +#define RV_ICCM_OFFSET 0xe000000 +#define RV_ICCM_REGION 0xe +#define RV_ICCM_SADR 0xee000000 +#define RV_ICCM_RESERVED 0x1000 +#define RV_ICCM_DATA_CELL ram_16384x39 +#define RV_ICCM_INDEX_BITS 14 +#define RV_ICCM_NUM_BANKS_8 +#define RV_ICCM_SIZE 512 +#define RV_ICCM_EADR 0xee07ffff +#define RV_ICCM_SIZE_512 +#define RV_EXTERNAL_PROG 0xb0000000 +#define RV_EXTERNAL_DATA_1 0x00000000 +#define RV_DEBUG_SB_MEM 0xb0580000 +#define RV_EXTERNAL_DATA 0xc0580000 +#define RV_SERIALIO 0xd0580000 +#ifndef RV_NMI_VEC +#define RV_NMI_VEC 0x11110000 +#endif +#define RV_PIC_BITS 15 +#define RV_PIC_REGION 0xf +#define RV_PIC_INT_WORDS 1 +#define RV_PIC_TOTAL_INT_PLUS1 9 +#define RV_PIC_MEIP_OFFSET 0x1000 +#define RV_PIC_BASE_ADDR 0xf00c0000 +#define RV_PIC_MEIGWCTRL_OFFSET 0x4000 +#define RV_PIC_MEIPL_OFFSET 0x0000 +#define RV_PIC_TOTAL_INT 8 +#define RV_PIC_SIZE 32 +#define RV_PIC_MEIE_OFFSET 0x2000 +#define RV_PIC_OFFSET 0xc0000 +#define RV_PIC_MEIPT_OFFSET 0x3004 +#define RV_PIC_MPICCFG_OFFSET 0x3000 +#define RV_PIC_MEIGWCLR_OFFSET 0x5000 +#define CLOCK_PERIOD 100 +#define CPU_TOP `RV_TOP.swerv +#define TOP tb_top +#define RV_BUILD_AHB_LITE 1 +#define RV_TOP `TOP.rvtop +#define DATAWIDTH 64 +#define RV_STERR_ROLLBACK 0 +#define RV_EXT_ADDRWIDTH 32 +#define RV_EXT_DATAWIDTH 64 +#define SDVT_AHB 1 +#define RV_LDERR_ROLLBACK 1 +#define ASSERT_ON diff --git a/configs/snapshots/default/pd_defines.vh b/configs/snapshots/default/pd_defines.vh new file mode 100644 index 0000000..b05d00b --- /dev/null +++ b/configs/snapshots/default/pd_defines.vh @@ -0,0 +1,11 @@ +// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE +// This is an automatically generated file by joseph.rahmeh on Tue Jun 4 07:50:46 PDT 2019 +// +// cmd: swerv -snapshot=default -ahb_lite +// + +`include "common_defines.vh" +`undef ASSERT_ON +`undef TEC_RV_ICG +`define TEC_RV_ICG CKLNQD12BWP35P140 +`define PHYSICAL 1 diff --git a/configs/snapshots/default/perl_configs.pl b/configs/snapshots/default/perl_configs.pl new file mode 100644 index 0000000..26c4121 --- /dev/null +++ b/configs/snapshots/default/perl_configs.pl @@ -0,0 +1,566 @@ +# NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE +# This is an automatically generated file by joseph.rahmeh on Tue Jun 4 07:50:46 PDT 2019 +# +# cmd: swerv -snapshot=default -ahb_lite +# +# To use this in a perf script, use 'require $RV_ROOT/configs/config.pl' +# Reference the hash via $config{name}.. + + +%config = ( + 'protection' => { + 'inst_access_mask5' => '0xffffffff', + 'data_access_enable4' => '0x0', + 'inst_access_enable3' => '0x0', + 'inst_access_enable0' => '0x0', + 'inst_access_mask3' => '0xffffffff', + 'data_access_enable5' => '0x0', + 'data_access_mask5' => '0xffffffff', + 'data_access_addr3' => '0x00000000', + 'inst_access_enable7' => '0x0', + 'data_access_addr6' => '0x00000000', + 'inst_access_mask7' => '0xffffffff', + 'inst_access_enable6' => '0x0', + 'inst_access_enable5' => '0x0', + 'data_access_addr4' => '0x00000000', + 'data_access_addr7' => '0x00000000', + 'data_access_mask3' => '0xffffffff', + 'inst_access_mask4' => '0xffffffff', + 'data_access_addr1' => '0x00000000', + 'inst_access_addr4' => '0x00000000', + 'inst_access_addr3' => '0x00000000', + 'data_access_enable1' => '0x0', + 'data_access_addr0' => '0x00000000', + 'data_access_mask0' => '0xffffffff', + 'data_access_mask6' => '0xffffffff', + 'inst_access_addr7' => '0x00000000', + 'inst_access_mask0' => '0xffffffff', + 'data_access_addr5' => '0x00000000', + 'data_access_addr2' => '0x00000000', + 'data_access_mask4' => '0xffffffff', + 'data_access_mask1' => '0xffffffff', + 'inst_access_addr0' => '0x00000000', + 'inst_access_addr2' => '0x00000000', + 'data_access_enable0' => '0x0', + 'data_access_enable2' => '0x0', + 'data_access_enable7' => '0x0', + 'inst_access_enable4' => '0x0', + 'data_access_mask7' => '0xffffffff', + 'inst_access_addr5' => '0x00000000', + 'inst_access_enable1' => '0x0', + 'data_access_mask2' => '0xffffffff', + 'inst_access_mask6' => '0xffffffff', + 'data_access_enable3' => '0x0', + 'inst_access_addr6' => '0x00000000', + 'inst_access_mask2' => '0xffffffff', + 'inst_access_enable2' => '0x0', + 'data_access_enable6' => '0x0', + 'inst_access_addr1' => '0x00000000', + 'inst_access_mask1' => '0xffffffff' + }, + 'core' => { + 'dec_instbuf_depth' => '4', + 'dma_buf_depth' => '4', + 'lsu_num_nbload' => '8', + 'lsu_stbuf_depth' => '8', + 'lsu_num_nbload_width' => '3' + }, + 'bus' => { + 'ifu_bus_tag' => '3', + 'lsu_bus_tag' => 4, + 'sb_bus_tag' => '1', + 'dma_bus_tag' => '1' + }, + 'dccm' => { + 'dccm_width_bits' => 2, + 'dccm_region' => '0xf', + 'dccm_reserved' => '0x1000', + 'dccm_size' => 64, + 'dccm_data_width' => 32, + 'dccm_num_banks_8' => '', + 'dccm_fdata_width' => 39, + 'dccm_byte_width' => '4', + 'dccm_data_cell' => 'ram_2048x39', + 'dccm_enable' => '1', + 'dccm_bits' => 16, + 'dccm_offset' => '0x40000', + 'dccm_ecc_width' => 7, + 'dccm_size_64' => '', + 'dccm_rows' => '2048', + 'dccm_bank_bits' => 3, + 'dccm_num_banks' => '8', + 'dccm_index_bits' => 11, + 'lsu_sb_bits' => 16, + 'dccm_eadr' => '0xf004ffff', + 'dccm_sadr' => '0xf0040000' + }, + 'reset_vec' => '0x80000000', + 'retstack' => { + 'ret_stack_size' => '4' + }, + 'triggers' => [ + { + 'poke_mask' => [ + '0x081818c7', + '0xffffffff', + '0x00000000' + ], + 'reset' => [ + '0x23e00000', + '0x00000000', + '0x00000000' + ], + 'mask' => [ + '0x081818c7', + '0xffffffff', + '0x00000000' + ] + }, + { + 'poke_mask' => [ + '0x081818c7', + '0xffffffff', + '0x00000000' + ], + 'reset' => [ + '0x23e00000', + '0x00000000', + '0x00000000' + ], + 'mask' => [ + '0x081818c7', + '0xffffffff', + '0x00000000' + ] + }, + { + 'poke_mask' => [ + '0x081818c7', + '0xffffffff', + '0x00000000' + ], + 'reset' => [ + '0x23e00000', + '0x00000000', + '0x00000000' + ], + 'mask' => [ + '0x081818c7', + '0xffffffff', + '0x00000000' + ] + }, + { + 'poke_mask' => [ + '0x081818c7', + '0xffffffff', + '0x00000000' + ], + 'reset' => [ + '0x23e00000', + '0x00000000', + '0x00000000' + ], + 'mask' => [ + '0x081818c7', + '0xffffffff', + '0x00000000' + ] + } + ], + 'xlen' => 32, + 'verilator' => '', + 'target' => 'default', + 'max_mmode_perf_event' => '50', + 'btb' => { + 'btb_btag_fold' => 1, + 'btb_index3_hi' => 9, + 'btb_index1_lo' => '4', + 'btb_addr_hi' => 5, + 'btb_addr_lo' => '4', + 'btb_index1_hi' => 5, + 'btb_index2_hi' => 7, + 'btb_index2_lo' => 6, + 'btb_array_depth' => 4, + 'btb_btag_size' => 9, + 'btb_size' => 32, + 'btb_index3_lo' => 8 + }, + 'iccm' => { + 'iccm_num_banks' => '8', + 'iccm_bits' => 19, + 'iccm_bank_bits' => 3, + 'iccm_rows' => '16384', + 'iccm_offset' => '0xe000000', + 'iccm_region' => '0xe', + 'iccm_sadr' => '0xee000000', + 'iccm_reserved' => '0x1000', + 'iccm_data_cell' => 'ram_16384x39', + 'iccm_index_bits' => 14, + 'iccm_num_banks_8' => '', + 'iccm_size' => 512, + 'iccm_eadr' => '0xee07ffff', + 'iccm_size_512' => '' + }, + 'icache' => { + 'icache_size' => 16, + 'icache_tag_high' => 12, + 'icache_ic_rows' => '256', + 'icache_taddr_high' => 5, + 'icache_tag_low' => '6', + 'icache_tag_cell' => 'ram_64x21', + 'icache_ic_depth' => 8, + 'icache_ic_index' => 8, + 'icache_enable' => '1', + 'icache_data_cell' => 'ram_256x34', + 'icache_tag_depth' => 64 + }, + 'physical' => '1', + 'memmap' => { + 'external_prog' => '0xb0000000', + 'external_data_1' => '0x00000000', + 'debug_sb_mem' => '0xb0580000', + 'external_data' => '0xc0580000', + 'serialio' => '0xd0580000' + }, + 'nmi_vec' => '0x11110000', + 'num_mmode_perf_regs' => '4', + 'bht' => { + 'bht_hash_string' => '{ghr[3:2] ^ {ghr[3+1], {4-1-2{1\'b0} } },hashin[5:4]^ghr[2-1:0]}', + 'bht_addr_hi' => 7, + 'bht_ghr_range' => '4:0', + 'bht_ghr_size' => 5, + 'bht_ghr_pad2' => 'fghr[4:3],2\'b0', + 'bht_size' => 128, + 'bht_addr_lo' => '4', + 'bht_array_depth' => 16, + 'bht_ghr_pad' => 'fghr[4],3\'b0' + }, + 'numiregs' => '32', + 'even_odd_trigger_chains' => 'true', + 'pic' => { + 'pic_bits' => 15, + 'pic_region' => '0xf', + 'pic_int_words' => 1, + 'pic_total_int_plus1' => 9, + 'pic_meip_offset' => '0x1000', + 'pic_base_addr' => '0xf00c0000', + 'pic_meigwctrl_offset' => '0x4000', + 'pic_meipl_offset' => '0x0000', + 'pic_total_int' => 8, + 'pic_size' => 32, + 'pic_meie_offset' => '0x2000', + 'pic_offset' => '0xc0000', + 'pic_meipt_offset' => '0x3004', + 'pic_mpiccfg_offset' => '0x3000', + 'pic_meigwclr_offset' => '0x5000' + }, + 'testbench' => { + 'clock_period' => '100', + 'CPU_TOP' => '`RV_TOP.swerv', + 'TOP' => 'tb_top', + 'build_ahb_lite' => '1', + 'RV_TOP' => '`TOP.rvtop', + 'datawidth' => '64', + 'sterr_rollback' => '0', + 'ext_addrwidth' => '32', + 'ext_datawidth' => '64', + 'SDVT_AHB' => '1', + 'lderr_rollback' => '1', + 'assert_on' => '' + }, + 'tec_rv_icg' => 'clockhdr', + 'csr' => { + 'pmpaddr9' => { + 'exists' => 'false' + }, + 'dicad1' => { + 'reset' => '0x0', + 'number' => '0x7ca', + 'comment' => 'Cache diagnostics.', + 'debug' => 'true', + 'exists' => 'true', + 'mask' => '0x3' + }, + 'pmpcfg0' => { + 'exists' => 'false' + }, + 'mhpmcounter4h' => { + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'dicago' => { + 'reset' => '0x0', + 'number' => '0x7cb', + 'comment' => 'Cache diagnostics.', + 'debug' => 'true', + 'exists' => 'true', + 'mask' => '0x0' + }, + 'mie' => { + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0x40000888' + }, + 'misa' => { + 'reset' => '0x40001104', + 'exists' => 'true', + 'mask' => '0x0' + }, + 'mhpmcounter6h' => { + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'meicpct' => { + 'reset' => '0x0', + 'number' => '0xbca', + 'comment' => 'External claim id/priority capture.', + 'exists' => 'true', + 'mask' => '0x0' + }, + 'mimpid' => { + 'reset' => '0x1', + 'exists' => 'true', + 'mask' => '0x0' + }, + 'mcpc' => { + 'reset' => '0x0', + 'number' => '0x7c2', + 'exists' => 'true', + 'mask' => '0x0' + }, + 'mhpmevent4' => { + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'pmpaddr8' => { + 'exists' => 'false' + }, + 'pmpcfg3' => { + 'exists' => 'false' + }, + 'marchid' => { + 'reset' => '0x0000000b', + 'exists' => 'true', + 'mask' => '0x0' + }, + 'pmpaddr5' => { + 'exists' => 'false' + }, + 'mfdc' => { + 'reset' => '0x00070000', + 'number' => '0x7f9', + 'exists' => 'true', + 'mask' => '0x000707ff' + }, + 'mhpmevent6' => { + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'mvendorid' => { + 'reset' => '0x45', + 'exists' => 'true', + 'mask' => '0x0' + }, + 'pmpaddr4' => { + 'exists' => 'false' + }, + 'dcsr' => { + 'poke_mask' => '0x00008dcc', + 'reset' => '0x40000003', + 'exists' => 'true', + 'mask' => '0x00008c04' + }, + 'cycle' => { + 'exists' => 'false' + }, + 'pmpaddr12' => { + 'exists' => 'false' + }, + 'pmpaddr3' => { + 'exists' => 'false' + }, + 'mhpmcounter3h' => { + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'time' => { + 'exists' => 'false' + }, + 'meicidpl' => { + 'reset' => '0x0', + 'number' => '0xbcb', + 'comment' => 'External interrupt claim id priority level.', + 'exists' => 'true', + 'mask' => '0xf' + }, + 'pmpaddr14' => { + 'exists' => 'false' + }, + 'pmpaddr13' => { + 'exists' => 'false' + }, + 'pmpaddr1' => { + 'exists' => 'false' + }, + 'mhpmcounter6' => { + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'dicad0' => { + 'reset' => '0x0', + 'number' => '0x7c9', + 'comment' => 'Cache diagnostics.', + 'debug' => 'true', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'meipt' => { + 'reset' => '0x0', + 'number' => '0xbc9', + 'comment' => 'External interrupt priority threshold.', + 'exists' => 'true', + 'mask' => '0xf' + }, + 'pmpaddr15' => { + 'exists' => 'false' + }, + 'mhpmcounter5' => { + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'pmpcfg1' => { + 'exists' => 'false' + }, + 'pmpaddr10' => { + 'exists' => 'false' + }, + 'pmpaddr0' => { + 'exists' => 'false' + }, + 'pmpcfg2' => { + 'exists' => 'false' + }, + 'pmpaddr2' => { + 'exists' => 'false' + }, + 'mpmc' => { + 'reset' => '0x0', + 'number' => '0x7c6', + 'comment' => 'Core pause: Implemented as read only.', + 'exists' => 'true', + 'mask' => '0x0' + }, + 'dmst' => { + 'reset' => '0x0', + 'number' => '0x7c4', + 'comment' => 'Memory synch trigger: Flush caches in debug mode.', + 'debug' => 'true', + 'exists' => 'true', + 'mask' => '0x0' + }, + 'instret' => { + 'exists' => 'false' + }, + 'mhpmevent3' => { + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'dicawics' => { + 'reset' => '0x0', + 'number' => '0x7c8', + 'comment' => 'Cache diagnostics.', + 'debug' => 'true', + 'exists' => 'true', + 'mask' => '0x0130fffc' + }, + 'mip' => { + 'poke_mask' => '0x40000888', + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0x0' + }, + 'mhpmcounter5h' => { + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'micect' => { + 'reset' => '0x0', + 'number' => '0x7f0', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'miccmect' => { + 'reset' => '0x0', + 'number' => '0x7f1', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'mhpmevent5' => { + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'mhpmcounter3' => { + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'pmpaddr6' => { + 'exists' => 'false' + }, + 'pmpaddr11' => { + 'exists' => 'false' + }, + 'mcgc' => { + 'poke_mask' => '0x000001ff', + 'reset' => '0x0', + 'number' => '0x7f8', + 'exists' => 'true', + 'mask' => '0x000001ff' + }, + 'mhpmcounter4' => { + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'mdccmect' => { + 'reset' => '0x0', + 'number' => '0x7f2', + 'exists' => 'true', + 'mask' => '0xffffffff' + }, + 'pmpaddr7' => { + 'exists' => 'false' + }, + 'meicurpl' => { + 'reset' => '0x0', + 'number' => '0xbcc', + 'comment' => 'External interrupt current priority level.', + 'exists' => 'true', + 'mask' => '0xf' + }, + 'mstatus' => { + 'reset' => '0x1800', + 'exists' => 'true', + 'mask' => '0x88' + }, + 'tselect' => { + 'reset' => '0x0', + 'exists' => 'true', + 'mask' => '0x3' + } + }, + 'regwidth' => '32', + 'harts' => 1 + ); +1; diff --git a/configs/snapshots/default/pic_ctrl_verilator_unroll.sv b/configs/snapshots/default/pic_ctrl_verilator_unroll.sv new file mode 100644 index 0000000..8e15c4b --- /dev/null +++ b/configs/snapshots/default/pic_ctrl_verilator_unroll.sv @@ -0,0 +1,173 @@ +// argv=9 +// TOTAL_INT=9 NUM_LEVELS=4 +`ifdef RV_PIC_2CYCLE +// LEVEL0 +logic [TOTAL_INT+2:0] [INTPRIORITY_BITS-1:0] level_intpend_w_prior_en_1; +logic [TOTAL_INT+2:0] [ID_BITS-1:0] level_intpend_id_1; + for (m=0; m<=(TOTAL_INT)/(2**(1)) ; m++) begin : COMPARE0 + if ( m == (TOTAL_INT)/(2**(1))) begin + assign level_intpend_w_prior_en_1[m+1] = '0 ; + assign level_intpend_id_1[m+1] = '0 ; + end + cmp_and_mux #( + .ID_BITS(ID_BITS), + .INTPRIORITY_BITS(INTPRIORITY_BITS)) cmp_l1 ( + .a_id(level_intpend_id[0][2*m]), + .a_priority(level_intpend_w_prior_en[0][2*m]), + .b_id(level_intpend_id[0][2*m+1]), + .b_priority(level_intpend_w_prior_en[0][2*m+1]), + .out_id(level_intpend_id_1[m]), + .out_priority(level_intpend_w_prior_en_1[m])) ; + + end + +// LEVEL1 +logic [TOTAL_INT+2:0] [INTPRIORITY_BITS-1:0] level_intpend_w_prior_en_2; +logic [TOTAL_INT+2:0] [ID_BITS-1:0] level_intpend_id_2; + for (m=0; m<=(TOTAL_INT)/(2**(2)) ; m++) begin : COMPARE1 + if ( m == (TOTAL_INT)/(2**(2))) begin + assign level_intpend_w_prior_en_2[m+1] = '0 ; + assign level_intpend_id_2[m+1] = '0 ; + end + cmp_and_mux #( + .ID_BITS(ID_BITS), + .INTPRIORITY_BITS(INTPRIORITY_BITS)) cmp_l2 ( + .a_id(level_intpend_id_1[2*m]), + .a_priority(level_intpend_w_prior_en_1[2*m]), + .b_id(level_intpend_id_1[2*m+1]), + .b_priority(level_intpend_w_prior_en_1[2*m+1]), + .out_id(level_intpend_id_2[m]), + .out_priority(level_intpend_w_prior_en_2[m])) ; + + end + +for (i=0; i<=TOTAL_INT/2**(NUM_LEVELS/2) ; i++) begin : MIDDLE_FLOPS + rvdff #(INTPRIORITY_BITS) level2_intpend_prior_reg (.*, .din (level_intpend_w_prior_en_2[i]), .dout(l2_intpend_w_prior_en_ff[i]), .clk(active_clk)); + rvdff #(ID_BITS) level2_intpend_id_reg (.*, .din (level_intpend_id_2[i]), .dout(l2_intpend_id_ff[i]), .clk(active_clk)); +end +// LEVEL2 +logic [TOTAL_INT+2:0] [INTPRIORITY_BITS-1:0] levelx_intpend_w_prior_en_3; +logic [TOTAL_INT+2:0] [ID_BITS-1:0] levelx_intpend_id_3; + for (m=0; m<=(TOTAL_INT)/(2**(3)) ; m++) begin : COMPARE2 + if ( m == (TOTAL_INT)/(2**(3))) begin + assign levelx_intpend_w_prior_en_3[m+1] = '0 ; + assign levelx_intpend_id_3[m+1] = '0 ; + end + cmp_and_mux #( + .ID_BITS(ID_BITS), + .INTPRIORITY_BITS(INTPRIORITY_BITS)) cmp_l3 ( + .a_id(levelx_intpend_id[2][2*m]), + .a_priority(levelx_intpend_w_prior_en[2][2*m]), + .b_id(levelx_intpend_id[2][2*m+1]), + .b_priority(levelx_intpend_w_prior_en[2][2*m+1]), + .out_id(levelx_intpend_id_3[m]), + .out_priority(levelx_intpend_w_prior_en_3[m])) ; + + end + +// LEVEL3 +logic [TOTAL_INT+2:0] [INTPRIORITY_BITS-1:0] levelx_intpend_w_prior_en_4; +logic [TOTAL_INT+2:0] [ID_BITS-1:0] levelx_intpend_id_4; + for (m=0; m<=(TOTAL_INT)/(2**(4)) ; m++) begin : COMPARE3 + if ( m == (TOTAL_INT)/(2**(4))) begin + assign levelx_intpend_w_prior_en_4[m+1] = '0 ; + assign levelx_intpend_id_4[m+1] = '0 ; + end + cmp_and_mux #( + .ID_BITS(ID_BITS), + .INTPRIORITY_BITS(INTPRIORITY_BITS)) cmp_l4 ( + .a_id(levelx_intpend_id_3[2*m]), + .a_priority(levelx_intpend_w_prior_en_3[2*m]), + .b_id(levelx_intpend_id_3[2*m+1]), + .b_priority(levelx_intpend_w_prior_en_3[2*m+1]), + .out_id(levelx_intpend_id_4[m]), + .out_priority(levelx_intpend_w_prior_en_4[m])) ; + + end + +assign claimid_in[ID_BITS-1:0] = levelx_intpend_id_4[0] ; // This is the last level output +assign selected_int_priority[INTPRIORITY_BITS-1:0] = levelx_intpend_w_prior_en_4[0] ; +`else +// LEVEL0 +logic [TOTAL_INT+2:0] [INTPRIORITY_BITS-1:0] level_intpend_w_prior_en_1; +logic [TOTAL_INT+2:0] [ID_BITS-1:0] level_intpend_id_1; + for (m=0; m<=(TOTAL_INT)/(2**(1)) ; m++) begin : COMPARE0 + if ( m == (TOTAL_INT)/(2**(1))) begin + assign level_intpend_w_prior_en_1[m+1] = '0 ; + assign level_intpend_id_1[m+1] = '0 ; + end + cmp_and_mux #( + .ID_BITS(ID_BITS), + .INTPRIORITY_BITS(INTPRIORITY_BITS)) cmp_l1 ( + .a_id(level_intpend_id[0][2*m]), + .a_priority(level_intpend_w_prior_en[0][2*m]), + .b_id(level_intpend_id[0][2*m+1]), + .b_priority(level_intpend_w_prior_en[0][2*m+1]), + .out_id(level_intpend_id_1[m]), + .out_priority(level_intpend_w_prior_en_1[m])) ; + + end + +// LEVEL1 +logic [TOTAL_INT+2:0] [INTPRIORITY_BITS-1:0] level_intpend_w_prior_en_2; +logic [TOTAL_INT+2:0] [ID_BITS-1:0] level_intpend_id_2; + for (m=0; m<=(TOTAL_INT)/(2**(2)) ; m++) begin : COMPARE1 + if ( m == (TOTAL_INT)/(2**(2))) begin + assign level_intpend_w_prior_en_2[m+1] = '0 ; + assign level_intpend_id_2[m+1] = '0 ; + end + cmp_and_mux #( + .ID_BITS(ID_BITS), + .INTPRIORITY_BITS(INTPRIORITY_BITS)) cmp_l2 ( + .a_id(level_intpend_id_1[2*m]), + .a_priority(level_intpend_w_prior_en_1[2*m]), + .b_id(level_intpend_id_1[2*m+1]), + .b_priority(level_intpend_w_prior_en_1[2*m+1]), + .out_id(level_intpend_id_2[m]), + .out_priority(level_intpend_w_prior_en_2[m])) ; + + end + +// LEVEL2 +logic [TOTAL_INT+2:0] [INTPRIORITY_BITS-1:0] level_intpend_w_prior_en_3; +logic [TOTAL_INT+2:0] [ID_BITS-1:0] level_intpend_id_3; + for (m=0; m<=(TOTAL_INT)/(2**(3)) ; m++) begin : COMPARE2 + if ( m == (TOTAL_INT)/(2**(3))) begin + assign level_intpend_w_prior_en_3[m+1] = '0 ; + assign level_intpend_id_3[m+1] = '0 ; + end + cmp_and_mux #( + .ID_BITS(ID_BITS), + .INTPRIORITY_BITS(INTPRIORITY_BITS)) cmp_l3 ( + .a_id(level_intpend_id_2[2*m]), + .a_priority(level_intpend_w_prior_en_2[2*m]), + .b_id(level_intpend_id_2[2*m+1]), + .b_priority(level_intpend_w_prior_en_2[2*m+1]), + .out_id(level_intpend_id_3[m]), + .out_priority(level_intpend_w_prior_en_3[m])) ; + + end + +// LEVEL3 +logic [TOTAL_INT+2:0] [INTPRIORITY_BITS-1:0] level_intpend_w_prior_en_4; +logic [TOTAL_INT+2:0] [ID_BITS-1:0] level_intpend_id_4; + for (m=0; m<=(TOTAL_INT)/(2**(4)) ; m++) begin : COMPARE3 + if ( m == (TOTAL_INT)/(2**(4))) begin + assign level_intpend_w_prior_en_4[m+1] = '0 ; + assign level_intpend_id_4[m+1] = '0 ; + end + cmp_and_mux #( + .ID_BITS(ID_BITS), + .INTPRIORITY_BITS(INTPRIORITY_BITS)) cmp_l4 ( + .a_id(level_intpend_id_3[2*m]), + .a_priority(level_intpend_w_prior_en_3[2*m]), + .b_id(level_intpend_id_3[2*m+1]), + .b_priority(level_intpend_w_prior_en_3[2*m+1]), + .out_id(level_intpend_id_4[m]), + .out_priority(level_intpend_w_prior_en_4[m])) ; + + end + +assign claimid_in[ID_BITS-1:0] = level_intpend_id_4[0] ; // This is the last level output +assign selected_int_priority[INTPRIORITY_BITS-1:0] = level_intpend_w_prior_en_4[0] ; +`endif diff --git a/configs/snapshots/default/pic_map_auto.h b/configs/snapshots/default/pic_map_auto.h new file mode 100644 index 0000000..60568b3 --- /dev/null +++ b/configs/snapshots/default/pic_map_auto.h @@ -0,0 +1,31 @@ +// mask[3:0] = { 4'b1000 - 30b mask,4'b0100 - 31b mask, 4'b0010 - 28b mask, 4'b0001 - 32b mask } +always_comb begin + case (address[14:0]) + 15'b011000000000000 : mask[3:0] = 4'b0100; + 15'b100000000000100 : mask[3:0] = 4'b1000; + 15'b100000000001000 : mask[3:0] = 4'b1000; + 15'b100000000001100 : mask[3:0] = 4'b1000; + 15'b100000000010000 : mask[3:0] = 4'b1000; + 15'b100000000010100 : mask[3:0] = 4'b1000; + 15'b100000000011000 : mask[3:0] = 4'b1000; + 15'b100000000011100 : mask[3:0] = 4'b1000; + 15'b100000000100000 : mask[3:0] = 4'b1000; + 15'b010000000000100 : mask[3:0] = 4'b0100; + 15'b010000000001000 : mask[3:0] = 4'b0100; + 15'b010000000001100 : mask[3:0] = 4'b0100; + 15'b010000000010000 : mask[3:0] = 4'b0100; + 15'b010000000010100 : mask[3:0] = 4'b0100; + 15'b010000000011000 : mask[3:0] = 4'b0100; + 15'b010000000011100 : mask[3:0] = 4'b0100; + 15'b010000000100000 : mask[3:0] = 4'b0100; + 15'b000000000000100 : mask[3:0] = 4'b0010; + 15'b000000000001000 : mask[3:0] = 4'b0010; + 15'b000000000001100 : mask[3:0] = 4'b0010; + 15'b000000000010000 : mask[3:0] = 4'b0010; + 15'b000000000010100 : mask[3:0] = 4'b0010; + 15'b000000000011000 : mask[3:0] = 4'b0010; + 15'b000000000011100 : mask[3:0] = 4'b0010; + 15'b000000000100000 : mask[3:0] = 4'b0010; + default : mask[3:0] = 4'b0001; + endcase +end diff --git a/configs/snapshots/default/whisper.json b/configs/snapshots/default/whisper.json new file mode 100644 index 0000000..2593f72 --- /dev/null +++ b/configs/snapshots/default/whisper.json @@ -0,0 +1,395 @@ +{ + "memmap" : { + "cosnoleio" : "0xd0580000" + }, + "nmi_vec" : "0x11110000", + "dccm" : { + "region" : "0xf", + "offset" : "0x40000", + "size" : "0x10000" + }, + "num_mmode_perf_regs" : "4", + "load_error_rollback" : "1", + "reset_vec" : "0x80000000", + "triggers" : [ + { + "poke_mask" : [ + "0x081818c7", + "0xffffffff", + "0x00000000" + ], + "reset" : [ + "0x23e00000", + "0x00000000", + "0x00000000" + ], + "mask" : [ + "0x081818c7", + "0xffffffff", + "0x00000000" + ] + }, + { + "poke_mask" : [ + "0x081818c7", + "0xffffffff", + "0x00000000" + ], + "reset" : [ + "0x23e00000", + "0x00000000", + "0x00000000" + ], + "mask" : [ + "0x081818c7", + "0xffffffff", + "0x00000000" + ] + }, + { + "poke_mask" : [ + "0x081818c7", + "0xffffffff", + "0x00000000" + ], + "reset" : [ + "0x23e00000", + "0x00000000", + "0x00000000" + ], + "mask" : [ + "0x081818c7", + "0xffffffff", + "0x00000000" + ] + }, + { + "poke_mask" : [ + "0x081818c7", + "0xffffffff", + "0x00000000" + ], + "reset" : [ + "0x23e00000", + "0x00000000", + "0x00000000" + ], + "mask" : [ + "0x081818c7", + "0xffffffff", + "0x00000000" + ] + } + ], + "xlen" : 32, + "pic" : { + "meigwctrl_offset" : "0x4000", + "region" : "0xf", + "total_int" : 8, + "size" : "0x8000", + "mpiccfg_offset" : "0x3000", + "meigwclr_offset" : "0x5000", + "total_int_plus1" : 9, + "meipt_offset" : "0x3004", + "int_words" : 1, + "meie_offset" : "0x2000", + "bits" : 15, + "meip_offset" : "0x1000", + "meipl_offset" : "0x0000", + "offset" : "0xc0000" + }, + "store_error_rollback" : "0", + "even_odd_trigger_chains" : "true", + "max_mmode_perf_event" : "50", + "csr" : { + "pmpaddr9" : { + "exists" : "false" + }, + "dicad1" : { + "reset" : "0x0", + "number" : "0x7ca", + "comment" : "Cache diagnostics.", + "debug" : "true", + "exists" : "true", + "mask" : "0x3" + }, + "pmpcfg0" : { + "exists" : "false" + }, + "mhpmcounter4h" : { + "reset" : "0x0", + "exists" : "true", + "mask" : "0xffffffff" + }, + "dicago" : { + "reset" : "0x0", + "number" : "0x7cb", + "comment" : "Cache diagnostics.", + "debug" : "true", + "exists" : "true", + "mask" : "0x0" + }, + "mie" : { + "reset" : "0x0", + "exists" : "true", + "mask" : "0x40000888" + }, + "misa" : { + "reset" : "0x40001104", + "exists" : "true", + "mask" : "0x0" + }, + "mhpmcounter6h" : { + "reset" : "0x0", + "exists" : "true", + "mask" : "0xffffffff" + }, + "meicpct" : { + "reset" : "0x0", + "number" : "0xbca", + "comment" : "External claim id/priority capture.", + "exists" : "true", + "mask" : "0x0" + }, + "mimpid" : { + "reset" : "0x1", + "exists" : "true", + "mask" : "0x0" + }, + "mcpc" : { + "reset" : "0x0", + "number" : "0x7c2", + "exists" : "true", + "mask" : "0x0" + }, + "mhpmevent4" : { + "reset" : "0x0", + "exists" : "true", + "mask" : "0xffffffff" + }, + "pmpaddr8" : { + "exists" : "false" + }, + "pmpcfg3" : { + "exists" : "false" + }, + "marchid" : { + "reset" : "0x0000000b", + "exists" : "true", + "mask" : "0x0" + }, + "pmpaddr5" : { + "exists" : "false" + }, + "mfdc" : { + "reset" : "0x00070000", + "number" : "0x7f9", + "exists" : "true", + "mask" : "0x000707ff" + }, + "mhpmevent6" : { + "reset" : "0x0", + "exists" : "true", + "mask" : "0xffffffff" + }, + "mvendorid" : { + "reset" : "0x45", + "exists" : "true", + "mask" : "0x0" + }, + "pmpaddr4" : { + "exists" : "false" + }, + "dcsr" : { + "poke_mask" : "0x00008dcc", + "reset" : "0x40000003", + "exists" : "true", + "mask" : "0x00008c04" + }, + "cycle" : { + "exists" : "false" + }, + "pmpaddr12" : { + "exists" : "false" + }, + "pmpaddr3" : { + "exists" : "false" + }, + "mhpmcounter3h" : { + "reset" : "0x0", + "exists" : "true", + "mask" : "0xffffffff" + }, + "time" : { + "exists" : "false" + }, + "meicidpl" : { + "reset" : "0x0", + "number" : "0xbcb", + "comment" : "External interrupt claim id priority level.", + "exists" : "true", + "mask" : "0xf" + }, + "pmpaddr14" : { + "exists" : "false" + }, + "pmpaddr13" : { + "exists" : "false" + }, + "pmpaddr1" : { + "exists" : "false" + }, + "mhpmcounter6" : { + "reset" : "0x0", + "exists" : "true", + "mask" : "0xffffffff" + }, + "dicad0" : { + "reset" : "0x0", + "number" : "0x7c9", + "comment" : "Cache diagnostics.", + "debug" : "true", + "exists" : "true", + "mask" : "0xffffffff" + }, + "meipt" : { + "reset" : "0x0", + "number" : "0xbc9", + "comment" : "External interrupt priority threshold.", + "exists" : "true", + "mask" : "0xf" + }, + "pmpaddr15" : { + "exists" : "false" + }, + "mhpmcounter5" : { + "reset" : "0x0", + "exists" : "true", + "mask" : "0xffffffff" + }, + "pmpcfg1" : { + "exists" : "false" + }, + "pmpaddr10" : { + "exists" : "false" + }, + "pmpaddr0" : { + "exists" : "false" + }, + "pmpcfg2" : { + "exists" : "false" + }, + "pmpaddr2" : { + "exists" : "false" + }, + "mpmc" : { + "reset" : "0x0", + "number" : "0x7c6", + "comment" : "Core pause: Implemented as read only.", + "exists" : "true", + "mask" : "0x0" + }, + "dmst" : { + "reset" : "0x0", + "number" : "0x7c4", + "comment" : "Memory synch trigger: Flush caches in debug mode.", + "debug" : "true", + "exists" : "true", + "mask" : "0x0" + }, + "instret" : { + "exists" : "false" + }, + "mhpmevent3" : { + "reset" : "0x0", + "exists" : "true", + "mask" : "0xffffffff" + }, + "dicawics" : { + "reset" : "0x0", + "number" : "0x7c8", + "comment" : "Cache diagnostics.", + "debug" : "true", + "exists" : "true", + "mask" : "0x0130fffc" + }, + "mip" : { + "poke_mask" : "0x40000888", + "reset" : "0x0", + "exists" : "true", + "mask" : "0x0" + }, + "mhpmcounter5h" : { + "reset" : "0x0", + "exists" : "true", + "mask" : "0xffffffff" + }, + "micect" : { + "reset" : "0x0", + "number" : "0x7f0", + "exists" : "true", + "mask" : "0xffffffff" + }, + "miccmect" : { + "reset" : "0x0", + "number" : "0x7f1", + "exists" : "true", + "mask" : "0xffffffff" + }, + "mhpmevent5" : { + "reset" : "0x0", + "exists" : "true", + "mask" : "0xffffffff" + }, + "mhpmcounter3" : { + "reset" : "0x0", + "exists" : "true", + "mask" : "0xffffffff" + }, + "pmpaddr6" : { + "exists" : "false" + }, + "pmpaddr11" : { + "exists" : "false" + }, + "mcgc" : { + "poke_mask" : "0x000001ff", + "reset" : "0x0", + "number" : "0x7f8", + "exists" : "true", + "mask" : "0x000001ff" + }, + "mhpmcounter4" : { + "reset" : "0x0", + "exists" : "true", + "mask" : "0xffffffff" + }, + "mdccmect" : { + "reset" : "0x0", + "number" : "0x7f2", + "exists" : "true", + "mask" : "0xffffffff" + }, + "pmpaddr7" : { + "exists" : "false" + }, + "meicurpl" : { + "reset" : "0x0", + "number" : "0xbcc", + "comment" : "External interrupt current priority level.", + "exists" : "true", + "mask" : "0xf" + }, + "mstatus" : { + "reset" : "0x1800", + "exists" : "true", + "mask" : "0x88" + }, + "tselect" : { + "reset" : "0x0", + "exists" : "true", + "mask" : "0x3" + } + }, + "harts" : 1 +} diff --git a/configs/swerv.config b/configs/swerv.config new file mode 100755 index 0000000..bd6f694 --- /dev/null +++ b/configs/swerv.config @@ -0,0 +1,1706 @@ +#! /usr/bin/env perl +# +# SPDX-License-Identifier: Apache-2.0 +# Copyright 2019 Western Digital Corporation or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#use strict; +use Data::Dumper; +use Getopt::Long; +use lib "$ENV{RV_ROOT}/tools"; +use JSON; + +my ($self) = $0 =~ m/.*\/(\w+)/o; +my @argv_orig = @ARGV; + + +# Master configuration file +# +# Configuration is perl hash +# Output are define files for various flows +# Verilog (`defines common to RTL/TB) +# Software (#defines) +# Whisper (JSON/#defines) +# +# Default values and valid ranges should be specified +# Can be overridden via the cmd line (-set=name=value-string) +# +# Format of the hash is +# name => VALUE | LIST | HASH +# +# Special name "inside" followed by list .. values must be one of provided list +# Special name "derive" followed by equation to derive +# + +# Dump verilog/assembly macros in upper case +my $defines_case = "U"; + +# Include these macros in verilog (pattern matched) +my @verilog_vars = qw (xlen reset_vec numiregs nmi_vec protection.* icg target testbench.* dccm.* retstack core.* iccm.* btb.* bht.* icache.* pic.* regwidth memmap bus.*); + +# Include these macros in assembly (pattern matched) +my @asm_vars = qw (xlen reset_vec nmi_vec target dccm.* iccm.* pic.* memmap bus.* testbench.* protection.*); +my @asm_overridable = qw (reset_vec nmi_vec) ; + +# Include these macros in PD (pattern matched) +my @pd_vars = qw (physical retstack target btb.* bht.* dccm.* iccm.* icache.* pic.* reset_vec nmi_vec build_ahb_lite datawidth bus.*); + +# Dump non-derived/settable vars/values for these vars in stdout : +my @dvars = qw(retstack btb bht core dccm iccm icache pic bus.* reset_vec nmi_vec memmap bus); + + +# Prefix all macros with +my $prefix = "RV_"; +# No prefix if keyword has +my $no_prefix = 'RV|TOP|^tec|regwidth|clock_period|assert_on|^datawidth|^physical|verilator|SDVT_AHB'; + +my $vlog_use__wh = 1; + +# Cmd Line options#{{{ +our %sets; +our %unsets; +my $help; +my @sets = (); +my @unsets = (); + +#Configurations may be changed via the -set option +# +# -set=name=value : Change the default config parameter value (lowercase)\n"; +# -unset=name : Remove the default config parameter (lowercase)\n"; +# : Do not prepend RV_ prefex to -set/-unset variables\n"; +# : multiple -set/-unset options accepted\n\n"; +# + +$helpusage = " + +Main configuration database for SWERV + +This script documents, and generates the {`#} define/include files for verilog/assembly/backend flows + + +User options: + + -target = { default, magellan } + use default settings for one of the targets + + -set=var=value + set arbitrary variable to a value + -unset=var + unset any definitions for var + -snapshot=name + name the configuration (only if no -target specified) + +Direct options for the following parameters exist: + + -ret_stack_size = {2, 3, 4, ... 8} + size of return stack + -btb_size = { 32, 48, 64, 128, 256, 512 } + size of branch target buffer + -dccm_region = { 0x0, 0x1, ... 0xf } + number of 256Mb memory region containig DCCM + -dccm_offset = hexadecimal + offset (in bytes) of DCCM witin dccm_region + dccm address will be: 256M * dccm_region + dccm_offset\", and that must be aligned + to the dccm size or the next larger power of 2 if size is not a power of 2 + -dccm_size = { 4, 8, 16, 32, 48, 64, 128, 256, 512 } kB + size of DCCM + -iccm_enable = { 0, 1 } + whether or not ICCM is enabled + -icache_enable = { 0, 1 } + whether or not icache is enabled + -icache_ecc = { 0, 1 } + whether or not icache has ecc - EXPENSIVE 30% sram growth + default: icache_ecc==0 (parity) + -iccm_region = { 0x0, 0x1, ... 0xf } + number of 256Mb memory region containing ICCM + -iccm_offset = hexadecimal + offcet (in bytes) of ICCM within iccm_region + iccm address will be: \"256M * iccm_region + iccm_offset\", and that must be aligned + to the iccm size or the next larger power of 2 if size is not a power of 2 + -iccm_size = { 4 , 8 , 16 , 32, 64, 128, 256, 512 } kB + size of ICCM + -icache_size = { 16, 32, 64, 128, 256 } kB + size of icache + -pic_2cycle = { 0, 1 } + whether or not 2-cycle PIC is enabled (2 cycle pic may result + in an overall smaller cycle time) + -pic_region = { 0x0, 0x1, ... 0xf } + nuber of 256Mb memory region containing PIC memory-mapped registers + -pic_offset = hexadecial + offset (in bytes) of PIC within pic_region + pic address will be: \"256M * pic_region + pic_offset\", and that must be aligned + to the pic size or the next larger power of 2 if size is not a power of 2 + -pic_size = { 32, 64, 128, 256 } kB + size of PIC + -pic_total_int = { 1, 2, 3, ..., 255 } + number of interrupt sources in PIC + -ahb_lite + build with AHB-lite bus interface + default is AXI4 +"; + + +my $ret_stack_size; +my $btb_size; +my $bht_size; +my $dccm_region; +my $dccm_offset; +my $dccm_size; +my $iccm_enable; +my $icache_enable; +my $icache_ecc; +my $iccm_region; +my $iccm_offset; +my $iccm_size; +my $icache_size; +my $pic_2cycle; +my $pic_region; +my $pic_offset; +my $pic_size; +my $pic_total_int; +my $ahb_lite; + +my $top_align_iccm = 0; + +my $target = "default"; +my $snapshot ; +my $build_path ; + +GetOptions( + "help" => \$help, + "target=s" => \$target, + "snapshot=s" => \$snapshot, + "verbose" => \$verbose, + "ret_stack_size=s" => \$ret_stack_size, + "btb_size=s" => \$btb_size, + "bht_size=s" => \$bht_size, + "dccm_enable=s" => \$dccm_enable, + "dccm_region=s" => \$dccm_region, + "dccm_offset=s" => \$dccm_offset, + "dccm_size=s" => \$dccm_size, + "iccm_enable=s" => \$iccm_enable, + "icache_enable=s" => \$icache_enable, + "icache_ecc=s" => \$icache_ecc, + "iccm_region=s" => \$iccm_region, + "iccm_offset=s" => \$iccm_offset, + "iccm_size=s" => \$iccm_size, + "pic_2cycle=s" => \$pic_2cycle, + "pic_region=s" => \$pic_region, + "pic_offset=s" => \$pic_offset, + "pic_size=s" => \$pic_size, + "pic_total_int=s" => \$pic_total_int, + "icache_size=s" => \$icache_size, + "ahb_lite" => \$ahb_lite, + "set=s@" => \@sets, + "unset=s@" => \@unsets, +) || die("$helpusage"); + +if ($help) { + print "$helpusage\n"; + exit; +} + +if (!defined $snapshot ) { + $snapshot = $target; +} + +if (!defined $ENV{BUILD_PATH}) { + $build_path = "$ENV{RV_ROOT}/configs/snapshots/$snapshot" ; +} else { + $build_path = $ENV{BUILD_PATH}; +} + +if (! -d "$build_path") { + system ("mkdir -p $build_path"); +} + +# Verilog defines file path +my $vlogfile = "$build_path/common_defines.vh"; + +# Assembly defines file path +my $asmfile = "$build_path/defines.h"; + +# PD defines file path +my $pdfile = "$build_path/pd_defines.vh"; + +# Whisper config file path +my $whisperfile = "$build_path/whisper.json"; + +# Perl defines file path +my $perlfile = "$build_path/perl_configs.pl"; + +my $no_secondary_alu=0; + +if ($target eq "magellan") { + print "$self: Using target \"magellan\"\n"; + if (!defined($ret_stack_size)) { $ret_stack_size=4; } + if (!defined($btb_size)) { $btb_size=32; } + if (!defined($bht_size)) { $bht_size=128; } + if (!defined($dccm_enable)) { $dccm_enable=1; } + if (!defined($dccm_region)) { $dccm_region="0xf"; } + if (!defined($dccm_offset)) { $dccm_offset="0x80000"; } #1*256*1024 + if (!defined($dccm_size)) { $dccm_size=512; } + if (!defined($dccm_num_banks)) { $dccm_num_banks=8; } + if (!defined($iccm_enable)) { $iccm_enable=1; } + if (!defined($iccm_region)) { $iccm_region="0xe"; } + if (!defined($iccm_offset)) { $iccm_offset="0xe000000"; } #0x380*256*1024 + if (!defined($iccm_size)) { $iccm_size=512; } + if (!defined($iccm_num_banks)) { $iccm_num_banks=8; } + if (!defined($icache_enable)) { $icache_enable=0; } + if (!defined($icache_ecc)) { $icache_ecc=0; } + if (!defined($icache_size)) { $icache_size=16; } + if (!defined($pic_2cycle)) { $pic_2cycle=0; } + if (!defined($pic_region)) { $pic_region="0xf"; } + if (!defined($pic_offset)) { $pic_offset="0x100000"; } # 3*256*1024 + if (!defined($pic_size)) { $pic_size=32; } + if (!defined($pic_total_int)) { $pic_total_int=8; } + if (!defined($dec_instbuf_depth)) { $dec_instbuf_depth=2; } +} +elsif ($target eq "default") { + if (!defined($ret_stack_size)) { $ret_stack_size=4; } + if (!defined($btb_size)) { $btb_size=32; } + if (!defined($bht_size)) { $bht_size=128; } + if (!defined($dccm_enable)) { $dccm_enable=1; } + if (!defined($dccm_region)) { $dccm_region="0xf"; } + if (!defined($dccm_offset)) { $dccm_offset="0x40000"; } #1*256*1024 + if (!defined($dccm_size)) { $dccm_size=64; } + if (!defined($dccm_num_banks)) { $dccm_num_banks=8; } + if (!defined($iccm_enable)) { $iccm_enable=0; } + if (!defined($iccm_region)) { $iccm_region="0xe"; } + if (!defined($iccm_offset)) { $iccm_offset="0xe000000"; } #0x380*256*1024 + if (!defined($iccm_size)) { $iccm_size=512; } + if (!defined($iccm_num_banks)) { $iccm_num_banks=8; } + if (!defined($icache_enable)) { $icache_enable=1; } + if (!defined($icache_ecc)) { $icache_ecc=0; } + if (!defined($icache_size)) { $icache_size=16; } + if (!defined($pic_2cycle)) { $pic_2cycle=0; } + if (!defined($pic_region)) { $pic_region="0xf"; } + if (!defined($pic_offset)) { $pic_offset="0xc0000"; } # 3*256*1024 + if (!defined($pic_size)) { $pic_size=32; } + if (!defined($pic_total_int)) { $pic_total_int=8; } + + # default is AXI bus +} +else { + die "$self: ERROR! Unsupported target \"$target\". Supported targets are: \"default,magellan\"!\n"; +} + +# general stuff - can't set from command line other than -set + +if (!defined($lsu_stbuf_depth)) { $lsu_stbuf_depth=8; } +if (!defined($dma_buf_depth)) { $dma_buf_depth=4; } +if (!defined($lsu_num_nbload)) { $lsu_num_nbload=8; } +if (!defined($dec_instbuf_depth)) { $dec_instbuf_depth=4; } + + +# Configure triggers +our @triggers = (#{{{ + { + "reset" => ["0x23e00000", "0x00000000", "0x00000000"], + "mask" => ["0x081818c7", "0xffffffff", "0x00000000"], + "poke_mask" => ["0x081818c7", "0xffffffff", "0x00000000"] + }, + { + "reset" => ["0x23e00000", "0x00000000", "0x00000000"], + "mask" => ["0x081818c7", "0xffffffff", "0x00000000"], + "poke_mask" => ["0x081818c7", "0xffffffff", "0x00000000"] + }, + { + "reset" => ["0x23e00000", "0x00000000", "0x00000000"], + "mask" => ["0x081818c7", "0xffffffff", "0x00000000"], + "poke_mask" => ["0x081818c7", "0xffffffff", "0x00000000"] + }, + { + "reset" => ["0x23e00000", "0x00000000", "0x00000000"], + "mask" => ["0x081818c7", "0xffffffff", "0x00000000"], + "poke_mask" => ["0x081818c7", "0xffffffff", "0x00000000"] + }, + );#}}} + + +# Configure CSRs +our %csr = (#{{{ + "mstatus" => { + "reset" => "0x1800", # MPP bits hard wired to binrary 11. + "mask" => "0x88", # Only mpie(7) & mie(3) bits writeable + "exists" => "true", + }, + "mie" => { + "reset" => "0x0", + # Only external, timer, local, and software writeable + "mask" => "0x40000888", + "exists" => "true", + }, + "mip" => { + "reset" => "0x0", + # None of the bits are writeable using CSR instructions + "mask" => "0x0", + # Bits corresponding to error overflow, external, timer and stoftware + # interrupts are modifiable + "poke_mask" => "0x40000888", + "exists" => "true", + }, + "mvendorid" => { + "reset" => "0x45", + "mask" => "0x0", + "exists" => "true", + }, + "marchid" => { + "reset" => "0x0000000b", + "mask" => "0x0", + "exists" => "true", + }, + "mimpid" => { + "reset" => "0x1", + "mask" => "0x0", + "exists" => "true", + }, + "misa" => { + "reset" => "0x40001104", + "mask" => "0x0", + "exists" => "true", + }, + "tselect" => { + "reset" => "0x0", + "mask" => "0x3", # Four triggers + "exists" => "true", + }, + "dcsr" => { + "reset" => "0x40000003", + "mask" => "0x00008c04", + "poke_mask" => "0x00008dcc", # cause field modifiable, nmip modifiable + "exists" => "true", + }, + "cycle" => { + "exists" => "false", + }, + "time" => { + "exists" => "false", + }, + "instret" => { + "exists" => "false", + }, + "mhpmcounter3" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmcounter4" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmcounter5" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmcounter6" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmcounter3h" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmcounter4h" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmcounter5h" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmcounter6h" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmevent3" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmevent4" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmevent5" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmevent6" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, +# Remaining CSRs are non-standard. These are specific to SWERV + "dicawics" => { + "number" => "0x7c8", + "reset" => "0x0", + "mask" => "0x0130fffc", + "exists" => "true", + }, + "dicad0" => { + "number" => "0x7c9", + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "dicad1" => { + "number" => "0x7ca", + "reset" => "0x0", + "mask" => "0x3", + "exists" => "true", + }, + "dicago" => { + "number" => "0x7cb", + "reset" => "0x0", + "mask" => "0x0", + "exists" => "true", + }, + "mcpc" => { + "number" => "0x7c2", + "reset" => "0x0", + "mask" => "0x0", + "exists" => "true", + }, + "mpmc" => { + "comment" => "Core pause: Implemented as read only.", + "number" => "0x7c6", + "reset" => "0x0", + "mask" => "0x0", + "exists" => "true", + }, + "micect" => { + "number" => "0x7f0", + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "miccmect" => { + "number" => "0x7f1", + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mdccmect" => { + "number" => "0x7f2", + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mcgc" => { + "number" => "0x7f8", + "reset" => "0x0", + "mask" => "0x000001ff", + "poke_mask" => "0x000001ff", + "exists" => "true", + }, + "mfdc" => { + "number" => "0x7f9", + "reset" => "0x00070000", + "mask" => "0x000707ff", + "exists" => "true", + }, + "dmst" => { + "comment" => "Memory synch trigger: Flush caches in debug mode.", + "number" => "0x7c4", + "reset" => "0x0", + "mask" => "0x0", + "exists" => "true", + "debug" => "true", + }, + "dicawics" => { + "comment" => "Cache diagnostics.", + "number" => "0x7c8", + "reset" => "0x0", + "mask" => "0x0130fffc", + "exists" => "true", + "debug" => "true", + }, + "dicad0" => { + "comment" => "Cache diagnostics.", + "number" => "0x7c9", + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + "debug" => "true", + }, + "dicad1" => { + "comment" => "Cache diagnostics.", + "number" => "0x7ca", + "reset" => "0x0", + "mask" => "0x3", + "exists" => "true", + "debug" => "true", + }, + "dicago" => { + "comment" => "Cache diagnostics.", + "number" => "0x7cb", + "reset" => "0x0", + "mask" => "0x0", + "exists" => "true", + "debug" => "true", + }, + "meipt" => { + "comment" => "External interrupt priority threshold.", + "number" => "0xbc9", + "reset" => "0x0", + "mask" => "0xf", + "exists" => "true", + }, + "meicpct" => { + "comment" => "External claim id/priority capture.", + "number" => "0xbca", + "reset" => "0x0", + "mask" => "0x0", + "exists" => "true", + }, + "meicidpl" => { + "comment" => "External interrupt claim id priority level.", + "number" => "0xbcb", + "reset" => "0x0", + "mask" => "0xf", + "exists" => "true", + }, + "meicurpl" => { + "comment" => "External interrupt current priority level.", + "number" => "0xbcc", + "reset" => "0x0", + "mask" => "0xf", + "exists" => "true", + }, + +);#}}} + + +foreach my $i (0 .. 3) { + $csr{"pmpcfg$i"} = { "exists" => "false" }; +} + +foreach my $i (0 .. 15) { + $csr{"pmpaddr$i"} = { "exists" => "false" }; +} + + + +# }}} +# Main config hash, with default values +# +# Hash can be hierarchical with arbitrary levels +# Hexadecimal values are prefixed with 0x +# +# For verilog, if bit width is expected, add to %width hash below +our %config = (#{{{ + "harts" => "1", + "xlen" => "32", + "numiregs" => "32", + "regwidth" => "32", + "reset_vec" => "0x80000000", + "nmi_vec" => "0x11110000", + "physical" => "1", + "num_mmode_perf_regs" => "4", # Whisper only + "max_mmode_perf_event" => "50", # Whisper only: performance counters event ids will be clamped to this + "target" => $target, + "tec_rv_icg" => "clockhdr", + "verilator" => "$verilator", + + "retstack" => { + "ret_stack_size" => "$ret_stack_size", + }, + + "btb" => { + "btb_size" => "$btb_size", + "btb_index1_hi" => "derived", + "btb_index1_lo" => "4", + "btb_index2_hi" => "derived", + "btb_index2_lo" => "derived", + "btb_index3_hi" => "derived", + "btb_index3_lo" => "derived", + "btb_addr_hi" => "derived", + "btb_array_depth" => "derived", + "btb_addr_lo" => "4", + "btb_btag_size" => "derived", + }, + "bht" => { + "bht_size" => "$bht_size", + "bht_addr_hi" => "derived", + "bht_addr_lo" => "4", + "bht_array_depth" => "derived", + "bht_ghr_size" => "derived", + "bht_ghr_range" => "derived", + "bht_ghr_pad" => "derived", + "bht_ghr_pad2" => "derived", + "bht_hash_string" => "derived", + + }, + + "core" => { + "dec_instbuf_depth" => "$dec_instbuf_depth", + "lsu_stbuf_depth" => "$lsu_stbuf_depth", + "dma_buf_depth" => "$dma_buf_depth", + "lsu_num_nbload" => "$lsu_num_nbload", + "no_secondary_alu" => "$no_secondary_alu", + }, + + "dccm" => { + "dccm_enable" => "$dccm_enable", # Comment this out if no DCCM + "dccm_region" => "$dccm_region", # 256M region number + "dccm_offset" => "$dccm_offset", # 256K offset number + "dccm_size" => "$dccm_size", # Size in Kbytes + "dccm_num_banks" => "$dccm_num_banks", + "dccm_sadr" => 'derived', + "dccm_eadr" => 'derived', + "dccm_bits" => 'derived', + "dccm_bank_bits" => 'derived', + "dccm_data_width" => 'derived', + "dccm_fdata_width" => 'derived', + "dccm_byte_width" => 'derived', + "dccm_width_bits" => 'derived', + "dccm_index_bits" => 'derived', + "dccm_ecc_width" => 'derived', + "lsu_sb_bits" => 'derived', + "dccm_data_cell" => 'derived', + "dccm_rows" => 'derived', + "dccm_reserved" => 'derived', # reserve dccm space for SW/stack - no random r/w + }, + + + "iccm" => { + "iccm_enable" => "$iccm_enable", # Comment this out if no ICCM + "iccm_region" => "$iccm_region", # 256M region number + "iccm_offset" => "$iccm_offset", # 256K offset number + "iccm_size" => "$iccm_size", # Size in Kbytes + "iccm_num_banks" => "$iccm_num_banks", + "iccm_bank_bits" => 'derived', + "iccm_index_bits" => 'derived', + "iccm_rows" => 'derived', + "iccm_data_cell" => 'derived', + "iccm_sadr" => 'derived', + "iccm_eadr" => 'derived', + "iccm_reserved" => 'derived', # reserve iccm space for SW/handlers - no random r/w + }, + "icache" => { + "icache_enable" => "$icache_enable", + "icache_size" => "$icache_size", + "icache_data_cell" => 'derived', + "icache_tag_cell" => 'derived', + "icache_taddr_high" => 'derived', + "icache_tag_high" => 'derived', + "icache_tag_depth" => 'derived', + "icache_ic_depth" => 'derived', + "icache_ic_rows" => 'derived', + "icache_ic_index" => 'derived', + "icache_tag_low" => '6', + "icache_ecc" => "$icache_ecc", + }, + "pic" => { + "pic_2cycle" => "$pic_2cycle", # two cycle PIC for timing reasons + "pic_region" => "$pic_region", # 256M region number + "pic_offset" => "$pic_offset", # 256K offset number + "pic_size" => "$pic_size", # Size in Kbytes + "pic_base_addr" => 'derived', # base_addr = pic_region + offset + "pic_total_int_plus1" => 'derived', # pic_total_int + 1 + "pic_total_int" => "$pic_total_int",# number of interrupt pins (Smax) + "pic_int_words" => 'derived', # number of 32 bit words for packed registers (Xmax) + "pic_bits" => 'derived', # of bits needs to address the PICM + "pic_meipl_offset" => '0x0000', # Offset of meipl relative to pic_base_addr + "pic_meip_offset" => '0x1000', # Offset of meip relative to pic_base_addr + "pic_meie_offset" => '0x2000', # Offset of meie relative to pic_base_addr + "pic_mpiccfg_offset" => '0x3000', # Offset of mpiccfg relative to pic_base_addr + "pic_meipt_offset" => '0x3004', # Offset of meipt relative to pic_base_addr -- deprecated + "pic_meigwctrl_offset" => '0x4000', # gateway control regs relative to pic_base_addr + "pic_meigwclr_offset" => '0x5000' # gateway clear regs relative to pic_base_addr + + }, + "testbench" => { + "TOP" => "tb_top", + "RV_TOP" => "`TOP.rvtop", + "CPU_TOP" => "`RV_TOP.swerv", + "clock_period" => "100", + "build_ahb_lite" => "$ahb_lite", # one and only one bus build arg will ever be defined + "build_axi4" => "", + "assert_on" => "", + "datawidth" => "64", # deprecate this !! FIXME + "ext_datawidth" => "64", + "ext_addrwidth" => "32", + "sterr_rollback" => "0", + "lderr_rollback" => "1", + "SDVT_AHB" => "1", + }, + "protection" => { + "inst_access_enable0" => "0x0", + "inst_access_addr0" => "0x00000000", + "inst_access_mask0" => "0xffffffff", + "inst_access_enable1" => "0x0", + "inst_access_addr1" => "0x00000000", + "inst_access_mask1" => "0xffffffff", + "inst_access_enable2" => "0x0", + "inst_access_addr2" => "0x00000000", + "inst_access_mask2" => "0xffffffff", + "inst_access_enable3" => "0x0", + "inst_access_addr3" => "0x00000000", + "inst_access_mask3" => "0xffffffff", + "inst_access_enable4" => "0x0", + "inst_access_addr4" => "0x00000000", + "inst_access_mask4" => "0xffffffff", + "inst_access_enable5" => "0x0", + "inst_access_addr5" => "0x00000000", + "inst_access_mask5" => "0xffffffff", + "inst_access_enable6" => "0x0", + "inst_access_addr6" => "0x00000000", + "inst_access_mask6" => "0xffffffff", + "inst_access_enable7" => "0x0", + "inst_access_addr7" => "0x00000000", + "inst_access_mask7" => "0xffffffff", + "data_access_enable0" => "0x0", + "data_access_addr0" => "0x00000000", + "data_access_mask0" => "0xffffffff", + "data_access_enable1" => "0x0", + "data_access_addr1" => "0x00000000", + "data_access_mask1" => "0xffffffff", + "data_access_enable2" => "0x0", + "data_access_addr2" => "0x00000000", + "data_access_mask2" => "0xffffffff", + "data_access_enable3" => "0x0", + "data_access_addr3" => "0x00000000", + "data_access_mask3" => "0xffffffff", + "data_access_enable4" => "0x0", + "data_access_addr4" => "0x00000000", + "data_access_mask4" => "0xffffffff", + "data_access_enable5" => "0x0", + "data_access_addr5" => "0x00000000", + "data_access_mask5" => "0xffffffff", + "data_access_enable6" => "0x0", + "data_access_addr6" => "0x00000000", + "data_access_mask6" => "0xffffffff", + "data_access_enable7" => "0x0", + "data_access_addr7" => "0x00000000", + "data_access_mask7" => "0xffffffff", + }, + "memmap" => { + "serialio" => 'derived, overridable', + "external_data" => 'derived, overridable', + "external_prog" => 'derived, overridable', + "debug_sb_mem" => 'derived, overridable', + "external_data_1" => 'derived, overridable', +# "consoleio" => 'derived', # Part of serial io. + }, + "bus" => { + "lsu_bus_tag" => 'derived', + "dma_bus_tag" => '1', + "sb_bus_tag" => '1', + "ifu_bus_tag" => '3', + }, + "triggers" => \@triggers, + "csr" => \%csr, + "even_odd_trigger_chains" => "true", +); + +if (($iccm_enable==0) && !grep(/iccm_enable/, @sets)) { delete $config{"iccm"}{"iccm_enable"}; } +if (($dccm_enable==0) && !grep(/dccm_enable/, @sets)) { delete $config{"dccm"}{"dccm_enable"}; } +if (($icache_enable==0) && !grep(/icache_enable/, @sets)) { delete $config{"icache"}{"icache_enable"}; } +if (($verilator==0) && !grep(/verilator/, @sets)) { delete $config{"core"}{"verilator"}; } +if (($no_secondary_alu==0) && !grep(/no_secondary_alu/, @sets)) { delete $config{"core"}{"no_secondary_alu"}; } +if (($pic_2cycle==0) && !grep(/pic_2cycle/, @sets)) { delete $config{"pic"}{"pic_2cycle"}; } +if (($icache_ecc==0) && !grep(/icache_ecc/, @sets)) { delete $config{"icache"}{"icache_ecc"}; } + +# Perform any overrides first before derived values +map_set_unset(); +gen_define("","", \%config,[]); +print "\nSweRV configuration for target=$target\n\n"; +dump_define("","", \%config,[]); + +# perform final checks +my $c; +$c=$config{retstack}{ret_stack_size}; if (!($c >=2 && $c <=8)) { die("$helpusage\n\nFAIL: ret_stack_size == $c; ILLEGAL !!!\n\n"); } +$c=$config{btb}{btb_size}; if (!($c==32||$c==48||$c==64||$c==128||$c==256||$c==512)){ die("$helpusage\n\nFAIL: btb_size == $c; ILLEGAL !!!\n\n"); } +$c=$config{iccm}{iccm_region}; if (!($c>=0 && $c<16)) { die("$helpusage\n\nFAIL: iccm_region == $c ILLEGAL !!!\n\n"); } +$c=$config{iccm}{iccm_offset}; if (!($c>=0 && $c<256*1024*1024 && ($c&0xfff)==0)) { die("$helpusage\n\nFAIL: iccm_offset == $c ILLEGAL !!!\n\n"); } +$c=$config{iccm}{iccm_size}; if (!($c==4||$c==8||$c==16||$c==32||$c==64||$c==128||$c==256||$c==512)) { die("$helpusage\n\nFAIL: iccm_size == $c ILLEGAL !!!\n\n"); } +$c=$config{iccm}{iccm_num_banks}; if (!($c==4 || $c==8 || ($c==16 && $config{iccm}{iccm_size} != 4))) { die("$helpusage\n\nFAIL: iccm_num_banks == $c ILLEGAL !!!\n\n"); } +$c=$config{iccm}{iccm_enable}; if (!($c==0 || $c==1)) { die("$helpusage\n\nFAIL: iccm_enable == $c ILLEGAL !!!\n\n"); } +$c=$config{dccm}{dccm_region}; if (!($c>=0 && $c<16)) { die("$helpusage\n\nFAIL: iccm_region == $c ILLEGAL !!!\n\n"); } +$c=$config{dccm}{dccm_num_banks}; if (!($c==4 || $c==8 || ($c==16 && $config{dccm}{dccm_size} != 4) )) { die("$helpusage\n\nFAIL: dccm_num_banks == $c ILLEGAL !!!\n\n"); } +$c=$config{dccm}{dccm_offset}; if (!($c>=0 && $c<256*1024*1024 && ($c&0xfff)==0)) { die("$helpusage\n\nFAIL: iccm_offset == $c ILLEGAL !!!\n\n"); } +$c=$config{dccm}{dccm_size}; if (!($c==4||$c==8||$c==16||$c==32||$c==48||$c==64||$c==128||$c==256||$c==512)) { die("$helpusage\n\nFAIL: iccm_size == $c ILLEGAL !!!\n\n"); } +$c=$config{pic}{pic_2cycle}; if (!($c==0 || $c==1)) { die("$helpusage\n\nFAIL: pic_2cycle == $c ILLEGAL !!!\n\n"); } +$c=$config{pic}{pic_region}; if (!($c>=0 && $c<16)) { die("$helpusage\n\nFAIL: pic_region == $c ILLEGAL !!!\n\n"); } +$c=$config{pic}{pic_offset}; if (!($c>=0 && $c<256*1024*1024 && ($c&0xfff)==0)) { die("$helpusage\n\nFAIL: pic_offset == $c ILLEGAL !!!\n\n"); } +$c=$config{pic}{pic_size}; if (!($c==32 || $c==64 || $c==128 || $c==256)) { die("$helpusage\n\nFAIL: pic_size == $c ILLEGAL !!!\n\n"); } +$c=$config{pic}{pic_total_int}; if ( $c<1 || $c>255) { die("$helpusage\n\nFAIL: pic_total_int == $c ILLEGAL !!!\n\n"); } +$c=$config{icache}{icache_enable}; if (!($c==0 || $c==1)) { die("$helpusage\n\nFAIL: icache_enable == $c ILLEGAL !!!\n\n"); } +$c=$config{icache}{icache_size}; if (!($c==16 || $c==32 || $c==64 || $c==128 || $c==256)) { die("$helpusage\n\nFAIL: icache_size == $c ILLEGAL !!!\n\n"); } +$c=$config{core}{dec_instbuf_depth}; if (!($c==2 || $c==4)) { die("$helpusage\n\nFAIL: dec_instbuf_depth == $c ILLEGAL !!!\n\n"); } +$c=$config{core}{lsu_stbuf_depth}; if (!($c==2 || $c==4 || $c==8)) { die("$helpusage\n\nFAIL: lsu_stbuf_depth == $c ILLEGAL !!!\n\n"); } +$c=$config{core}{dma_buf_depth}; if (!($c==2 || $c==4)) { die("$helpusage\n\nFAIL: dma_buf_depth == $c ILLEGAL !!!\n\n"); } +$c=$config{core}{lsu_num_nbload}; if (!($c==2 || $c==4 || $c==8)) { die("$helpusage\n\nFAIL: lsu_num_nbload == $c ILLEGAL !!!\n\n"); } + +$c=$config{protection}{inst_access_addr0}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr0 lower 6b must be 0s $c !!!\n\n"); } +$c=$config{protection}{inst_access_addr1}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr1 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{inst_access_addr2}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr2 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{inst_access_addr3}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr3 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{inst_access_addr4}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr4 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{inst_access_addr5}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr5 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{inst_access_addr6}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr6 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{inst_access_addr7}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr7 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{inst_access_mask0}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: inst_access_mask0 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{inst_access_mask1}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: inst_access_mask1 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{inst_access_mask2}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: inst_access_mask2 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{inst_access_mask3}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: inst_access_mask3 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{inst_access_mask4}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: inst_access_mask4 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{inst_access_mask5}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: inst_access_mask5 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{inst_access_mask6}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: inst_access_mask6 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{inst_access_mask7}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: inst_access_mask7 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{data_access_addr0}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr0 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_addr1}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr1 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_addr2}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr2 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_addr3}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr3 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_addr4}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr4 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_addr5}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr5 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_addr6}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr6 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_addr7}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr7 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_mask0}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: data_access_mask0 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{data_access_mask1}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: data_access_mask1 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{data_access_mask2}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: data_access_mask2 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{data_access_mask3}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: data_access_mask3 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{data_access_mask4}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: data_access_mask4 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{data_access_mask5}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: data_access_mask5 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{data_access_mask6}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: data_access_mask6 lower 6b must be 1s !!!\n\n"); } +$c=$config{protection}{data_access_mask7}; if ((hex($c)&0x3f) != 63) { die("$helpusage\n\nFAIL: data_access_mask7 lower 6b must be 1s !!!\n\n"); } + + + +if (($config{"testbench"}{"build_ahb_lite"} ne "")) { + delete $config{"testbench"}{"build_axi4"}; +} +else { # default is AXI bus + delete $config{"testbench"}{"build_ahb_lite"}; +} + + +# Fill in derived configuration entries. + +if($config{btb}{btb_size}==512){ + $config{btb}{btb_index1_hi} = 9; + $config{btb}{btb_index2_hi} = 15; + $config{btb}{btb_index3_hi} = 21; + $config{btb}{btb_array_depth}= 64; + $config{btb}{btb_btag_size} = 5; +} elsif($config{btb}{btb_size}==256){ + $config{btb}{btb_index1_hi} = 8; + $config{btb}{btb_index2_hi} = 13; + $config{btb}{btb_index3_hi} = 18; + $config{btb}{btb_array_depth}= 32; + $config{btb}{btb_btag_size} = 6; +} elsif($config{btb}{btb_size}==128){ + $config{btb}{btb_index1_hi} = 7; + $config{btb}{btb_index2_hi} = 11; + $config{btb}{btb_index3_hi} = 15; + $config{btb}{btb_array_depth}= 16; + $config{btb}{btb_btag_size} = 7; +} elsif($config{btb}{btb_size}==64){ + $config{btb}{btb_index1_hi} = 6; + $config{btb}{btb_index2_hi} = 9; + $config{btb}{btb_index3_hi} = 12; + $config{btb}{btb_array_depth}= 8; + $config{btb}{btb_btag_size} = 8; +} elsif($config{btb}{btb_size}==48){ + $config{btb}{btb_index1_hi} = 5; + $config{btb}{btb_index2_hi} = 7; + $config{btb}{btb_index3_hi} = 9; + $config{btb}{btb_array_depth}= 4; + $config{btb}{btb_48}= 1; + $config{btb}{btb_fold2_index_hash} = 1; + $config{btb}{btb_btag_size} = 9; + $config{btb}{btb_btag_fold} = 1; +} elsif($config{btb}{btb_size}==32){ + $config{btb}{btb_index1_hi} = 5; + $config{btb}{btb_index2_hi} = 7; + $config{btb}{btb_index3_hi} = 9; + $config{btb}{btb_array_depth}= 4; + $config{btb}{btb_btag_size} = 9; + $config{btb}{btb_btag_fold} = 1; +} + +$config{btb}{btb_index2_lo} = $config{btb}{btb_index1_hi}+1; +$config{btb}{btb_index3_lo} = $config{btb}{btb_index2_hi}+1; +$config{btb}{btb_addr_hi} = $config{btb}{btb_index1_hi}; + +# BHT index is a hash of the GHR and PC_HASH +sub ghrhash{ + my($btb_index_hi,$ghr_size) = @_; + + $btb_addr_width = $btb_index_hi - 3; + + $ghr_hi = $ghr_size - 1; + $ghr_lo = $btb_addr_width; + + $ghr_start = "{"; + if($ghr_size > $btb_addr_width){ + if($ghr_size-1 == $btb_addr_width){ + $string= "{ghr[$ghr_hi:$ghr_lo] ^ ghr[$ghr_hi+1],hashin[$config{btb}{btb_index1_hi}:4]^ghr[$ghr_lo-1:0]}"; + } + else{ + $string = "{ghr[$ghr_hi:$ghr_lo] ^ {ghr[$ghr_hi+1], {$ghr_size-1-$btb_addr_width\{1'b0} } },hashin[$config{btb}{btb_index1_hi}:4]^ghr[$ghr_lo-1:0]}"; + } + } + elsif($ghr_size < $btb_addr_width){ + $string = "{hashin[$ghr_size+3:4]^ghr[$ghr_size-1:0]^{ghr[$ghr_hi+1], {$ghr_hi\{1'b0} } }}"; + } + else{ $string = "{hashin[$config{btb}{btb_index1_hi}:4]^ghr[$ghr_lo-1:0]^{ghr[$ghr_hi+1], {$btb_addr_width-1\{1'b0} } } }"} + return $string; + +} + + +if($config{bht}{bht_size}==2048){ + $config{bht}{bht_ghr_size}= 9; + $config{bht}{bht_ghr_range}= "8:0"; + $config{bht}{bht_ghr_pad}= "fghr[8:4],3'b0"; + $config{bht}{bht_ghr_pad2}= "fghr[8:3],2'b0"; + $config{bht}{bht_array_depth}= 256; + $config{bht}{bht_addr_hi}= 11; +} elsif($config{bht}{bht_size}==1024){ + $config{bht}{bht_ghr_size}= 8; + $config{bht}{bht_ghr_range}= "7:0"; + $config{bht}{bht_ghr_pad}= "fghr[7:4],3'b0"; + $config{bht}{bht_ghr_pad2}= "fghr[7:3],2'b0"; + $config{bht}{bht_array_depth}= 128; + $config{bht}{bht_addr_hi}= 10; +} elsif($config{bht}{bht_size}==512){ + $config{bht}{bht_ghr_size}= 7; + $config{bht}{bht_ghr_range}= "6:0"; + $config{bht}{bht_ghr_pad}= "fghr[6:4],3'b0"; + $config{bht}{bht_ghr_pad2}= "fghr[6:3],2'b0"; + $config{bht}{bht_array_depth}= 64; + $config{bht}{bht_addr_hi}= 9; +} elsif($config{bht}{bht_size}==256){ + $config{bht}{bht_ghr_size}= 6; + $config{bht}{bht_ghr_range}= "5:0"; + $config{bht}{bht_ghr_pad}= "fghr[5:4],3'b0"; + $config{bht}{bht_ghr_pad2}= "fghr[5:3],2'b0"; + $config{bht}{bht_addr_hi} = 8; + $config{bht}{bht_array_depth}= 32; +} elsif($config{bht}{bht_size}==128){ + $config{bht}{bht_ghr_size}= 5; + $config{bht}{bht_ghr_range}= "4:0"; + $config{bht}{bht_ghr_pad}= "fghr[4],3'b0"; + $config{bht}{bht_ghr_pad2}= "fghr[4:3],2'b0"; + $config{bht}{bht_addr_hi} = 7; + $config{bht}{bht_array_depth}= 16; +} elsif($config{bht}{bht_size}==64){ + $config{bht}{bht_ghr_size}= 4; + $config{bht}{bht_ghr_range}= "3:0"; + $config{bht}{bht_ghr_pad}= "3'b0 "; + $config{bht}{bht_ghr_pad2}= "fghr[3],2'b0"; + $config{bht}{bht_addr_hi} = 6; + $config{bht}{bht_array_depth}= 8; +} elsif($config{bht}{bht_size}==32){ + $config{bht}{bht_ghr_size}= 3; + $config{bht}{bht_ghr_range}= "2:0"; + $config{bht}{bht_ghr_pad}= "2'b0 "; + $config{bht}{bht_ghr_pad2}= "2'b0"; + $config{bht}{bht_addr_hi} = 5; + $config{bht}{bht_array_depth}= 4; +} +#if($config{bht}{bht_size}==2048){ +# $config{bht}{bht_ghr_size}= 8; +# $config{bht}{bht_ghr_range}= "7:0"; +# $config{bht}{bht_ghr_pad}= "fghr[7:4],3'b0"; +# $config{bht}{bht_ghr_pad2}= "fghr[7:3],2'b0"; +# $config{bht}{bht_array_depth}= 256; +# $config{bht}{bht_addr_hi}= 11; +#} elsif($config{bht}{bht_size}==1024){ +# $config{bht}{bht_ghr_size}= 7; +# $config{bht}{bht_ghr_range}= "6:0"; +# $config{bht}{bht_ghr_pad}= "fghr[6:4],3'b0"; +# $config{bht}{bht_ghr_pad2}= "fghr[6:3],2'b0"; +# $config{bht}{bht_array_depth}= 128; +# $config{bht}{bht_addr_hi}= 10; +#} elsif($config{bht}{bht_size}==512){ +# $config{bht}{bht_ghr_size}= 6; +# $config{bht}{bht_ghr_range}= "5:0"; +# $config{bht}{bht_ghr_pad}= "fghr[5:4],3'b0"; +# $config{bht}{bht_ghr_pad2}= "fghr[5:3],2'b0"; +# $config{bht}{bht_array_depth}= 64; +# $config{bht}{bht_addr_hi}= 9; +#} elsif($config{bht}{bht_size}==256){ +# $config{bht}{bht_ghr_size}= 5; +# $config{bht}{bht_ghr_range}= "4:0"; +# $config{bht}{bht_ghr_pad}= "fghr[4],3'b0"; +# $config{bht}{bht_ghr_pad2}= "fghr[4:3],2'b0"; +# $config{bht}{bht_addr_hi} = 8; +# $config{bht}{bht_array_depth}= 32; +#} elsif($config{bht}{bht_size}==128){ +# $config{bht}{bht_ghr_size}= 5; +# $config{bht}{bht_ghr_range}= "4:0"; +# $config{bht}{bht_ghr_pad}= "fghr[4],3'b0"; +# $config{bht}{bht_ghr_pad2}= "fghr[4:3],2'b0"; +# $config{bht}{bht_addr_hi} = 7; +# $config{bht}{bht_array_depth}= 16; +#} elsif($config{bht}{bht_size}==64){ +# $config{bht}{bht_ghr_size}= 4; +# $config{bht}{bht_ghr_range}= "3:0"; +# $config{bht}{bht_ghr_pad}= "3'b0 "; +# $config{bht}{bht_ghr_pad2}= "fghr[4],2'b0"; +# $config{bht}{bht_addr_hi} = 6; +# $config{bht}{bht_array_depth}= 8; +#} elsif($config{bht}{bht_size}==32){ +# $config{bht}{bht_ghr_size}= 3; +# $config{bht}{bht_ghr_range}= "2:0"; +# $config{bht}{bht_ghr_pad}= "2'b0 "; +# $config{bht}{bht_ghr_pad2}= "2'b0"; +# $config{bht}{bht_addr_hi} = 5; +# $config{bht}{bht_array_depth}= 4; +# $config{bht}{bht_ghr_size_2} = 1; +#} + +$config{bht}{bht_hash_string} = &ghrhash($config{btb}{btb_index1_hi}, $config{bht}{bht_ghr_size}-1); + +$config{pic}{pic_base_addr} = (hex($config{pic}{pic_region})<<28) + + (hex($config{pic}{pic_offset})); +$config{pic}{pic_base_addr} = sprintf("0x%x", $config{pic}{pic_base_addr}); + +$config{pic}{pic_int_words} = int($config{pic}{pic_total_int}/32 +0.9); +$config{pic}{pic_bits} = 10 + log2($config{pic}{pic_size}); + +$config{core}{lsu_num_nbload_width} = log2($config{core}{lsu_num_nbload}); + +$config{bus}{lsu_bus_tag} = log2($config{core}{lsu_num_nbload}) + 1; + +$config{dccm}{dccm_sadr} = (hex($config{dccm}{dccm_region})<<28) + + (hex($config{dccm}{dccm_offset})); +$config{dccm}{dccm_sadr} = sprintf("0x%x", $config{dccm}{dccm_sadr}); + +$config{dccm}{dccm_eadr} = (hex($config{dccm}{dccm_region})<<28) + + (hex($config{dccm}{dccm_offset})) + size($config{dccm}{dccm_size})-1; +$config{dccm}{dccm_eadr} = sprintf("0x%x", $config{dccm}{dccm_eadr}); + +$config{dccm}{dccm_reserved} = sprintf("0x%x", ($config{dccm}{dccm_size}>30)? 4096 : ($config{dccm}{dccm_size}*1024)/4); + +$config{dccm}{dccm_bits} = ($config{dccm}{dccm_size}==48 ) ? 16 : 10 + log2($config{dccm}{dccm_size}); + +$config{dccm}{dccm_bank_bits} = log2($config{dccm}{dccm_num_banks}); +$config{dccm}{dccm_data_width} = 32; +$config{dccm}{dccm_fdata_width} = $config{dccm}{dccm_data_width} + log2($config{dccm}{dccm_data_width}) + 2; +$config{dccm}{dccm_byte_width} = $config{dccm}{dccm_data_width}/8; + +$config{dccm}{dccm_width_bits} = log2($config{dccm}{dccm_byte_width}); +$config{dccm}{dccm_index_bits} = $config{dccm}{dccm_bits} - $config{dccm}{dccm_bank_bits} - $config{dccm}{dccm_width_bits}; + +$config{dccm}{dccm_ecc_width} = log2($config{dccm}{dccm_data_width}) + 2; +$config{dccm}{lsu_sb_bits} = (($config{dccm}{dccm_bits}) > ($config{pic}{pic_bits})) ? ($config{dccm}{dccm_bits}) : ($config{pic}{pic_bits}); +$config{dccm}{dccm_rows} = ($config{dccm}{dccm_size}==48 ) ? (2**($config{dccm}{dccm_index_bits}-1) + 2**$config{dccm}{dccm_index_bits})/2 : 2**$config{dccm}{dccm_index_bits}; +$config{dccm}{dccm_data_cell} = "ram_$config{dccm}{dccm_rows}x39"; + + +$config{icache}{icache_tag_high} = (($config{icache}{icache_size}==256) ? 16 : + ($config{icache}{icache_size}==128) ? 15 : + ($config{icache}{icache_size}==64) ? 14 : + ($config{icache}{icache_size}==32) ? 13 : 12); + +$config{icache}{icache_tag_depth} = (($config{icache}{icache_size}==256) ? 1024 : + ($config{icache}{icache_size}==128) ? 512 : + ($config{icache}{icache_size}==64) ? 256 : + ($config{icache}{icache_size}==32) ? 128 : 64); + + +$config{icache}{icache_ic_depth} = log2($config{icache}{icache_size}) + 4; +$config{icache}{icache_ic_index} = log2($config{icache}{icache_size}) + 4; +$config{icache}{icache_ic_rows} = 2**$config{icache}{icache_ic_depth}; + + +$config{icache}{icache_taddr_high} = log2($config{icache}{icache_tag_depth}) - 1; + +if (defined($config{icache}{icache_ecc})) { +$config{icache}{icache_data_cell} = "ram_$config{icache}{icache_ic_rows}x42"; +$config{icache}{icache_tag_cell} = "ram_$config{icache}{icache_tag_depth}x25"; + +} +else { +$config{icache}{icache_data_cell} = "ram_$config{icache}{icache_ic_rows}x34"; +$config{icache}{icache_tag_cell} = "ram_$config{icache}{icache_tag_depth}x21"; +} +$config{pic}{pic_total_int_plus1} = $config{pic}{pic_total_int} + 1; +# Defines with explicit values in the macro name +$config{dccm}{"dccm_num_banks_$config{dccm}{dccm_num_banks}"} = ""; +$config{dccm}{"dccm_size_$config{dccm}{dccm_size}"} = ""; + +# If ICCM offset not explicitly provided, align to TOP of the region +if ($top_align_iccm && ($config{iccm}{iccm_offset} eq $iccm_offset) && ($config{iccm}{iccm_size} < 32)) { + $config{iccm}{iccm_region} = "0xa"; + print "$self: Setting default iccm region to region $config{iccm}{iccm_region}\n"; + $config{iccm}{iccm_offset} = sprintf("0x%08x",256*1024*1024-size($config{iccm}{iccm_size})); + print "$self: Aligning default iccm offset to top of region @ $config{iccm}{iccm_offset}\n"; +} +$config{iccm}{iccm_sadr} = (hex($config{iccm}{iccm_region})<<28) + + (hex($config{iccm}{iccm_offset})); +$config{iccm}{iccm_sadr} = sprintf("0x%08x", $config{iccm}{iccm_sadr}); + +$config{iccm}{iccm_eadr} = (hex($config{iccm}{iccm_region})<<28) + + (hex($config{iccm}{iccm_offset})) + size($config{iccm}{iccm_size})-1; +$config{iccm}{iccm_eadr} = sprintf("0x%08x", $config{iccm}{iccm_eadr}); + +$config{iccm}{iccm_reserved} = sprintf("0x%x", ($config{iccm}{iccm_size}>30)? 4096 : ($config{iccm}{iccm_size}*1024)/4); + +$config{iccm}{iccm_bits} = 10 + log2($config{iccm}{iccm_size}); +$config{iccm}{iccm_bank_bits} = log2($config{iccm}{iccm_num_banks}); //-1; +$config{iccm}{iccm_index_bits} = $config{iccm}{iccm_bits} - $config{iccm}{iccm_bank_bits} - 2; # always 4 bytes +$config{iccm}{iccm_rows} = 2**$config{iccm}{iccm_index_bits}; +$config{iccm}{iccm_data_cell} = "ram_$config{iccm}{iccm_rows}x39"; +# Defines with explicit values in the macro name +$config{iccm}{"iccm_num_banks_$config{iccm}{iccm_num_banks}"} = ""; +$config{iccm}{"iccm_size_$config{iccm}{iccm_size}"} = ""; + +# Find an unused region for serial IO +for ($rgn = 15;$rgn >= 0; $rgn--) { + if (($rgn != hex($config{iccm}{iccm_region})) && + ($rgn != hex($config{dccm}{dccm_region})) && + ($rgn != (hex($config{pic}{pic_region})))) { + $config{memmap}{serialio} = ($rgn << 28) + (22<<18); + last; + } +} +$config{memmap}{serialio} = sprintf("0x%08x", $config{memmap}{serialio}); + +# Find an unused region for external data +for ($rgn = 15;$rgn >= 0; $rgn--) { + if (($rgn != hex($config{iccm}{iccm_region})) && + ($rgn != hex($config{dccm}{dccm_region})) && + ($rgn != (hex($config{memmap}{serialio})>>28)) && + ($rgn != (hex($config{pic}{pic_region})))) { + $config{memmap}{external_data} = ($rgn << 28) + (22<<18); + last; + } +} +$config{memmap}{external_data} = sprintf("0x%08x", $config{memmap}{external_data}); +# +# Find an unused region for external prog +for ($rgn = 15;$rgn >= 0; $rgn--) { + if (($rgn != hex($config{iccm}{iccm_region})) && + ($rgn != hex($config{dccm}{dccm_region})) && + ($rgn != (hex($config{memmap}{serialio})>>28)) && + ($rgn != (hex($config{memmap}{external_data})>>28)) && + ($rgn != (hex($config{pic}{pic_region})))) { + $config{memmap}{external_prog} = ($rgn << 28); + last; + } +} +$config{memmap}{external_prog} = sprintf("0x%08x", $config{memmap}{external_prog}); + +# Unused region for second data +for ($rgn = 15;$rgn >= 0; $rgn--) { + if (($rgn != hex($config{iccm}{iccm_region})) && + ($rgn != hex($config{dccm}{dccm_region})) && + ($rgn != (hex($config{memmap}{serialio})>>28)) && + ($rgn != (hex($config{memmap}{external_data})>>28)) && + ($rgn != (hex($config{memmap}{external_prog})>>28) && + ($rgn != (hex($config{pic}{pic_region}))) + )) { + $config{memmap}{external_data_1} = ($rgn << 28); + last; + } +} +$config{memmap}{external_data_1} = sprintf("0x%08x", $config{memmap}{data_1}); + + +#$config{memmap}{consoleio} = hex($config{memmap}{serialio}) + 0x100; +#$config{memmap}{consoleio} = sprintf("0x%x", $config{memmap}{consoleio}); + +# Find an unused region for debug_sb_memory data +for ($rgn = 15;$rgn >= 0; $rgn--) { + if (($rgn != hex($config{iccm}{iccm_region})) && + ($rgn != hex($config{dccm}{dccm_region})) && + ($rgn != (hex($config{memmap}{serialio})>>28)) && + ($rgn != (hex($config{memmap}{external_data})>>28)) && + ($rgn != (hex($config{pic}{pic_region})))) { + $config{memmap}{debug_sb_mem} = ($rgn << 28) + (22<<18); + last; + } +} +$config{memmap}{debug_sb_mem} = sprintf("0x%08x", $config{memmap}{debug_sb_mem}); + +# Boot magellan from ICCM +if ($target eq "magellan") { + $config{reset_vec} = $config{iccm}{iccm_sadr}; + $config{testbench}{magellan} = 1; + print "$self: Setting reset_vec = ICCM start address for Magellan\n"; +} + + + +# Output bit-width specifiers for these variables +our %widths = ( + "dccm_region" => "4", + "dccm_offset" => "28", + "dccm_sadr" => "32", + "dccm_eadr" => "32", + "pic_region" => "4", + "pic_offset" => "10", + "pic_base_addr" => "32", + "iccm_region" => "4", + "iccm_offset" => "10", + "iccm_sadr" => "32", + "iccm_eadr" => "32", + "bus_prty_default" => "2", + "inst_access_enable0" => "1", + "inst_access_enable1" => "1", + "inst_access_enable2" => "1", + "inst_access_enable3" => "1", + "inst_access_enable4" => "1", + "inst_access_enable5" => "1", + "inst_access_enable6" => "1", + "inst_access_enable7" => "1", + "data_access_enable0" => "1", + "data_access_enable1" => "1", + "data_access_enable2" => "1", + "data_access_enable3" => "1", + "data_access_enable4" => "1", + "data_access_enable5" => "1", + "data_access_enable6" => "1", + "data_access_enable7" => "1", +); +#}}} + +#print Dumper(\%config); +#print Dumper(\%width); + +#print Dumper(\%sets); +#print Dumper(\%unsets); + +# Sanity checks +check_addr_align("dccm", hex($config{dccm}{dccm_sadr}), $config{dccm}{dccm_size}*1024); +check_addr_align("iccm", hex($config{iccm}{iccm_sadr}), $config{iccm}{iccm_size}*1024); +check_addr_align("pic", hex($config{pic}{pic_base_addr}), $config{pic}{pic_size}*1024); + +# Prevent overlap of internal memories +if ((hex($config{pic}{pic_region}) == hex($config{iccm}{iccm_region})) && (hex($config{pic}{pic_offset}) == hex($config{iccm}{iccm_offset}))) { + die "$self: ERROR! PIC and ICCM blocks collide (region $config{iccm}{iccm_region}, offset $config{pic}{pic_offset})!\n"; +} +if ((hex($config{pic}{pic_region}) == hex($config{dccm}{dccm_region})) && (hex($config{pic}{pic_offset}) == hex($config{dccm}{dccm_offset}))) { + die "$self: ERROR! PIC and DCCM blocks collide (region $config{dccm}{dccm_region}, offset $config{pic}{pic_offset})!\n"; +} +if ((hex($config{iccm}{iccm_region}) == hex($config{dccm}{dccm_region})) && (hex($config{iccm}{iccm_offset}) == hex($config{dccm}{dccm_offset}))) { + die "$self: ERROR! ICCM and DCCM blocks collide (region $config{iccm}{iccm_region}, offset $config{dccm}{dccm_offset})!\n"; +} + +##################### Add dumper routines here ########################## +# +# Dump Verilog $RV_ROOT/configs/common_defines.vh +print "$self: Writing $vlogfile\n"; +open (FILE, ">$vlogfile") || die "Cannot open $vlogfile for writing $!\n"; +print_header("//"); +gen_define("","`", \%config, \@verilog_vars); +close FILE; + +print "$self: Writing $asmfile\n"; +open (FILE, ">$asmfile") || die "Cannot open $asmfile for writing $!\n"; +# Dump ASM/C $RV_ROOT/diags/env/defines.h +print_header("//"); +gen_define("","#", \%config, \@asm_vars, \@asm_overridable); +close FILE; + +# add `define PHYSICAL 1 +# remove `undef RV_ICCM_ENABLE + +my $pddata=' +`include "common_defines.vh" +`undef ASSERT_ON +`undef TEC_RV_ICG +`define TEC_RV_ICG CKLNQD12BWP35P140 +`define PHYSICAL 1 +'; + + +print "$self: Writing $pdfile\n"; +open (FILE, ">$pdfile") || die "Cannot open $pdfile for writing $!\n"; +# Dump PD $RV_ROOT/$RV_ROOT/configs/pd_defines.vh +print_header("//"); +printf (FILE "$pddata"); +close FILE; + +print "$self: Writing $whisperfile\n"; +dump_whisper_config(\%config, $whisperfile); + + +# change this to use config version +#`$ENV{RV_ROOT}/tools/picmap -t $config{pic}{pic_total_int} > $ENV{RV_ROOT}/design/include/pic_map_auto.h`; +`$ENV{RV_ROOT}/tools/picmap -t $config{pic}{pic_total_int} > $build_path/pic_map_auto.h`; +#`$ENV{RV_ROOT}/tools/unrollforverilator $config{pic}{pic_total_int_plus1} > $ENV{RV_ROOT}/design/include/pic_ctrl_verilator_unroll.sv`; +`$ENV{RV_ROOT}/tools/unrollforverilator $config{pic}{pic_total_int_plus1} > $build_path/pic_ctrl_verilator_unroll.sv`; + +# Perl vars for use by scripts +print "$self: Writing $perlfile\n"; +open (FILE, ">$perlfile") || die "Cannot open $perlfile for writing $!\n"; +print_header("# "); +print FILE "# To use this in a perf script, use 'require \$RV_ROOT/configs/config.pl'\n"; +print FILE "# Reference the hash via \$config{name}..\n\n\n"; +print FILE Data::Dumper->Dump([\%config], [ qw(*config) ]); +print FILE "1;\n"; +close FILE; + +# Done ################################################################## +# +exit(0); + +# ###################### Helper subroutines ##########################{{{ +# Convert size in kilobytes to real value + +sub size {#{{{ + my $ksize = shift; + my $size = sprintf("%d",$ksize*1024); + return $size; +}#}}} + +# Print the defines with prefix +sub print_define {#{{{ + my ($sym, $key,$value, $override) = @_; + my $lprefix = $prefix if ($key !~ /$no_prefix/); + if ($sym eq "`") { + if (defined($widths{$key})) { + $value =~ s/^(0x)*/$widths{$key}'h/; + } else { + $value =~ s/^0x/'h/; + } + } + if ($defines_case eq "U") { + print FILE "${sym}ifndef \U$lprefix$key\E\n" if ($override); + print FILE "${sym}define \U$lprefix$key\E $value\n"; + print FILE "${sym}endif\n" if ($override); + } else { + print FILE "${sym}ifndef $lprefix$key\n" if ($override); + print FILE "${sym}define $lprefix$key $value\n"; + print FILE "${sym}endif\n" if ($override); + } +}#}}} + +# print header +sub print_header {#{{{ + my $cs = shift; + print FILE "$cs NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE\n"; + print FILE "$cs This is an automatically generated file by $ENV{USER} on ",`date`; + print FILE "$cs\n$cs cmd: $self @argv_orig \n"; + print FILE "$cs\n"; +}#}}} + +# evaluate derivations +sub derive {#{{{ + my $eqn = shift; + return sprintf("0x%x", eval($eqn)); +}#}}} + +# traverse the database and extract the key/value pair +sub gen_define {#{{{ + my $matched = shift; + my $prefix = shift; + my $hash = @_[0]; + my @printvars = @{@_[1]}; + my @overridable = @{@_[2]}; + my $re = join("|",@printvars); + $re = qr/($re)/; + #print Dumper($hash); + foreach my $key (keys %$hash) { + next if $key eq "csr"; + #print "looking at $key:$matched ($re)\n"; + if (defined($unsets{$key})) { + print "$self:unsetting $key\n"; + delete($config{$key}); + next + } + if (defined($sets{$key}) && $sets{$key} ne $$hash{$key}) { + if (($$hash{$key} =~ /derived/i) && ($$hash{$key} !~ /overridable/i)) { + die ("$self: ERROR! $key is a derived and non-overridable parameter!\n"); + } else { + print "$self: Overriding $key value $$hash{$key} with $sets{$key}\n"; + $$hash{$key} = $sets{$key}; + } + } + my $value = $$hash{$key}; + if (ref($value) eq "HASH") { + if ($key =~ /$re/) { + $matched = 1; + } + gen_define($matched,$prefix, $value, \@printvars, \@overridable); + $matched = 0; + } elsif (ref($value) eq "ARRAY") { + # print "$key : @{$value}\n"; + $matched = 0; + } else { + if ($matched eq "1" || $key =~ /$re/) { + if($value =~ /derive\(.*\)/o) { + $value = eval($value); + } + $override = grep(/^$key$/, @overridable); + print_define($prefix, $key, $value, $override); + } + } + } +}#}}} + +sub dump_define {#{{{ + my $matched = shift; + my $prefix = shift; + my $hash = @_[0]; + my @printvars = @{@_[1]}; + my @overridable = @{@_[2]}; + my $re = join("|",@printvars); + $re = qr/($re)/; + #print Dumper($hash); + foreach my $key (keys %$hash) { + next if $key eq "csr"; + next unless $matched || grep(/^$key$/,@dvars); + #print "looking at $key:$matched ($re)\n"; + if (defined($unsets{$key})) { + print "$self:unsetting $key\n"; + delete($config{$key}); + next + } + if (defined($sets{$key}) && $sets{$key} ne $$hash{$key}) { + if (($$hash{$key} =~ /derived/i) && ($$hash{$key} !~ /overridable/i)) { + die ("$self: ERROR! $key is a derived and non-overridable parameter!\n"); + } else { + print "$self: Overriding $key value $$hash{$key} with $sets{$key}\n"; + $$hash{$key} = $sets{$key}; + } + } + my $value = $$hash{$key}; + if (ref($value) eq "HASH") { + if ($key =~ /$re/) { + $matched = 1; + } + dump_define($matched,$prefix, $value, \@printvars, \@overridable); + $matched = 0; + } elsif (ref($value) eq "ARRAY") { + # print "$key : @{$value}\n"; + $matched = 0; + } else { + if ($matched eq "1" || $key =~ /$re/) { + if($value =~ /derive\(.*\)/o) { + $value = eval($value); + } + printf ("swerv: %-30s = $value\n",$key) if ($value !~ /derived/); + } + } + } +}#}}} + +# Perform cmd line set/unset ############################################{{{ +sub map_set_unset { + if (scalar(@sets)) { + print "$self: Set(s) requested : @sets\n"; + foreach (@sets) { + my ($key,$value) = m/(\w+)=*(\w+)*/o; + $value = 1 if (!defined($value)); + $sets{$key} = $value; + } + } + if (scalar(@unsets)) { + print "$self: Unset(s) requested : @sets\n"; + foreach (@unsets) { + $unsets{$_} = 1; + } + } +} #}}} +#}}} + + +# If arg looks like a hexadecimal string, then convert it to decimal.#{{{ +# Otherwise, return arg. +sub decimal { + my ($x) = @_; + return hex($x) if $x =~ /^0x/o; + return $x; +}#}}} + +# Collect memory protection specs (array of address pairs) in the given +# resutls array. Tag is either "data" or "inst". +sub collect_mem_protection { + my ($tag, $config, $results) = @_; + return unless exists $config{protection}; + + my $prot = $config{protection}; + + my $enable_tag = $tag . "_access_enable"; + my $addr_tag = $tag . "_access_addr"; + my $mask_tag = $tag . "_access_mask"; + + foreach my $key (keys %{$prot}) { + next unless $key =~ /^$enable_tag(\d+)$/; + my $ix = $1; + + my $enable = $prot->{$key}; + if ($enable !~ /[01]$/) { + warn("Invalid value for protection entry $key: $enable\n"); + next; + } + + next unless ($enable eq "1" or $enable eq "1'b1"); + + if (! exists $prot->{"$addr_tag$ix"}) { + warn("Missing $addr_tag$ix\n"); + next; + } + + if (! exists $prot->{"$mask_tag$ix"}) { + warn("Missing $mask_tag$ix\n"); + next; + } + + my $addr = $prot->{"$addr_tag$ix"}; + my $mask = $prot->{"$mask_tag$ix"}; + + if ($addr !~ /^0x[0-9a-fA-F]+$/) { + warn("Invalid $addr_tag$ix: $addr\n"); + next; + } + + if ($mask !~ /^0x[0-9a-fA-F]+$/) { + warn("Invalid $mask_tag$ix: $mask\n"); + next; + } + + if ((hex($addr) & hex($mask)) != 0) { + warn("Protection mask bits overlap address bits in mask $mask and addr $addr\n"); + } + + if ($mask !~ /^0x0*[137]?f*$/) { + warn("Protection mask ($mask) must have all its one bits to the right of its zero bits\n"); + next; + } + + my $start = hex($addr) & ~hex($mask) & 0xffffffff; + my $end = (hex($addr) | hex($mask)) & 0xffffffff; + + $start = sprintf("0x%08x", $start); + $end = sprintf("0x%08x", $end); + + push(@{$results}, [ $start, $end ]); + } +} + +sub dump_whisper_config{#{{{ + my ($config, $path) = @_; + + open(my $fh, ">", "$path") or die ("Failed to open $path for writing: $!\n"); + + # Put the configuration parameters relevant to whisper into a hash + # in preparation for a JSON dump. + my %jh; # Json hash + + # Collect top-level integer entries. + foreach my $tag (qw( harts xlen )) { + $jh{$tag} = $config{$tag} + 0 if exists $config{$tag}; + } + + # Collect top-level string/hex entries. + foreach my $tag (qw ( reset_vec nmi_vec num_mmode_perf_regs max_mmode_perf_event + even_odd_trigger_chains)) { + $jh{$tag} = $config{$tag} if exists $config{$tag}; + } + + # Collect memory map configs. + my (@inst_mem_prot, @data_mem_prot); + collect_mem_protection("inst", $config, \@inst_mem_prot); + collect_mem_protection("data", $config, \@data_mem_prot); + $jh{memmap}{inst} = [@inst_mem_prot] if @inst_mem_prot; + $jh{memmap}{data} = [@data_mem_prot] if @data_mem_prot; + $jh{memmap}{cosnoleio} = $config{memmap}{serialio} if exists $config{memmap}{serialio}; + + # Collect load/store-error rollback parameter. + if (exists $config{testbench} and exists $config{testbench}{sterr_rollback}) { + $jh{store_error_rollback} = $config{testbench}{sterr_rollback}; + } + if (exists $config{testbench} and exists $config{testbench}{lderr_rollback}) { + $jh{load_error_rollback} = $config{testbench}{lderr_rollback}; + } + + # Collect dccm configs + if (exists $config{dccm} and exists $config{dccm}{dccm_enable}) { + $jh{dccm}{region} = $config{dccm}{dccm_region}; + $jh{dccm}{size} = 1024*decimal($config{dccm}{dccm_size}); # From 1k to bytes + $jh{dccm}{offset} = $config{dccm}{dccm_offset}; + + $jh{dccm}{size} = sprintf("0x%x", $jh{dccm}{size}); + } + + # Collect icccm configs. + if (exists $config{iccm} and exists $config{iccm}{iccm_enable}) { + $jh{iccm}{region} = $config{iccm}{iccm_region}; + $jh{iccm}{size} = 1024*decimal($config{iccm}{iccm_size}); # From 1k to bytes + $jh{iccm}{offset} = $config{iccm}{iccm_offset}; + + $jh{iccm}{size} = sprintf("0x%x", $jh{iccm}{size}); + } + + # Collect CSRs + + $jh{csr} = $config{csr} if exists $config{csr}; + + # Collect pic configs. + if (exists $config{pic}) { + while (my ($k, $v) = each %{$config{pic}}) { + next if $k eq 'pic_base_addr'; # derived from region and offset + if ($k eq 'pic_size') { + $v *= 1024; # from kbytes to bytes + $v = sprintf("0x%x", $v); + } + $k =~ s/^pic_//o; + $v += 0 if $v =~ /^\d+$/o; + $jh{pic}{$k} = $v; + } + } + + # Collect triggers. + $jh{triggers} = $config{triggers} if exists $config{triggers}; + + # Dump JSON config file. + my $json = JSON->new->allow_nonref; + my $text = $json->pretty->encode(\%jh); + print($fh $text); + + close $fh; +}#}}} + + +# Checker for iccm/dccm/pic sub-region address alignment. Address must be a multiple +# of size or next higher power of 2 if size is not a power of 2. +sub check_addr_align { + my ($section, $addr, $size) = @_; + + die("Invalid $section size: $size\n") if $size <= 0; + + my $log_size = log2($size); + my $p2 = 1 << $log_size; + $size = 2*$p2 if $size != $p2; + + if (($addr % $size) != 0) { + printf("Address of $section area(0x%x) is not a multiple of its size (0x%x)\n", + $addr, $size); + exit(1); + } +} + + +sub log2 { + my ($n) = @_; + return log($n)/log(2); +} diff --git a/design/dbg/dbg.sv b/design/dbg/dbg.sv new file mode 100644 index 0000000..18b6014 --- /dev/null +++ b/design/dbg/dbg.sv @@ -0,0 +1,626 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** +// $Id$ +// +// Function: Top level SWERV core file to control the debug mode +// Comments: Responsible to put the rest of the core in quiesce mode, +// Send the commands/address. sends WrData and Recieve read Data. +// And then Resume the core to do the normal mode +// Author : +//******************************************************************************** +module dbg ( + // outputs to the core for command and data interface + output logic [31:0] dbg_cmd_addr, + output logic [31:0] dbg_cmd_wrdata, + output logic dbg_cmd_valid, + output logic dbg_cmd_write, // 1: write command, 0: read_command + output logic [1:0] dbg_cmd_type, // 0:gpr 1:csr 2: memory + output logic [1:0] dbg_cmd_size, // size of the abstract mem access debug command + output logic dbg_core_rst_l, // core reset from dm + + // inputs back from the core/dec + input logic [31:0] core_dbg_rddata, + input logic core_dbg_cmd_done, // This will be treated like a valid signal + input logic core_dbg_cmd_fail, // Exception during command run + + // Signals to dma to get a bubble + output logic dbg_dma_bubble, // Debug needs a bubble to send a valid + input logic dma_dbg_ready, // DMA is ready to accept debug request + + // interface with the rest of the core to halt/resume handshaking + output logic dbg_halt_req, // This is a pulse + output logic dbg_resume_req, // Debug sends a resume requests. Pulse + input logic dec_tlu_debug_mode, // Core is in debug mode + input logic dec_tlu_dbg_halted, // The core has finished the queiscing sequence. Core is halted now + input logic dec_tlu_mpc_halted_only, // Only halted due to MPC + input logic dec_tlu_resume_ack, // core sends back an ack for the resume (pulse) + + // inputs from the JTAG + input logic dmi_reg_en, // read or write + input logic [6:0] dmi_reg_addr, // address of DM register + input logic dmi_reg_wr_en, // write instruction + input logic [31:0] dmi_reg_wdata, // write data + // output + output logic [31:0] dmi_reg_rdata, // read data +// output logic dmi_reg_ack, + + // AXI signals + // AXI Write Channels + output logic sb_axi_awvalid, + input logic sb_axi_awready, + output logic [`RV_SB_BUS_TAG-1:0] sb_axi_awid, + output logic [31:0] sb_axi_awaddr, + output logic [3:0] sb_axi_awregion, + output logic [7:0] sb_axi_awlen, + output logic [2:0] sb_axi_awsize, + output logic [1:0] sb_axi_awburst, + output logic sb_axi_awlock, + output logic [3:0] sb_axi_awcache, + output logic [2:0] sb_axi_awprot, + output logic [3:0] sb_axi_awqos, + + output logic sb_axi_wvalid, + input logic sb_axi_wready, + output logic [63:0] sb_axi_wdata, + output logic [7:0] sb_axi_wstrb, + output logic sb_axi_wlast, + + input logic sb_axi_bvalid, + output logic sb_axi_bready, + input logic [1:0] sb_axi_bresp, + input logic [`RV_SB_BUS_TAG-1:0] sb_axi_bid, + + // AXI Read Channels + output logic sb_axi_arvalid, + input logic sb_axi_arready, + output logic [`RV_SB_BUS_TAG-1:0] sb_axi_arid, + output logic [31:0] sb_axi_araddr, + output logic [3:0] sb_axi_arregion, + output logic [7:0] sb_axi_arlen, + output logic [2:0] sb_axi_arsize, + output logic [1:0] sb_axi_arburst, + output logic sb_axi_arlock, + output logic [3:0] sb_axi_arcache, + output logic [2:0] sb_axi_arprot, + output logic [3:0] sb_axi_arqos, + + input logic sb_axi_rvalid, + output logic sb_axi_rready, + input logic [`RV_SB_BUS_TAG-1:0] sb_axi_rid, + input logic [63:0] sb_axi_rdata, + input logic [1:0] sb_axi_rresp, + input logic sb_axi_rlast, + + input logic dbg_bus_clk_en, + + // general inputs + input logic clk, + input logic free_clk, + input logic rst_l, + input logic clk_override, + input logic scan_mode +); + +`include "global.h" + + typedef enum logic [2:0] {IDLE=3'b000, HALTING=3'b001, HALTED=3'b010, CMD_START=3'b011, CMD_WAIT=3'b100, CMD_DONE=3'b101, RESUMING=3'b110} state_t; + typedef enum logic [3:0] {SBIDLE=4'b0, WAIT=4'b1, CMD_RD=4'b10, CMD_WR=4'b11, CMD_WR_ADDR=4'b100, CMD_WR_DATA=4'b101, RSP_RD=4'b110, RSP_WR=4'b111, DONE=4'b1000} sb_state_t; + + state_t dbg_state; + state_t dbg_nxtstate; + logic dbg_state_en; + // these are the registers that the debug module implements + logic [31:0] dmstatus_reg; // [26:24]-dmerr, [17:16]-resume ack, [9:8]-halted, [3:0]-version + logic [31:0] dmcontrol_reg; // dmcontrol register has only 6 bits implemented. 31: haltreq, 30: resumereq, 29: haltreset, 28: ackhavereset, 1: ndmreset, 0: dmactive. + logic [31:0] command_reg; + logic [31:0] abstractcs_reg; // bits implemted are [12] - busy and [10:8]= command error + logic [31:0] haltsum0_reg; + logic [31:0] data0_reg; + logic [31:0] data1_reg; + + // data 0 + logic [31:0] data0_din; + logic data0_reg_wren, data0_reg_wren0, data0_reg_wren1; + // data 1 + logic [31:0] data1_din; + logic data1_reg_wren, data1_reg_wren0, data1_reg_wren1; + // abstractcs + logic abstractcs_busy_wren; + logic abstractcs_busy_din; + logic [2:0] abstractcs_error_din; + logic abstractcs_error_sel0, abstractcs_error_sel1, abstractcs_error_sel2, abstractcs_error_sel3, abstractcs_error_sel4, abstractcs_error_sel5; + logic abstractcs_error_selor; + // dmstatus + //logic dmstatus_wren; + logic dmstatus_dmerr_wren; + logic dmstatus_resumeack_wren; + logic dmstatus_resumeack_din; + logic dmstatus_havereset_wren; + logic dmstatus_havereset_rst; + logic dmstatus_resumeack; + logic dmstatus_halted; + logic dmstatus_havereset; + + // dmcontrol + logic dmcontrol_wren, dmcontrol_wren_Q; + // command + logic command_wren; + // needed to send the read data back for dmi reads + logic [31:0] dmi_reg_rdata_din; + + sb_state_t sb_state; + sb_state_t sb_nxtstate; + logic sb_state_en; + + //System bus section + logic sbcs_wren; + logic sbcs_sbbusy_wren; + logic sbcs_sbbusy_din; + logic sbcs_sbbusyerror_wren; + logic sbcs_sbbusyerror_din; + + logic sbcs_sberror_wren; + logic [2:0] sbcs_sberror_din; + logic sbcs_unaligned; + logic sbcs_illegal_size; + + // data + logic sbdata0_reg_wren0; + logic sbdata0_reg_wren1; + logic sbdata0_reg_wren; + logic [31:0] sbdata0_din; + + logic sbdata1_reg_wren0; + logic sbdata1_reg_wren1; + logic sbdata1_reg_wren; + logic [31:0] sbdata1_din; + + logic sbaddress0_reg_wren0; + logic sbaddress0_reg_wren1; + logic sbaddress0_reg_wren; + logic [31:0] sbaddress0_reg_din; + logic [3:0] sbaddress0_incr; + logic sbreadonaddr_access; + logic sbreadondata_access; + logic sbdata0wr_access; + + logic sb_axi_awvalid_q, sb_axi_awready_q; + logic sb_axi_wvalid_q, sb_axi_wready_q; + logic sb_axi_arvalid_q, sb_axi_arready_q; + logic sb_axi_bvalid_q, sb_axi_bready_q; + logic sb_axi_rvalid_q, sb_axi_rready_q; + logic [1:0] sb_axi_bresp_q, sb_axi_rresp_q; + logic [63:0] sb_axi_rdata_q; + + logic [63:0] sb_bus_rdata; + + //registers + logic [31:0] sbcs_reg; + logic [31:0] sbaddress0_reg; + logic [31:0] sbdata0_reg; + logic [31:0] sbdata1_reg; + + logic dbg_dm_rst_l; + + //clken + logic dbg_free_clken; + logic dbg_free_clk; + + logic sb_free_clken; + logic sb_free_clk; + + logic bus_clken; + logic bus_clk; + + // clocking + // used for the abstract commands. + assign dbg_free_clken = dmi_reg_en | (dbg_state != IDLE) | dbg_state_en | dec_tlu_dbg_halted | clk_override; + + // used for the system bus + assign sb_free_clken = dmi_reg_en | sb_state_en | (sb_state != SBIDLE) | clk_override; + assign bus_clken = (sb_axi_awvalid | sb_axi_wvalid | sb_axi_arvalid | sb_axi_bvalid | sb_axi_rvalid | clk_override) & dbg_bus_clk_en; + + rvclkhdr dbg_free_cgc (.en(dbg_free_clken), .l1clk(dbg_free_clk), .*); + rvclkhdr sb_free_cgc (.en(sb_free_clken), .l1clk(sb_free_clk), .*); + rvclkhdr bus_cgc (.en(bus_clken), .l1clk(bus_clk), .*); + + // end clocking section + + // Reset logic + assign dbg_dm_rst_l = rst_l & (dmcontrol_reg[0] | scan_mode); + assign dbg_core_rst_l = ~dmcontrol_reg[1]; + + // system bus register + // sbcs[31:29], sbcs - [22]:sbbusyerror, [21]: sbbusy, [20]:sbreadonaddr, [19:17]:sbaccess, [16]:sbautoincrement, [15]:sbreadondata, [14:12]:sberror, sbsize=32, 128=0, 64/32/16/8 are legal + assign sbcs_reg[31:29] = 3'b1; + assign sbcs_reg[28:23] = '0; + assign sbcs_reg[11:5] = 7'h20; + assign sbcs_reg[4:0] = 5'b01111; + assign sbcs_wren = (dmi_reg_addr == 7'h38) & dmi_reg_en & dmi_reg_wr_en & (sb_state == SBIDLE); // & (sbcs_reg[14:12] == 3'b000); + assign sbcs_sbbusyerror_wren = (sbcs_wren & dmi_reg_wdata[22]) | + ((sb_state != SBIDLE) & dmi_reg_en & ((dmi_reg_addr == 7'h39) | (dmi_reg_addr == 7'h3c) | (dmi_reg_addr == 7'h3d))); + assign sbcs_sbbusyerror_din = ~(sbcs_wren & dmi_reg_wdata[22]); // Clear when writing one + + rvdffs #(1) sbcs_sbbusyerror_reg (.din(sbcs_sbbusyerror_din), .dout(sbcs_reg[22]), .en(sbcs_sbbusyerror_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + rvdffs #(1) sbcs_sbbusy_reg (.din(sbcs_sbbusy_din), .dout(sbcs_reg[21]), .en(sbcs_sbbusy_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + rvdffs #(1) sbcs_sbreadonaddr_reg (.din(dmi_reg_wdata[20]), .dout(sbcs_reg[20]), .en(sbcs_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + rvdffs #(5) sbcs_misc_reg (.din(dmi_reg_wdata[19:15]), .dout(sbcs_reg[19:15]), .en(sbcs_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + rvdffs #(3) sbcs_error_reg (.din(sbcs_sberror_din[2:0]), .dout(sbcs_reg[14:12]), .en(sbcs_sberror_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + + assign sbcs_unaligned = ((sbcs_reg[19:17] == 3'b001) & sbaddress0_reg[0]) | + ((sbcs_reg[19:17] == 3'b010) & (|sbaddress0_reg[1:0])) | + ((sbcs_reg[19:17] == 3'b011) & (|sbaddress0_reg[2:0])); + + assign sbcs_illegal_size = sbcs_reg[19]; // Anything bigger than 64 bits is illegal + + assign sbaddress0_incr[3:0] = ({4{(sbcs_reg[19:17] == 3'b000)}} & 4'b0001) | + ({4{(sbcs_reg[19:17] == 3'b001)}} & 4'b0010) | + ({4{(sbcs_reg[19:17] == 3'b010)}} & 4'b0100) | + ({4{(sbcs_reg[19:17] == 3'b100)}} & 4'b1000); + + // sbdata + //assign sbdata0_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 32'h3c); + assign sbdata0_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3c); // write data only when single read is 0 + assign sbdata0_reg_wren1 = (sb_state == RSP_RD) & sb_state_en & ~sbcs_sberror_wren; + assign sbdata0_reg_wren = sbdata0_reg_wren0 | sbdata0_reg_wren1; + + assign sbdata1_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3d); // write data only when single read is 0; + assign sbdata1_reg_wren1 = (sb_state == RSP_RD) & sb_state_en & ~sbcs_sberror_wren; + assign sbdata1_reg_wren = sbdata1_reg_wren0 | sbdata1_reg_wren1; + + assign sbdata0_din[31:0] = ({32{sbdata0_reg_wren0}} & dmi_reg_wdata[31:0]) | + ({32{sbdata0_reg_wren1}} & sb_bus_rdata[31:0]); + assign sbdata1_din[31:0] = ({32{sbdata1_reg_wren0}} & dmi_reg_wdata[31:0]) | + ({32{sbdata1_reg_wren1}} & sb_bus_rdata[63:32]); + + rvdffe #(32) dbg_sbdata0_reg (.*, .din(sbdata0_din[31:0]), .dout(sbdata0_reg[31:0]), .en(sbdata0_reg_wren), .rst_l(dbg_dm_rst_l)); + rvdffe #(32) dbg_sbdata1_reg (.*, .din(sbdata1_din[31:0]), .dout(sbdata1_reg[31:0]), .en(sbdata1_reg_wren), .rst_l(dbg_dm_rst_l)); + + // sbaddress + assign sbaddress0_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h39); + assign sbaddress0_reg_wren = sbaddress0_reg_wren0 | sbaddress0_reg_wren1; + assign sbaddress0_reg_din[31:0]= ({32{sbaddress0_reg_wren0}} & dmi_reg_wdata[31:0]) | + ({32{sbaddress0_reg_wren1}} & (sbaddress0_reg[31:0] + {28'b0,sbaddress0_incr[3:0]})); + rvdffe #(32) dbg_sbaddress0_reg (.*, .din(sbaddress0_reg_din[31:0]), .dout(sbaddress0_reg[31:0]), .en(sbaddress0_reg_wren), .rst_l(dbg_dm_rst_l)); + + assign sbreadonaddr_access = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h39) & sbcs_reg[20]; // if readonaddr is set the next command will start upon writing of addr0 + assign sbreadondata_access = dmi_reg_en & ~dmi_reg_wr_en & (dmi_reg_addr == 7'h3c) & sbcs_reg[15]; // if readondata is set the next command will start upon reading of data0 + assign sbdata0wr_access = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3c); // write to sbdata0 will start write command to system bus + + // memory mapped registers + // dmcontrol register has only 6 bits implemented. 31: haltreq, 30: resumereq, 29: haltreset, 28: ackhavereset, 1: ndmreset, 0: dmactive. + // rest all the bits are zeroed out + // dmactive flop is reset based on core rst_l, all other flops use dm_rst_l + assign dmcontrol_wren = (dmi_reg_addr == 7'h10) & dmi_reg_en & dmi_reg_wr_en; + assign dmcontrol_reg[27:2] = '0; + rvdffs #(5) dmcontrolff (.din({dmi_reg_wdata[31:28],dmi_reg_wdata[1]}), .dout({dmcontrol_reg[31:28], dmcontrol_reg[1]}), .en(dmcontrol_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + rvdffs #(1) dmcontrol_dmactive_ff (.din(dmi_reg_wdata[0]), .dout(dmcontrol_reg[0]), .en(dmcontrol_wren), .rst_l(rst_l), .clk(dbg_free_clk)); + rvdff #(1) dmcontrol_wrenff(.din(dmcontrol_wren), .dout(dmcontrol_wren_Q), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + + // dmstatus register bits that are implemented + // [19:18]-havereset,[17:16]-resume ack, [9:8]-halted, [3:0]-version + // rest all the bits are zeroed out + //assign dmstatus_wren = (dmi_reg_addr == 32'h11) & dmi_reg_en; + assign dmstatus_reg[31:20] = '0; + assign dmstatus_reg[19:18] = {2{dmstatus_havereset}}; + assign dmstatus_reg[15:10] = '0; + assign dmstatus_reg[7] = '1; + assign dmstatus_reg[6:4] = '0; + assign dmstatus_reg[17:16] = {2{dmstatus_resumeack}}; + assign dmstatus_reg[9:8] = {2{dmstatus_halted}}; + assign dmstatus_reg[3:0] = 4'h2; + + assign dmstatus_resumeack_wren = ((dbg_state == RESUMING) & dec_tlu_resume_ack) | (dmstatus_resumeack & ~dmcontrol_reg[30]); + assign dmstatus_resumeack_din = (dbg_state == RESUMING) & dec_tlu_resume_ack; + + assign dmstatus_havereset_wren = (dmi_reg_addr == 7'h10) & dmi_reg_wdata[1] & dmi_reg_en & dmi_reg_wr_en; + assign dmstatus_havereset_rst = (dmi_reg_addr == 7'h10) & dmi_reg_wdata[28] & dmi_reg_en & dmi_reg_wr_en; + + rvdffs #(1) dmstatus_resumeack_reg (.din(dmstatus_resumeack_din), .dout(dmstatus_resumeack), .en(dmstatus_resumeack_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + rvdff #(1) dmstatus_halted_reg (.din(dec_tlu_dbg_halted & ~dec_tlu_mpc_halted_only), .dout(dmstatus_halted), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + rvdffsc #(1) dmstatus_havereset_reg (.din(1'b1), .dout(dmstatus_havereset), .en(dmstatus_havereset_wren), .clear(dmstatus_havereset_rst), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + + // haltsum0 register + assign haltsum0_reg[31:1] = '0; + assign haltsum0_reg[0] = dmstatus_halted; + + // abstractcs register + // bits implemted are [12] - busy and [10:8]= command error + assign abstractcs_reg[31:13] = '0; + assign abstractcs_reg[11] = '0; + assign abstractcs_reg[7:4] = '0; + assign abstractcs_reg[3:0] = 4'h2; // One data register + assign abstractcs_error_sel0 = abstractcs_reg[12] & dmi_reg_en & ((dmi_reg_wr_en & ( (dmi_reg_addr == 7'h16) | (dmi_reg_addr == 7'h17))) | (dmi_reg_addr == 7'h4)); + assign abstractcs_error_sel1 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h17) & ~((dmi_reg_wdata[31:24] == 8'b0) | (dmi_reg_wdata[31:24] == 8'h2)); + assign abstractcs_error_sel2 = core_dbg_cmd_done & core_dbg_cmd_fail; + assign abstractcs_error_sel3 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h17) & (dbg_state != HALTED); + assign abstractcs_error_sel4 = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en & + ( ((dmi_reg_wdata[22:20] == 3'b001) & data1_reg[0]) | + ((dmi_reg_wdata[22:20] == 3'b010) & (|data1_reg[1:0])) | + dmi_reg_wdata[22] | (dmi_reg_wdata[22:20] == 3'b011) + ); + + assign abstractcs_error_sel5 = (dmi_reg_addr == 7'h16) & dmi_reg_en & dmi_reg_wr_en; + + assign abstractcs_error_selor = abstractcs_error_sel0 | abstractcs_error_sel1 | abstractcs_error_sel2 | abstractcs_error_sel3 | abstractcs_error_sel4 | abstractcs_error_sel5; + + assign abstractcs_error_din[2:0] = ({3{abstractcs_error_sel0}} & 3'b001) | // writing command or abstractcs while a command was executing. Or accessing data0 + ({3{abstractcs_error_sel1}} & 3'b010) | // writing a non-zero command to cmd field of command + ({3{abstractcs_error_sel2}} & 3'b011) | // exception while running command + ({3{abstractcs_error_sel3}} & 3'b100) | // writing a comnand when not in the halted state + ({3{abstractcs_error_sel4}} & 3'b111) | // unaligned abstract memory command + ({3{abstractcs_error_sel5}} & ~dmi_reg_wdata[10:8] & abstractcs_reg[10:8]) | // W1C + ({3{~abstractcs_error_selor}} & abstractcs_reg[10:8]); // hold + + rvdffs #(1) dmabstractcs_busy_reg (.din(abstractcs_busy_din), .dout(abstractcs_reg[12]), .en(abstractcs_busy_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + rvdff #(3) dmabstractcs_error_reg (.din(abstractcs_error_din[2:0]), .dout(abstractcs_reg[10:8]), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + + + // command register - implemented all the bits in this register + // command[16] = 1: write, 0: read + assign command_wren = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en & (dbg_state == HALTED); + rvdffe #(32) dmcommand_reg (.*, .din(dmi_reg_wdata[31:0]), .dout(command_reg[31:0]), .en(command_wren), .rst_l(dbg_dm_rst_l)); + + // data0 reg + assign data0_reg_wren0 = (dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h4) & (dbg_state == HALTED)); + assign data0_reg_wren1 = core_dbg_cmd_done & (dbg_state == CMD_WAIT) & ~command_reg[16]; + assign data0_reg_wren = data0_reg_wren0 | data0_reg_wren1; + + assign data0_din[31:0] = ({32{data0_reg_wren0}} & dmi_reg_wdata[31:0]) | + ({32{data0_reg_wren1}} & core_dbg_rddata[31:0]); + + rvdffe #(32) dbg_data0_reg (.*, .din(data0_din[31:0]), .dout(data0_reg[31:0]), .en(data0_reg_wren), .rst_l(dbg_dm_rst_l)); + + // data 1 + assign data1_reg_wren0 = (dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h5) & (dbg_state == HALTED)); + assign data1_reg_wren1 = 1'b0; // core_dbg_cmd_done & (dbg_state == CMD_WAIT) & ~command_reg[16]; + assign data1_reg_wren = data1_reg_wren0 | data1_reg_wren1; + + assign data1_din[31:0] = ({32{data1_reg_wren0}} & dmi_reg_wdata[31:0]); + //({32{data0_reg_wren1}} & core_dbg_rddata[31:0]); + + rvdffe #(32) dbg_data1_reg (.*, .din(data1_din[31:0]), .dout(data1_reg[31:0]), .en(data1_reg_wren), .rst_l(dbg_dm_rst_l)); + + + // FSM to control the debug mode entry, command send/recieve, and Resume flow. + always_comb begin + dbg_nxtstate = IDLE; + dbg_state_en = 1'b0; + abstractcs_busy_wren = 1'b0; + abstractcs_busy_din = 1'b0; + dbg_halt_req = dmcontrol_wren_Q & dmcontrol_reg[31]; // single pulse output to the core + dbg_resume_req = 1'b0; // single pulse output to the core + + case (dbg_state) + IDLE: begin + dbg_nxtstate = (dmstatus_reg[9] | dec_tlu_mpc_halted_only) ? HALTED : HALTING; // initiate the halt command to the core + dbg_state_en = ((dmcontrol_reg[31] & ~dec_tlu_debug_mode) | dmstatus_reg[9] | dec_tlu_mpc_halted_only) & ~dmcontrol_reg[1]; // when the jtag writes the halt bit in the DM register, OR when the status indicates Halted + dbg_halt_req = dmcontrol_reg[31]; // Removed debug mode qualification during MPC changes + //dbg_halt_req = dmcontrol_reg[31] & ~dec_tlu_debug_mode; // only when jtag has written the halt_req bit in the control + end + HALTING : begin + dbg_nxtstate = HALTED; // Goto HALTED once the core sends an ACK + dbg_state_en = dmstatus_reg[9]; // core indicates halted + end + HALTED: begin + // wait for halted to go away before send to resume. Else start of new command + dbg_nxtstate = (dmstatus_reg[9] & ~dmcontrol_reg[1]) ? ((dmcontrol_reg[30] & ~dmcontrol_reg[31]) ? RESUMING : CMD_START) : + (dmcontrol_reg[31] ? HALTING : IDLE); // This is MPC halted case + //dbg_nxtstate = dmcontrol_reg[1] ? IDLE : (dmcontrol_reg[30] & ~dmcontrol_reg[31]) ? RESUMING : CMD_START; // wait for halted to go away before send to resume. Else start of new command + dbg_state_en = (dmstatus_reg[9] & dmcontrol_reg[30] & ~dmcontrol_reg[31] & dmcontrol_wren_Q) | command_wren | dmcontrol_reg[1] | ~(dmstatus_reg[9] | dec_tlu_mpc_halted_only); + abstractcs_busy_wren = dbg_state_en & (dbg_nxtstate == CMD_START); // write busy when a new command was written by jtag + abstractcs_busy_din = 1'b1; + dbg_resume_req = dbg_state_en & (dbg_nxtstate == RESUMING); // single cycle pulse to core if resuming + end + CMD_START: begin + dbg_nxtstate = (|abstractcs_reg[10:8]) ? CMD_DONE : CMD_WAIT; // new command sent to the core + dbg_state_en = dbg_cmd_valid | (|abstractcs_reg[10:8]); + end + CMD_WAIT: begin + dbg_nxtstate = CMD_DONE; + dbg_state_en = core_dbg_cmd_done; // go to done state for one cycle after completing current command + end + CMD_DONE: begin + dbg_nxtstate = HALTED; + dbg_state_en = 1'b1; + abstractcs_busy_wren = dbg_state_en; // remove the busy bit from the abstracts ( bit 12 ) + abstractcs_busy_din = 1'b0; + end + RESUMING : begin + dbg_nxtstate = IDLE; + dbg_state_en = dmstatus_reg[17]; // resume ack has been updated in the dmstatus register + end + default : begin + dbg_nxtstate = IDLE; + dbg_state_en = 1'b0; + abstractcs_busy_wren = 1'b0; + abstractcs_busy_din = 1'b0; + dbg_halt_req = 1'b0; // single pulse output to the core + dbg_resume_req = 1'b0; // single pulse output to the core + end + endcase + end // always_comb begin + + assign dmi_reg_rdata_din[31:0] = ({32{dmi_reg_addr == 7'h4}} & data0_reg[31:0]) | + ({32{dmi_reg_addr == 7'h5}} & data1_reg[31:0]) | + ({32{dmi_reg_addr == 7'h10}} & dmcontrol_reg[31:0]) | + ({32{dmi_reg_addr == 7'h11}} & dmstatus_reg[31:0]) | + ({32{dmi_reg_addr == 7'h16}} & abstractcs_reg[31:0]) | + ({32{dmi_reg_addr == 7'h17}} & command_reg[31:0]) | + ({32{dmi_reg_addr == 7'h40}} & haltsum0_reg[31:0]) | + ({32{dmi_reg_addr == 7'h38}} & sbcs_reg[31:0]) | + ({32{dmi_reg_addr == 7'h39}} & sbaddress0_reg[31:0]) | + ({32{dmi_reg_addr == 7'h3c}} & sbdata0_reg[31:0]) | + ({32{dmi_reg_addr == 7'h3d}} & sbdata1_reg[31:0]); + + + rvdffs #($bits(state_t)) dbg_state_reg (.din(dbg_nxtstate), .dout({dbg_state}), .en(dbg_state_en), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + // Ack will use the power on reset only otherwise there won't be any ack until dmactive is 1 +// rvdff #(1) dmi_ack_reg (.din(dmi_reg_en), .dout(dmi_reg_ack), .rst_l(rst_l), .clk(free_clk)); + rvdffs #(32) dmi_rddata_reg(.din(dmi_reg_rdata_din), .dout(dmi_reg_rdata), .en(dmi_reg_en), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + + // interface for the core + assign dbg_cmd_addr[31:0] = (command_reg[31:24] == 8'h2) ? {data1_reg[31:2],2'b0} : {20'b0, command_reg[11:0]}; // Only word addresses for abstract memory + assign dbg_cmd_wrdata[31:0] = data0_reg[31:0]; + assign dbg_cmd_valid = (dbg_state == CMD_START) & ~(|abstractcs_reg[10:8]) & dma_dbg_ready; + assign dbg_cmd_write = command_reg[16]; + assign dbg_cmd_type[1:0] = (command_reg[31:24] == 8'h2) ? 2'b10 : {1'b0, (command_reg[15:12] == 4'b0)}; + assign dbg_cmd_size[1:0] = command_reg[21:20]; + + // Ask DMA to stop taking bus trxns since debug request is done + assign dbg_dma_bubble = ((dbg_state == CMD_START) & ~(|abstractcs_reg[10:8])) | (dbg_state == CMD_WAIT); + + // system bus FSM + always_comb begin + sb_nxtstate = SBIDLE; + sb_state_en = 1'b0; + sbcs_sbbusy_wren = 1'b0; + sbcs_sbbusy_din = 1'b0; + sbcs_sberror_wren = 1'b0; + sbcs_sberror_din[2:0] = 3'b0; + sbaddress0_reg_wren1 = 1'b0; + case (sb_state) + SBIDLE: begin + sb_nxtstate = WAIT; + sb_state_en = sbdata0wr_access | sbreadondata_access | sbreadonaddr_access; + sbcs_sbbusy_wren = sb_state_en; // set the single read bit if it is a singlread command + sbcs_sbbusy_din = 1'b1; + sbcs_sberror_wren = sbcs_wren & (|dmi_reg_wdata[14:12]); // write to clear the error bits + sbcs_sberror_din[2:0] = ~dmi_reg_wdata[14:12] & sbcs_reg[14:12]; + end + WAIT: begin + sb_nxtstate = (sbcs_unaligned | sbcs_illegal_size) ? DONE : (sbcs_reg[15] | sbcs_reg[20]) ? CMD_RD : CMD_WR; + sb_state_en = dbg_bus_clk_en | sbcs_unaligned | sbcs_illegal_size; + sbcs_sberror_wren = sbcs_unaligned | sbcs_illegal_size; + sbcs_sberror_din[2:0] = sbcs_unaligned ? 3'b011 : 3'b100; + end + CMD_RD : begin + sb_nxtstate = RSP_RD; + sb_state_en = sb_axi_arvalid_q & sb_axi_arready_q & dbg_bus_clk_en; + end + CMD_WR : begin + sb_nxtstate = (sb_axi_awready_q & sb_axi_wready_q) ? RSP_WR : (sb_axi_awready_q ? CMD_WR_DATA : CMD_WR_ADDR); + sb_state_en = ((sb_axi_awvalid_q & sb_axi_awready_q) | (sb_axi_wvalid_q & sb_axi_wready_q)) & dbg_bus_clk_en; + end + CMD_WR_ADDR : begin + sb_nxtstate = RSP_WR; + sb_state_en = sb_axi_awvalid_q & sb_axi_awready_q & dbg_bus_clk_en; + end + CMD_WR_DATA : begin + sb_nxtstate = RSP_WR; + sb_state_en = sb_axi_wvalid_q & sb_axi_wready_q & dbg_bus_clk_en; + end + RSP_RD: begin + sb_nxtstate = DONE; + sb_state_en = sb_axi_rvalid_q & sb_axi_rready_q & dbg_bus_clk_en; + sbcs_sberror_wren = sb_state_en & sb_axi_rresp_q[1]; + sbcs_sberror_din[2:0] = 3'b010; + end + RSP_WR: begin + sb_nxtstate = DONE; + sb_state_en = sb_axi_bvalid_q & sb_axi_bready_q & dbg_bus_clk_en; + sbcs_sberror_wren = sb_state_en & sb_axi_bresp_q[1]; + sbcs_sberror_din[2:0] = 3'b010; + end + DONE: begin + sb_nxtstate = SBIDLE; + sb_state_en = 1'b1; + sbcs_sbbusy_wren = 1'b1; // reset the single read + sbcs_sbbusy_din = 1'b0; + sbaddress0_reg_wren1 = sbcs_reg[16]; // auto increment was set. Update to new address after completing the current command + end + default : begin + sb_nxtstate = SBIDLE; + sb_state_en = 1'b0; + sbcs_sbbusy_wren = 1'b0; + sbcs_sbbusy_din = 1'b0; + sbcs_sberror_wren = 1'b0; + sbcs_sberror_din[2:0] = 3'b0; + sbaddress0_reg_wren1 = 1'b0; + end + endcase + end // always_comb begin + + rvdffs #($bits(sb_state_t)) sb_state_reg (.din(sb_nxtstate), .dout({sb_state}), .en(sb_state_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + + //rvdff #(.WIDTH(1)) bus_clken_ff (.din(dbg_bus_clk_en), .dout(dbg_bus_clk_en_q), .rst_l(dbg_dm_rst_l), .clk(dbg_sb_c2_free_clk), .*); + + rvdffs #(.WIDTH(1)) axi_awvalid_ff (.din(sb_axi_awvalid), .dout(sb_axi_awvalid_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); + rvdffs #(.WIDTH(1)) axi_awready_ff (.din(sb_axi_awready), .dout(sb_axi_awready_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); + rvdffs #(.WIDTH(1)) axi_wvalid_ff (.din(sb_axi_wvalid), .dout(sb_axi_wvalid_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); + rvdffs #(.WIDTH(1)) axi_wready_ff (.din(sb_axi_wready), .dout(sb_axi_wready_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); + rvdffs #(.WIDTH(1)) axi_arvalid_ff (.din(sb_axi_arvalid), .dout(sb_axi_arvalid_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); + rvdffs #(.WIDTH(1)) axi_arready_ff (.din(sb_axi_arready), .dout(sb_axi_arready_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); + + rvdffs #(.WIDTH(1)) axi_bvalid_ff (.din(sb_axi_bvalid), .dout(sb_axi_bvalid_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); + rvdffs #(.WIDTH(1)) axi_bready_ff (.din(sb_axi_bready), .dout(sb_axi_bready_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); + rvdff #(.WIDTH(2)) axi_bresp_ff (.din(sb_axi_bresp[1:0]), .dout(sb_axi_bresp_q[1:0]), .rst_l(dbg_dm_rst_l), .clk(bus_clk), .*); + rvdffs #(.WIDTH(1)) axi_rvalid_ff (.din(sb_axi_rvalid), .dout(sb_axi_rvalid_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); + rvdffs #(.WIDTH(1)) axi_rready_ff (.din(sb_axi_rready), .dout(sb_axi_rready_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); + rvdff #(.WIDTH(2)) axi_rresp_ff (.din(sb_axi_rresp[1:0]), .dout(sb_axi_rresp_q[1:0]), .rst_l(dbg_dm_rst_l), .clk(bus_clk), .*); + rvdff #(.WIDTH(64)) axi_rdata_ff (.din(sb_axi_rdata[63:0]), .dout(sb_axi_rdata_q[63:0]), .rst_l(dbg_dm_rst_l), .clk(bus_clk), .*); + + // AXI Request signals + assign sb_axi_awvalid = ((sb_state == CMD_WR) | (sb_state == CMD_WR_ADDR)) & ~(sb_axi_awvalid_q & sb_axi_awready_q); + assign sb_axi_awaddr[31:0] = sbaddress0_reg[31:0]; + assign sb_axi_awid[SB_BUS_TAG-1:0] = '0; + assign sb_axi_awsize[2:0] = sbcs_reg[19:17]; + assign sb_axi_awprot[2:0] = '0; + assign sb_axi_awcache[3:0] = 4'b1111; + assign sb_axi_awregion[3:0] = sbaddress0_reg[31:28]; + assign sb_axi_awlen[7:0] = '0; + assign sb_axi_awburst[1:0] = 2'b01; + assign sb_axi_awqos[3:0] = '0; + assign sb_axi_awlock = '0; + + assign sb_axi_wvalid = ((sb_state == CMD_WR) | (sb_state == CMD_WR_DATA)) & ~(sb_axi_wvalid_q & sb_axi_wready_q); + assign sb_axi_wdata[63:0] = ({64{(sbcs_reg[19:17] == 3'h0)}} & {8{sbdata0_reg[7:0]}}) | + ({64{(sbcs_reg[19:17] == 3'h1)}} & {4{sbdata0_reg[15:0]}}) | + ({64{(sbcs_reg[19:17] == 3'h2)}} & {2{sbdata0_reg[31:0]}}) | + ({64{(sbcs_reg[19:17] == 3'h3)}} & {sbdata1_reg[31:0],sbdata0_reg[31:0]}); + assign sb_axi_wstrb[7:0] = ({8{(sbcs_reg[19:17] == 3'h0)}} & (8'h1 << sbaddress0_reg[2:0])) | + ({8{(sbcs_reg[19:17] == 3'h1)}} & (8'h3 << {sbaddress0_reg[2:1],1'b0})) | + ({8{(sbcs_reg[19:17] == 3'h2)}} & (8'hf << {sbaddress0_reg[2],2'b0})) | + ({8{(sbcs_reg[19:17] == 3'h3)}} & 8'hff); + assign sb_axi_wlast = '1; + + assign sb_axi_arvalid = (sb_state == CMD_RD) & ~(sb_axi_arvalid_q & sb_axi_arready_q); + assign sb_axi_araddr[31:0] = {sbaddress0_reg[31:3],3'b0}; + assign sb_axi_arid[SB_BUS_TAG-1:0] = '0; + assign sb_axi_arsize[2:0] = 3'b011; + assign sb_axi_arprot[2:0] = '0; + assign sb_axi_arcache[3:0] = 4'b0; + assign sb_axi_arregion[3:0] = sbaddress0_reg[31:28]; + assign sb_axi_arlen[7:0] = '0; + assign sb_axi_arburst[1:0] = 2'b01; + assign sb_axi_arqos[3:0] = '0; + assign sb_axi_arlock = '0; + + // AXI Response signals + assign sb_axi_bready = 1'b1; + + assign sb_axi_rready = 1'b1; + assign sb_bus_rdata[63:0] = ({64{sbcs_reg[19:17] == 3'h0}} & ((sb_axi_rdata_q[63:0] >> 8*sbaddress0_reg[2:0]) & 64'hff)) | + ({64{sbcs_reg[19:17] == 3'h1}} & ((sb_axi_rdata_q[63:0] >> 16*sbaddress0_reg[2:1]) & 64'hffff)) | + ({64{sbcs_reg[19:17] == 3'h2}} & ((sb_axi_rdata_q[63:0] >> 32*sbaddress0_reg[2]) & 64'hffff_ffff)) | + ({64{sbcs_reg[19:17] == 3'h3}} & sb_axi_rdata_q[63:0]); + +`ifdef ASSERT_ON +// assertion. +// when the resume_ack is asserted then the dec_tlu_dbg_halted should be 0 + dm_check_resume_and_halted: assert property (@(posedge clk) disable iff(~rst_l) (~dec_tlu_resume_ack | ~dec_tlu_dbg_halted)); +`endif +endmodule diff --git a/design/dec/cdecode b/design/dec/cdecode new file mode 100644 index 0000000..52b3dd3 --- /dev/null +++ b/design/dec/cdecode @@ -0,0 +1,254 @@ + +.definition + + + +# invalid rs2=0 +c.add0 = [1001.....1....10] +c.add1 = [1001......1...10] +c.add2 = [1001.......1..10] +c.add3 = [1001........1.10] +c.add4 = [1001.........110] + +# invalid rs2=0 +c.mv0 = [1000.....1....10] +c.mv1 = [1000......1...10] +c.mv2 = [1000.......1..10] +c.mv3 = [1000........1.10] +c.mv4 = [1000.........110] + + +# invalid if rs1=0 +c.jalr0 = [10011....0000010] +c.jalr1 = [1001.1...0000010] +c.jalr2 = [1001..1..0000010] +c.jalr3 = [1001...1.0000010] +c.jalr4 = [1001....10000010] + +c.addi = [000...........01] + +# invalid imm=0 +c.addi16sp0 = [011100010.....01] +c.addi16sp1 = [011.000101....01] +c.addi16sp2 = [011.00010.1...01] +c.addi16sp3 = [011.00010..1..01] +c.addi16sp4 = [011.00010...1.01] +c.addi16sp5 = [011.00010....101] + +# invalid uimm=0 +c.addi4spn0 = [0001..........00] +c.addi4spn1 = [000.1.........00] +c.addi4spn2 = [000..1........00] +c.addi4spn3 = [000...1.......00] +c.addi4spn4 = [000....1......00] +c.addi4spn5 = [000.....1.....00] +c.addi4spn6 = [000......1....00] +c.addi4spn7 = [000.......1...00] + + +c.and = [100011...11...01] +c.andi = [100.10........01] +c.beqz = [110...........01] +c.bnez = [111...........01] +c.ebreak = [1001000000000010] +c.j = [101...........01] +c.jal = [001...........01] + + +c.jr0 = [10001....0000010] +c.jr1 = [1000.1...0000010] +c.jr2 = [1000..1..0000010] +c.jr3 = [1000...1.0000010] +c.jr4 = [1000....10000010] + +c.li = [010...........01] + +# invalid rd=x2 or imm=0 +c.lui0 = [01111.........01] +c.lui1 = [0111.1........01] +c.lui2 = [0111..1.......01] +c.lui3 = [0111...0......01] +c.lui4 = [0111....1.....01] +c.lui5 = [011.1....1....01] +c.lui6 = [011..1...1....01] +c.lui7 = [011...1..1....01] +c.lui8 = [011....0.1....01] +c.lui9 = [011.....11....01] +c.lui10= [011.1.....1...01] +c.lui11= [011..1....1...01] +c.lui12 = [011...1...1...01] +c.lui13 = [011....0..1...01] +c.lui14 = [011.....1.1...01] +c.lui15 = [011.1......1..01] +c.lui16 = [011..1.....1..01] +c.lui17 = [011...1....1..01] +c.lui18 = [011....0...1..01] +c.lui19 = [011.....1..1..01] +c.lui20 = [011.1.......1.01] +c.lui21 = [011..1......1.01] +c.lui22 = [011...1.....1.01] +c.lui23 = [011....0....1.01] +c.lui24 = [011.....1...1.01] +c.lui25 = [011.1........101] +c.lui26 = [011..1.......101] +c.lui27 = [011...1......101] +c.lui28 = [011....0.....101] +c.lui29 = [011.....1....101] + + +c.lw = [010...........00] + + +c.lwsp = [010...........10] + +c.or = [100011...10...01] + +# bit 5 of the shift must be 0 to be legal +c.slli = [0000..........10] + +c.srai = [100001........01] + +c.srli = [100000........01] + +c.sub = [100011...00...01] +c.sw = [110...........00] +c.swsp = [110...........10] +c.xor = [100011...01...01] + + +.input +rv32c = { + i[15] + i[14] + i[13] + i[12] + i[11] + i[10] + i[9] + i[8] + i[7] + i[6] + i[5] + i[4] + i[3] + i[2] + i[1] + i[0] +} + +.output +rv32c = { + rdrd + rdrs1 + rs2rs2 + rdprd + rdprs1 + rs2prs2 + rs2prd + uimm9_2 + ulwimm6_2 + ulwspimm7_2 + rdeq2 + rdeq1 + rs1eq2 + sbroffset8_1 + simm9_4 + simm5_0 + sjaloffset11_1 + sluimm17_12 + uimm5_0 + uswimm6_2 + uswspimm7_2 + o[31] + o[30] + o[29] + o[28] + o[27] + o[26] + o[25] + o[24] + o[23] + o[22] + o[21] + o[20] + o[19] + o[18] + o[17] + o[16] + o[15] + o[14] + o[13] + o[12] + o[11] + o[10] + o[9] + o[8] + o[7] + o[6] + o[5] + o[4] + o[3] + o[2] + o[1] + o[0] + } + +# assign rs2d[4:0] = i[6:2]; +# +# assign rdd[4:0] = i[11:7]; +# +# assign rdpd[4:0] = {2'b01, i[9:7]}; +# +# assign rs2pd[4:0] = {2'b01, i[4:2]}; + +.decode + + + + +rv32c[c.add{0-4}] = { rdrd rdrs1 rs2rs2 o[5] o[4] o[1] o[0] } + +rv32c[c.mv{0-4}] = { rdrd rs2rs2 o[5] o[4] o[1] o[0] } + +rv32c[c.addi] = { rdrd rdrs1 simm5_0 o[4] o[1] o[0] } + +rv32c[c.addi16sp{0-5}] = { rdeq2 rs1eq2 simm9_4 o[4] o[1] o[0] } +rv32c[c.addi4spn{0-7}] = { rs2prd rs1eq2 uimm9_2 o[4] o[1] o[0] } + + +rv32c[c.and] = { rdprd rdprs1 rs2prs2 o[14] o[13] o[12] o[5] o[4] o[1] o[0] } +rv32c[c.andi] = { rdprd rdprs1 simm5_0 o[14] o[13] o[12] o[4] o[1] o[0] } +rv32c[c.beqz] = { rdprs1 sbroffset8_1 o[6] o[5] o[1] o[0] } +rv32c[c.bnez] = { rdprs1 sbroffset8_1 o[12] o[6] o[5] o[1] o[0] } + + +rv32c[c.ebreak] = { o[20] o[6] o[5] o[4] o[1] o[0] } + +rv32c[c.j] = { sjaloffset11_1 o[6] o[5] o[3] o[2] o[1] o[0] } +rv32c[c.jal] = { sjaloffset11_1 rdeq1 o[6] o[5] o[3] o[2] o[1] o[0] } + + +rv32c[c.jalr{0-4}] = { rdeq1 rdrs1 o[6] o[5] o[2] o[1] o[0] } +rv32c[c.jr{0-4}] = { rdrs1 o[6] o[5] o[2] o[1] o[0] } +rv32c[c.li] = { rdrd simm5_0 o[4] o[1] o[0] } + +rv32c[c.lui{0-29}] = { rdrd sluimm17_12 o[5] o[4] o[2] o[1] o[0] } +rv32c[c.lw] = { rs2prd rdprs1 ulwimm6_2 o[13] o[1] o[0] } +rv32c[c.lwsp] = { rdrd rs1eq2 ulwspimm7_2 o[13] o[1] o[0] } + + +rv32c[c.or] = { rdprd rdprs1 rs2prs2 o[14] o[13] o[5] o[4] o[1] o[0] } + +rv32c[c.slli] = { rdrd rdrs1 uimm5_0 o[12] o[4] o[1] o[0] } +rv32c[c.srai] = { rdprd rdprs1 uimm5_0 o[30] o[14] o[12] o[4] o[1] o[0] } +rv32c[c.srli] = { rdprd rdprs1 uimm5_0 o[14] o[12] o[4] o[1] o[0] } + + +rv32c[c.sub] = { rdprd rdprs1 rs2prs2 o[30] o[5] o[4] o[1] o[0] } +rv32c[c.sw] = { rdprs1 rs2prs2 uswimm6_2 o[13] o[5] o[1] o[0] } +rv32c[c.swsp] = { rs2rs2 rs1eq2 uswspimm7_2 o[13] o[5] o[1] o[0] } +rv32c[c.xor] = { rdprd rdprs1 rs2prs2 o[14] o[5] o[4] o[1] o[0] } + + + +.end \ No newline at end of file diff --git a/design/dec/csrdecode b/design/dec/csrdecode new file mode 100644 index 0000000..4544cdd --- /dev/null +++ b/design/dec/csrdecode @@ -0,0 +1,229 @@ +.definition + +csr_misa = [001100000001] +csr_mvendorid = [111100010001] +csr_marchid = [111100010010] +csr_mimpid = [111100010011] +csr_mhartid = [111100010100] +csr_mstatus = [001100000000] +csr_mtvec = [001100000101] +csr_mip = [001101000100] +csr_mie = [001100000100] +csr_mcyclel = [101100000000] +csr_mcycleh = [101110000000] +csr_minstretl = [101100000010] +csr_minstreth = [101110000010] +csr_mscratch = [001101000000] +csr_mepc = [001101000001] +csr_mcause = [001101000010] +csr_mtval = [001101000011] +csr_mrac = [011111000000] +csr_dmst = [011111000100] +csr_mdeau = [101111000000] +csr_mdseac = [111111000000] +csr_meivt = [101111001000] +csr_meihap = [111111001000] +csr_meipt = [101111001001] +csr_meipt = [101111001001] +csr_meicpct = [101111001010] +csr_meicurpl = [101111001100] +csr_meicidpl = [101111001011] +csr_dcsr = [011110110000] +csr_dpc = [011110110001] +csr_dicawics = [011111001000] +csr_dicad0 = [011111001001] +csr_dicad1 = [011111001010] +csr_dicago = [011111001011] +csr_mtsel = [011110100000] +csr_mtdata1 = [011110100001] +csr_mtdata2 = [011110100010] +csr_mhpmc3 = [101100000011] +csr_mhpmc4 = [101100000100] +csr_mhpmc5 = [101100000101] +csr_mhpmc6 = [101100000110] +csr_mhpmc3h = [101110000011] +csr_mhpmc4h = [101110000100] +csr_mhpmc5h = [101110000101] +csr_mhpmc6h = [101110000110] +csr_mhpme3 = [001100100011] +csr_mhpme4 = [001100100100] +csr_mhpme5 = [001100100101] +csr_mhpme6 = [001100100110] +csr_micect = [011111110000] +csr_miccmect = [011111110001] +csr_mdccmect = [011111110010] +csr_mpmc = [011111000110] +csr_mcgc = [011111111000] +csr_mcpc = [011111000010] +csr_mfdc = [011111111001] +csr_mgpmc = [011111010000] +csr_perfva = [101100000111] +csr_perfvb = [101100001...] +csr_perfvc = [10110001....] +csr_perfvd = [101110000111] +csr_perfve = [101110001...] +csr_perfvf = [10111001....] +csr_perfvg = [001100100111] +csr_perfvh = [001100101...] +csr_perfvi = [00110011....] + +.input + +csr = { + dec_csr_rdaddr_d[11] + dec_csr_rdaddr_d[10] + dec_csr_rdaddr_d[9] + dec_csr_rdaddr_d[8] + dec_csr_rdaddr_d[7] + dec_csr_rdaddr_d[6] + dec_csr_rdaddr_d[5] + dec_csr_rdaddr_d[4] + dec_csr_rdaddr_d[3] + dec_csr_rdaddr_d[2] + dec_csr_rdaddr_d[1] + dec_csr_rdaddr_d[0] +} + +.output + +csr = { + csr_misa + csr_mvendorid + csr_marchid + csr_mimpid + csr_mhartid + csr_mstatus + csr_mtvec + csr_mip + csr_mie + csr_mcyclel + csr_mcycleh + csr_minstretl + csr_minstreth + csr_mscratch + csr_mepc + csr_mcause + csr_mtval + csr_mrac + csr_dmst + csr_mdeau + csr_mdseac + csr_meihap + csr_meivt + csr_meipt + csr_meicpct + csr_meicurpl + csr_meicidpl + csr_dcsr + csr_mpmc + csr_mcgc + csr_mcpc + csr_mfdc + csr_dpc + csr_mtsel + csr_mtdata1 + csr_mtdata2 + csr_mhpmc3 + csr_mhpmc4 + csr_mhpmc5 + csr_mhpmc6 + csr_mhpmc3h + csr_mhpmc4h + csr_mhpmc5h + csr_mhpmc6h + csr_mhpme3 + csr_mhpme4 + csr_mhpme5 + csr_mhpme6 + csr_mgpmc +csr_perfva +csr_perfvb +csr_perfvc +csr_perfvd +csr_perfve +csr_perfvf +csr_perfvg +csr_perfvh +csr_perfvi + csr_micect + csr_miccmect + csr_mdccmect +csr_dicawics +csr_dicad0 +csr_dicad1 +csr_dicago + valid_only + presync + postsync +} + +.decode + +csr[ csr_misa ] = { csr_misa } +csr[ csr_mvendorid ] = { csr_mvendorid } +csr[ csr_marchid ] = { csr_marchid } +csr[ csr_mimpid ] = { csr_mimpid } +csr[ csr_mhartid ] = { csr_mhartid } +csr[ csr_mstatus ] = { csr_mstatus postsync } +csr[ csr_mtvec ] = { csr_mtvec postsync} +csr[ csr_mip ] = { csr_mip } +csr[ csr_mie ] = { csr_mie } +csr[ csr_mcyclel ] = { csr_mcyclel } +csr[ csr_mcycleh ] = { csr_mcycleh } +csr[ csr_minstretl ] = { csr_minstretl presync } +csr[ csr_minstreth ] = { csr_minstreth presync } +csr[ csr_mscratch ] = { csr_mscratch } +csr[ csr_mepc ] = { csr_mepc postsync} +csr[ csr_mcause ] = { csr_mcause } +csr[ csr_mtval ] = { csr_mtval } +csr[ csr_mrac ] = { csr_mrac postsync } +csr[ csr_dmst ] = { csr_dmst postsync} +csr[ csr_mdeau ] = { csr_mdeau } +csr[ csr_mdseac ] = { csr_mdseac } +csr[ csr_meipt ] = { csr_meipt } +csr[ csr_meihap ] = { csr_meihap } +csr[ csr_meivt ] = { csr_meivt } +csr[ csr_meicurpl ] = { csr_meicurpl } +csr[ csr_meicpct ] = { csr_meicpct } +csr[ csr_meicidpl ] = { csr_meicidpl } +csr[ csr_mpmc ] = { csr_mpmc } +csr[ csr_mcgc ] = { csr_mcgc } +csr[ csr_mgpmc ] = { csr_mgpmc presync postsync } +csr[ csr_mcpc ] = { csr_mcpc presync postsync } +csr[ csr_mfdc ] = { csr_mfdc presync postsync } +csr[ csr_dcsr ] = { csr_dcsr } +csr[ csr_dpc ] = { csr_dpc } +csr[ csr_mtsel ] = { csr_mtsel } +csr[ csr_mtdata1 ] = { csr_mtdata1 postsync } +csr[ csr_mtdata2 ] = { csr_mtdata2 postsync } +csr[ csr_mhpmc3 ] = { csr_mhpmc3 presync } +csr[ csr_mhpmc4 ] = { csr_mhpmc4 presync } +csr[ csr_mhpmc5 ] = { csr_mhpmc5 presync } +csr[ csr_mhpmc6 ] = { csr_mhpmc6 presync } +csr[ csr_mhpmc3h ] = { csr_mhpmc3h presync } +csr[ csr_mhpmc4h ] = { csr_mhpmc4h presync } +csr[ csr_mhpmc5h ] = { csr_mhpmc5h presync } +csr[ csr_mhpmc6h ] = { csr_mhpmc6h presync } +csr[ csr_mhpme3 ] = { csr_mhpme3 } +csr[ csr_mhpme4 ] = { csr_mhpme4 } +csr[ csr_mhpme5 ] = { csr_mhpme5 } +csr[ csr_mhpme6 ] = { csr_mhpme6 } +csr[ csr_micect ] = { csr_micect } +csr[ csr_miccmect ] = { csr_miccmect } +csr[ csr_mdccmect ] = { csr_mdccmect } +csr[ csr_dicawics ] = { csr_dicawics } +csr[ csr_dicad0 ] = { csr_dicad0 } +csr[ csr_dicad1 ] = { csr_dicad1 } +csr[ csr_dicago ] = { csr_dicago } + +csr[ csr_perfva ] = { valid_only } +csr[ csr_perfvb ] = { valid_only } +csr[ csr_perfvc ] = { valid_only } +csr[ csr_perfvd ] = { valid_only } +csr[ csr_perfve ] = { valid_only } +csr[ csr_perfvf ] = { valid_only } +csr[ csr_perfvg ] = { valid_only } +csr[ csr_perfvh ] = { valid_only } +csr[ csr_perfvi ] = { valid_only } + +.end diff --git a/design/dec/dec.sv b/design/dec/dec.sv new file mode 100644 index 0000000..6062163 --- /dev/null +++ b/design/dec/dec.sv @@ -0,0 +1,579 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// dec: decode unit - decode, bypassing, ARF, interrupts +// +//******************************************************************************** +// $Id$ +// +// +// Function: Decode +// Comments: Decode, dependency scoreboard, ARF +// +// +// A -> D -> EX1 ... WB +// +//******************************************************************************** + +module dec + import swerv_types::*; +( + input logic clk, + input logic free_clk, + input logic active_clk, + + + output logic dec_pause_state_cg, // pause state for clock-gating + + input logic rst_l, // reset, active low + input logic [31:1] rst_vec, // reset vector, from core pins + + input logic nmi_int, // NMI pin + input logic [31:1] nmi_vec, // NMI vector, from pins + + input logic i_cpu_halt_req, // Asynchronous Halt request to CPU + input logic i_cpu_run_req, // Asynchronous Restart request to CPU + + output logic o_cpu_halt_status, // Halt status of core (pmu/fw) + output logic o_cpu_halt_ack, // Halt request ack + output logic o_cpu_run_ack, // Run request ack + output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request + + // external MPC halt/run interface + input logic mpc_debug_halt_req, // Async halt request + input logic mpc_debug_run_req, // Async run request + input logic mpc_reset_run_req, // Run/halt after reset + output logic mpc_debug_halt_ack, // Halt ack + output logic mpc_debug_run_ack, // Run ack + output logic debug_brkpt_status, // debug breakpoint + + + output logic dec_ib0_valid_eff_d, // effective valid taking decode into account + output logic dec_ib1_valid_eff_d, + + input logic exu_pmu_i0_br_misp, // slot 0 branch misp + input logic exu_pmu_i0_br_ataken, // slot 0 branch actual taken + input logic exu_pmu_i0_pc4, // slot 0 4 byte branch + input logic exu_pmu_i1_br_misp, // slot 1 branch misp + input logic exu_pmu_i1_br_ataken, // slot 1 branch actual taken + input logic exu_pmu_i1_pc4, // slot 1 4 byte branch + + + input logic lsu_nonblock_load_valid_dc3, // valid nonblock load at dc3 + input logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_dc3, // -> corresponding tag + input logic lsu_nonblock_load_inv_dc5, // invalidate request for nonblock load dc5 + input logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_dc5, // -> corresponding tag + input logic lsu_nonblock_load_data_valid, // valid nonblock load data back + input logic lsu_nonblock_load_data_error, // nonblock load bus error + input logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // -> corresponding tag + input logic [31:0] lsu_nonblock_load_data, // nonblock load data + + input logic lsu_pmu_bus_trxn, // D side bus transaction + input logic lsu_pmu_bus_misaligned, // D side bus misaligned + input logic lsu_pmu_bus_error, // D side bus error + input logic lsu_pmu_bus_busy, // D side bus busy + input logic lsu_pmu_misaligned_dc3, // D side load or store misaligned + + input logic [1:0] ifu_pmu_instr_aligned, // aligned instructions + input logic ifu_pmu_align_stall, // aligner stalled + input logic ifu_pmu_fetch_stall, // fetch unit stalled + input logic ifu_pmu_ic_miss, // icache miss + input logic ifu_pmu_ic_hit, // icache hit + input logic ifu_pmu_bus_error, // Instruction side bus error + input logic ifu_pmu_bus_busy, // Instruction side bus busy + input logic ifu_pmu_bus_trxn, // Instruction side bus transaction + + input logic [3:0] lsu_trigger_match_dc3, + input logic dbg_cmd_valid, // debugger abstract command valid + input logic [1:0] dbg_cmd_size, // size of the abstract mem access debug command + input logic dbg_cmd_write, // command is a write + input logic [1:0] dbg_cmd_type, // command type + input logic [31:0] dbg_cmd_addr, // command address + input logic [1:0] dbg_cmd_wrdata, // command write data, for fence/fence_i + + + input logic ifu_i0_icaf, // icache access fault + input logic ifu_i1_icaf, + input logic ifu_i0_icaf_f1, // i0 has access fault on second fetch group + input logic ifu_i1_icaf_f1, + input logic ifu_i0_perr, // icache parity error + input logic ifu_i1_perr, + input logic ifu_i0_sbecc, // icache/iccm single-bit error + input logic ifu_i1_sbecc, + input logic ifu_i0_dbecc, // icache/iccm double-bit error + input logic ifu_i1_dbecc, + + input logic lsu_freeze_dc3, // freeze pipe: decode -> dc3 + input logic lsu_idle_any, // lsu idle for fence instructions + input logic lsu_halt_idle_any, // lsu idle for halting + + input br_pkt_t i0_brp, // branch packet + input br_pkt_t i1_brp, + + input lsu_error_pkt_t lsu_error_pkt_dc3, // LSU exception/error packet + + input logic lsu_imprecise_error_load_any, // LSU imprecise load bus error + input logic lsu_imprecise_error_store_any, // LSU imprecise store bus error + input logic [31:0] lsu_imprecise_error_addr_any, // LSU imprecise bus error address + input logic lsu_freeze_external_ints_dc3, // load to side effect region + + input logic exu_i0_flush_lower_e4, // slot 0 flush for mp + input logic exu_i1_flush_lower_e4, // slot 1 flush for mp + input logic [31:1] exu_i0_flush_path_e4, // slot 0 flush target for mp + input logic [31:1] exu_i1_flush_path_e4, // slot 1 flush target for mp + + input logic [15:0] ifu_illegal_inst, // 16b opcode for illegal inst + + input logic exu_div_stall, // stall decode for div executing + input logic [31:0] exu_div_result, // final div result + input logic exu_div_finish, // cycle div finishes + + input logic [31:0] exu_mul_result_e3, // 32b mul result + + input logic [31:0] exu_csr_rs1_e1, // rs1 for csr instruction + + input logic [31:0] lsu_result_dc3, // load result + input logic [31:0] lsu_result_corr_dc4, // corrected load result + + input logic lsu_load_stall_any, // This is for blocking loads + input logic lsu_store_stall_any, // This is for blocking stores + input logic dma_dccm_stall_any, // stall any load/store at decode, pmu event + input logic dma_iccm_stall_any, // iccm stalled, pmu event + + input logic iccm_dma_sb_error, // ICCM DMA single bit error + + input logic exu_i0_flush_final, // slot0 flush + input logic exu_i1_flush_final, // slot1 flush + + input logic [31:1] exu_npc_e4, // next PC + + input logic exu_flush_final, // final flush + + input logic [31:0] exu_i0_result_e1, // alu result e1 + input logic [31:0] exu_i1_result_e1, + + input logic [31:0] exu_i0_result_e4, // alu result e4 + input logic [31:0] exu_i1_result_e4, + + + input logic ifu_i0_valid, ifu_i1_valid, // fetch valids to instruction buffer + input logic [31:0] ifu_i0_instr, ifu_i1_instr, // fetch inst's to instruction buffer + input logic [31:1] ifu_i0_pc, ifu_i1_pc, // pc's for instruction buffer + input logic ifu_i0_pc4, ifu_i1_pc4, // indication of 4B or 2B for corresponding inst + input logic [31:1] exu_i0_pc_e1, // pc's for e1 from the alu's + input logic [31:1] exu_i1_pc_e1, + + input logic mexintpend, // External interrupt pending + input logic timer_int, // Timer interrupt pending (from pin) + + input logic [7:0] pic_claimid, // PIC claimid + input logic [3:0] pic_pl, // PIC priv level + input logic mhwakeup, // High priority wakeup + + output logic [3:0] dec_tlu_meicurpl, // to PIC, Current priv level + output logic [3:0] dec_tlu_meipt, // to PIC + +`ifdef RV_ICACHE_ECC + input logic [41:0] ifu_ic_debug_rd_data, // diagnostic icache read data +`else + input logic [33:0] ifu_ic_debug_rd_data, // diagnostic icache read data +`endif + input logic ifu_ic_debug_rd_data_valid, // diagnostic icache read data valid + output cache_debug_pkt_t dec_tlu_ic_diag_pkt, // packet of DICAWICS, DICAD0/1, DICAGO info for icache diagnostics + + +// Debug start + input logic dbg_halt_req, // DM requests a halt + input logic dbg_resume_req, // DM requests a resume + input logic ifu_miss_state_idle, // I-side miss buffer empty + + output logic dec_tlu_flush_noredir_wb , // Tell fetch to idle on this flush + output logic dec_tlu_mpc_halted_only, // Core is halted only due to MPC + output logic dec_tlu_dbg_halted, // Core is halted and ready for debug command + output logic dec_tlu_pmu_fw_halted, // Core is halted due to Power management unit or firmware halt + output logic dec_tlu_debug_mode, // Core is in debug mode + output logic dec_tlu_resume_ack, // Resume acknowledge + output logic dec_tlu_flush_leak_one_wb, // single step + output logic dec_tlu_flush_err_wb, // iside perr/ecc rfpc + output logic dec_tlu_stall_dma, // stall dma access when there's a halt request + + output logic dec_debug_wdata_rs1_d, // insert debug write data into rs1 at decode + + output logic [31:0] dec_dbg_rddata, // debug command read data + + output logic dec_dbg_cmd_done, // abstract command is done + output logic dec_dbg_cmd_fail, // abstract command failed (illegal reg address) + + output trigger_pkt_t [3:0] trigger_pkt_any, // info needed by debug trigger blocks + +// Debug end + // branch info from pipe0 for errors or counter updates + input logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] exu_i0_br_index_e4, // index + input logic [1:0] exu_i0_br_hist_e4, // history + input logic [1:0] exu_i0_br_bank_e4, // bank + input logic exu_i0_br_error_e4, // error + input logic exu_i0_br_start_error_e4, // start error + input logic exu_i0_br_valid_e4, // valid + input logic exu_i0_br_mp_e4, // mispredict + input logic exu_i0_br_middle_e4, // middle of bank + input logic [`RV_BHT_GHR_RANGE] exu_i0_br_fghr_e4, // FGHR when predicted + + // branch info from pipe1 for errors or counter updates + input logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] exu_i1_br_index_e4, // index + input logic [1:0] exu_i1_br_hist_e4, // history + input logic [1:0] exu_i1_br_bank_e4, // bank + input logic exu_i1_br_error_e4, // error + input logic exu_i1_br_start_error_e4, // start error + input logic exu_i1_br_valid_e4, // valid + input logic exu_i1_br_mp_e4, // mispredict + input logic exu_i1_br_middle_e4, // middle of bank + input logic [`RV_BHT_GHR_RANGE] exu_i1_br_fghr_e4, // FGHR when predicted + + +`ifdef RV_BTB_48 + input logic [1:0] exu_i1_br_way_e4, // way hit or repl + input logic [1:0] exu_i0_br_way_e4, // way hit or repl +`else + input logic exu_i1_br_way_e4, // way hit or repl + input logic exu_i0_br_way_e4, // way hit or repl +`endif + + output logic [31:0] gpr_i0_rs1_d, // gpr rs1 data + output logic [31:0] gpr_i0_rs2_d, // gpr rs2 data + output logic [31:0] gpr_i1_rs1_d, + output logic [31:0] gpr_i1_rs2_d, + + output logic [31:0] dec_i0_immed_d, // immediate data + output logic [31:0] dec_i1_immed_d, + + output logic [12:1] dec_i0_br_immed_d, // br immediate data + output logic [12:1] dec_i1_br_immed_d, + + output alu_pkt_t i0_ap, // alu packet + output alu_pkt_t i1_ap, + + output logic dec_i0_alu_decode_d, // alu schedule on primary alu + output logic dec_i1_alu_decode_d, + + output logic dec_i0_select_pc_d, // select pc onto rs1 for jal's + output logic dec_i1_select_pc_d, + + output logic [31:1] dec_i0_pc_d, dec_i1_pc_d, // pc's at decode + output logic dec_i0_rs1_bypass_en_d, // rs1 bypass enable + output logic dec_i0_rs2_bypass_en_d, // rs2 bypass enable + output logic dec_i1_rs1_bypass_en_d, + output logic dec_i1_rs2_bypass_en_d, + + output logic [31:0] i0_rs1_bypass_data_d, // rs1 bypass data + output logic [31:0] i0_rs2_bypass_data_d, // rs2 bypass data + output logic [31:0] i1_rs1_bypass_data_d, + output logic [31:0] i1_rs2_bypass_data_d, + output logic dec_ib3_valid_d, // ib3 buffer valid + output logic dec_ib2_valid_d, // ib2 buffer valid + + output lsu_pkt_t lsu_p, // lsu packet + output mul_pkt_t mul_p, // mul packet + output div_pkt_t div_p, // div packet + + output logic [11:0] dec_lsu_offset_d, // 12b offset for load/store addresses + output logic dec_i0_lsu_d, // is load/store + output logic dec_i1_lsu_d, + + output logic flush_final_e3, // final flush + output logic i0_flush_final_e3, // final flush from i0 + + output logic dec_csr_ren_d, // csr read enable + + output logic dec_tlu_cancel_e4, // Cancel lsu op at DC4 due to future trigger hit + + output logic dec_tlu_flush_lower_wb, // tlu flush due to late mp, exception, rfpc, or int + output logic [31:1] dec_tlu_flush_path_wb, // tlu flush target + output logic dec_tlu_i0_kill_writeb_wb, // I0 is flushed, don't writeback any results to arch state + output logic dec_tlu_i1_kill_writeb_wb, // I1 is flushed, don't writeback any results to arch state + output logic dec_tlu_fence_i_wb, // flush is a fence_i rfnpc, flush icache + + output logic dec_i0_mul_d, // chose which gpr value to use + output logic dec_i1_mul_d, + output logic dec_i0_div_d, // chose which gpr value to use + output logic dec_i1_div_d, + output logic dec_i1_valid_e1, // i1 valid at e1 stage + output logic dec_div_decode_e4, // div at e4 stage + output logic [31:1] pred_correct_npc_e2, // npc if prediction is correct at e2 stage + + output logic dec_i0_rs1_bypass_en_e3, // rs1 bypass enable e3 + output logic dec_i0_rs2_bypass_en_e3, // rs2 bypass enable e3 + output logic dec_i1_rs1_bypass_en_e3, + output logic dec_i1_rs2_bypass_en_e3, + output logic [31:0] i0_rs1_bypass_data_e3, // rs1 bypass data e3 + output logic [31:0] i0_rs2_bypass_data_e3, // rs2 bypass data e3 + output logic [31:0] i1_rs1_bypass_data_e3, + output logic [31:0] i1_rs2_bypass_data_e3, + output logic dec_i0_sec_decode_e3, // secondary decode e3 + output logic dec_i1_sec_decode_e3, + output logic [31:1] dec_i0_pc_e3, // pc at e3 + output logic [31:1] dec_i1_pc_e3, + + output logic dec_i0_rs1_bypass_en_e2, // rs1 bypass enable e2 + output logic dec_i0_rs2_bypass_en_e2, // rs2 bypass enable e2 + output logic dec_i1_rs1_bypass_en_e2, + output logic dec_i1_rs2_bypass_en_e2, + output logic [31:0] i0_rs1_bypass_data_e2, // rs1 bypass data e2 + output logic [31:0] i0_rs2_bypass_data_e2, // rs2 bypass data e2 + output logic [31:0] i1_rs1_bypass_data_e2, + output logic [31:0] i1_rs2_bypass_data_e2, + + output br_tlu_pkt_t dec_tlu_br0_wb_pkt, // slot 0 branch predictor update packet + output br_tlu_pkt_t dec_tlu_br1_wb_pkt, // slot 1 branch predictor update packet + + output logic [1:0] dec_tlu_perfcnt0, // toggles when perf counter 0 has an event inc + output logic [1:0] dec_tlu_perfcnt1, // toggles when perf counter 1 has an event inc + output logic [1:0] dec_tlu_perfcnt2, // toggles when perf counter 2 has an event inc + output logic [1:0] dec_tlu_perfcnt3, // toggles when perf counter 3 has an event inc + + output predict_pkt_t i0_predict_p_d, // prediction packet to alus + output predict_pkt_t i1_predict_p_d, + + output logic dec_i0_lsu_decode_d, // load/store decode + + output logic [31:0] i0_result_e4_eff, // alu result e4 + output logic [31:0] i1_result_e4_eff, + + output logic dec_tlu_i0_valid_e4, // slot 0 instruction is valid at e4 + output logic dec_tlu_i1_valid_e4, // slot 1 instruction is valid at e4, implies i0_valid_e4 + + output logic [31:0] i0_result_e2, // i0 result data e2 + output logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control + + output logic [31:1] dec_tlu_i0_pc_e4, // pc e4 + output logic [31:1] dec_tlu_i1_pc_e4, + + output logic [4:2] dec_i0_data_en, // clock-gate control logic + output logic [4:1] dec_i0_ctl_en, + output logic [4:2] dec_i1_data_en, + output logic [4:1] dec_i1_ctl_en, + + output logic dec_nonblock_load_freeze_dc2, // lsu must freeze nonblock load due to younger dependency in pipe + + input logic [15:0] ifu_i0_cinst, // 16b compressed instruction + input logic [15:0] ifu_i1_cinst, + + output trace_pkt_t trace_rv_trace_pkt, // trace packet + + // feature disable from mfdc + output logic dec_tlu_sideeffect_posted_disable, // disable posted writes to side-effect address + output logic dec_tlu_core_ecc_disable, // disable core ECC + output logic dec_tlu_sec_alu_disable, // disable secondary ALU + output logic dec_tlu_non_blocking_disable, // disable non blocking loads + output logic dec_tlu_fast_div_disable, // disable fast divider + output logic dec_tlu_bpred_disable, // disable branch prediction + output logic dec_tlu_wb_coalescing_disable, // disable writebuffer coalescing + output logic dec_tlu_ld_miss_byp_wb_disable, // disable loads miss bypass write buffer + output logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:16] + + // clock gating overrides from mcgc + output logic dec_tlu_misc_clk_override, // override misc clock domain gating + output logic dec_tlu_exu_clk_override, // override exu clock domain gating + output logic dec_tlu_ifu_clk_override, // override fetch clock domain gating + output logic dec_tlu_lsu_clk_override, // override load/store clock domain gating + output logic dec_tlu_bus_clk_override, // override bus clock domain gating + output logic dec_tlu_pic_clk_override, // override PIC clock domain gating + output logic dec_tlu_dccm_clk_override, // override DCCM clock domain gating + output logic dec_tlu_icm_clk_override, // override ICCM clock domain gating + + input logic scan_mode + + ); + + localparam GPR_BANKS = 1; + localparam GPR_BANKS_LOG2 = (GPR_BANKS == 1) ? 1 : $clog2(GPR_BANKS); + + logic dec_tlu_dec_clk_override; // to and from dec blocks + logic clk_override; + + logic dec_ib1_valid_d; + logic dec_ib0_valid_d; + + logic [1:0] dec_pmu_instr_decoded; + logic dec_pmu_decode_stall; + logic dec_pmu_presync_stall; + logic dec_pmu_postsync_stall; + + logic dec_tlu_wr_pause_wb; // CSR write to pause reg is at WB. + + logic dec_i0_rs1_en_d; + logic dec_i0_rs2_en_d; + logic dec_fence_pending; // tell TLU to stall DMA + + logic [4:0] dec_i0_rs1_d; + logic [4:0] dec_i0_rs2_d; + + + logic dec_i1_rs1_en_d; + logic dec_i1_rs2_en_d; + + logic [4:0] dec_i1_rs1_d; + logic [4:0] dec_i1_rs2_d; + + + logic [31:0] dec_i0_instr_d, dec_i1_instr_d; + + logic dec_tlu_pipelining_disable; + logic dec_tlu_dual_issue_disable; + + + logic [4:0] dec_i0_waddr_wb; + logic dec_i0_wen_wb; + logic [31:0] dec_i0_wdata_wb; + + logic [4:0] dec_i1_waddr_wb; + logic dec_i1_wen_wb; + logic [31:0] dec_i1_wdata_wb; + + logic dec_csr_wen_wb; // csr write enable at wb + logic [11:0] dec_csr_rdaddr_d; // read address for csr + logic [11:0] dec_csr_wraddr_wb; // write address for csryes + + logic [31:0] dec_csr_wrdata_wb; // csr write data at wb + + logic [31:0] dec_csr_rddata_d; // csr read data at wb + logic dec_csr_legal_d; // csr indicates legal operation + + logic dec_csr_wen_unq_d; // valid csr with write - for csr legal + logic dec_csr_any_unq_d; // valid csr - for csr legal + logic dec_csr_stall_int_ff; // csr is mie/mstatus + + + + trap_pkt_t dec_tlu_packet_e4; + + logic dec_i0_pc4_d, dec_i1_pc4_d; + logic dec_tlu_presync_d; + logic dec_tlu_postsync_d; + logic dec_tlu_debug_stall; + + logic [31:0] dec_illegal_inst; + + + // GPR Bank ID write signals + logic wen_bank_id; + logic [GPR_BANKS_LOG2-1:0] wr_bank_id; + + logic dec_i0_icaf_d; + logic dec_i1_icaf_d; + logic dec_i0_perr_d; + logic dec_i1_perr_d; + logic dec_i0_sbecc_d; + logic dec_i1_sbecc_d; + logic dec_i0_dbecc_d; + logic dec_i1_dbecc_d; + + logic dec_i0_icaf_f1_d; + + logic dec_i0_decode_d; + logic dec_i1_decode_d; + + logic [3:0] dec_i0_trigger_match_d; + logic [3:0] dec_i1_trigger_match_d; + + + logic dec_debug_fence_d; + + logic dec_nonblock_load_wen; + logic [4:0] dec_nonblock_load_waddr; + logic dec_tlu_flush_pause_wb; + + logic dec_i0_load_e4; + + logic dec_pause_state; + + br_pkt_t dec_i0_brp; + br_pkt_t dec_i1_brp; + + assign clk_override = dec_tlu_dec_clk_override; + + + assign dec_dbg_rddata[31:0] = dec_i0_wdata_wb[31:0]; + + dec_ib_ctl instbuff (.* + ); + + dec_decode_ctl decode (.*); + + dec_tlu_ctl tlu (.*); + + // Temp hookups + assign wen_bank_id = '0; + assign wr_bank_id = '0; + + + + dec_gpr_ctl #(.GPR_BANKS(GPR_BANKS), + .GPR_BANKS_LOG2(GPR_BANKS_LOG2)) arf (.*, + // inputs + .raddr0(dec_i0_rs1_d[4:0]), .rden0(dec_i0_rs1_en_d), + .raddr1(dec_i0_rs2_d[4:0]), .rden1(dec_i0_rs2_en_d), + .raddr2(dec_i1_rs1_d[4:0]), .rden2(dec_i1_rs1_en_d), + .raddr3(dec_i1_rs2_d[4:0]), .rden3(dec_i1_rs2_en_d), + + .waddr0(dec_i0_waddr_wb[4:0]), .wen0(dec_i0_wen_wb), .wd0(dec_i0_wdata_wb[31:0]), + .waddr1(dec_i1_waddr_wb[4:0]), .wen1(dec_i1_wen_wb), .wd1(dec_i1_wdata_wb[31:0]), + .waddr2(dec_nonblock_load_waddr[4:0]), .wen2(dec_nonblock_load_wen), .wd2(lsu_nonblock_load_data[31:0]), + + // outputs + .rd0(gpr_i0_rs1_d[31:0]), .rd1(gpr_i0_rs2_d[31:0]), + .rd2(gpr_i1_rs1_d[31:0]), .rd3(gpr_i1_rs2_d[31:0]) + ); + +// Trigger + + dec_trigger dec_trigger (.*); + + + + + +// trace + logic [15:0] dec_i0_cinst_d; + logic [15:0] dec_i1_cinst_d; + logic [31:0] dec_i0_inst_wb1; + logic [31:0] dec_i1_inst_wb1; + logic [31:1] dec_i0_pc_wb1; + logic [31:1] dec_i1_pc_wb1; + logic dec_tlu_i1_valid_wb1, dec_tlu_i0_valid_wb1, dec_tlu_int_valid_wb1; + logic [4:0] dec_tlu_exc_cause_wb1; + logic [31:0] dec_tlu_mtval_wb1; + + logic dec_tlu_i0_exc_valid_wb1, dec_tlu_i1_exc_valid_wb1; + + // also need retires_p==3 + + assign trace_rv_trace_pkt.trace_rv_i_insn_ip = { 32'b0, dec_i1_inst_wb1[31:0], dec_i0_inst_wb1[31:0] }; + assign trace_rv_trace_pkt.trace_rv_i_address_ip = { 32'b0, dec_i1_pc_wb1[31:1], 1'b0, dec_i0_pc_wb1[31:1], 1'b0 }; + + assign trace_rv_trace_pkt.trace_rv_i_valid_ip = {dec_tlu_int_valid_wb1, // always int + dec_tlu_i1_valid_wb1 | dec_tlu_i1_exc_valid_wb1, // not interrupts + dec_tlu_i0_valid_wb1 | dec_tlu_i0_exc_valid_wb1 + }; + assign trace_rv_trace_pkt.trace_rv_i_exception_ip = {dec_tlu_int_valid_wb1, dec_tlu_i1_exc_valid_wb1, dec_tlu_i0_exc_valid_wb1}; + assign trace_rv_trace_pkt.trace_rv_i_ecause_ip = dec_tlu_exc_cause_wb1[4:0]; // replicate across ports + assign trace_rv_trace_pkt.trace_rv_i_interrupt_ip = {dec_tlu_int_valid_wb1,2'b0}; + assign trace_rv_trace_pkt.trace_rv_i_tval_ip = dec_tlu_mtval_wb1[31:0]; // replicate across ports + + + +// end trace + +endmodule // dec + diff --git a/design/dec/dec_decode_ctl.sv b/design/dec/dec_decode_ctl.sv new file mode 100644 index 0000000..f5126b3 --- /dev/null +++ b/design/dec/dec_decode_ctl.sv @@ -0,0 +1,2660 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +module dec_decode_ctl + import swerv_types::*; +( + input logic [15:0] dec_i0_cinst_d, // 16b compressed instruction + input logic [15:0] dec_i1_cinst_d, + + output logic [31:0] dec_i0_inst_wb1, // 32b instruction at wb+1 for trace encoder + output logic [31:0] dec_i1_inst_wb1, + + output logic [31:1] dec_i0_pc_wb1, // 31b pc at wb+1 for trace encoder + output logic [31:1] dec_i1_pc_wb1, + + + input logic lsu_nonblock_load_valid_dc3, // valid nonblock load at dc3 + input logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_dc3, // -> corresponding tag + input logic lsu_nonblock_load_inv_dc5, // invalidate request for nonblock load dc5 + input logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_dc5, // -> corresponding tag + input logic lsu_nonblock_load_data_valid, // valid nonblock load data back + input logic lsu_nonblock_load_data_error, // nonblock load bus error + input logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // -> corresponding tag + + input logic [3:0] dec_i0_trigger_match_d, // i0 decode trigger matches + input logic [3:0] dec_i1_trigger_match_d, // i1 decode trigger matches + + input logic dec_tlu_wr_pause_wb, // pause instruction at wb + input logic dec_tlu_pipelining_disable, // pipeline disable - presync, i0 decode only + input logic dec_tlu_dual_issue_disable, // i0 decode only + + input logic dec_tlu_sec_alu_disable, // no alu ops sent to secondary alus + + input logic [3:0] lsu_trigger_match_dc3, // lsu trigger matches + + input logic lsu_pmu_misaligned_dc3, // perf mon: load/store misalign + input logic dec_tlu_debug_stall, // debug stall decode + input logic dec_tlu_flush_leak_one_wb, // leak1 instruction + + input logic dec_debug_fence_d, // debug fence instruction + + input logic [1:0] dbg_cmd_wrdata, // disambiguate fence, fence_i + + input logic dec_i0_icaf_d, // icache access fault + input logic dec_i1_icaf_d, + input logic dec_i0_icaf_f1_d, // i0 instruction access fault at decode for f1 fetch group + input logic dec_i0_perr_d, // icache parity error + input logic dec_i1_perr_d, + input logic dec_i0_sbecc_d, // icache/iccm single-bit error + input logic dec_i1_sbecc_d, + input logic dec_i0_dbecc_d, // icache/iccm double-bit error + input logic dec_i1_dbecc_d, + + input br_pkt_t dec_i0_brp, // branch packet + input br_pkt_t dec_i1_brp, + + input logic [15:0] ifu_illegal_inst, // 16b illegal inst from aligner + + input logic [31:1] dec_i0_pc_d, // pc + + input logic lsu_freeze_dc3, // freeze pipe: decode -> dc3 + input logic lsu_halt_idle_any, // lsu idle: if fence instr & ~lsu_halt_idle_any then stall decode + + input logic lsu_load_stall_any, // stall any store at load + input logic lsu_store_stall_any, // stall any store at decode + input logic dma_dccm_stall_any, // stall any load/store at decode + + input logic exu_div_finish, // div finish this cycle + input logic exu_div_stall, // div executing: stall decode + input logic [31:0] exu_div_result, // div result + + input logic dec_tlu_i0_kill_writeb_wb, // I0 is flushed, don't writeback any results to arch state + input logic dec_tlu_i1_kill_writeb_wb, // I1 is flushed, don't writeback any results to arch state + + input logic dec_tlu_flush_lower_wb, // trap lower flush + input logic dec_tlu_flush_pause_wb, // don't clear pause state on initial lower flush + input logic dec_tlu_presync_d, // CSR read needs to be presync'd + input logic dec_tlu_postsync_d, // CSR ops that need to be postsync'd + + input logic [31:0] exu_mul_result_e3, // multiply result + + input logic dec_i0_pc4_d, // inst is 4B inst else 2B + input logic dec_i1_pc4_d, + + input logic [31:0] dec_csr_rddata_d, // csr read data at wb + input logic dec_csr_legal_d, // csr indicates legal operation + + input logic [31:0] exu_csr_rs1_e1, // rs1 for csr instr + + input logic [31:0] lsu_result_dc3, // load result + input logic [31:0] lsu_result_corr_dc4, // corrected load result + + input logic exu_i0_flush_final, // lower flush or i0 flush at e2 + input logic exu_i1_flush_final, // lower flush or i1 flush at e2 + + input logic [31:1] exu_i0_pc_e1, // pcs at e1 + input logic [31:1] exu_i1_pc_e1, + + input logic [31:0] dec_i0_instr_d, // inst at decode + input logic [31:0] dec_i1_instr_d, + + input logic dec_ib0_valid_d, // inst valid at decode + input logic dec_ib1_valid_d, + + input logic [31:0] exu_i0_result_e1, // from primary alu's + input logic [31:0] exu_i1_result_e1, + + input logic [31:0] exu_i0_result_e4, // from secondary alu's + input logic [31:0] exu_i1_result_e4, + + input logic clk, // for rvdffe's + input logic active_clk, // clk except for halt / pause + input logic free_clk, // free running clock + + input logic clk_override, // test stuff + input logic rst_l, + + + output logic dec_i0_rs1_en_d, // rs1 enable at decode + output logic dec_i0_rs2_en_d, + + output logic [4:0] dec_i0_rs1_d, // rs1 logical source + output logic [4:0] dec_i0_rs2_d, + + + + output logic [31:0] dec_i0_immed_d, // 32b immediate data decode + + output logic dec_i1_rs1_en_d, + output logic dec_i1_rs2_en_d, + + output logic [4:0] dec_i1_rs1_d, + output logic [4:0] dec_i1_rs2_d, + + + + output logic [31:0] dec_i1_immed_d, + + output logic [12:1] dec_i0_br_immed_d, // 12b branch immediate + output logic [12:1] dec_i1_br_immed_d, + + output alu_pkt_t i0_ap, // alu packets + output alu_pkt_t i1_ap, + + output logic dec_i0_decode_d, // i0 decode + output logic dec_i1_decode_d, + + output logic dec_ib0_valid_eff_d, // effective valid taking decode into account + output logic dec_ib1_valid_eff_d, + + output logic dec_i0_alu_decode_d, // decode to primary alu's + output logic dec_i1_alu_decode_d, + + + output logic [31:0] i0_rs1_bypass_data_d, // i0 rs1 bypass data + output logic [31:0] i0_rs2_bypass_data_d, // i0 rs2 bypass data + output logic [31:0] i1_rs1_bypass_data_d, + output logic [31:0] i1_rs2_bypass_data_d, + + + output logic [4:0] dec_i0_waddr_wb, // i0 logical source to write to gpr's + output logic dec_i0_wen_wb, // i0 write enable + output logic [31:0] dec_i0_wdata_wb, // i0 write data + + output logic [4:0] dec_i1_waddr_wb, + output logic dec_i1_wen_wb, + output logic [31:0] dec_i1_wdata_wb, + + output logic dec_i0_select_pc_d, // i0 select pc for rs1 - branches + output logic dec_i1_select_pc_d, + + output logic dec_i0_rs1_bypass_en_d, // i0 rs1 bypass enable + output logic dec_i0_rs2_bypass_en_d, // i0 rs2 bypass enable + output logic dec_i1_rs1_bypass_en_d, + output logic dec_i1_rs2_bypass_en_d, + + output lsu_pkt_t lsu_p, // load/store packet + + output mul_pkt_t mul_p, // multiply packet + + output div_pkt_t div_p, // divide packet + + output logic [11:0] dec_lsu_offset_d, + output logic dec_i0_lsu_d, // chose which gpr value to use + output logic dec_i1_lsu_d, + output logic dec_i0_mul_d, // chose which gpr value to use + output logic dec_i1_mul_d, + output logic dec_i0_div_d, // chose which gpr value to use + output logic dec_i1_div_d, + + // review + output logic flush_final_e3, // flush final at e3: i0 or i1 + output logic i0_flush_final_e3, // i0 flush final at e3 + + output logic dec_csr_ren_d, // valid csr decode + output logic dec_csr_wen_unq_d, // valid csr with write - for csr legal + output logic dec_csr_any_unq_d, // valid csr - for csr legal + output logic dec_csr_wen_wb, // csr write enable at wb + output logic [11:0] dec_csr_rdaddr_d, // read address for csr + output logic [11:0] dec_csr_wraddr_wb, // write address for csr + output logic [31:0] dec_csr_wrdata_wb, // csr write data at wb + output logic dec_csr_stall_int_ff, // csr is mie/mstatus + + output dec_tlu_i0_valid_e4, // i0 valid inst at e4 + output dec_tlu_i1_valid_e4, + + output trap_pkt_t dec_tlu_packet_e4, // trap packet + + output logic dec_fence_pending, // tell TLU to stall DMA + output logic [31:1] dec_tlu_i0_pc_e4, // i0 trap pc + output logic [31:1] dec_tlu_i1_pc_e4, + + output logic [31:0] dec_illegal_inst, // illegal inst + output logic dec_i1_valid_e1, // i1 valid e1 + output logic dec_div_decode_e4, // i0 div e4 + output logic [31:1] pred_correct_npc_e2, // npc e2 if the prediction is correct + output logic dec_i0_rs1_bypass_en_e3, // i0 rs1 bypass enables e3 + output logic dec_i0_rs2_bypass_en_e3, // i1 rs1 bypass enables e3 + output logic dec_i1_rs1_bypass_en_e3, + output logic dec_i1_rs2_bypass_en_e3, + output logic [31:0] i0_rs1_bypass_data_e3, // i0 rs1 bypass data e3 + output logic [31:0] i0_rs2_bypass_data_e3, // i1 rs1 bypass data e3 + output logic [31:0] i1_rs1_bypass_data_e3, + output logic [31:0] i1_rs2_bypass_data_e3, + output logic dec_i0_sec_decode_e3, // i0 secondary alu e3 + output logic dec_i1_sec_decode_e3, // i1 secondary alu e3 + output logic [31:1] dec_i0_pc_e3, // i0 pc e3 + output logic [31:1] dec_i1_pc_e3, // i1 pc e3 + + output logic dec_i0_rs1_bypass_en_e2, // i0 rs1 bypass enable e2 + output logic dec_i0_rs2_bypass_en_e2, // i0 rs2 bypass enable e2 + output logic dec_i1_rs1_bypass_en_e2, + output logic dec_i1_rs2_bypass_en_e2, + output logic [31:0] i0_rs1_bypass_data_e2, // i0 rs1 bypass data e2 + output logic [31:0] i0_rs2_bypass_data_e2, // i0 rs2 bypass data e2 + output logic [31:0] i1_rs1_bypass_data_e2, + output logic [31:0] i1_rs2_bypass_data_e2, + + output predict_pkt_t i0_predict_p_d, // i0 predict packet decode + output predict_pkt_t i1_predict_p_d, + + output logic dec_i0_lsu_decode_d, // i0 lsu decode + + output logic [31:0] i0_result_e4_eff, // i0 e4 result taking freeze into account + output logic [31:0] i1_result_e4_eff, + output logic [31:0] i0_result_e2, // i0 result e2 + + output logic [4:2] dec_i0_data_en, // clock-gating logic + output logic [4:1] dec_i0_ctl_en, + output logic [4:2] dec_i1_data_en, + output logic [4:1] dec_i1_ctl_en, + + output logic [1:0] dec_pmu_instr_decoded, // number of instructions decode this cycle encoded + output logic dec_pmu_decode_stall, // decode is stalled + output logic dec_pmu_presync_stall, // decode has presync stall + output logic dec_pmu_postsync_stall, // decode has postsync stall + + output logic dec_nonblock_load_wen, // write enable for nonblock load + output logic [4:0] dec_nonblock_load_waddr, // logical write addr for nonblock load + output logic dec_nonblock_load_freeze_dc2, // lsu must freeze nonblock load due to younger dependency in pipe + output logic dec_pause_state, // core in pause state + output logic dec_pause_state_cg, // pause state for clock-gating + + output logic dec_i0_load_e4, // pipe down if load is i0 or not in case of lsu_freeze + + input logic scan_mode + ); + + + + + dec_pkt_t i0_dp_raw, i0_dp; + dec_pkt_t i1_dp_raw, i1_dp; + + + + logic [31:0] i0, i1; + logic i0_valid_d, i1_valid_d; + + logic [31:0] i0_result_e1, i1_result_e1; + logic [31:0] i1_result_e2; + logic [31:0] i0_result_e3, i1_result_e3; + logic [31:0] i0_result_e4, i1_result_e4; + logic [31:0] i0_result_wb, i1_result_wb; + + logic [31:1] i0_pc_e1, i1_pc_e1; + logic [31:1] i0_pc_e2, i1_pc_e2; + logic [31:1] i0_pc_e3, i1_pc_e3; + logic [31:1] i0_pc_e4, i1_pc_e4; + + logic [9:0] i0_rs1bypass, i0_rs2bypass; + logic [9:0] i1_rs1bypass, i1_rs2bypass; + + logic i0_jalimm20, i1_jalimm20; + logic i0_uiimm20, i1_uiimm20; + + //logic flush_final_e3; + + logic lsu_decode_d; + logic [31:0] i0_immed_d; + logic i0_presync; + logic i0_postsync; + + logic postsync_stall; + logic ps_stall; + + logic prior_inflight, prior_inflight_e1e4, prior_inflight_wb; + + logic csr_clr_d, csr_set_d, csr_write_d; + + + logic csr_clr_e1,csr_set_e1,csr_write_e1,csr_imm_e1; + logic [31:0] csr_mask_e1; + logic [31:0] write_csr_data_e1; + logic [31:0] write_csr_data_in; + logic [31:0] write_csr_data; + logic csr_data_wen; + + logic [4:0] csrimm_e1; + + logic [31:0] csr_rddata_e1; + + logic flush_lower_wb; + + logic i1_load_block_d; + logic i1_mul_block_d; + logic i1_load2_block_d; + logic i1_mul2_block_d; + logic mul_decode_d; + logic div_decode_d; + logic [31:1] div_pc; + logic div_stall, div_stall_ff; + logic [3:0] div_trigger; + + logic i0_legal; + logic shift_illegal; + logic illegal_inst_en; + logic [31:0] illegal_inst; + logic illegal_lockout_in, illegal_lockout; + logic i0_legal_decode_d; + + logic i1_flush_final_e3; + + logic [31:0] i0_result_e3_final, i1_result_e3_final; + logic [31:0] i0_result_wb_raw, i1_result_wb_raw; + logic [12:1] last_br_immed_d; + logic i1_depend_i0_d; + logic i0_rs1_depend_i0_e1, i0_rs1_depend_i0_e2, i0_rs1_depend_i0_e3, i0_rs1_depend_i0_e4, i0_rs1_depend_i0_wb; + logic i0_rs1_depend_i1_e1, i0_rs1_depend_i1_e2, i0_rs1_depend_i1_e3, i0_rs1_depend_i1_e4, i0_rs1_depend_i1_wb; + logic i0_rs2_depend_i0_e1, i0_rs2_depend_i0_e2, i0_rs2_depend_i0_e3, i0_rs2_depend_i0_e4, i0_rs2_depend_i0_wb; + logic i0_rs2_depend_i1_e1, i0_rs2_depend_i1_e2, i0_rs2_depend_i1_e3, i0_rs2_depend_i1_e4, i0_rs2_depend_i1_wb; + logic i1_rs1_depend_i0_e1, i1_rs1_depend_i0_e2, i1_rs1_depend_i0_e3, i1_rs1_depend_i0_e4, i1_rs1_depend_i0_wb; + logic i1_rs1_depend_i1_e1, i1_rs1_depend_i1_e2, i1_rs1_depend_i1_e3, i1_rs1_depend_i1_e4, i1_rs1_depend_i1_wb; + logic i1_rs2_depend_i0_e1, i1_rs2_depend_i0_e2, i1_rs2_depend_i0_e3, i1_rs2_depend_i0_e4, i1_rs2_depend_i0_wb; + logic i1_rs2_depend_i1_e1, i1_rs2_depend_i1_e2, i1_rs2_depend_i1_e3, i1_rs2_depend_i1_e4, i1_rs2_depend_i1_wb; + logic i1_rs1_depend_i0_d, i1_rs2_depend_i0_d; + + logic i0_secondary_d, i1_secondary_d; + logic i0_secondary_block_d, i1_secondary_block_d; + logic non_block_case_d; + logic i0_div_decode_d; + logic [31:0] i0_result_e4_final, i1_result_e4_final; + logic i0_load_block_d; + logic i0_mul_block_d; + logic [3:0] i0_rs1_depth_d, i0_rs2_depth_d; + logic [3:0] i1_rs1_depth_d, i1_rs2_depth_d; + + logic i0_rs1_match_e1_e2, i0_rs1_match_e1_e3; + logic i0_rs2_match_e1_e2, i0_rs2_match_e1_e3; + logic i1_rs1_match_e1_e2, i1_rs1_match_e1_e3; + logic i1_rs2_match_e1_e2, i1_rs2_match_e1_e3; + + logic i0_load_stall_d, i1_load_stall_d; + logic i0_store_stall_d, i1_store_stall_d; + + logic i0_predict_nt, i0_predict_t; + logic i1_predict_nt, i1_predict_t; + + logic i0_notbr_error, i0_br_toffset_error; + logic i1_notbr_error, i1_br_toffset_error; + logic i0_ret_error, i1_ret_error; + logic i0_br_error, i1_br_error; + logic i0_br_error_all, i1_br_error_all; + logic [11:0] i0_br_offset, i1_br_offset; + + logic freeze; + + logic [20:1] i0_pcall_imm, i1_pcall_imm; // predicted jal's + logic i0_pcall_12b_offset, i1_pcall_12b_offset; + logic i0_pcall_raw, i1_pcall_raw; + logic i0_pcall_case, i1_pcall_case; + logic i0_pcall, i1_pcall; + + logic i0_pja_raw, i1_pja_raw; + logic i0_pja_case, i1_pja_case; + logic i0_pja, i1_pja; + + logic i0_pret_case, i1_pret_case; + logic i0_pret_raw, i0_pret; + logic i1_pret_raw, i1_pret; + + logic i0_jal, i1_jal; // jal's that are not predicted + + + logic i0_predict_br, i1_predict_br; + + logic freeze_prior1, freeze_prior2; + + logic [31:0] i0_result_e4_freeze, i1_result_e4_freeze; + logic [31:0] i0_result_wb_freeze, i1_result_wb_freeze; + logic [31:0] i1_result_wb_eff, i0_result_wb_eff; + logic [2:0] i1rs1_intra, i1rs2_intra; + logic i1_rs1_intra_bypass, i1_rs2_intra_bypass; + logic store_data_bypass_c1, store_data_bypass_c2; + logic [1:0] store_data_bypass_e4_c1, store_data_bypass_e4_c2, store_data_bypass_e4_c3; + logic store_data_bypass_i0_e2_c2; + + class_pkt_t i0_rs1_class_d, i0_rs2_class_d; + class_pkt_t i1_rs1_class_d, i1_rs2_class_d; + + class_pkt_t i0_dc, i0_e1c, i0_e2c, i0_e3c, i0_e4c, i0_wbc; + class_pkt_t i1_dc, i1_e1c, i1_e2c, i1_e3c, i1_e4c, i1_wbc; + + + logic i0_rs1_match_e1, i0_rs1_match_e2, i0_rs1_match_e3; + logic i1_rs1_match_e1, i1_rs1_match_e2, i1_rs1_match_e3; + logic i0_rs2_match_e1, i0_rs2_match_e2, i0_rs2_match_e3; + logic i1_rs2_match_e1, i1_rs2_match_e2, i1_rs2_match_e3; + + logic i0_secondary_stall_d; + + logic i0_ap_pc2, i0_ap_pc4; + logic i1_ap_pc2, i1_ap_pc4; + + logic div_wen_wb; + logic i0_rd_en_d; + logic i1_rd_en_d; + logic [4:0] i1_rd_d; + logic [4:0] i0_rd_d; + + logic load_ldst_bypass_c1; + logic load_mul_rs1_bypass_e1; + logic load_mul_rs2_bypass_e1; + + logic leak1_i0_stall_in, leak1_i0_stall; + logic leak1_i1_stall_in, leak1_i1_stall; + logic leak1_mode; + + logic i0_csr_write_only_d; + + logic prior_inflight_e1e3, prior_inflight_eff; + logic any_csr_d; + + logic prior_csr_write; + + logic [5:0] i0_pipe_en; + logic i0_e1_ctl_en, i0_e2_ctl_en, i0_e3_ctl_en, i0_e4_ctl_en, i0_wb_ctl_en; + logic i0_e1_data_en, i0_e2_data_en, i0_e3_data_en, i0_e4_data_en, i0_wb_data_en, i0_wb1_data_en; + + logic [5:0] i1_pipe_en; + logic i1_e1_ctl_en, i1_e2_ctl_en, i1_e3_ctl_en, i1_e4_ctl_en, i1_wb_ctl_en; + logic i1_e1_data_en, i1_e2_data_en, i1_e3_data_en, i1_e4_data_en, i1_wb_data_en, i1_wb1_data_en; + + logic debug_fence_i; + logic debug_fence; + + logic i0_csr_write; + logic presync_stall; + + logic i0_instr_error; + logic i0_icaf_d; + logic i1_icaf_d; + + logic i0_not_alu_eff, i1_not_alu_eff; + + logic disable_secondary; + + logic clear_pause; + logic pause_state_in, pause_state; + logic pause_stall; + + logic [31:1] i1_pc_wb; + + logic i0_brp_valid; + logic nonblock_load_cancel; + logic lsu_idle; + logic csr_read_e1; + logic i0_block_d; + logic i1_block_d; + logic ps_stall_in; + + logic freeze_after_unfreeze1; + logic freeze_after_unfreeze2; + logic unfreeze_cycle1; + logic unfreeze_cycle2; + + logic tlu_wr_pause_wb1, tlu_wr_pause_wb2; + + assign freeze = lsu_freeze_dc3; + +`ifdef RV_NO_SECONDARY_ALU + assign disable_secondary = 1; +`else + assign disable_secondary = dec_tlu_sec_alu_disable; +`endif + + +// branch prediction + + + // in leak1_mode, ignore any predictions for i0, treat branch as if we haven't seen it before + // in leak1 mode, also ignore branch errors for i0 + assign i0_brp_valid = dec_i0_brp.valid & ~leak1_mode; + + assign i0_predict_p_d.misp = '0; + assign i0_predict_p_d.ataken = '0; + assign i0_predict_p_d.boffset = '0; + + assign i0_predict_p_d.pcall = i0_pcall; // dont mark as pcall if branch error + assign i0_predict_p_d.pja = i0_pja; + assign i0_predict_p_d.pret = i0_pret; + assign i0_predict_p_d.prett[31:1] = dec_i0_brp.prett[31:1]; + assign i0_predict_p_d.pc4 = dec_i0_pc4_d; + assign i0_predict_p_d.hist[1:0] = dec_i0_brp.hist[1:0]; + assign i0_predict_p_d.valid = i0_brp_valid & i0_legal_decode_d; + assign i0_notbr_error = i0_brp_valid & ~(i0_dp_raw.condbr | i0_pcall_raw | i0_pja_raw | i0_pret_raw); + + // no toffset error for a pret + assign i0_br_toffset_error = i0_brp_valid & dec_i0_brp.hist[1] & (dec_i0_brp.toffset[11:0] != i0_br_offset[11:0]) & !i0_pret_raw; + assign i0_ret_error = i0_brp_valid & dec_i0_brp.ret & ~i0_pret_raw; + assign i0_br_error = dec_i0_brp.br_error | i0_notbr_error | i0_br_toffset_error | i0_ret_error; + assign i0_predict_p_d.br_error = i0_br_error & i0_legal_decode_d & ~leak1_mode; + assign i0_predict_p_d.br_start_error = dec_i0_brp.br_start_error & i0_legal_decode_d & ~leak1_mode; + assign i0_predict_p_d.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = dec_i0_brp.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + assign i0_predict_p_d.bank[1:0] = dec_i0_brp.bank[1:0]; + assign i0_predict_p_d.btag[`RV_BTB_BTAG_SIZE-1:0] = dec_i0_brp.btag[`RV_BTB_BTAG_SIZE-1:0]; + assign i0_br_error_all = (i0_br_error | dec_i0_brp.br_start_error) & ~leak1_mode; + assign i0_predict_p_d.toffset[11:0] = i0_br_offset[11:0]; + assign i0_predict_p_d.fghr[`RV_BHT_GHR_RANGE] = dec_i0_brp.fghr[`RV_BHT_GHR_RANGE]; + assign i0_predict_p_d.way = dec_i0_brp.way; + + + assign i1_predict_p_d.misp = '0; + assign i1_predict_p_d.ataken = '0; + assign i1_predict_p_d.boffset = '0; + + assign i1_predict_p_d.pcall = i1_pcall; + assign i1_predict_p_d.pja = i1_pja; + assign i1_predict_p_d.pret = i1_pret; + assign i1_predict_p_d.prett[31:1] = dec_i1_brp.prett[31:1]; + assign i1_predict_p_d.pc4 = dec_i1_pc4_d; + assign i1_predict_p_d.hist[1:0] = dec_i1_brp.hist[1:0]; + assign i1_predict_p_d.valid = dec_i1_brp.valid & dec_i1_decode_d; + assign i1_notbr_error = dec_i1_brp.valid & ~(i1_dp_raw.condbr | i1_pcall_raw | i1_pja_raw | i1_pret_raw); + + + assign i1_br_toffset_error = dec_i1_brp.valid & dec_i1_brp.hist[1] & (dec_i1_brp.toffset[11:0] != i1_br_offset[11:0]) & !i1_pret_raw; + assign i1_ret_error = dec_i1_brp.valid & dec_i1_brp.ret & ~i1_pret_raw; + assign i1_br_error = dec_i1_brp.br_error | i1_notbr_error | i1_br_toffset_error | i1_ret_error; + assign i1_predict_p_d.br_error = i1_br_error & dec_i1_decode_d; + assign i1_predict_p_d.br_start_error = dec_i1_brp.br_start_error & dec_i1_decode_d; + assign i1_predict_p_d.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = dec_i1_brp.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + assign i1_predict_p_d.bank[1:0] = dec_i1_brp.bank[1:0]; + assign i1_predict_p_d.btag[`RV_BTB_BTAG_SIZE-1:0] = dec_i1_brp.btag[`RV_BTB_BTAG_SIZE-1:0]; + assign i1_br_error_all = (i1_br_error | dec_i1_brp.br_start_error); + assign i1_predict_p_d.toffset[11:0] = i1_br_offset[11:0]; + assign i1_predict_p_d.fghr[`RV_BHT_GHR_RANGE] = dec_i1_brp.fghr[`RV_BHT_GHR_RANGE]; + assign i1_predict_p_d.way = dec_i1_brp.way; + + // end + + // on br error turn anything into a nop + // on i0 instruction fetch access fault turn anything into a nop + // nop => alu rs1 imm12 rd lor + + assign i0_icaf_d = dec_i0_icaf_d | dec_i0_dbecc_d; + assign i1_icaf_d = dec_i1_icaf_d | dec_i1_dbecc_d; + + assign i0_instr_error = i0_icaf_d | dec_i0_perr_d | dec_i0_sbecc_d; + + always_comb begin + i0_dp = i0_dp_raw; + if (i0_br_error_all | i0_instr_error) begin + i0_dp = '0; + i0_dp.alu = 1'b1; + i0_dp.rs1 = 1'b1; + i0_dp.rs2 = 1'b1; + i0_dp.lor = 1'b1; + i0_dp.legal = 1'b1; + i0_dp.postsync = 1'b1; + end + + i1_dp = i1_dp_raw; + if (i1_br_error_all) begin + i1_dp = '0; + i1_dp.alu = 1'b1; + i1_dp.rs1 = 1'b1; + i1_dp.rs2 = 1'b1; + i1_dp.lor = 1'b1; + i1_dp.legal = 1'b1; + i1_dp.postsync = 1'b1; + end + + end + + assign flush_lower_wb = dec_tlu_flush_lower_wb; + + assign i0[31:0] = dec_i0_instr_d[31:0]; + + assign i1[31:0] = dec_i1_instr_d[31:0]; + + assign dec_i0_select_pc_d = i0_dp.pc; + assign dec_i1_select_pc_d = i1_dp.pc; + + // branches that can be predicted + + assign i0_predict_br = i0_dp.condbr | i0_pcall | i0_pja | i0_pret; + assign i1_predict_br = i1_dp.condbr | i1_pcall | i1_pja | i1_pret; + + assign i0_predict_nt = ~(dec_i0_brp.hist[1] & i0_brp_valid) & i0_predict_br; + assign i0_predict_t = (dec_i0_brp.hist[1] & i0_brp_valid) & i0_predict_br; + + assign i0_ap.valid = (i0_dc.sec | i0_dc.alu | i0_dp.alu ); + assign i0_ap.add = i0_dp.add; + assign i0_ap.sub = i0_dp.sub; + assign i0_ap.land = i0_dp.land; + assign i0_ap.lor = i0_dp.lor; + assign i0_ap.lxor = i0_dp.lxor; + assign i0_ap.sll = i0_dp.sll; + assign i0_ap.srl = i0_dp.srl; + assign i0_ap.sra = i0_dp.sra; + assign i0_ap.slt = i0_dp.slt; + assign i0_ap.unsign = i0_dp.unsign; + assign i0_ap.beq = i0_dp.beq; + assign i0_ap.bne = i0_dp.bne; + assign i0_ap.blt = i0_dp.blt; + assign i0_ap.bge = i0_dp.bge; + + + + assign i0_ap.csr_write = i0_csr_write_only_d; + assign i0_ap.csr_imm = i0_dp.csr_imm; + + + assign i0_ap.jal = i0_jal; + + + assign i0_ap_pc2 = ~dec_i0_pc4_d; + assign i0_ap_pc4 = dec_i0_pc4_d; + + assign i0_ap.predict_nt = i0_predict_nt; + assign i0_ap.predict_t = i0_predict_t; + + assign i1_predict_nt = ~(dec_i1_brp.hist[1] & dec_i1_brp.valid) & i1_predict_br; + assign i1_predict_t = (dec_i1_brp.hist[1] & dec_i1_brp.valid) & i1_predict_br; + + assign i1_ap.valid = (i1_dc.sec | i1_dc.alu | i1_dp.alu); + assign i1_ap.add = i1_dp.add; + assign i1_ap.sub = i1_dp.sub; + assign i1_ap.land = i1_dp.land; + assign i1_ap.lor = i1_dp.lor; + assign i1_ap.lxor = i1_dp.lxor; + assign i1_ap.sll = i1_dp.sll; + assign i1_ap.srl = i1_dp.srl; + assign i1_ap.sra = i1_dp.sra; + assign i1_ap.slt = i1_dp.slt; + assign i1_ap.unsign = i1_dp.unsign; + assign i1_ap.beq = i1_dp.beq; + assign i1_ap.bne = i1_dp.bne; + assign i1_ap.blt = i1_dp.blt; + assign i1_ap.bge = i1_dp.bge; + + assign i1_ap.csr_write = 1'b0; + assign i1_ap.csr_imm = 1'b0; + + assign i1_ap.jal = i1_jal; + + assign i1_ap_pc2 = ~dec_i1_pc4_d; + assign i1_ap_pc4 = dec_i1_pc4_d; + + assign i1_ap.predict_nt = i1_predict_nt; + assign i1_ap.predict_t = i1_predict_t; + + localparam NBLOAD_SIZE = `RV_LSU_NUM_NBLOAD; + localparam NBLOAD_SIZE_MSB = `RV_LSU_NUM_NBLOAD-1; + localparam NBLOAD_TAG_MSB = `RV_LSU_NUM_NBLOAD_WIDTH-1; + +// non block load cam logic + + logic cam_write, cam_inv_reset, cam_data_reset; + logic [NBLOAD_TAG_MSB:0] cam_write_tag, cam_inv_reset_tag, cam_data_reset_tag; + logic [NBLOAD_SIZE_MSB:0] cam_wen; + + logic [NBLOAD_TAG_MSB:0] load_data_tag; + logic [NBLOAD_SIZE_MSB:0] nonblock_load_write; + + load_cam_pkt_t [NBLOAD_SIZE_MSB:0] cam; + load_cam_pkt_t [NBLOAD_SIZE_MSB:0] cam_in; + + logic [4:0] nonblock_load_rd; + logic i1_nonblock_load_stall, i0_nonblock_load_stall; + logic i1_nonblock_boundary_stall, i0_nonblock_boundary_stall; + logic i0_depend_load_e1_d, i0_depend_load_e2_d; + logic i1_depend_load_e1_d, i1_depend_load_e2_d; + logic depend_load_e1_d, depend_load_e2_d, depend_load_same_cycle_d; + logic depend_load_e2_e1, depend_load_same_cycle_e1; + logic depend_load_same_cycle_e2; + + logic nonblock_load_valid_dc4, nonblock_load_valid_wb; + logic i0_load_kill_wen, i1_load_kill_wen; + + logic found; + always_comb begin + found = 0; + cam_wen[NBLOAD_SIZE_MSB:0] = '0; + for (int i=0; i used for clockgating for wb stage ctl logic + rvdff #(1) divwbff (.*, .clk(active_clk), .din(exu_div_finish), .dout(div_wen_wb)); + + + assign i0_result_e1[31:0] = exu_i0_result_e1[31:0]; + assign i1_result_e1[31:0] = exu_i1_result_e1[31:0]; + + // pipe the results down the pipe + rvdffe #(32) i0e2resultff (.*, .en(i0_e2_data_en), .din(i0_result_e1[31:0]), .dout(i0_result_e2[31:0])); + rvdffe #(32) i1e2resultff (.*, .en(i1_e2_data_en), .din(i1_result_e1[31:0]), .dout(i1_result_e2[31:0])); + + rvdffe #(32) i0e3resultff (.*, .en(i0_e3_data_en), .din(i0_result_e2[31:0]), .dout(i0_result_e3[31:0])); + rvdffe #(32) i1e3resultff (.*, .en(i1_e3_data_en), .din(i1_result_e2[31:0]), .dout(i1_result_e3[31:0])); + + + + assign i0_result_e3_final[31:0] = (e3d.i0v & e3d.i0load) ? lsu_result_dc3[31:0] : (e3d.i0v & e3d.i0mul) ? exu_mul_result_e3[31:0] : i0_result_e3[31:0]; + + assign i1_result_e3_final[31:0] = (e3d.i1v & e3d.i1load) ? lsu_result_dc3[31:0] : (e3d.i1v & e3d.i1mul) ? exu_mul_result_e3[31:0] : i1_result_e3[31:0]; + + + + rvdffe #(32) i0e4resultff (.*, .en(i0_e4_data_en), .din(i0_result_e3_final[31:0]), .dout(i0_result_e4[31:0])); + rvdffe #(32) i1e4resultff (.*, .en(i1_e4_data_en), .din(i1_result_e3_final[31:0]), .dout(i1_result_e4[31:0])); + + assign i0_result_e4_final[31:0] = + ( e4d.i0secondary) ? exu_i0_result_e4[31:0] : (e4d.i0v & e4d.i0load) ? lsu_result_corr_dc4[31:0] : i0_result_e4[31:0]; + + assign i1_result_e4_final[31:0] = + (e4d.i1v & e4d.i1secondary) ? exu_i1_result_e4[31:0] : (e4d.i1v & e4d.i1load) ? lsu_result_corr_dc4[31:0] :i1_result_e4[31:0]; + + rvdffe #(32) i0wbresultff (.*, .en(i0_wb_data_en), .din(i0_result_e4_final[31:0]), .dout(i0_result_wb_raw[31:0])); + rvdffe #(32) i1wbresultff (.*, .en(i1_wb_data_en), .din(i1_result_e4_final[31:0]), .dout(i1_result_wb_raw[31:0])); + + assign i0_result_wb[31:0] = (div_wen_wb) ? exu_div_result[31:0] : i0_result_wb_raw[31:0]; + + assign i1_result_wb[31:0] = i1_result_wb_raw[31:0]; + + logic [12:1] last_br_immed_e1, last_br_immed_e2; + + rvdffe #(12) e1brpcff (.*, .en(i0_e1_data_en), .din(last_br_immed_d[12:1] ), .dout(last_br_immed_e1[12:1])); + rvdffe #(12) e2brpcff (.*, .en(i0_e2_data_en), .din(last_br_immed_e1[12:1]), .dout(last_br_immed_e2[12:1])); + + + logic [31:0] i0_inst_d, i1_inst_d; + logic [31:0] i0_inst_e1, i1_inst_e1; + logic [31:0] i0_inst_e2, i1_inst_e2; + logic [31:0] i0_inst_e3, i1_inst_e3; + logic [31:0] i0_inst_e4, i1_inst_e4; + logic [31:0] i0_inst_wb, i1_inst_wb; + logic [31:0] i0_inst_wb1,i1_inst_wb1; + + logic [31:0] div_inst; + +// trace stuff + + rvdffe #(32) divinstff (.*, .en(i0_div_decode_d), .din(i0_inst_d[31:0]), .dout(div_inst[31:0])); + + assign i0_inst_d[31:0] = (dec_i0_pc4_d) ? i0[31:0] : {16'b0, dec_i0_cinst_d[15:0] }; + + rvdffe #(32) i0e1instff (.*, .en(i0_e1_data_en), .din(i0_inst_d[31:0]), .dout(i0_inst_e1[31:0])); + rvdffe #(32) i0e2instff (.*, .en(i0_e2_data_en), .din(i0_inst_e1[31:0]), .dout(i0_inst_e2[31:0])); + rvdffe #(32) i0e3instff (.*, .en(i0_e3_data_en), .din(i0_inst_e2[31:0]), .dout(i0_inst_e3[31:0])); + rvdffe #(32) i0e4instff (.*, .en(i0_e4_data_en), .din(i0_inst_e3[31:0]), .dout(i0_inst_e4[31:0])); + rvdffe #(32) i0wbinstff (.*, .en(i0_wb_data_en | exu_div_finish), .din( (exu_div_finish) ? div_inst[31:0] : i0_inst_e4[31:0]), .dout(i0_inst_wb[31:0])); + rvdffe #(32) i0wb1instff (.*, .en(i0_wb1_data_en | div_wen_wb), .din(i0_inst_wb[31:0]), .dout(i0_inst_wb1[31:0])); + + assign i1_inst_d[31:0] = (dec_i1_pc4_d) ? i1[31:0] : {16'b0, dec_i1_cinst_d[15:0] }; + + rvdffe #(32) i1e1instff (.*, .en(i1_e1_data_en), .din(i1_inst_d[31:0]), .dout(i1_inst_e1[31:0])); + rvdffe #(32) i1e2instff (.*, .en(i1_e2_data_en), .din(i1_inst_e1[31:0]), .dout(i1_inst_e2[31:0])); + rvdffe #(32) i1e3instff (.*, .en(i1_e3_data_en), .din(i1_inst_e2[31:0]), .dout(i1_inst_e3[31:0])); + rvdffe #(32) i1e4instff (.*, .en(i1_e4_data_en), .din(i1_inst_e3[31:0]), .dout(i1_inst_e4[31:0])); + rvdffe #(32) i1wbinstff (.*, .en(i1_wb_data_en), .din(i1_inst_e4[31:0]), .dout(i1_inst_wb[31:0])); + rvdffe #(32) i1wb1instff (.*, .en(i1_wb1_data_en),.din(i1_inst_wb[31:0]), .dout(i1_inst_wb1[31:0])); + + assign dec_i0_inst_wb1[31:0] = i0_inst_wb1[31:0]; + assign dec_i1_inst_wb1[31:0] = i1_inst_wb1[31:0]; + + logic [31:1] i0_pc_wb, i0_pc_wb1; + logic [31:1] i1_pc_wb1; + + rvdffe #(31) i0wbpcff (.*, .en(i0_wb_data_en | exu_div_finish), .din(dec_tlu_i0_pc_e4[31:1]), .dout(i0_pc_wb[31:1])); + rvdffe #(31) i0wb1pcff (.*, .en(i0_wb1_data_en | div_wen_wb), .din(i0_pc_wb[31:1]), .dout(i0_pc_wb1[31:1])); + + rvdffe #(31) i1wb1pcff (.*, .en(i1_wb1_data_en),.din(i1_pc_wb[31:1]), .dout(i1_pc_wb1[31:1])); + + assign dec_i0_pc_wb1[31:1] = i0_pc_wb1[31:1]; + assign dec_i1_pc_wb1[31:1] = i1_pc_wb1[31:1]; + + + // pipe the pc's down the pipe + assign i0_pc_e1[31:1] = exu_i0_pc_e1[31:1]; + assign i1_pc_e1[31:1] = exu_i1_pc_e1[31:1]; + + rvdffe #(31) i0e2pcff (.*, .en(i0_e2_data_en), .din(i0_pc_e1[31:1]), .dout(i0_pc_e2[31:1])); + rvdffe #(31) i0e3pcff (.*, .en(i0_e3_data_en), .din(i0_pc_e2[31:1]), .dout(i0_pc_e3[31:1])); + rvdffe #(31) i0e4pcff (.*, .en(i0_e4_data_en), .din(i0_pc_e3[31:1]), .dout(i0_pc_e4[31:1])); + rvdffe #(31) i1e2pcff (.*, .en(i1_e2_data_en), .din(i1_pc_e1[31:1]), .dout(i1_pc_e2[31:1])); + rvdffe #(31) i1e3pcff (.*, .en(i1_e3_data_en), .din(i1_pc_e2[31:1]), .dout(i1_pc_e3[31:1])); + rvdffe #(31) i1e4pcff (.*, .en(i1_e4_data_en), .din(i1_pc_e3[31:1]), .dout(i1_pc_e4[31:1])); + + assign dec_i0_pc_e3[31:1] = i0_pc_e3[31:1]; + assign dec_i1_pc_e3[31:1] = i1_pc_e3[31:1]; + + + assign dec_tlu_i0_pc_e4[31:1] = (exu_div_finish) ? div_pc[31:1] : i0_pc_e4[31:1]; + assign dec_tlu_i1_pc_e4[31:1] = i1_pc_e4[31:1]; + + logic [31:1] last_pc_e2; + + // generate the correct npc for correct br predictions + assign last_pc_e2[31:1] = (e2d.i1valid) ? i1_pc_e2[31:1] : i0_pc_e2[31:1]; + + rvbradder ibradder_correct ( + .pc(last_pc_e2[31:1]), + .offset(last_br_immed_e2[12:1]), + .dout(pred_correct_npc_e2[31:1]) + ); + + + + // needed for debug triggers + rvdffe #(31) i1wbpcff (.*, .en(i1_wb_data_en), .din(dec_tlu_i1_pc_e4[31:1]), .dout(i1_pc_wb[31:1])); + + + + + + + // bit 9 is priority match, bit 0 lowest priority, i1_e1, i0_e1, i1_e2, ... i1_wb, i0_wb + + + + assign i0_rs1bypass[9:0] = { i0_rs1_depth_d[3:0] == 4'd1 & i0_rs1_class_d.alu, + i0_rs1_depth_d[3:0] == 4'd2 & i0_rs1_class_d.alu, + i0_rs1_depth_d[3:0] == 4'd3 & i0_rs1_class_d.alu, + i0_rs1_depth_d[3:0] == 4'd4 & i0_rs1_class_d.alu, + i0_rs1_depth_d[3:0] == 4'd5 & (i0_rs1_class_d.alu | i0_rs1_class_d.load | i0_rs1_class_d.mul), + i0_rs1_depth_d[3:0] == 4'd6 & (i0_rs1_class_d.alu | i0_rs1_class_d.load | i0_rs1_class_d.mul), + i0_rs1_depth_d[3:0] == 4'd7 & (i0_rs1_class_d.alu | i0_rs1_class_d.load | i0_rs1_class_d.mul | i0_rs1_class_d.sec), + i0_rs1_depth_d[3:0] == 4'd8 & (i0_rs1_class_d.alu | i0_rs1_class_d.load | i0_rs1_class_d.mul | i0_rs1_class_d.sec), + i0_rs1_depth_d[3:0] == 4'd9 & (i0_rs1_class_d.alu | i0_rs1_class_d.load | i0_rs1_class_d.mul | i0_rs1_class_d.sec), + i0_rs1_depth_d[3:0] == 4'd10 & (i0_rs1_class_d.alu | i0_rs1_class_d.load | i0_rs1_class_d.mul | i0_rs1_class_d.sec) }; + + + assign i0_rs2bypass[9:0] = { i0_rs2_depth_d[3:0] == 4'd1 & i0_rs2_class_d.alu, + i0_rs2_depth_d[3:0] == 4'd2 & i0_rs2_class_d.alu, + i0_rs2_depth_d[3:0] == 4'd3 & i0_rs2_class_d.alu, + i0_rs2_depth_d[3:0] == 4'd4 & i0_rs2_class_d.alu, + i0_rs2_depth_d[3:0] == 4'd5 & (i0_rs2_class_d.alu | i0_rs2_class_d.load | i0_rs2_class_d.mul), + i0_rs2_depth_d[3:0] == 4'd6 & (i0_rs2_class_d.alu | i0_rs2_class_d.load | i0_rs2_class_d.mul), + i0_rs2_depth_d[3:0] == 4'd7 & (i0_rs2_class_d.alu | i0_rs2_class_d.load | i0_rs2_class_d.mul | i0_rs2_class_d.sec), + i0_rs2_depth_d[3:0] == 4'd8 & (i0_rs2_class_d.alu | i0_rs2_class_d.load | i0_rs2_class_d.mul | i0_rs2_class_d.sec), + i0_rs2_depth_d[3:0] == 4'd9 & (i0_rs2_class_d.alu | i0_rs2_class_d.load | i0_rs2_class_d.mul | i0_rs2_class_d.sec), + i0_rs2_depth_d[3:0] == 4'd10 & (i0_rs2_class_d.alu | i0_rs2_class_d.load | i0_rs2_class_d.mul | i0_rs2_class_d.sec) }; + + + assign i1_rs1bypass[9:0] = { i1_rs1_depth_d[3:0] == 4'd1 & i1_rs1_class_d.alu, + i1_rs1_depth_d[3:0] == 4'd2 & i1_rs1_class_d.alu, + i1_rs1_depth_d[3:0] == 4'd3 & i1_rs1_class_d.alu, + i1_rs1_depth_d[3:0] == 4'd4 & i1_rs1_class_d.alu, + i1_rs1_depth_d[3:0] == 4'd5 & (i1_rs1_class_d.alu | i1_rs1_class_d.load | i1_rs1_class_d.mul), + i1_rs1_depth_d[3:0] == 4'd6 & (i1_rs1_class_d.alu | i1_rs1_class_d.load | i1_rs1_class_d.mul), + i1_rs1_depth_d[3:0] == 4'd7 & (i1_rs1_class_d.alu | i1_rs1_class_d.load | i1_rs1_class_d.mul | i1_rs1_class_d.sec), + i1_rs1_depth_d[3:0] == 4'd8 & (i1_rs1_class_d.alu | i1_rs1_class_d.load | i1_rs1_class_d.mul | i1_rs1_class_d.sec), + i1_rs1_depth_d[3:0] == 4'd9 & (i1_rs1_class_d.alu | i1_rs1_class_d.load | i1_rs1_class_d.mul | i1_rs1_class_d.sec), + i1_rs1_depth_d[3:0] == 4'd10 & (i1_rs1_class_d.alu | i1_rs1_class_d.load | i1_rs1_class_d.mul | i1_rs1_class_d.sec) }; + + + assign i1_rs2bypass[9:0] = { i1_rs2_depth_d[3:0] == 4'd1 & i1_rs2_class_d.alu, + i1_rs2_depth_d[3:0] == 4'd2 & i1_rs2_class_d.alu, + i1_rs2_depth_d[3:0] == 4'd3 & i1_rs2_class_d.alu, + i1_rs2_depth_d[3:0] == 4'd4 & i1_rs2_class_d.alu, + i1_rs2_depth_d[3:0] == 4'd5 & (i1_rs2_class_d.alu | i1_rs2_class_d.load | i1_rs2_class_d.mul), + i1_rs2_depth_d[3:0] == 4'd6 & (i1_rs2_class_d.alu | i1_rs2_class_d.load | i1_rs2_class_d.mul), + i1_rs2_depth_d[3:0] == 4'd7 & (i1_rs2_class_d.alu | i1_rs2_class_d.load | i1_rs2_class_d.mul | i1_rs2_class_d.sec), + i1_rs2_depth_d[3:0] == 4'd8 & (i1_rs2_class_d.alu | i1_rs2_class_d.load | i1_rs2_class_d.mul | i1_rs2_class_d.sec), + i1_rs2_depth_d[3:0] == 4'd9 & (i1_rs2_class_d.alu | i1_rs2_class_d.load | i1_rs2_class_d.mul | i1_rs2_class_d.sec), + i1_rs2_depth_d[3:0] == 4'd10 & (i1_rs2_class_d.alu | i1_rs2_class_d.load | i1_rs2_class_d.mul | i1_rs2_class_d.sec) }; + + + + + assign dec_i0_rs1_bypass_en_d = |i0_rs1bypass[9:0]; + assign dec_i0_rs2_bypass_en_d = |i0_rs2bypass[9:0]; + assign dec_i1_rs1_bypass_en_d = |i1_rs1bypass[9:0]; + assign dec_i1_rs2_bypass_en_d = |i1_rs2bypass[9:0]; + + + + assign i0_rs1_bypass_data_d[31:0] = ({32{i0_rs1bypass[9]}} & i1_result_e1[31:0]) | + ({32{i0_rs1bypass[8]}} & i0_result_e1[31:0]) | + ({32{i0_rs1bypass[7]}} & i1_result_e2[31:0]) | + ({32{i0_rs1bypass[6]}} & i0_result_e2[31:0]) | + ({32{i0_rs1bypass[5]}} & i1_result_e3_final[31:0]) | + ({32{i0_rs1bypass[4]}} & i0_result_e3_final[31:0]) | + ({32{i0_rs1bypass[3]}} & i1_result_e4_final[31:0]) | + ({32{i0_rs1bypass[2]}} & i0_result_e4_final[31:0]) | + ({32{i0_rs1bypass[1]}} & i1_result_wb[31:0]) | + ({32{i0_rs1bypass[0]}} & i0_result_wb[31:0]); + + + assign i0_rs2_bypass_data_d[31:0] = ({32{i0_rs2bypass[9]}} & i1_result_e1[31:0]) | + ({32{i0_rs2bypass[8]}} & i0_result_e1[31:0]) | + ({32{i0_rs2bypass[7]}} & i1_result_e2[31:0]) | + ({32{i0_rs2bypass[6]}} & i0_result_e2[31:0]) | + ({32{i0_rs2bypass[5]}} & i1_result_e3_final[31:0]) | + ({32{i0_rs2bypass[4]}} & i0_result_e3_final[31:0]) | + ({32{i0_rs2bypass[3]}} & i1_result_e4_final[31:0]) | + ({32{i0_rs2bypass[2]}} & i0_result_e4_final[31:0]) | + ({32{i0_rs2bypass[1]}} & i1_result_wb[31:0]) | + ({32{i0_rs2bypass[0]}} & i0_result_wb[31:0]); + + assign i1_rs1_bypass_data_d[31:0] = ({32{i1_rs1bypass[9]}} & i1_result_e1[31:0]) | + ({32{i1_rs1bypass[8]}} & i0_result_e1[31:0]) | + ({32{i1_rs1bypass[7]}} & i1_result_e2[31:0]) | + ({32{i1_rs1bypass[6]}} & i0_result_e2[31:0]) | + ({32{i1_rs1bypass[5]}} & i1_result_e3_final[31:0]) | + ({32{i1_rs1bypass[4]}} & i0_result_e3_final[31:0]) | + ({32{i1_rs1bypass[3]}} & i1_result_e4_final[31:0]) | + ({32{i1_rs1bypass[2]}} & i0_result_e4_final[31:0]) | + ({32{i1_rs1bypass[1]}} & i1_result_wb[31:0]) | + ({32{i1_rs1bypass[0]}} & i0_result_wb[31:0]); + + + assign i1_rs2_bypass_data_d[31:0] = ({32{i1_rs2bypass[9]}} & i1_result_e1[31:0]) | + ({32{i1_rs2bypass[8]}} & i0_result_e1[31:0]) | + ({32{i1_rs2bypass[7]}} & i1_result_e2[31:0]) | + ({32{i1_rs2bypass[6]}} & i0_result_e2[31:0]) | + ({32{i1_rs2bypass[5]}} & i1_result_e3_final[31:0]) | + ({32{i1_rs2bypass[4]}} & i0_result_e3_final[31:0]) | + ({32{i1_rs2bypass[3]}} & i1_result_e4_final[31:0]) | + ({32{i1_rs2bypass[2]}} & i0_result_e4_final[31:0]) | + ({32{i1_rs2bypass[1]}} & i1_result_wb[31:0]) | + ({32{i1_rs2bypass[0]}} & i0_result_wb[31:0]); + + + + + + +endmodule + +// file "decode" is human readable file that has all of the instruction decodes defined and is part of git repo +// modify this file as needed + +// to generate all the equations below from "decode" except legal equation: + +// 1) coredecode -in decode > coredecode.e + +// 2) espresso -Dso -oeqntott coredecode.e | addassign -pre out. > equations + +// to generate the legal (32b instruction is legal) equation below: + +// 1) coredecode -in decode -legal > legal.e + +// 2) espresso -Dso -oeqntott legal.e | addassign -pre out. > legal_equation + +module dec_dec_ctl + import swerv_types::*; +( + input logic [31:0] inst, + + output dec_pkt_t out + ); + + logic [31:0] i; + + + assign i[31:0] = inst[31:0]; + + +assign out.alu = (i[2]) | (i[6]) | (!i[25]&i[4]) | (!i[5]&i[4]); + +assign out.rs1 = (!i[14]&!i[13]&!i[2]) | (!i[13]&i[11]&!i[2]) | (i[19]&i[13]&!i[2]) | ( + !i[13]&i[10]&!i[2]) | (i[18]&i[13]&!i[2]) | (!i[13]&i[9]&!i[2]) | ( + i[17]&i[13]&!i[2]) | (!i[13]&i[8]&!i[2]) | (i[16]&i[13]&!i[2]) | ( + !i[13]&i[7]&!i[2]) | (i[15]&i[13]&!i[2]) | (!i[4]&!i[3]) | (!i[6] + &!i[2]); + +assign out.rs2 = (i[5]&!i[4]&!i[2]) | (!i[6]&i[5]&!i[2]); + +assign out.imm12 = (!i[4]&!i[3]&i[2]) | (i[13]&!i[5]&i[4]&!i[2]) | (!i[13]&!i[12] + &i[6]&i[4]) | (!i[12]&!i[5]&i[4]&!i[2]); + +assign out.rd = (!i[5]&!i[2]) | (i[5]&i[2]) | (i[4]); + +assign out.shimm5 = (!i[13]&i[12]&!i[5]&i[4]&!i[2]); + +assign out.imm20 = (i[5]&i[3]) | (i[4]&i[2]); + +assign out.pc = (!i[5]&!i[3]&i[2]) | (i[5]&i[3]); + +assign out.load = (!i[5]&!i[4]&!i[2]); + +assign out.store = (!i[6]&i[5]&!i[4]); + +assign out.lsu = (!i[6]&!i[4]&!i[2]); + +assign out.add = (!i[14]&!i[13]&!i[12]&!i[5]&i[4]) | (!i[5]&!i[3]&i[2]) | (!i[30] + &!i[25]&!i[14]&!i[13]&!i[12]&!i[6]&i[4]&!i[2]); + +assign out.sub = (i[30]&!i[12]&!i[6]&i[5]&i[4]&!i[2]) | (!i[25]&!i[14]&i[13]&!i[6] + &i[4]&!i[2]) | (!i[14]&i[13]&!i[5]&i[4]&!i[2]) | (i[6]&!i[4]&!i[2]); + +assign out.land = (i[14]&i[13]&i[12]&!i[5]&!i[2]) | (!i[25]&i[14]&i[13]&i[12]&!i[6] + &!i[2]); + +assign out.lor = (!i[6]&i[3]) | (!i[25]&i[14]&i[13]&!i[12]&i[4]&!i[2]) | (i[5]&i[4] + &i[2]) | (!i[12]&i[6]&i[4]) | (i[13]&i[6]&i[4]) | (i[14]&i[13]&!i[12] + &!i[5]&!i[2]) | (i[7]&i[6]&i[4]) | (i[8]&i[6]&i[4]) | (i[9]&i[6]&i[4]) | ( + i[10]&i[6]&i[4]) | (i[11]&i[6]&i[4]); + +assign out.lxor = (!i[25]&i[14]&!i[13]&!i[12]&i[4]&!i[2]) | (i[14]&!i[13]&!i[12] + &!i[5]&i[4]&!i[2]); + +assign out.sll = (!i[25]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + +assign out.sra = (i[30]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + +assign out.srl = (!i[30]&!i[25]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + +assign out.slt = (!i[25]&!i[14]&i[13]&!i[6]&i[4]&!i[2]) | (!i[14]&i[13]&!i[5]&i[4] + &!i[2]); + +assign out.unsign = (!i[14]&i[13]&i[12]&!i[5]&!i[2]) | (i[13]&i[6]&!i[4]&!i[2]) | ( + i[14]&!i[5]&!i[4]) | (!i[25]&!i[14]&i[13]&i[12]&!i[6]&!i[2]) | ( + i[25]&i[14]&i[12]&!i[6]&i[5]&!i[2]); + +assign out.condbr = (i[6]&!i[4]&!i[2]); + +assign out.beq = (!i[14]&!i[12]&i[6]&!i[4]&!i[2]); + +assign out.bne = (!i[14]&i[12]&i[6]&!i[4]&!i[2]); + +assign out.bge = (i[14]&i[12]&i[5]&!i[4]&!i[2]); + +assign out.blt = (i[14]&!i[12]&i[5]&!i[4]&!i[2]); + +assign out.jal = (i[6]&i[2]); + +assign out.by = (!i[13]&!i[12]&!i[6]&!i[4]&!i[2]); + +assign out.half = (i[12]&!i[6]&!i[4]&!i[2]); + +assign out.word = (i[13]&!i[6]&!i[4]); + +assign out.csr_read = (i[13]&i[6]&i[4]) | (i[7]&i[6]&i[4]) | (i[8]&i[6]&i[4]) | ( + i[9]&i[6]&i[4]) | (i[10]&i[6]&i[4]) | (i[11]&i[6]&i[4]); + +assign out.csr_clr = (i[15]&i[13]&i[12]&i[6]&i[4]) | (i[16]&i[13]&i[12]&i[6]&i[4]) | ( + i[17]&i[13]&i[12]&i[6]&i[4]) | (i[18]&i[13]&i[12]&i[6]&i[4]) | ( + i[19]&i[13]&i[12]&i[6]&i[4]); + +assign out.csr_set = (i[15]&!i[12]&i[6]&i[4]) | (i[16]&!i[12]&i[6]&i[4]) | (i[17] + &!i[12]&i[6]&i[4]) | (i[18]&!i[12]&i[6]&i[4]) | (i[19]&!i[12]&i[6] + &i[4]); + +assign out.csr_write = (!i[13]&i[12]&i[6]&i[4]); + +assign out.csr_imm = (i[14]&!i[13]&i[6]&i[4]) | (i[15]&i[14]&i[6]&i[4]) | (i[16] + &i[14]&i[6]&i[4]) | (i[17]&i[14]&i[6]&i[4]) | (i[18]&i[14]&i[6]&i[4]) | ( + i[19]&i[14]&i[6]&i[4]); + +assign out.presync = (!i[5]&i[3]) | (i[25]&i[14]&!i[6]&i[5]&!i[2]) | (!i[13]&i[7] + &i[6]&i[4]) | (!i[13]&i[8]&i[6]&i[4]) | (!i[13]&i[9]&i[6]&i[4]) | ( + !i[13]&i[10]&i[6]&i[4]) | (!i[13]&i[11]&i[6]&i[4]) | (i[15]&i[13] + &i[6]&i[4]) | (i[16]&i[13]&i[6]&i[4]) | (i[17]&i[13]&i[6]&i[4]) | ( + i[18]&i[13]&i[6]&i[4]) | (i[19]&i[13]&i[6]&i[4]); + +assign out.postsync = (i[12]&!i[5]&i[3]) | (!i[22]&!i[13]&!i[12]&i[6]&i[4]) | ( + i[25]&i[14]&!i[6]&i[5]&!i[2]) | (!i[13]&i[7]&i[6]&i[4]) | (!i[13] + &i[8]&i[6]&i[4]) | (!i[13]&i[9]&i[6]&i[4]) | (!i[13]&i[10]&i[6]&i[4]) | ( + !i[13]&i[11]&i[6]&i[4]) | (i[15]&i[13]&i[6]&i[4]) | (i[16]&i[13]&i[6] + &i[4]) | (i[17]&i[13]&i[6]&i[4]) | (i[18]&i[13]&i[6]&i[4]) | (i[19] + &i[13]&i[6]&i[4]); + +assign out.ebreak = (!i[22]&i[20]&!i[13]&!i[12]&i[6]&i[4]); + +assign out.ecall = (!i[21]&!i[20]&!i[13]&!i[12]&i[6]&i[4]); + +assign out.mret = (i[29]&!i[13]&!i[12]&i[6]&i[4]); + +assign out.mul = (i[25]&!i[14]&!i[6]&i[5]&i[4]&!i[2]); + +assign out.rs1_sign = (i[25]&!i[14]&i[13]&!i[12]&!i[6]&i[5]&i[4]&!i[2]) | (i[25] + &!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + +assign out.rs2_sign = (i[25]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + +assign out.low = (i[25]&!i[14]&!i[13]&!i[12]&i[5]&i[4]&!i[2]); + +assign out.div = (i[25]&i[14]&!i[6]&i[5]&!i[2]); + +assign out.rem = (i[25]&i[14]&i[13]&!i[6]&i[5]&!i[2]); + +assign out.fence = (!i[5]&i[3]); + +assign out.fence_i = (i[12]&!i[5]&i[3]); + +assign out.pm_alu = (i[28]&i[22]&!i[13]&!i[12]&i[4]) | (i[4]&i[2]) | (!i[25]&!i[6] + &i[4]) | (!i[5]&i[4]); + + +assign out.legal = (!i[31]&!i[30]&i[29]&i[28]&!i[27]&!i[26]&!i[25]&!i[24]&!i[23] + &!i[22]&i[21]&!i[20]&!i[19]&!i[18]&!i[17]&!i[16]&!i[15]&!i[14]&!i[11] + &!i[10]&!i[9]&!i[8]&!i[7]&i[6]&i[5]&i[4]&!i[3]&!i[2]&i[1]&i[0]) | ( + !i[31]&!i[30]&!i[29]&i[28]&!i[27]&!i[26]&!i[25]&!i[24]&!i[23]&i[22] + &!i[21]&i[20]&!i[19]&!i[18]&!i[17]&!i[16]&!i[15]&!i[14]&!i[11]&!i[10] + &!i[9]&!i[8]&!i[7]&i[6]&i[5]&i[4]&!i[3]&!i[2]&i[1]&i[0]) | (!i[31] + &!i[30]&!i[29]&!i[28]&!i[27]&!i[26]&!i[25]&!i[24]&!i[23]&!i[22]&!i[21] + &!i[19]&!i[18]&!i[17]&!i[16]&!i[15]&!i[14]&!i[11]&!i[10]&!i[9]&!i[8] + &!i[7]&i[5]&i[4]&!i[3]&!i[2]&i[1]&i[0]) | (!i[31]&!i[30]&!i[29]&!i[28] + &!i[27]&!i[26]&!i[25]&!i[6]&i[4]&!i[3]&i[1]&i[0]) | (!i[31]&!i[29] + &!i[28]&!i[27]&!i[26]&!i[25]&!i[14]&!i[13]&!i[12]&!i[6]&!i[3]&!i[2] + &i[1]&i[0]) | (!i[31]&!i[29]&!i[28]&!i[27]&!i[26]&!i[25]&i[14]&!i[13] + &i[12]&!i[6]&i[4]&!i[3]&i[1]&i[0]) | (!i[31]&!i[30]&!i[29]&!i[28] + &!i[27]&!i[26]&!i[6]&i[5]&i[4]&!i[3]&i[1]&i[0]) | (!i[14]&!i[13] + &!i[12]&i[6]&i[5]&!i[4]&!i[3]&i[1]&i[0]) | (i[14]&i[6]&i[5]&!i[4] + &!i[3]&!i[2]&i[1]&i[0]) | (!i[12]&!i[6]&!i[5]&i[4]&!i[3]&i[1]&i[0]) | ( + !i[14]&!i[13]&i[5]&!i[4]&!i[3]&!i[2]&i[1]&i[0]) | (i[12]&i[6]&i[5] + &i[4]&!i[3]&!i[2]&i[1]&i[0]) | (!i[31]&!i[30]&!i[29]&!i[28]&!i[27] + &!i[26]&!i[25]&!i[24]&!i[23]&!i[22]&!i[21]&!i[20]&!i[19]&!i[18]&!i[17] + &!i[16]&!i[15]&!i[14]&!i[13]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[6] + &!i[5]&!i[4]&i[3]&i[2]&i[1]&i[0]) | (!i[31]&!i[30]&!i[29]&!i[28] + &!i[19]&!i[18]&!i[17]&!i[16]&!i[15]&!i[14]&!i[13]&!i[12]&!i[11]&!i[10] + &!i[9]&!i[8]&!i[7]&!i[6]&!i[5]&!i[4]&i[3]&i[2]&i[1]&i[0]) | (i[13] + &i[6]&i[5]&i[4]&!i[3]&!i[2]&i[1]&i[0]) | (!i[13]&!i[6]&!i[5]&!i[4] + &!i[3]&!i[2]&i[1]&i[0]) | (i[6]&i[5]&!i[4]&i[3]&i[2]&i[1]&i[0]) | ( + i[13]&!i[6]&!i[5]&i[4]&!i[3]&i[1]&i[0]) | (!i[14]&!i[12]&!i[6]&!i[4] + &!i[3]&!i[2]&i[1]&i[0]) | (!i[6]&i[4]&!i[3]&i[2]&i[1]&i[0]); + + +endmodule diff --git a/design/dec/dec_gpr_ctl.sv b/design/dec/dec_gpr_ctl.sv new file mode 100644 index 0000000..02119db --- /dev/null +++ b/design/dec/dec_gpr_ctl.sv @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +module dec_gpr_ctl #(parameter GPR_BANKS = 1, + GPR_BANKS_LOG2 = 1) ( + input logic active_clk, + + input logic [4:0] raddr0, // logical read addresses + input logic [4:0] raddr1, + input logic [4:0] raddr2, + input logic [4:0] raddr3, + + input logic rden0, // read enables + input logic rden1, + input logic rden2, + input logic rden3, + + input logic [4:0] waddr0, // logical write addresses + input logic [4:0] waddr1, + input logic [4:0] waddr2, + + input logic wen0, // write enables + input logic wen1, + input logic wen2, + + input logic [31:0] wd0, // write data + input logic [31:0] wd1, + input logic [31:0] wd2, + + input logic wen_bank_id, // write enable for banks + input logic [GPR_BANKS_LOG2-1:0] wr_bank_id, // read enable for banks + + input logic clk, + input logic rst_l, + + output logic [31:0] rd0, // read data + output logic [31:0] rd1, + output logic [31:0] rd2, + output logic [31:0] rd3, + + input logic scan_mode +); + + logic [GPR_BANKS-1:0][31:1] [31:0] gpr_out; // 31 x 32 bit GPRs + logic [31:1] [31:0] gpr_in; + logic [31:1] w0v,w1v,w2v; + logic [31:1] gpr_wr_en; + logic [GPR_BANKS-1:0][31:1] gpr_bank_wr_en; + logic [GPR_BANKS_LOG2-1:0] gpr_bank_id; + + //assign gpr_bank_id[GPR_BANKS_LOG2-1:0] = '0; + rvdffs #(GPR_BANKS_LOG2) bankid_ff (.*, .clk(active_clk), .en(wen_bank_id), .din(wr_bank_id[GPR_BANKS_LOG2-1:0]), .dout(gpr_bank_id[GPR_BANKS_LOG2-1:0])); + + // GPR Write Enables for power savings + assign gpr_wr_en[31:1] = (w0v[31:1] | w1v[31:1] | w2v[31:1]); + for (genvar i=0; i or %x0, %reg,%x0 {000000000000,reg[4:0],110000000110011} + +// put write date on rs1 +// write -> or %reg, %x0, %x0 {00000000000000000110,reg[4:0],0110011} + + +// CSR accesses +// csr is of form rd, csr, rs1 + +// read -> csrrs %x0, %csr, %x0 {csr[11:0],00000010000001110011} + +// put write data on rs1 +// write -> csrrw %x0, %csr, %x0 {csr[11:0],00000001000001110011} + +// abstract memory command not done here + assign debug_valid = dbg_cmd_valid & (dbg_cmd_type[1:0] != 2'h2); + + + assign debug_read = debug_valid & ~dbg_cmd_write; + assign debug_write = debug_valid & dbg_cmd_write; + + assign debug_read_gpr = debug_read & (dbg_cmd_type[1:0]==2'h0); + assign debug_write_gpr = debug_write & (dbg_cmd_type[1:0]==2'h0); + assign debug_read_csr = debug_read & (dbg_cmd_type[1:0]==2'h1); + assign debug_write_csr = debug_write & (dbg_cmd_type[1:0]==2'h1); + + assign dreg[4:0] = dbg_cmd_addr[4:0]; + assign dcsr[11:0] = dbg_cmd_addr[11:0]; + + + assign ib0_debug_in[31:0] = ({32{debug_read_gpr}} & {12'b000000000000,dreg[4:0],15'b110000000110011}) | + ({32{debug_write_gpr}} & {20'b00000000000000000110,dreg[4:0],7'b0110011}) | + ({32{debug_read_csr}} & {dcsr[11:0],20'b00000010000001110011}) | + ({32{debug_write_csr}} & {dcsr[11:0],20'b00000001000001110011}); + + + // machine is in halted state, pipe empty, write will always happen next cycle + rvdff #(1) debug_wdata_rs1ff (.*, .clk(free_clk), .din(debug_write_gpr | debug_write_csr), .dout(dec_debug_wdata_rs1_d)); + + + // special fence csr for use only in debug mode + + logic debug_fence_in; + + assign debug_fence_in = debug_write_csr & (dcsr[11:0] == 12'h7c4); + + rvdff #(1) debug_fence_ff (.*, .clk(free_clk), .din(debug_fence_in), .dout(dec_debug_fence_d)); + + + assign ib0_in[31:0] = ({32{write_i0_ib0}} & ((debug_valid) ? ib0_debug_in[31:0] : ifu_i0_instr[31:0])) | + ({32{shift_ib1_ib0}} & ib1[31:0]) | + ({32{shift_ib2_ib0}} & ib2[31:0]); + + rvdffe #(32) ib0ff (.*, .en(ibwrite[0]), .din(ib0_in[31:0]), .dout(ib0[31:0])); + + assign dec_ib3_valid_d = ibval[3]; + assign dec_ib2_valid_d = ibval[2]; + assign dec_ib1_valid_d = ibval[1]; + assign dec_ib0_valid_d = ibval[0]; + + assign dec_i0_instr_d[31:0] = ib0[31:0]; + + assign dec_i1_instr_d[31:0] = ib1[31:0]; + + assign dec_i0_brp = bp0; + assign dec_i1_brp = bp1; + + + assign shift1 = dec_i0_decode_d & ~dec_i1_decode_d; + + assign shift2 = dec_i0_decode_d & dec_i1_decode_d; + + assign shift0 = ~dec_i0_decode_d; + + + // compute shifted ib valids to determine where to write + assign shift_ibval[3:0] = ({4{shift1}} & {1'b0, ibval[3:1] }) | + ({4{shift2}} & {2'b0, ibval[3:2]}) | + ({4{shift0}} & ibval[3:0]); + + assign write_i0_ib0 = ~shift_ibval[0] & (ifu_i0_val | debug_valid); + assign write_i0_ib1 = shift_ibval[0] & ~shift_ibval[1] & ifu_i0_val; + assign write_i0_ib2 = shift_ibval[1] & ~shift_ibval[2] & ifu_i0_val; + assign write_i0_ib3 = shift_ibval[2] & ~shift_ibval[3] & ifu_i0_val; + + assign write_i1_ib1 = ~shift_ibval[0] & ifu_i1_val; + assign write_i1_ib2 = shift_ibval[0] & ~shift_ibval[1] & ifu_i1_val; + assign write_i1_ib3 = shift_ibval[1] & ~shift_ibval[2] & ifu_i1_val; + + + assign shift_ib1_ib0 = shift1 & ibval[1]; + assign shift_ib2_ib1 = shift1 & ibval[2]; + assign shift_ib3_ib2 = shift1 & ibval[3]; + + assign shift_ib2_ib0 = shift2 & ibval[2]; + assign shift_ib3_ib1 = shift2 & ibval[3]; + + + +endmodule diff --git a/design/dec/dec_tlu_ctl.sv b/design/dec/dec_tlu_ctl.sv new file mode 100644 index 0000000..a53012d --- /dev/null +++ b/design/dec/dec_tlu_ctl.sv @@ -0,0 +1,2570 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +//******************************************************************************** +// dec_tlu_ctl.sv +// +// +// Function: CSRs, Commit/WB, flushing, exceptions, interrupts +// Comments: +// +//******************************************************************************** + +module dec_tlu_ctl + import swerv_types::*; +( + input logic clk, + input logic active_clk, + input logic free_clk, + input logic rst_l, + input logic scan_mode, + + input logic [31:1] rst_vec, // reset vector, from core pins + input logic nmi_int, // nmi pin + input logic [31:1] nmi_vec, // nmi vector + input logic i_cpu_halt_req, // Asynchronous Halt request to CPU + input logic i_cpu_run_req, // Asynchronous Restart request to CPU + + input logic mpc_debug_halt_req, // Async halt request + input logic mpc_debug_run_req, // Async run request + input logic mpc_reset_run_req, // Run/halt after reset + + // perf counter inputs + input logic [1:0] ifu_pmu_instr_aligned, // aligned instructions + input logic ifu_pmu_align_stall, // aligner stalled + input logic ifu_pmu_fetch_stall, // fetch unit stalled + input logic ifu_pmu_ic_miss, // icache miss + input logic ifu_pmu_ic_hit, // icache hit + input logic ifu_pmu_bus_error, // Instruction side bus error + input logic ifu_pmu_bus_busy, // Instruction side bus busy + input logic ifu_pmu_bus_trxn, // Instruction side bus transaction + input logic [1:0] dec_pmu_instr_decoded, // decoded instructions + input logic dec_pmu_decode_stall, // decode stall + input logic dec_pmu_presync_stall, // decode stall due to presync'd inst + input logic dec_pmu_postsync_stall,// decode stall due to postsync'd inst + input logic lsu_freeze_dc3, // lsu freeze stall + input logic lsu_store_stall_any, // SB or WB is full, stall decode + input logic dma_dccm_stall_any, // DMA stall of lsu + input logic dma_iccm_stall_any, // DMA stall of ifu + input logic exu_pmu_i0_br_misp, // pipe 0 branch misp + input logic exu_pmu_i0_br_ataken, // pipe 0 branch actual taken + input logic exu_pmu_i0_pc4, // pipe 0 4 byte branch + input logic exu_pmu_i1_br_misp, // pipe 1 branch misp + input logic exu_pmu_i1_br_ataken, // pipe 1 branch actual taken + input logic exu_pmu_i1_pc4, // pipe 1 4 byte branch + input logic lsu_pmu_bus_trxn, // D side bus transaction + input logic lsu_pmu_bus_misaligned, // D side bus misaligned + input logic lsu_pmu_bus_error, // D side bus error + input logic lsu_pmu_bus_busy, // D side bus busy + + + input logic iccm_dma_sb_error, // I side dma single bit error + + input lsu_error_pkt_t lsu_error_pkt_dc3, // lsu precise exception/error packet + + input logic dec_pause_state, // Pause counter not zero + input logic lsu_imprecise_error_store_any, // store bus error + input logic lsu_imprecise_error_load_any, // store bus error + input logic [31:0] lsu_imprecise_error_addr_any, // store bus error address + input logic lsu_freeze_external_ints_dc3, // load to side effect region + + input logic dec_csr_wen_unq_d, // valid csr with write - for csr legal + input logic dec_csr_any_unq_d, // valid csr - for csr legal + input logic dec_csr_wen_wb, // csr write enable at wb + input logic [11:0] dec_csr_rdaddr_d, // read address for csr + input logic [11:0] dec_csr_wraddr_wb, // write address for csr + input logic [31:0] dec_csr_wrdata_wb, // csr write data at wb + input logic dec_csr_stall_int_ff, // csr is mie/mstatus + + input logic dec_tlu_i0_valid_e4, // pipe 0 op at e4 is valid + input logic dec_tlu_i1_valid_e4, // pipe 1 op at e4 is valid + + input logic dec_i0_load_e4, // during cycle after freeze asserts, load is in i0 + + input logic dec_fence_pending, // tell TLU to stall DMA + + input logic [31:1] exu_npc_e4, // for NPC tracking + input logic exu_i0_flush_lower_e4, // pipe 0 branch mp flush + input logic exu_i1_flush_lower_e4, // pipe 1 branch mp flush + input logic [31:1] exu_i0_flush_path_e4, // pipe 0 correct path for mp, merge with lower path + input logic [31:1] exu_i1_flush_path_e4, // pipe 1 correct path for mp, merge with lower path + + input logic [31:1] dec_tlu_i0_pc_e4, // for PC/NPC tracking + input logic [31:1] dec_tlu_i1_pc_e4, // for PC/NPC tracking + + input trap_pkt_t dec_tlu_packet_e4, // exceptions known at decode + + input logic [31:0] dec_illegal_inst, // For mtval + input logic dec_i0_decode_d, // decode valid, used for clean icache diagnostics + + // branch info from pipe0 for errors or counter updates + input logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] exu_i0_br_index_e4, // index + input logic [1:0] exu_i0_br_hist_e4, // history + input logic [1:0] exu_i0_br_bank_e4, // bank + input logic exu_i0_br_error_e4, // error + input logic exu_i0_br_start_error_e4, // start error + input logic exu_i0_br_valid_e4, // valid + input logic exu_i0_br_mp_e4, // mispredict + input logic exu_i0_br_middle_e4, // middle of bank + input logic [`RV_BHT_GHR_RANGE] exu_i0_br_fghr_e4, // FGHR when predicted + + // branch info from pipe1 for errors or counter updates + input logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] exu_i1_br_index_e4, // index + input logic [1:0] exu_i1_br_hist_e4, // history + input logic [1:0] exu_i1_br_bank_e4, // bank + input logic exu_i1_br_error_e4, // error + input logic exu_i1_br_start_error_e4, // start error + input logic exu_i1_br_valid_e4, // valid + input logic exu_i1_br_mp_e4, // mispredict + input logic exu_i1_br_middle_e4, // middle of bank + input logic [`RV_BHT_GHR_RANGE] exu_i1_br_fghr_e4, // FGHR when predicted + +`ifdef RV_BTB_48 + input logic [1:0] exu_i1_br_way_e4, // way hit or repl + input logic [1:0] exu_i0_br_way_e4, // way hit or repl +`else + input logic exu_i1_br_way_e4, // way hit or repl + input logic exu_i0_br_way_e4, // way hit or repl +`endif + + // Debug start + output logic dec_dbg_cmd_done, // abstract command done + output logic dec_dbg_cmd_fail, // abstract command failed + output logic dec_tlu_flush_noredir_wb , // Tell fetch to idle on this flush + output logic dec_tlu_mpc_halted_only, // Core is halted only due to MPC + output logic dec_tlu_dbg_halted, // Core is halted and ready for debug command + output logic dec_tlu_pmu_fw_halted, // Core is halted due to Power management unit or firmware halt + output logic dec_tlu_debug_mode, // Core is in debug mode + output logic dec_tlu_resume_ack, // Resume acknowledge + output logic dec_tlu_debug_stall, // stall decode while waiting on core to empty + output logic dec_tlu_flush_leak_one_wb, // single step + output logic dec_tlu_flush_err_wb, // iside perr/ecc rfpc + output logic dec_tlu_stall_dma, // stall dma access when there's a halt request + input logic dbg_halt_req, // DM requests a halt + input logic dbg_resume_req, // DM requests a resume + input logic ifu_miss_state_idle, // I-side miss buffer empty + input logic lsu_halt_idle_any, // lsu is idle + output trigger_pkt_t [3:0] trigger_pkt_any, // trigger info for trigger blocks + +`ifdef RV_ICACHE_ECC + input logic [41:0] ifu_ic_debug_rd_data, // diagnostic icache read data +`else + input logic [33:0] ifu_ic_debug_rd_data, // diagnostic icache read data +`endif + input logic ifu_ic_debug_rd_data_valid, // diagnostic icache read data valid + output cache_debug_pkt_t dec_tlu_ic_diag_pkt, // packet of DICAWICS, DICAD0/1, DICAGO info for icache diagnostics + // Debug end + + input logic [7:0] pic_claimid, // pic claimid for csr + input logic [3:0] pic_pl, // pic priv level for csr + input logic mhwakeup, // high priority external int, wakeup if halted + + input logic mexintpend, // external interrupt pending + input logic timer_int, // timer interrupt pending + + output logic o_cpu_halt_status, // PMU interface, halted + output logic o_cpu_halt_ack, // halt req ack + output logic o_cpu_run_ack, // run req ack + output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request + + output logic mpc_debug_halt_ack, // Halt ack + output logic mpc_debug_run_ack, // Run ack + output logic debug_brkpt_status, // debug breakpoint + + output logic [3:0] dec_tlu_meicurpl, // to PIC + output logic [3:0] dec_tlu_meipt, // to PIC + + output br_tlu_pkt_t dec_tlu_br0_wb_pkt, // branch pkt to bp + output br_tlu_pkt_t dec_tlu_br1_wb_pkt, // branch pkt to bp + + output logic [31:0] dec_csr_rddata_d, // csr read data at wb + output logic dec_csr_legal_d, // csr indicates legal operation + + output logic dec_tlu_i0_kill_writeb_wb, // I0 is flushed, don't writeback any results to arch state + output logic dec_tlu_i1_kill_writeb_wb, // I1 is flushed, don't writeback any results to arch state + + output logic dec_tlu_flush_lower_wb, // commit has a flush (exception, int, mispredict at e4) + output logic [31:1] dec_tlu_flush_path_wb, // flush pc + output logic dec_tlu_fence_i_wb, // flush is a fence_i rfnpc, flush icache + + output logic dec_tlu_presync_d, // CSR read needs to be presync'd + output logic dec_tlu_postsync_d, // CSR needs to be presync'd + + output logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control + + output logic dec_tlu_cancel_e4, // Cancel lsu op at DC4 due to future trigger hit + + output logic dec_tlu_wr_pause_wb, // CSR write to pause reg is at WB. + output logic dec_tlu_flush_pause_wb, // Flush is due to pause + + output logic [1:0] dec_tlu_perfcnt0, // toggles when pipe0 perf counter 0 has an event inc + output logic [1:0] dec_tlu_perfcnt1, // toggles when pipe0 perf counter 1 has an event inc + output logic [1:0] dec_tlu_perfcnt2, // toggles when pipe0 perf counter 2 has an event inc + output logic [1:0] dec_tlu_perfcnt3, // toggles when pipe0 perf counter 3 has an event inc + + + output logic dec_tlu_i0_valid_wb1, // pipe 0 valid + output logic dec_tlu_i1_valid_wb1, // pipe 1 valid + output logic dec_tlu_i0_exc_valid_wb1, // pipe 0 exception valid + output logic dec_tlu_i1_exc_valid_wb1, // pipe 1 exception valid + output logic dec_tlu_int_valid_wb1, // pipe 2 int valid + output logic [4:0] dec_tlu_exc_cause_wb1, // exception or int cause + output logic [31:0] dec_tlu_mtval_wb1, // MTVAL value + + // feature disable from mfdc + output logic dec_tlu_sideeffect_posted_disable, // disable posted writes to side-effect address + output logic dec_tlu_dual_issue_disable, // disable dual issue + output logic dec_tlu_core_ecc_disable, // disable core ECC + output logic dec_tlu_sec_alu_disable, // disable secondary ALU + output logic dec_tlu_non_blocking_disable, // disable non blocking loads + output logic dec_tlu_fast_div_disable, // disable fast divider + output logic dec_tlu_bpred_disable, // disable branch prediction + output logic dec_tlu_wb_coalescing_disable, // disable writebuffer coalescing + output logic dec_tlu_ld_miss_byp_wb_disable, // disable loads miss bypass write buffer + output logic dec_tlu_pipelining_disable, // disable pipelining + output logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:16] + + // clock gating overrides from mcgc + output logic dec_tlu_misc_clk_override, // override misc clock domain gating + output logic dec_tlu_dec_clk_override, // override decode clock domain gating + output logic dec_tlu_exu_clk_override, // override exu clock domain gating + output logic dec_tlu_ifu_clk_override, // override fetch clock domain gating + output logic dec_tlu_lsu_clk_override, // override load/store clock domain gating + output logic dec_tlu_bus_clk_override, // override bus clock domain gating + output logic dec_tlu_pic_clk_override, // override PIC clock domain gating + output logic dec_tlu_dccm_clk_override, // override DCCM clock domain gating + output logic dec_tlu_icm_clk_override // override ICCM clock domain gating + + ); + + logic dec_csr_wen_wb_mod, clk_override, e4e5_int_clk, nmi_lsu_load_type, nmi_lsu_store_type, nmi_int_detected_f, nmi_lsu_load_type_f, + nmi_lsu_store_type_f, allow_dbg_halt_csr_write, dbg_cmd_done_ns, i_cpu_run_req_d1_raw, debug_mode_status, lsu_single_ecc_error_wb, + i0_mp_e4, i1_mp_e4, sel_npc_e4, sel_npc_wb, ce_int, mtval_capture_lsu_wb, wr_mdeau_wb, micect_cout_nc, miccmect_cout_nc, + mdccmect_cout_nc, nmi_in_debug_mode, dpc_capture_npc, dpc_capture_pc, tdata_load, tdata_opcode, tdata_action, perfcnt_halted; + + + logic reset_delayed, reset_detect, reset_detected; + logic wr_mstatus_wb, wr_mtvec_wb, wr_mie_wb, wr_mcyclel_wb, wr_mcycleh_wb, + wr_minstretl_wb, wr_minstreth_wb, wr_mscratch_wb, wr_mepc_wb, wr_mcause_wb, wr_mtval_wb, + wr_mrac_wb, wr_meihap_wb, wr_meicurpl_wb, wr_meipt_wb, wr_dcsr_wb, + wr_dpc_wb, wr_meicidpl_wb, wr_meivt_wb, wr_meicpct_wb, wr_micect_wb, wr_miccmect_wb, + wr_mdccmect_wb,wr_mhpme3_wb, wr_mhpme4_wb, wr_mhpme5_wb, wr_mhpme6_wb; + logic wr_mgpmc_wb, mgpmc_b, mgpmc; + logic wr_mtsel_wb, wr_mtdata1_t0_wb, wr_mtdata1_t1_wb, wr_mtdata1_t2_wb, wr_mtdata1_t3_wb, wr_mtdata2_t0_wb, wr_mtdata2_t1_wb, wr_mtdata2_t2_wb, wr_mtdata2_t3_wb; + logic [31:0] mtdata2_t0, mtdata2_t1, mtdata2_t2, mtdata2_t3, mtdata2_tsel_out, mtdata1_tsel_out; + logic [9:0] mtdata1_t0_ns, mtdata1_t0, mtdata1_t1_ns, mtdata1_t1, mtdata1_t2_ns, mtdata1_t2, mtdata1_t3_ns, mtdata1_t3; + logic [27:0] tdata_wrdata_wb; + logic [1:0] mtsel_ns, mtsel; + logic tlu_i0_kill_writeb_e4, tlu_i1_kill_writeb_e4; + logic [1:0] mstatus_ns, mstatus; + logic mstatus_mie_ns; + logic [30:0] mtvec_ns, mtvec; + logic [15:2] dcsr_ns, dcsr; + logic [3:0] mip_ns, mip; + logic [3:0] mie_ns, mie; + logic [31:0] mcyclel_ns, mcyclel; + logic [31:0] mcycleh_ns, mcycleh; + logic [31:0] minstretl_ns, minstretl; + logic [31:0] minstreth_ns, minstreth; + logic [31:0] micect_ns, micect, miccmect_ns, miccmect, mdccmect_ns, mdccmect; + logic [26:0] micect_inc, miccmect_inc, mdccmect_inc; + logic [31:0] mscratch; + logic [31:0] mhpmc3, mhpmc3_ns, mhpmc4, mhpmc4_ns, mhpmc5, mhpmc5_ns, mhpmc6, mhpmc6_ns; + logic [31:0] mhpmc3h, mhpmc3h_ns, mhpmc4h, mhpmc4h_ns, mhpmc5h, mhpmc5h_ns, mhpmc6h, mhpmc6h_ns; + logic [5:0] mhpme3, mhpme4, mhpme5, mhpme6; + logic [31:0] mrac; + logic [9:2] meihap; + logic [31:10] meivt; + logic [3:0] meicurpl_ns, meicurpl; + logic [3:0] meicidpl_ns, meicidpl; + logic [3:0] meipt_ns, meipt; + logic [31:0] mdseac; + logic mdseac_locked_ns, mdseac_locked_f, mdseac_en, nmi_lsu_detected; + logic [31:1] mepc_ns, mepc; + logic [31:1] dpc_ns, dpc; + logic [31:0] mcause_ns, mcause; + logic [31:0] mtval_ns, mtval; + logic mret_wb; + logic dec_pause_state_f, dec_tlu_wr_pause_wb_f, pause_expired_e4, pause_expired_wb; + logic tlu_flush_lower_e4, tlu_flush_lower_wb; + logic [31:1] tlu_flush_path_e4, tlu_flush_path_wb; + logic i0_valid_wb, i1_valid_wb; + logic [5:0] vectored_cause; + logic vpath_overflow_nc; + logic [31:1] vectored_path, interrupt_path; + logic [18:2] dicawics_ns, dicawics; + logic wr_dicawics_wb, wr_dicad0_wb, wr_dicad1_wb; + logic [31:0] dicad0_ns, dicad0; +`ifdef RV_ICACHE_ECC + logic [9:0] dicad1_ns, dicad1; + `else + logic [1:0] dicad1_ns, dicad1; + `endif + logic ebreak_e4, ebreak_to_debug_mode_e4, ecall_e4, illegal_e4, illegal_e4_qual, mret_e4, inst_acc_e4, fence_i_e4, + ic_perr_e4, iccm_sbecc_e4, ebreak_to_debug_mode_wb, kill_ebreak_count_wb, inst_acc_second_e4; + logic ebreak_wb, illegal_wb, illegal_raw_wb, inst_acc_wb, inst_acc_second_wb, fence_i_wb, ic_perr_wb, iccm_sbecc_wb; + logic ce_int_ready, ext_int_ready, timer_int_ready, mhwakeup_ready, + take_ext_int, take_ce_int, take_timer_int, take_nmi, take_nmi_wb; + logic i0_exception_valid_e4, interrupt_valid, i0_exception_valid_wb, interrupt_valid_wb, exc_or_int_valid, exc_or_int_valid_wb, mdccme_ce_req, miccme_ce_req, mice_ce_req; + logic synchronous_flush_e4; + logic [4:0] exc_cause_e4, exc_cause_wb; + logic mcyclel_cout, mcyclel_cout_f; + logic [31:0] mcyclel_inc; + logic mcycleh_cout_nc; + logic [31:0] mcycleh_inc; + logic minstretl_cout, minstretl_cout_f, minstret_enable; + logic [31:0] minstretl_inc, minstretl_read; + logic minstreth_cout_nc; + logic [31:0] minstreth_inc, minstreth_read; + logic [31:1] pc_e4, pc_wb, npc_e4, npc_wb; + logic mtval_capture_pc_wb, mtval_capture_inst_wb, mtval_clear_wb, mtval_capture_pc_plus2_wb; + logic valid_csr; + logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] dec_tlu_br0_addr_e4, dec_tlu_br1_addr_e4; + logic [1:0] dec_tlu_br0_bank_e4, dec_tlu_br1_bank_e4; + logic rfpc_i0_e4, rfpc_i1_e4; + logic lsu_i0_rfnpc_dc4, lsu_i1_rfnpc_dc4; + logic dec_tlu_br0_error_e4, dec_tlu_br0_start_error_e4, dec_tlu_br0_v_e4; + logic dec_tlu_br1_error_e4, dec_tlu_br1_start_error_e4, dec_tlu_br1_v_e4; + logic lsu_i0_exc_dc4, lsu_i1_exc_dc4, lsu_i0_exc_dc4_raw, lsu_i1_exc_dc4_raw, lsu_exc_ma_dc4, lsu_exc_acc_dc4, lsu_exc_st_dc4, + lsu_exc_valid_e4, lsu_exc_valid_e4_raw, lsu_exc_valid_wb, lsu_i0_exc_wb, + block_interrupts, lsu_block_interrupts_dc3, lsu_block_interrupts_e4; + logic tlu_i0_commit_cmt, tlu_i1_commit_cmt; + logic i0_trigger_eval_e4, i1_trigger_eval_e4, lsu_freeze_e4, lsu_freeze_pulse_e3, lsu_freeze_pulse_e4; + + logic request_debug_mode_e4, request_debug_mode_wb, request_debug_mode_done, request_debug_mode_done_f; + logic take_halt, take_halt_f, halt_taken, halt_taken_f, internal_dbg_halt_mode, dbg_tlu_halted_f, take_reset, + dbg_tlu_halted, core_empty, lsu_halt_idle_any_f, ifu_miss_state_idle_f, resume_ack_ns, + debug_halt_req_f, debug_resume_req_f, enter_debug_halt_req, dcsr_single_step_done, dcsr_single_step_done_f, + debug_halt_req_d1, debug_halt_req_ns, dcsr_single_step_running, dcsr_single_step_running_f, internal_dbg_halt_timers; + + logic [3:0] i0_trigger_e4, i1_trigger_e4, trigger_action, trigger_enabled, + i0_trigger_chain_masked_e4, i1_trigger_chain_masked_e4; + logic [2:0] trigger_chain; + logic i0_trigger_hit_e4, i0_trigger_hit_raw_e4, i0_trigger_action_e4, + trigger_hit_e4, trigger_hit_wb, i0_trigger_hit_wb, + mepc_trigger_hit_sel_pc_e4, + mepc_trigger_hit_sel_pc_wb; + logic i1_trigger_hit_e4, i1_trigger_hit_raw_e4, i1_trigger_action_e4; + logic [3:0] update_hit_bit_e4, update_hit_bit_wb, i0_iside_trigger_has_pri_e4, i1_iside_trigger_has_pri_e4, + i0_lsu_trigger_has_pri_e4, i1_lsu_trigger_has_pri_e4; + logic cpu_halt_status, cpu_halt_ack, cpu_run_ack, ext_halt_pulse, i_cpu_halt_req_d1, i_cpu_run_req_d1; + + logic inst_acc_e4_raw, trigger_hit_dmode_e4, trigger_hit_dmode_wb, trigger_hit_for_dscr_cause_wb; + logic wr_mcgc_wb, wr_mfdc_wb; + logic [8:0] mcgc; + logic [18:0] mfdc; + logic [13:0] mfdc_int, mfdc_ns; + logic i_cpu_halt_req_sync_qual, i_cpu_run_req_sync_qual, pmu_fw_halt_req_ns, pmu_fw_halt_req_f, + fw_halt_req, enter_pmu_fw_halt_req, pmu_fw_tlu_halted, pmu_fw_tlu_halted_f, internal_pmu_fw_halt_mode, + internal_pmu_fw_halt_mode_f; + logic dcsr_single_step_running_ff; + logic nmi_int_delayed, nmi_int_detected; + logic [3:0] trigger_execute, trigger_data, trigger_store; + logic mpc_run_state_ns, debug_brkpt_status_ns, mpc_debug_halt_ack_ns, mpc_debug_run_ack_ns, dbg_halt_state_ns, dbg_run_state_ns, + dbg_halt_state_f, mpc_debug_halt_req_sync_f, mpc_debug_run_req_sync_f, mpc_halt_state_f, mpc_halt_state_ns, mpc_run_state_f, debug_brkpt_status_f, + mpc_debug_halt_ack_f, mpc_debug_run_ack_f, dbg_run_state_f, dbg_halt_state_ff, mpc_debug_halt_req_sync_pulse, + mpc_debug_run_req_sync_pulse, debug_brkpt_valid, debug_halt_req, debug_resume_req, dec_tlu_mpc_halted_only_ns; + + + assign clk_override = dec_tlu_dec_clk_override; + + // Async inputs to the core have to be sync'd to the core clock. + logic nmi_int_sync, timer_int_sync, i_cpu_halt_req_sync, i_cpu_run_req_sync, mpc_debug_halt_req_sync, mpc_debug_run_req_sync; + rvsyncss #(6) syncro_ff(.*, + .clk(free_clk), + .din ({nmi_int, timer_int, i_cpu_halt_req, i_cpu_run_req, mpc_debug_halt_req, mpc_debug_run_req}), + .dout({nmi_int_sync, timer_int_sync, i_cpu_halt_req_sync, i_cpu_run_req_sync, mpc_debug_halt_req_sync, mpc_debug_run_req_sync})); + + // for CSRs that have inpipe writes only + + logic csr_wr_clk; + rvclkhdr csrwr_wb_cgc ( .en(dec_csr_wen_wb_mod | clk_override), .l1clk(csr_wr_clk), .* ); + logic lsu_e3_e4_clk, lsu_e4_e5_clk; + rvclkhdr lsu_e3_e4_cgc ( .en(lsu_error_pkt_dc3.exc_valid | lsu_error_pkt_dc4.exc_valid | lsu_error_pkt_dc3.single_ecc_error | lsu_error_pkt_dc4.single_ecc_error | clk_override), .l1clk(lsu_e3_e4_clk), .* ); + rvclkhdr lsu_e4_e5_cgc ( .en(lsu_error_pkt_dc4.exc_valid | lsu_exc_valid_wb | clk_override), .l1clk(lsu_e4_e5_clk), .* ); + + logic e4e5_clk, e4_valid, e5_valid, e4e5_valid, internal_dbg_halt_mode_f; + assign e4_valid = dec_tlu_i0_valid_e4 | dec_tlu_i1_valid_e4; + assign e4e5_valid = e4_valid | e5_valid; + rvclkhdr e4e5_cgc ( .en(e4e5_valid | clk_override), .l1clk(e4e5_clk), .* ); + rvclkhdr e4e5_int_cgc ( .en(e4e5_valid | internal_dbg_halt_mode_f | i_cpu_run_req_d1 | interrupt_valid | interrupt_valid_wb | reset_delayed | pause_expired_e4 | pause_expired_wb | clk_override), .l1clk(e4e5_int_clk), .* ); + + + assign lsu_freeze_pulse_e3 = lsu_freeze_dc3 & ~lsu_freeze_e4; + rvdff #(8) freeff (.*, .clk(free_clk), .din({lsu_freeze_dc3, lsu_freeze_pulse_e3, e4_valid, lsu_block_interrupts_dc3, internal_dbg_halt_mode, tlu_flush_lower_e4, tlu_i0_kill_writeb_e4, tlu_i1_kill_writeb_e4 }), + .dout({lsu_freeze_e4, lsu_freeze_pulse_e4, e5_valid, lsu_block_interrupts_e4, internal_dbg_halt_mode_f, tlu_flush_lower_wb, dec_tlu_i0_kill_writeb_wb, dec_tlu_i1_kill_writeb_wb})); + + + rvdff #(2) reset_ff (.*, .clk(free_clk), .din({1'b1, reset_detect}), .dout({reset_detect, reset_detected})); + assign reset_delayed = reset_detect ^ reset_detected; + + rvdff #(4) nmi_ff (.*, .clk(free_clk), .din({nmi_int_sync, nmi_int_detected, nmi_lsu_load_type, nmi_lsu_store_type}), .dout({nmi_int_delayed, nmi_int_detected_f, nmi_lsu_load_type_f, nmi_lsu_store_type_f})); + + // Filter subsequent bus errors after the first, until the lock on MDSEAC is cleared + assign nmi_lsu_detected = ~mdseac_locked_f & (lsu_imprecise_error_load_any | lsu_imprecise_error_store_any); + + assign nmi_int_detected = (nmi_int_sync & ~nmi_int_delayed) | nmi_lsu_detected | (nmi_int_detected_f & ~take_nmi_wb); + // if the first nmi is a lsu type, note it. If there's already an nmi pending, ignore + assign nmi_lsu_load_type = (nmi_lsu_detected & lsu_imprecise_error_load_any & ~(nmi_int_detected_f & ~take_nmi_wb)) | (nmi_lsu_load_type_f & ~take_nmi_wb); + assign nmi_lsu_store_type = (nmi_lsu_detected & lsu_imprecise_error_store_any & ~(nmi_int_detected_f & ~take_nmi_wb)) | (nmi_lsu_store_type_f & ~take_nmi_wb); + +`define MSTATUS_MIE 0 +`define MIP_MCEIP 3 +`define MIP_MEIP 2 +`define MIP_MTIP 1 +`define MIP_MSIP 0 + +`define MIE_MCEIE 3 +`define MIE_MEIE 2 +`define MIE_MTIE 1 +`define MIE_MSIE 0 + +`define DCSR_EBREAKM 15 +`define DCSR_STEPIE 11 +`define DCSR_STOPC 10 +//`define DCSR_STOPT 9 +`define DCSR_STEP 2 + + // ---------------------------------------------------------------------- + // MPC halt + // - can interact with debugger halt and v-v + + rvdff #(11) mpvhalt_ff (.*, .clk(free_clk), + .din({mpc_debug_halt_req_sync, mpc_debug_run_req_sync, + mpc_halt_state_ns, mpc_run_state_ns, debug_brkpt_status_ns, + mpc_debug_halt_ack_ns, mpc_debug_run_ack_ns, + dbg_halt_state_ns, dbg_run_state_ns, dbg_halt_state_f, + dec_tlu_mpc_halted_only_ns}), + .dout({mpc_debug_halt_req_sync_f, mpc_debug_run_req_sync_f, + mpc_halt_state_f, mpc_run_state_f, debug_brkpt_status_f, + mpc_debug_halt_ack_f, mpc_debug_run_ack_f, + dbg_halt_state_f, dbg_run_state_f, dbg_halt_state_ff, + dec_tlu_mpc_halted_only})); + + // turn level sensitive requests into pulses + assign mpc_debug_halt_req_sync_pulse = mpc_debug_halt_req_sync & ~mpc_debug_halt_req_sync_f; + assign mpc_debug_run_req_sync_pulse = mpc_debug_run_req_sync & ~mpc_debug_run_req_sync_f; + + // states + assign mpc_halt_state_ns = (mpc_halt_state_f | mpc_debug_halt_req_sync_pulse) & ~mpc_debug_run_req_sync; + assign mpc_run_state_ns = (mpc_run_state_f | (mpc_debug_run_req_sync_pulse & ~mpc_debug_run_ack_f)) & (internal_dbg_halt_mode_f & ~dcsr_single_step_running_f); + + // note, MPC halt can allow the jtag debugger to just start sending commands. When that happens, set the interal debugger halt state to prevent + // MPC run from starting the core. + assign dbg_halt_state_ns = (dbg_halt_state_f | (dbg_halt_req | dcsr_single_step_done_f | trigger_hit_dmode_wb | ebreak_to_debug_mode_wb)) & ~dbg_resume_req; + assign dbg_run_state_ns = (dbg_run_state_f | dbg_resume_req) & (internal_dbg_halt_mode_f & ~dcsr_single_step_running_f); + + // tell dbg we are only MPC halted + assign dec_tlu_mpc_halted_only_ns = ~dbg_halt_state_f & mpc_halt_state_f; + + // this asserts from detection of bkpt until after we leave debug mode + assign debug_brkpt_valid = ebreak_to_debug_mode_wb | trigger_hit_dmode_wb; + assign debug_brkpt_status_ns = (debug_brkpt_valid | debug_brkpt_status_f) & (internal_dbg_halt_mode & ~dcsr_single_step_running_f); + + // acks back to interface + assign mpc_debug_halt_ack_ns = mpc_halt_state_f & internal_dbg_halt_mode_f & mpc_debug_halt_req_sync & core_empty; + assign mpc_debug_run_ack_ns = (mpc_debug_run_req_sync & ~dbg_halt_state_ns & ~mpc_debug_halt_req_sync) | (mpc_debug_run_ack_f & mpc_debug_run_req_sync) ; + + // Pins + assign mpc_debug_halt_ack = mpc_debug_halt_ack_f; + assign mpc_debug_run_ack = mpc_debug_run_ack_f; + assign debug_brkpt_status = debug_brkpt_status_f; + + + // combine MPC and DBG halt requests + assign debug_halt_req = (dbg_halt_req | mpc_debug_halt_req_sync | (reset_delayed & ~mpc_reset_run_req)) & ~internal_dbg_halt_mode_f; + + assign debug_resume_req = ~debug_resume_req_f & // squash back to back resumes + ((mpc_run_state_ns & ~dbg_halt_state_ns) | // MPC run req + (dbg_run_state_ns & ~mpc_halt_state_ns)); // dbg request is a pulse + + // HALT + + // dbg/pmu/fw requests halt, service as soon as lsu is not blocking interrupts + assign take_halt = (debug_halt_req_f | pmu_fw_halt_req_f) & ~lsu_block_interrupts_e4 & ~synchronous_flush_e4 & ~mret_e4 & ~halt_taken_f & ~dec_tlu_flush_noredir_wb & ~take_reset; + + // hold after we take a halt, so we don't keep taking halts + assign halt_taken = (dec_tlu_flush_noredir_wb & ~dec_tlu_flush_pause_wb) | (halt_taken_f & ~dbg_tlu_halted_f & ~pmu_fw_tlu_halted_f & ~interrupt_valid_wb); + + // After doing halt flush (RFNPC) wait until core is idle before asserting a particular halt mode + // It takes a cycle for mb_empty to assert after a fetch, take_halt covers that cycle + assign core_empty = lsu_halt_idle_any & lsu_halt_idle_any_f & ifu_miss_state_idle & ifu_miss_state_idle_f & ~debug_halt_req & ~debug_halt_req_d1; + +//-------------------------------------------------------------------------------- +// Debug start +// + + assign enter_debug_halt_req = (~internal_dbg_halt_mode_f & debug_halt_req) | dcsr_single_step_done_f | trigger_hit_dmode_wb | ebreak_to_debug_mode_wb; + + // dbg halt state active from request until non-step resume + assign internal_dbg_halt_mode = debug_halt_req_ns | (internal_dbg_halt_mode_f & ~(debug_resume_req_f & ~dcsr[`DCSR_STEP])); + // dbg halt can access csrs as long as we are not stepping + assign allow_dbg_halt_csr_write = internal_dbg_halt_mode_f & ~dcsr_single_step_running_f; + + + // hold debug_halt_req_ns high until we enter debug halt + assign debug_halt_req_ns = enter_debug_halt_req | (debug_halt_req_f & ~dbg_tlu_halted); + + assign dbg_tlu_halted = (debug_halt_req_f & core_empty & halt_taken) | (dbg_tlu_halted_f & ~debug_resume_req_f); + + assign resume_ack_ns = (debug_resume_req_f & dbg_tlu_halted_f & dbg_run_state_ns); + + assign dcsr_single_step_done = dec_tlu_i0_valid_e4 & ~dec_tlu_dbg_halted & dcsr[`DCSR_STEP] & ~rfpc_i0_e4; + + assign dcsr_single_step_running = (debug_resume_req_f & dcsr[`DCSR_STEP]) | (dcsr_single_step_running_f & ~dcsr_single_step_done_f); + + assign dbg_cmd_done_ns = dec_tlu_i0_valid_e4 & dec_tlu_dbg_halted; + + // used to hold off commits after an in-pipe debug mode request (triggers, DCSR) + assign request_debug_mode_e4 = (trigger_hit_dmode_e4 | ebreak_to_debug_mode_e4) | (request_debug_mode_wb & ~dec_tlu_flush_lower_wb); + + assign request_debug_mode_done = (request_debug_mode_wb | request_debug_mode_done_f) & ~dbg_tlu_halted_f; + + rvdff #(22) halt_ff (.*, .clk(free_clk), .din({halt_taken, take_halt, lsu_halt_idle_any, ifu_miss_state_idle, dbg_tlu_halted, + resume_ack_ns, dbg_cmd_done_ns, debug_halt_req_ns, debug_resume_req, trigger_hit_dmode_e4, + dcsr_single_step_done, debug_halt_req, update_hit_bit_e4[3:0], dec_tlu_wr_pause_wb, dec_pause_state, + request_debug_mode_e4, request_debug_mode_done, dcsr_single_step_running, dcsr_single_step_running_f}), + .dout({halt_taken_f, take_halt_f, lsu_halt_idle_any_f, ifu_miss_state_idle_f, dbg_tlu_halted_f, + dec_tlu_resume_ack, dec_dbg_cmd_done, debug_halt_req_f, debug_resume_req_f, trigger_hit_dmode_wb, + dcsr_single_step_done_f, debug_halt_req_d1, update_hit_bit_wb[3:0], dec_tlu_wr_pause_wb_f, dec_pause_state_f, + request_debug_mode_wb, request_debug_mode_done_f, dcsr_single_step_running_f, dcsr_single_step_running_ff})); + + assign dec_tlu_debug_stall = debug_halt_req_f; + assign dec_tlu_dbg_halted = dbg_tlu_halted_f; + assign dec_tlu_debug_mode = internal_dbg_halt_mode_f; + assign dec_tlu_pmu_fw_halted = pmu_fw_tlu_halted_f; + + // kill fetch redirection on flush if going to halt, or if there's a fence during db-halt + assign dec_tlu_flush_noredir_wb = take_halt_f | (fence_i_wb & internal_dbg_halt_mode_f) | dec_tlu_flush_pause_wb | (trigger_hit_wb & trigger_hit_dmode_wb); + + // 1 cycle after writing the PAUSE counter, flush with noredir to idle F1-D. + assign dec_tlu_flush_pause_wb = dec_tlu_wr_pause_wb_f & ~interrupt_valid_wb; + + // detect end of pause counter and rfpc + assign pause_expired_e4 = ~dec_pause_state & dec_pause_state_f & ~(ext_int_ready | ce_int_ready | timer_int_ready | nmi_int_detected) & ~interrupt_valid_wb & ~debug_halt_req_f & ~pmu_fw_halt_req_f & ~halt_taken_f; + + // stall dma fifo if a fence is pending, decode is waiting for lsu to idle before decoding the fence inst. + assign dec_tlu_stall_dma = dec_fence_pending; + assign dec_tlu_flush_leak_one_wb = dec_tlu_flush_lower_wb & dcsr[`DCSR_STEP] & (dec_tlu_resume_ack | dcsr_single_step_running); + assign dec_tlu_flush_err_wb = dec_tlu_flush_lower_wb & (ic_perr_wb | iccm_sbecc_wb); + + // If DM attempts to access an illegal CSR, send cmd_fail back + assign dec_dbg_cmd_fail = illegal_raw_wb & dec_dbg_cmd_done; + + + //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- + // Triggers + // +`define MTDATA1_DMODE 9 +`define MTDATA1_SEL 7 +`define MTDATA1_ACTION 6 +`define MTDATA1_CHAIN 5 +`define MTDATA1_MATCH 4 +`define MTDATA1_M_ENABLED 3 +`define MTDATA1_EXE 2 +`define MTDATA1_ST 1 +`define MTDATA1_LD 0 + + // Prioritize trigger hits with other exceptions. + // + // Trigger should have highest priority except: + // - trigger is an execute-data and there is an inst_access exception (lsu triggers won't fire, inst. is nop'd by decode) + // - trigger is a store-data and there is a lsu_acc_exc or lsu_ma_exc. + assign trigger_execute[3:0] = {mtdata1_t3[`MTDATA1_EXE], mtdata1_t2[`MTDATA1_EXE], mtdata1_t1[`MTDATA1_EXE], mtdata1_t0[`MTDATA1_EXE]}; + assign trigger_data[3:0] = {mtdata1_t3[`MTDATA1_SEL], mtdata1_t2[`MTDATA1_SEL], mtdata1_t1[`MTDATA1_SEL], mtdata1_t0[`MTDATA1_SEL]}; + assign trigger_store[3:0] = {mtdata1_t3[`MTDATA1_ST], mtdata1_t2[`MTDATA1_ST], mtdata1_t1[`MTDATA1_ST], mtdata1_t0[`MTDATA1_ST]}; + + + // MSTATUS[MIE] needs to be on to take triggers unless the action is trigger to debug mode. + assign trigger_enabled[3:0] = {(mtdata1_t3[`MTDATA1_ACTION] | mstatus[`MSTATUS_MIE]) & mtdata1_t3[`MTDATA1_M_ENABLED], + (mtdata1_t2[`MTDATA1_ACTION] | mstatus[`MSTATUS_MIE]) & mtdata1_t2[`MTDATA1_M_ENABLED], + (mtdata1_t1[`MTDATA1_ACTION] | mstatus[`MSTATUS_MIE]) & mtdata1_t1[`MTDATA1_M_ENABLED], + (mtdata1_t0[`MTDATA1_ACTION] | mstatus[`MSTATUS_MIE]) & mtdata1_t0[`MTDATA1_M_ENABLED]}; + + // iside exceptions are always in i0 + assign i0_iside_trigger_has_pri_e4[3:0] = ~( (trigger_execute[3:0] & trigger_data[3:0] & {4{inst_acc_e4_raw}}) | // exe-data with inst_acc + ({4{exu_i0_br_error_e4 | exu_i0_br_start_error_e4}})); // branch error in i0 + + assign i1_iside_trigger_has_pri_e4[3:0] = ~( ({4{exu_i1_br_error_e4 | exu_i1_br_start_error_e4}}) ); // branch error in i1 + + // lsu excs have to line up with their respective triggers since the lsu op can be in either i0 or i1 but not both + assign i0_lsu_trigger_has_pri_e4[3:0] = ~(trigger_store[3:0] & trigger_data[3:0] & {4{lsu_i0_exc_dc4_raw}}); + assign i1_lsu_trigger_has_pri_e4[3:0] = ~(trigger_store[3:0] & trigger_data[3:0] & {4{lsu_i1_exc_dc4_raw}}); + + // Qual trigger hits + assign i0_trigger_eval_e4 = dec_tlu_i0_valid_e4 | ( dec_i0_load_e4 & lsu_freeze_pulse_e4); + assign i1_trigger_eval_e4 = dec_tlu_i1_valid_e4 | (~dec_i0_load_e4 & lsu_freeze_pulse_e4); + + assign i0_trigger_e4[3:0] = {4{i0_trigger_eval_e4}} & dec_tlu_packet_e4.i0trigger[3:0] & i0_iside_trigger_has_pri_e4[3:0] & i0_lsu_trigger_has_pri_e4[3:0] & trigger_enabled[3:0]; + assign i1_trigger_e4[3:0] = {4{i1_trigger_eval_e4}} & dec_tlu_packet_e4.i1trigger[3:0] & i1_iside_trigger_has_pri_e4[3:0] & i1_lsu_trigger_has_pri_e4[3:0] & trigger_enabled[3:0]; + + assign trigger_chain[2:0] = {mtdata1_t2[`MTDATA1_CHAIN], mtdata1_t1[`MTDATA1_CHAIN], mtdata1_t0[`MTDATA1_CHAIN]}; + + // chaining can mask raw trigger info + assign i0_trigger_chain_masked_e4[3:0] = {i0_trigger_e4[3] & (~trigger_chain[2] | i0_trigger_e4[2]), + i0_trigger_e4[2] & (~trigger_chain[2] | i0_trigger_e4[3]), + i0_trigger_e4[1] & (~trigger_chain[0] | i0_trigger_e4[0]), + i0_trigger_e4[0] & (~trigger_chain[0] | i0_trigger_e4[1])}; + + assign i1_trigger_chain_masked_e4[3:0] = {i1_trigger_e4[3] & (~trigger_chain[2] | i1_trigger_e4[2]), + i1_trigger_e4[2] & (~trigger_chain[2] | i1_trigger_e4[3]), + i1_trigger_e4[1] & (~trigger_chain[0] | i1_trigger_e4[0]), + i1_trigger_e4[0] & (~trigger_chain[0] | i1_trigger_e4[1])}; + + // This is the highest priority by this point. + assign i0_trigger_hit_raw_e4 = |i0_trigger_chain_masked_e4[3:0]; + assign i1_trigger_hit_raw_e4 = |i1_trigger_chain_masked_e4[3:0]; + + // Qual trigger hits + assign i0_trigger_hit_e4 = ~(dec_tlu_flush_lower_wb | dec_tlu_dbg_halted | lsu_freeze_pulse_e4) & i0_trigger_hit_raw_e4; + assign i1_trigger_hit_e4 = ~(dec_tlu_flush_lower_wb | ~tlu_i0_commit_cmt | exu_i0_br_mp_e4 | dec_tlu_dbg_halted | lsu_freeze_pulse_e4 | lsu_i0_rfnpc_dc4) & i1_trigger_hit_raw_e4; + + assign dec_tlu_cancel_e4 = (i0_trigger_hit_raw_e4 | i1_trigger_hit_raw_e4) & lsu_freeze_pulse_e4; + + // Actions include breakpoint, or dmode. Dmode is only possible if the DMODE bit is set. + // Otherwise, take a breakpoint. + assign trigger_action[3:0] = {mtdata1_t3[`MTDATA1_ACTION] & mtdata1_t3[`MTDATA1_DMODE], + mtdata1_t2[`MTDATA1_ACTION] & mtdata1_t2[`MTDATA1_DMODE], + mtdata1_t1[`MTDATA1_ACTION] & mtdata1_t1[`MTDATA1_DMODE], + mtdata1_t0[`MTDATA1_ACTION] & mtdata1_t0[`MTDATA1_DMODE]}; + + // this is needed to set the HIT bit in the triggers + assign update_hit_bit_e4[3:0] = ({4{i0_trigger_hit_e4 }} & i0_trigger_chain_masked_e4[3:0]) | + ({4{i1_trigger_hit_e4 & ~i0_trigger_hit_e4}} & i1_trigger_chain_masked_e4[3:0]); + + // action, 1 means dmode. Simultaneous triggers with at least 1 set for dmode force entire action to dmode. + assign i0_trigger_action_e4 = |(i0_trigger_chain_masked_e4[3:0] & trigger_action[3:0]); + assign i1_trigger_action_e4 = |(i1_trigger_chain_masked_e4[3:0] & trigger_action[3:0]); + + assign trigger_hit_e4 = i0_trigger_hit_e4 | i1_trigger_hit_e4; + assign trigger_hit_dmode_e4 = (i0_trigger_hit_e4 & i0_trigger_action_e4) | (i1_trigger_hit_e4 & ~i0_trigger_hit_e4 & i1_trigger_action_e4); + + assign mepc_trigger_hit_sel_pc_e4 = trigger_hit_e4 & ~trigger_hit_dmode_e4; + + +// +// Debug end +//-------------------------------------------------------------------------------- + + //---------------------------------------------------------------------- + // + // Commit + // + //---------------------------------------------------------------------- + + + + //-------------------------------------------------------------------------------- + // External halt (not debug halt) + // - Fully interlocked handshake + // i_cpu_halt_req ____|--------------|_______________ + // core_empty ---------------|___________ + // o_cpu_halt_ack _________________|----|__________ + // o_cpu_halt_status _______________|---------------------|_________ + // i_cpu_run_req ______|----------|____ + // o_cpu_run_ack ____________|------|________ + // + + + // debug mode has priority, ignore PMU/FW halt/run while in debug mode + assign i_cpu_halt_req_sync_qual = i_cpu_halt_req_sync & ~dec_tlu_debug_mode; + assign i_cpu_run_req_sync_qual = i_cpu_run_req_sync & ~dec_tlu_debug_mode & pmu_fw_tlu_halted_f; + + rvdff #(8) exthaltff (.*, .clk(free_clk), .din({i_cpu_halt_req_sync_qual, i_cpu_run_req_sync_qual, cpu_halt_status, + cpu_halt_ack, cpu_run_ack, internal_pmu_fw_halt_mode, + pmu_fw_halt_req_ns, pmu_fw_tlu_halted}), + .dout({i_cpu_halt_req_d1, i_cpu_run_req_d1_raw, o_cpu_halt_status, + o_cpu_halt_ack, o_cpu_run_ack, internal_pmu_fw_halt_mode_f, + pmu_fw_halt_req_f, pmu_fw_tlu_halted_f})); + + // only happens if we aren't in dgb_halt + assign ext_halt_pulse = i_cpu_halt_req_sync_qual & ~i_cpu_halt_req_d1; + + assign enter_pmu_fw_halt_req = ext_halt_pulse | fw_halt_req; + + assign pmu_fw_halt_req_ns = (enter_pmu_fw_halt_req | (pmu_fw_halt_req_f & ~pmu_fw_tlu_halted)) & ~debug_halt_req_f; + + assign internal_pmu_fw_halt_mode = pmu_fw_halt_req_ns | (internal_pmu_fw_halt_mode_f & ~i_cpu_run_req_d1 & ~debug_halt_req_f); + + // debug halt has priority + assign pmu_fw_tlu_halted = ((pmu_fw_halt_req_f & core_empty & halt_taken & ~enter_debug_halt_req) | (pmu_fw_tlu_halted_f & ~i_cpu_run_req_d1)) & ~debug_halt_req_f; + + assign cpu_halt_ack = i_cpu_halt_req_d1 & pmu_fw_tlu_halted_f; + assign cpu_halt_status = (pmu_fw_tlu_halted_f & ~i_cpu_run_req_d1) | (o_cpu_halt_status & ~i_cpu_run_req_d1 & ~internal_dbg_halt_mode_f); + assign cpu_run_ack = (o_cpu_halt_status & i_cpu_run_req_sync_qual) | (o_cpu_run_ack & i_cpu_run_req_sync_qual); + assign debug_mode_status = internal_dbg_halt_mode_f; + assign o_debug_mode_status = debug_mode_status;// & ~mpc_debug_run_ack_f; + +`ifdef ASSERT_ON + assert_commit_while_halted: assert #0 (~((tlu_i0_commit_cmt | tlu_i1_commit_cmt) & o_cpu_halt_status)) else $display("ERROR: Commiting while cpu_halt_status asserted!"); +`endif + + // high priority interrupts can wakeup from external halt, so can unmasked timer interrupts + assign i_cpu_run_req_d1 = i_cpu_run_req_d1_raw | ((nmi_int_detected | timer_int_ready | (mhwakeup & mhwakeup_ready)) & o_cpu_halt_status); + + //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- + + // LSU exceptions (LSU responsible for prioritizing simultaneous cases) + lsu_error_pkt_t lsu_error_pkt_dc4; + + rvdff #( $bits(lsu_error_pkt_t) ) lsu_error_dc4ff (.*, .clk(lsu_e3_e4_clk), .din(lsu_error_pkt_dc3), .dout(lsu_error_pkt_dc4)); + + logic lsu_single_ecc_error_wb_ns; + assign lsu_single_ecc_error_wb_ns = lsu_error_pkt_dc4.single_ecc_error;// & ((~lsu_error_pkt_dc4.inst_pipe & tlu_i0_commit_cmt) | (lsu_error_pkt_dc4.inst_pipe & tlu_i1_commit_cmt)); + rvdff #(2) lsu_dccm_errorff (.*, .clk(free_clk), .din({mdseac_locked_ns, lsu_single_ecc_error_wb_ns}), .dout({mdseac_locked_f, lsu_single_ecc_error_wb})); + + logic [31:0] lsu_error_pkt_addr_dc4, lsu_error_pkt_addr_wb; + assign lsu_error_pkt_addr_dc4[31:0] = lsu_error_pkt_dc4.addr[31:0]; + rvdff #(34) lsu_error_wbff (.*, .clk(lsu_e4_e5_clk), .din({lsu_error_pkt_addr_dc4[31:0], lsu_exc_valid_e4, lsu_i0_exc_dc4}), .dout({lsu_error_pkt_addr_wb[31:0], lsu_exc_valid_wb, lsu_i0_exc_wb})); + + + // lsu exception is valid unless it's in pipe1 and there was a rfpc_i0_e4, brmp, or an iside exception in pipe0. + assign lsu_exc_valid_e4_raw = lsu_error_pkt_dc4.exc_valid & ~(lsu_error_pkt_dc4.inst_pipe & (rfpc_i0_e4 | i0_exception_valid_e4 | exu_i0_br_mp_e4)) & ~dec_tlu_flush_lower_wb; + + assign lsu_i0_exc_dc4_raw = lsu_error_pkt_dc4.exc_valid & ~lsu_error_pkt_dc4.inst_pipe; + assign lsu_i1_exc_dc4_raw = lsu_error_pkt_dc4.exc_valid & lsu_error_pkt_dc4.inst_pipe; + assign lsu_i0_exc_dc4 = lsu_i0_exc_dc4_raw & lsu_exc_valid_e4_raw & ~i0_trigger_hit_e4; + assign lsu_i1_exc_dc4 = lsu_i1_exc_dc4_raw & lsu_exc_valid_e4_raw & ~trigger_hit_e4; + assign lsu_exc_valid_e4 = lsu_i0_exc_dc4 | lsu_i1_exc_dc4; + + assign lsu_exc_ma_dc4 = (lsu_i0_exc_dc4 | lsu_i1_exc_dc4) & ~lsu_error_pkt_dc4.exc_type; + assign lsu_exc_acc_dc4 = (lsu_i0_exc_dc4 | lsu_i1_exc_dc4) & lsu_error_pkt_dc4.exc_type; + assign lsu_exc_st_dc4 = (lsu_i0_exc_dc4 | lsu_i1_exc_dc4) & lsu_error_pkt_dc4.inst_type; + + // Single bit ECC errors on loads are RFNPC corrected, with the corrected data written to the GPR. + // LSU turns the load into a store and patches the data in the DCCM + assign lsu_i0_rfnpc_dc4 = dec_tlu_i0_valid_e4 & ~lsu_error_pkt_dc4.inst_pipe & ~lsu_error_pkt_dc4.inst_type & + lsu_error_pkt_dc4.single_ecc_error & ~lsu_error_pkt_dc4.dma_valid & ~i0_trigger_hit_e4; + assign lsu_i1_rfnpc_dc4 = dec_tlu_i1_valid_e4 & lsu_error_pkt_dc4.inst_pipe & ~lsu_error_pkt_dc4.inst_type & + lsu_error_pkt_dc4.single_ecc_error & ~lsu_error_pkt_dc4.dma_valid & ~i0_trigger_hit_e4 & ~i1_trigger_hit_e4; + + // Branch prediction updating + assign dec_tlu_br0_addr_e4[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = exu_i0_br_index_e4[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + assign dec_tlu_br0_bank_e4[1:0] = exu_i0_br_bank_e4[1:0]; + assign dec_tlu_br1_addr_e4[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = exu_i1_br_index_e4[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + assign dec_tlu_br1_bank_e4[1:0] = exu_i1_br_bank_e4[1:0]; + + + // Final commit valids + assign tlu_i0_commit_cmt = dec_tlu_i0_valid_e4 & + ~rfpc_i0_e4 & + ~lsu_i0_exc_dc4 & + ~inst_acc_e4 & + ~dec_tlu_dbg_halted & + ~request_debug_mode_wb & + ~i0_trigger_hit_e4; + + assign tlu_i1_commit_cmt = dec_tlu_i1_valid_e4 & + ~rfpc_i0_e4 & ~rfpc_i1_e4 & + ~exu_i0_br_mp_e4 & + ~lsu_i0_exc_dc4 & ~lsu_i1_exc_dc4 & + ~lsu_i0_rfnpc_dc4 & + ~inst_acc_e4 & + ~request_debug_mode_wb & + ~trigger_hit_e4; + + // unified place to manage the killing of arch state writebacks + assign tlu_i0_kill_writeb_e4 = rfpc_i0_e4 | lsu_i0_exc_dc4 | inst_acc_e4 | (illegal_e4 & dec_tlu_dbg_halted) | i0_trigger_hit_e4 ; + assign tlu_i1_kill_writeb_e4 = rfpc_i0_e4 | rfpc_i1_e4 | lsu_exc_valid_e4 | exu_i0_br_mp_e4 | inst_acc_e4 | (illegal_e4 & dec_tlu_dbg_halted) | trigger_hit_e4 | lsu_i0_rfnpc_dc4; + + // refetch PC, microarch flush + // ic errors only in pipe0 + assign rfpc_i0_e4 = dec_tlu_i0_valid_e4 & ~tlu_flush_lower_wb & (exu_i0_br_error_e4 | exu_i0_br_start_error_e4 | ic_perr_e4 | iccm_sbecc_e4) & ~i0_trigger_hit_e4; + assign rfpc_i1_e4 = dec_tlu_i1_valid_e4 & ~tlu_flush_lower_wb & ~i0_exception_valid_e4 & ~exu_i0_br_mp_e4 & ~lsu_i0_exc_dc4 & ~lsu_i0_rfnpc_dc4 & + ~(exu_i0_br_error_e4 | exu_i0_br_start_error_e4 | ic_perr_e4 | iccm_sbecc_e4) & + (exu_i1_br_error_e4 | exu_i1_br_start_error_e4) & + ~trigger_hit_e4; + + // go ahead and repair the branch error on other flushes, doesn't have to be the rfpc flush + assign dec_tlu_br0_error_e4 = exu_i0_br_error_e4 & dec_tlu_i0_valid_e4 & ~tlu_flush_lower_wb; + assign dec_tlu_br0_start_error_e4 = exu_i0_br_start_error_e4 & dec_tlu_i0_valid_e4 & ~tlu_flush_lower_wb; + assign dec_tlu_br0_v_e4 = exu_i0_br_valid_e4 & dec_tlu_i0_valid_e4 & ~tlu_flush_lower_wb & ~exu_i0_br_mp_e4; + + assign dec_tlu_br1_error_e4 = exu_i1_br_error_e4 & dec_tlu_i1_valid_e4 & ~tlu_flush_lower_wb & ~exu_i0_br_mp_e4; + assign dec_tlu_br1_start_error_e4 = exu_i1_br_start_error_e4 & dec_tlu_i1_valid_e4 & ~tlu_flush_lower_wb & ~exu_i0_br_mp_e4; + assign dec_tlu_br1_v_e4 = exu_i1_br_valid_e4 & ~tlu_flush_lower_wb & dec_tlu_i1_valid_e4 & ~exu_i0_br_mp_e4 & ~exu_i1_br_mp_e4; + +`ifdef RV_BTB_48 + rvdff #(20) +`else + rvdff #(18) +`endif + bp_wb_ff (.*, .clk(e4e5_clk), + .din({exu_i0_br_hist_e4[1:0], + dec_tlu_br0_error_e4, + dec_tlu_br0_start_error_e4, + dec_tlu_br0_v_e4, + exu_i1_br_hist_e4[1:0], + dec_tlu_br1_error_e4, + dec_tlu_br1_start_error_e4, + dec_tlu_br1_v_e4, + dec_tlu_br0_bank_e4[1:0], + dec_tlu_br1_bank_e4[1:0], + exu_i0_br_way_e4, + exu_i1_br_way_e4, + exu_i0_br_middle_e4, + exu_i1_br_middle_e4 + }), + .dout({dec_tlu_br0_wb_pkt.hist[1:0], + dec_tlu_br0_wb_pkt.br_error, + dec_tlu_br0_wb_pkt.br_start_error, + dec_tlu_br0_wb_pkt.valid, + dec_tlu_br1_wb_pkt.hist[1:0], + dec_tlu_br1_wb_pkt.br_error, + dec_tlu_br1_wb_pkt.br_start_error, + dec_tlu_br1_wb_pkt.valid, + dec_tlu_br0_wb_pkt.bank[1:0], + dec_tlu_br1_wb_pkt.bank[1:0], + dec_tlu_br0_wb_pkt.way, + dec_tlu_br1_wb_pkt.way, + dec_tlu_br0_wb_pkt.middle, + dec_tlu_br1_wb_pkt.middle + })); + + rvdff #(`RV_BHT_GHR_SIZE*2) bp_wb_ghrff (.*, .clk(e4e5_clk), + .din({exu_i0_br_fghr_e4[`RV_BHT_GHR_RANGE], + exu_i1_br_fghr_e4[`RV_BHT_GHR_RANGE] + }), + .dout({dec_tlu_br0_wb_pkt.fghr[`RV_BHT_GHR_RANGE], + dec_tlu_br1_wb_pkt.fghr[`RV_BHT_GHR_RANGE] + })); + + rvdff #(2*$bits(dec_tlu_br0_addr_e4[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO])) + bp_wb_index_ff (.*, .clk(e4e5_clk), + .din({dec_tlu_br0_addr_e4[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO], + dec_tlu_br1_addr_e4[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]}), + .dout({dec_tlu_br0_wb_pkt.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO], + dec_tlu_br1_wb_pkt.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]})); + + // only expect these in pipe 0 + assign ebreak_e4 = (dec_tlu_packet_e4.pmu_i0_itype == EBREAK) & dec_tlu_i0_valid_e4 & ~i0_trigger_hit_e4 & ~dcsr[`DCSR_EBREAKM]; + assign ecall_e4 = (dec_tlu_packet_e4.pmu_i0_itype == ECALL) & dec_tlu_i0_valid_e4 & ~i0_trigger_hit_e4; + assign illegal_e4 = ~dec_tlu_packet_e4.legal & dec_tlu_i0_valid_e4 & ~i0_trigger_hit_e4; + assign mret_e4 = (dec_tlu_packet_e4.pmu_i0_itype == MRET) & dec_tlu_i0_valid_e4 & ~i0_trigger_hit_e4; + // fence_i includes debug only fence_i's + assign fence_i_e4 = (dec_tlu_packet_e4.fence_i & dec_tlu_i0_valid_e4 & ~i0_trigger_hit_e4); //| csr_fence_i_wb; + assign ic_perr_e4 = dec_tlu_packet_e4.perr & dec_tlu_i0_valid_e4 & ~i0_trigger_hit_e4; + assign iccm_sbecc_e4 = dec_tlu_packet_e4.sbecc & dec_tlu_i0_valid_e4 & ~i0_trigger_hit_e4; + assign inst_acc_e4_raw = dec_tlu_packet_e4.icaf & dec_tlu_i0_valid_e4; + assign inst_acc_e4 = inst_acc_e4_raw & ~rfpc_i0_e4 & ~i0_trigger_hit_e4; + assign inst_acc_second_e4 = dec_tlu_packet_e4.icaf_f1; + + assign ebreak_to_debug_mode_e4 = (dec_tlu_packet_e4.pmu_i0_itype == EBREAK) & dec_tlu_i0_valid_e4 & ~i0_trigger_hit_e4 & dcsr[`DCSR_EBREAKM]; + + assign illegal_e4_qual = illegal_e4 & ~dec_tlu_dbg_halted; + + rvdff #(10) exctype_wb_ff (.*, .clk(e4e5_clk), + .din({ic_perr_e4, iccm_sbecc_e4, ebreak_e4, ebreak_to_debug_mode_e4, illegal_e4, + illegal_e4_qual, inst_acc_e4, inst_acc_second_e4, fence_i_e4, mret_e4}), + .dout({ic_perr_wb, iccm_sbecc_wb, ebreak_wb, ebreak_to_debug_mode_wb, illegal_raw_wb, + illegal_wb, inst_acc_wb, inst_acc_second_wb, fence_i_wb, mret_wb})); + + assign dec_tlu_fence_i_wb = fence_i_wb; + // + // Exceptions + // + // - MEPC <- PC + // - PC <- MTVEC, assert flush_lower + // - MCAUSE <- cause + // - MTVAL <- + // - MPIE <- MIE + // - MIE <- 0 + // + assign i0_exception_valid_e4 = (ebreak_e4 | ecall_e4 | illegal_e4 | inst_acc_e4) & ~rfpc_i0_e4 & ~dec_tlu_dbg_halted; + + // Cause: + // + // 0x2 : illegal + // 0x3 : breakpoint + // 0xb : Environment call M-mode + + + assign exc_cause_e4[4:0] = ( ({5{take_ext_int}} & 5'h0b) | + ({5{take_timer_int}} & 5'h07) | + ({5{take_ce_int}} & 5'h1e) | + ({5{illegal_e4}} & 5'h02) | + ({5{ecall_e4}} & 5'h0b) | + ({5{inst_acc_e4}} & 5'h01) | + ({5{ebreak_e4 | trigger_hit_e4}} & 5'h03) | + ({5{lsu_exc_ma_dc4 & ~lsu_exc_st_dc4}} & 5'h04) | + ({5{lsu_exc_acc_dc4 & ~lsu_exc_st_dc4}} & 5'h05) | + ({5{lsu_exc_ma_dc4 & lsu_exc_st_dc4}} & 5'h06) | + ({5{lsu_exc_acc_dc4 & lsu_exc_st_dc4}} & 5'h07) + ) & ~{5{take_nmi}}; + + // + // Interrupts + // + // Priv spec 1.10, 3.1.14 + // "Multiple simultaneous interrupts and traps at the same privilege level are handled in the following + // decreasing priority order: external interrupts, software interrupts, timer interrupts, then finally any + // synchronous traps." + // + // For above purposes, exceptions that are committed have already happened and will cause an int at E4 to wait a cycle + // or more if MSTATUS[MIE] is cleared. + // + // -in priority order, highest to lowest + // -single cycle window where a csr write to MIE/MSTATUS is at E4 when the other conditions for externals are met. + // Hold off externals for a cycle to make sure we are consistent with what was just written + assign mhwakeup_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[`MIP_MEIP] & mie_ns[`MIE_MEIE]; + assign ext_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[`MIP_MEIP] & mie_ns[`MIE_MEIE]; + assign ce_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[`MIP_MCEIP] & mie_ns[`MIE_MCEIE]; + assign timer_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[`MIP_MTIP] & mie_ns[`MIE_MTIE]; + + // mispredicts + assign i0_mp_e4 = exu_i0_flush_lower_e4 & ~i0_trigger_hit_e4; + assign i1_mp_e4 = exu_i1_flush_lower_e4 & ~trigger_hit_e4 & ~lsu_i0_rfnpc_dc4; + + assign internal_dbg_halt_timers = internal_dbg_halt_mode_f & ~dcsr_single_step_running; + + // Prioritize externals + assign block_interrupts = ( (lsu_block_interrupts_e4 & ~dec_tlu_flush_lower_wb) | // I/O transaction on the bus pending + (internal_dbg_halt_mode & (~dcsr_single_step_running | dec_tlu_i0_valid_e4)) | // No ints in db-halt unless we are single stepping + internal_pmu_fw_halt_mode | i_cpu_halt_req_d1 |// No ints in PMU/FW halt. First we exit halt + take_nmi | // NMI is top priority + ebreak_to_debug_mode_e4 | // Heading to debug mode, hold off ints + synchronous_flush_e4 | // exception flush this cycle + exc_or_int_valid_wb | // ext/int past cycle (need time for MIE to update) + mret_wb | // mret (need time for MIE to update) + mret_e4 // mret in progress, for cases were ISR enables ints before mret + ); + + assign take_ext_int = ext_int_ready & ~block_interrupts; + assign take_ce_int = ce_int_ready & ~ext_int_ready & ~block_interrupts; + assign take_timer_int = timer_int_ready & ~ext_int_ready & ~ce_int_ready & ~block_interrupts; + + assign take_reset = reset_delayed & mpc_reset_run_req; + assign take_nmi = nmi_int_detected & ~internal_pmu_fw_halt_mode & (~internal_dbg_halt_mode | (dcsr_single_step_running_f & dcsr[`DCSR_STEPIE] & ~dec_tlu_i0_valid_e4 & ~dcsr_single_step_done_f)) & ~synchronous_flush_e4 & ~mret_e4 & ~take_reset & ~ebreak_to_debug_mode_e4; + + assign interrupt_valid = take_ext_int | take_timer_int | take_nmi | take_ce_int; + + + // Compute interrupt path: + // If vectored async is set in mtvec, flush path for interrupts is MTVEC + (4 * CAUSE); + assign vectored_cause[5:0] = ({1'b0, exc_cause_e4[4:0]} << 1); + assign {vpath_overflow_nc, vectored_path[31:1]} = {mtvec[30:1], 1'b0} + {25'b0, vectored_cause[5:0]}; + assign interrupt_path[31:1] = take_nmi ? nmi_vec[31:1] : ((mtvec[0] == 1'b1) ? vectored_path[31:1] : {mtvec[30:1], 1'b0}); + + assign sel_npc_e4 = lsu_i0_rfnpc_dc4 | (lsu_i1_rfnpc_dc4 & tlu_i1_commit_cmt) | fence_i_e4 | (i_cpu_run_req_d1 & ~interrupt_valid); + assign sel_npc_wb = (i_cpu_run_req_d1 & pmu_fw_tlu_halted_f) | pause_expired_e4; + + + assign synchronous_flush_e4 = i0_exception_valid_e4 | // exception + i0_mp_e4 | i1_mp_e4 | // mispredict + rfpc_i0_e4 | rfpc_i1_e4 | // rfpc + lsu_exc_valid_e4 | // lsu exception in either pipe 0 or pipe 1 + fence_i_e4 | // fence, a rfnpc + lsu_i0_rfnpc_dc4 | lsu_i1_rfnpc_dc4 | + debug_resume_req_f | // resume from debug halt, fetch the dpc + sel_npc_wb | // resume from pmu/fw halt, or from pause and fetch the NPC + dec_tlu_wr_pause_wb | // flush at start of pause + trigger_hit_e4; // trigger hit, ebreak or goto debug mode + + assign tlu_flush_lower_e4 = interrupt_valid | mret_e4 | synchronous_flush_e4 | take_halt | take_reset; + + assign tlu_flush_path_e4[31:1] = take_reset ? rst_vec[31:1] : + + ( ({31{~take_nmi & i0_mp_e4}} & exu_i0_flush_path_e4[31:1]) | + ({31{~take_nmi & ~i0_mp_e4 & i1_mp_e4 & ~rfpc_i0_e4 & ~lsu_i0_exc_dc4}} & exu_i1_flush_path_e4[31:1]) | + ({31{~take_nmi & sel_npc_e4}} & npc_e4[31:1]) | + ({31{~take_nmi & rfpc_i0_e4}} & dec_tlu_i0_pc_e4[31:1]) | + ({31{~take_nmi & rfpc_i1_e4}} & dec_tlu_i1_pc_e4[31:1]) | + ({31{interrupt_valid}} & interrupt_path[31:1]) | + ({31{(i0_exception_valid_e4 | lsu_exc_valid_e4 | (trigger_hit_e4 & ~trigger_hit_dmode_e4)) & ~interrupt_valid}} & {mtvec[30:1],1'b0}) | + ({31{~take_nmi & mret_e4 & ~wr_mepc_wb}} & mepc[31:1]) | + ({31{~take_nmi & debug_resume_req_f}} & dpc[31:1]) | + ({31{~take_nmi & sel_npc_wb}} & npc_wb[31:1]) | + ({31{~take_nmi & mret_e4 & wr_mepc_wb}} & dec_csr_wrdata_wb[31:1]) ); + + rvdff #(31) flush_lower_ff (.*, .clk(e4e5_int_clk), + .din({tlu_flush_path_e4[31:1]}), + .dout({tlu_flush_path_wb[31:1]})); + + assign dec_tlu_flush_lower_wb = tlu_flush_lower_wb; + assign dec_tlu_flush_path_wb[31:1] = tlu_flush_path_wb[31:1]; + + + // this is used to capture mepc, etc. + assign exc_or_int_valid = lsu_exc_valid_e4 | i0_exception_valid_e4 | interrupt_valid | (trigger_hit_e4 & ~trigger_hit_dmode_e4); + + assign lsu_block_interrupts_dc3 = lsu_freeze_external_ints_dc3 & ~dec_tlu_flush_lower_wb; + + rvdff #(15) excinfo_wb_ff (.*, .clk(e4e5_int_clk), + .din({interrupt_valid, i0_exception_valid_e4, exc_or_int_valid, + exc_cause_e4[4:0], tlu_i0_commit_cmt & ~illegal_e4, tlu_i1_commit_cmt, + mepc_trigger_hit_sel_pc_e4, trigger_hit_e4, i0_trigger_hit_e4, + take_nmi, pause_expired_e4 }), + .dout({interrupt_valid_wb, i0_exception_valid_wb, exc_or_int_valid_wb, + exc_cause_wb[4:0], i0_valid_wb, i1_valid_wb, + mepc_trigger_hit_sel_pc_wb, trigger_hit_wb, i0_trigger_hit_wb, + take_nmi_wb, pause_expired_wb})); + + //---------------------------------------------------------------------- + // + // CSRs + // + //---------------------------------------------------------------------- + + + // ---------------------------------------------------------------------- + // MISA (RO) + // [31:30] XLEN - implementation width, 2'b01 - 32 bits + // [12] M - integer mul/div + // [8] I - RV32I + // [2] C - Compressed extension + `define MISA 12'h301 + + // MVENDORID, MARCHID, MIMPID, MHARTID + `define MVENDORID 12'hf11 + `define MARCHID 12'hf12 + `define MIMPID 12'hf13 + `define MHARTID 12'hf14 + + + // ---------------------------------------------------------------------- + // MSTATUS (RW) + // [12:11] MPP : Prior priv level, always 2'b11, not flopped + // [7] MPIE : Int enable previous [1] + // [3] MIE : Int enable [0] + `define MSTATUS 12'h300 + + + //When executing a MRET instruction, supposing MPP holds the value 3, MIE + //is set to MPIE; the privilege mode is changed to 3; MPIE is set to 1; and MPP is set to 3 + + assign dec_csr_wen_wb_mod = dec_csr_wen_wb & ~trigger_hit_wb; + assign wr_mstatus_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MSTATUS); + + assign mstatus_ns[1:0] = ( ({2{exc_or_int_valid_wb}} & {mstatus[`MSTATUS_MIE], 1'b0}) | + ({2{mret_wb & ~exc_or_int_valid_wb}} & {1'b1, mstatus[1]}) | + ({2{wr_mstatus_wb & ~exc_or_int_valid_wb}} & {dec_csr_wrdata_wb[7], dec_csr_wrdata_wb[3]}) | + ({2{~wr_mstatus_wb & ~exc_or_int_valid_wb & ~mret_wb}} & mstatus[1:0]) ); + + // gate MIE if we are single stepping and DCSR[STEPIE] is off + assign mstatus_mie_ns = mstatus_ns[`MSTATUS_MIE] & (~dcsr_single_step_running_f | dcsr[`DCSR_STEPIE]); + rvdff #(2) mstatus_ff (.*, .clk(free_clk), .din(mstatus_ns[1:0]), .dout(mstatus[1:0])); + + // ---------------------------------------------------------------------- + // MTVEC (RW) + // [31:2] BASE : Trap vector base address + // [1] - Reserved, not implemented, reads zero + // [0] MODE : 0 = Direct, 1 = Asyncs are vectored to BASE + (4 * CAUSE) + `define MTVEC 12'h305 + + assign wr_mtvec_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MTVEC); + assign mtvec_ns[30:0] = {dec_csr_wrdata_wb[31:2], dec_csr_wrdata_wb[0]} ; + rvdffe #(31) mtvec_ff (.*, .en(wr_mtvec_wb), .din(mtvec_ns[30:0]), .dout(mtvec[30:0])); + + // ---------------------------------------------------------------------- + // MIP (RW) + // + // [30] MCEIP : (RO) M-Mode Correctable Error interrupt pending + // [11] MEIP : (RO) M-Mode external interrupt pending + // [7] MTIP : (RO) M-Mode timer interrupt pending + // [3] MSIP : (RO) M-Mode software interrupt pending + `define MIP 12'h344 + + assign ce_int = (mdccme_ce_req | miccme_ce_req | mice_ce_req); + + assign mip_ns[3:0] = {ce_int, mexintpend, timer_int_sync, mip[0]}; + rvdff #(4) mip_ff (.*, .clk(free_clk), .din(mip_ns[3:0]), .dout(mip[3:0])); + + // ---------------------------------------------------------------------- + // MIE (RW) + // [30] MCEIE : (RO) M-Mode Correctable Error interrupt enable + // [11] MEIE : (RW) M-Mode external interrupt enable + // [7] MTIE : (RW) M-Mode timer interrupt enable + // [3] MSIE : (RW) M-Mode software interrupt enable + `define MIE 12'h304 + + assign wr_mie_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MIE); + assign mie_ns[3:0] = wr_mie_wb ? {dec_csr_wrdata_wb[30], dec_csr_wrdata_wb[11], dec_csr_wrdata_wb[7], dec_csr_wrdata_wb[3]} : mie[3:0]; + rvdff #(4) mie_ff (.*, .clk(csr_wr_clk), .din(mie_ns[3:0]), .dout(mie[3:0])); + + + // ---------------------------------------------------------------------- + // MCYCLEL (RW) + // [31:0] : Lower Cycle count + + `define MCYCLEL 12'hb00 + + + assign wr_mcyclel_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MCYCLEL); + + logic mcyclel_cout_in; + + assign mcyclel_cout_in = ~(kill_ebreak_count_wb | (dec_tlu_dbg_halted & dcsr[`DCSR_STOPC]) | dec_tlu_pmu_fw_halted); + + assign {mcyclel_cout, mcyclel_inc[31:0]} = mcyclel[31:0] + {31'b0, mcyclel_cout_in}; + assign mcyclel_ns[31:0] = wr_mcyclel_wb ? dec_csr_wrdata_wb[31:0] : mcyclel_inc[31:0]; + + rvdffe #(32) mcyclel_ff (.*, .en(wr_mcyclel_wb | mcyclel_cout_in), .din(mcyclel_ns[31:0]), .dout(mcyclel[31:0])); + rvdff #(1) mcyclef_cout_ff (.*, .clk(free_clk), .din(mcyclel_cout & ~wr_mcycleh_wb), .dout(mcyclel_cout_f)); + // ---------------------------------------------------------------------- + // MCYCLEH (RW) + // [63:32] : Higher Cycle count + // Chained with mcyclel. Note: mcyclel overflow due to a mcycleh write gets ignored. + + `define MCYCLEH 12'hb80 + + assign wr_mcycleh_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MCYCLEH); + + assign {mcycleh_cout_nc, mcycleh_inc[31:0]} = mcycleh[31:0] + {31'b0, mcyclel_cout_f}; + assign mcycleh_ns[31:0] = wr_mcycleh_wb ? dec_csr_wrdata_wb[31:0] : mcycleh_inc[31:0]; + + rvdffe #(32) mcycleh_ff (.*, .en(wr_mcycleh_wb | mcyclel_cout_f), .din(mcycleh_ns[31:0]), .dout(mcycleh[31:0])); + + // ---------------------------------------------------------------------- + // MINSTRETL (RW) + // [31:0] : Lower Instruction retired count + // From the spec "Some CSRs, such as the instructions retired counter, instret, may be modified as side effects + // of instruction execution. In these cases, if a CSR access instruction reads a CSR, it reads the + // value prior to the execution of the instruction. If a CSR access instruction writes a CSR, the + // update occurs after the execution of the instruction. In particular, a value written to instret by + // one instruction will be the value read by the following instruction (i.e., the increment of instret + // caused by the first instruction retiring happens before the write of the new value)." + `define MINSTRETL 12'hb02 + + assign kill_ebreak_count_wb = ebreak_to_debug_mode_wb & dcsr[`DCSR_STOPC]; + + assign wr_minstretl_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MINSTRETL); + + assign {minstretl_cout, minstretl_inc[31:0]} = minstretl[31:0] + {31'b0,i0_valid_wb} + {31'b0,i1_valid_wb}; + + assign minstret_enable = (i0_valid_wb & ~(dec_tlu_dbg_halted & dcsr[`DCSR_STOPC]) & ~kill_ebreak_count_wb) | i1_valid_wb | wr_minstretl_wb; + + assign minstretl_ns[31:0] = wr_minstretl_wb ? dec_csr_wrdata_wb[31:0] : minstretl_inc[31:0]; + rvdffe #(32) minstretl_ff (.*, .en(minstret_enable), .din(minstretl_ns[31:0]), .dout(minstretl[31:0])); + logic minstret_enable_f; + rvdff #(2) minstretf_cout_ff (.*, .clk(free_clk), .din({minstret_enable, minstretl_cout & ~wr_minstreth_wb}), .dout({minstret_enable_f, minstretl_cout_f})); + + assign minstretl_read[31:0] = minstretl[31:0]; + // ---------------------------------------------------------------------- + // MINSTRETH (RW) + // [63:32] : Higher Instret count + // Chained with minstretl. Note: minstretl overflow due to a minstreth write gets ignored. + + `define MINSTRETH 12'hb82 + + assign wr_minstreth_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MINSTRETH); + + assign {minstreth_cout_nc, minstreth_inc[31:0]} = minstreth[31:0] + {31'b0, minstretl_cout_f}; + assign minstreth_ns[31:0] = wr_minstreth_wb ? dec_csr_wrdata_wb[31:0] : minstreth_inc[31:0]; + rvdffe #(32) minstreth_ff (.*, .en(minstret_enable_f | wr_minstreth_wb), .din(minstreth_ns[31:0]), .dout(minstreth[31:0])); + + assign minstreth_read[31:0] = minstreth_inc[31:0]; + + // ---------------------------------------------------------------------- + // MSCRATCH (RW) + // [31:0] : Scratch register + `define MSCRATCH 12'h340 + + assign wr_mscratch_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MSCRATCH); + + rvdffe #(32) mscratch_ff (.*, .en(wr_mscratch_wb), .din(dec_csr_wrdata_wb[31:0]), .dout(mscratch[31:0])); + + // ---------------------------------------------------------------------- + // MEPC (RW) + // [31:1] : Exception PC + `define MEPC 12'h341 + + // NPC + logic sel_exu_npc_e4, sel_flush_npc_e4, sel_i0_npc_e4, sel_hold_npc_e4; + + // commit all ops + assign sel_exu_npc_e4 = ~dec_tlu_dbg_halted & ~tlu_flush_lower_wb & (dec_tlu_i0_valid_e4 | dec_tlu_i1_valid_e4) & ~(dec_tlu_i1_valid_e4 & lsu_i0_rfnpc_dc4); + // commit just i0 when there's a valid i1 that should be flushed + assign sel_i0_npc_e4 = ~dec_tlu_dbg_halted & ~tlu_flush_lower_wb & dec_tlu_i0_valid_e4 & lsu_i0_rfnpc_dc4 & dec_tlu_i1_valid_e4; + // flush, update npc + assign sel_flush_npc_e4 = ~dec_tlu_dbg_halted & tlu_flush_lower_wb & ~dec_tlu_flush_noredir_wb; + // hold prior npc + assign sel_hold_npc_e4 = ~sel_exu_npc_e4 & ~sel_flush_npc_e4 & ~sel_i0_npc_e4; + + assign npc_e4[31:1] = ( ({31{sel_exu_npc_e4}} & exu_npc_e4[31:1]) | + ({31{sel_i0_npc_e4}} & dec_tlu_i1_pc_e4[31:1]) | + ({31{~mpc_reset_run_req & reset_delayed}} & rst_vec[31:1]) | // init to reset vector for mpc halt on reset case + ({31{(sel_flush_npc_e4)}} & tlu_flush_path_wb[31:1]) | + ({31{(sel_hold_npc_e4)}} & npc_wb[31:1]) ); + + rvdffe #(31) npwbc_ff (.*, .en(sel_i0_npc_e4 | sel_exu_npc_e4 | sel_flush_npc_e4 | reset_delayed), .din(npc_e4[31:1]), .dout(npc_wb[31:1])); + + // PC has to be captured for exceptions and interrupts. For MRET, we could execute it and then take an + // interrupt before the next instruction. + logic pc0_valid_e4, pc1_valid_e4; + assign pc0_valid_e4 = ~dec_tlu_dbg_halted & dec_tlu_i0_valid_e4; + assign pc1_valid_e4 = ~dec_tlu_dbg_halted & dec_tlu_i0_valid_e4 & dec_tlu_i1_valid_e4 & ~lsu_i0_exc_dc4 & ~rfpc_i0_e4 & ~inst_acc_e4 & ~i0_trigger_hit_e4; + + assign pc_e4[31:1] = ( ({31{ pc0_valid_e4 & ~pc1_valid_e4}} & dec_tlu_i0_pc_e4[31:1]) | + ({31{ pc1_valid_e4}} & dec_tlu_i1_pc_e4[31:1]) | + ({31{~pc0_valid_e4 & ~pc1_valid_e4}} & pc_wb[31:1]) + ); + + rvdffe #(31) pwbc_ff (.*, .en(pc0_valid_e4 | pc1_valid_e4), .din(pc_e4[31:1]), .dout(pc_wb[31:1])); + + assign wr_mepc_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MEPC); + + assign mepc_ns[31:1] = ( ({31{i0_exception_valid_wb | lsu_exc_valid_wb | mepc_trigger_hit_sel_pc_wb}} & pc_wb[31:1]) | + ({31{interrupt_valid_wb}} & npc_wb[31:1]) | + ({31{wr_mepc_wb & ~exc_or_int_valid_wb}} & dec_csr_wrdata_wb[31:1]) | + ({31{~wr_mepc_wb & ~exc_or_int_valid_wb}} & mepc[31:1]) ); + + + rvdff #(31) mepc_ff (.*, .clk(e4e5_int_clk), .din(mepc_ns[31:1]), .dout(mepc[31:1])); + + // ---------------------------------------------------------------------- + // MCAUSE (RW) + // [31:0] : Exception Cause + `define MCAUSE 12'h342 + + assign wr_mcause_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MCAUSE); + + assign mcause_ns[31:0] = ( ({32{exc_or_int_valid_wb & take_nmi_wb & nmi_lsu_store_type_f}} & {32'hf000_0000}) | + ({32{exc_or_int_valid_wb & take_nmi_wb & nmi_lsu_load_type_f}} & {32'hf000_0001}) | + ({32{exc_or_int_valid_wb & ~take_nmi_wb}} & {interrupt_valid_wb, 26'b0, exc_cause_wb[4:0]}) | + ({32{wr_mcause_wb & ~exc_or_int_valid_wb}} & dec_csr_wrdata_wb[31:0]) | + ({32{~wr_mcause_wb & ~exc_or_int_valid_wb}} & mcause[31:0]) ); + + rvdff #(32) mcause_ff (.*, .clk(e4e5_int_clk), .din(mcause_ns[31:0]), .dout(mcause[31:0])); + + // ---------------------------------------------------------------------- + // MTVAL (RW) + // [31:0] : Exception address if relevant + `define MTVAL 12'h343 + + assign wr_mtval_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MTVAL); + assign mtval_capture_pc_wb = exc_or_int_valid_wb & (ebreak_wb | (inst_acc_wb & ~inst_acc_second_wb) | mepc_trigger_hit_sel_pc_wb) & ~take_nmi_wb; + assign mtval_capture_pc_plus2_wb = exc_or_int_valid_wb & (inst_acc_wb & inst_acc_second_wb) & ~take_nmi_wb; + assign mtval_capture_inst_wb = exc_or_int_valid_wb & illegal_wb & ~take_nmi_wb; + assign mtval_capture_lsu_wb = exc_or_int_valid_wb & lsu_exc_valid_wb & ~take_nmi_wb; + assign mtval_clear_wb = exc_or_int_valid_wb & ~mtval_capture_pc_wb & ~mtval_capture_inst_wb & ~mtval_capture_lsu_wb & ~mepc_trigger_hit_sel_pc_wb; + + + assign mtval_ns[31:0] = (({32{mtval_capture_pc_wb}} & {pc_wb[31:1], 1'b0}) | + ({32{mtval_capture_pc_plus2_wb}} & {pc_wb[31:1] + 31'b1, 1'b0}) | + ({32{mtval_capture_inst_wb}} & dec_illegal_inst[31:0]) | + ({32{mtval_capture_lsu_wb}} & lsu_error_pkt_addr_wb[31:0]) | + ({32{wr_mtval_wb & ~interrupt_valid_wb}} & dec_csr_wrdata_wb[31:0]) | + ({32{~take_nmi_wb & ~wr_mtval_wb & ~mtval_capture_pc_wb & ~mtval_capture_inst_wb & ~mtval_clear_wb & ~mtval_capture_lsu_wb}} & mtval[31:0]) ); + + + rvdff #(32) mtval_ff (.*, .clk(e4e5_int_clk), .din(mtval_ns[31:0]), .dout(mtval[31:0])); + + // ---------------------------------------------------------------------- + // MCGC (RW) Clock gating control + // [31:9] : Reserved, reads 0x0 + // [8] : misc_clk_override + // [7] : dec_clk_override + // [6] : exu_clk_override + // [5] : ifu_clk_override + // [4] : lsu_clk_override + // [3] : bus_clk_override + // [2] : pic_clk_override + // [1] : dccm_clk_override + // [0] : icm_clk_override + // + `define MCGC 12'h7f8 + assign wr_mcgc_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MCGC); + + rvdffe #(9) mcgc_ff (.*, .en(wr_mcgc_wb), .din(dec_csr_wrdata_wb[8:0]), .dout(mcgc[8:0])); + + assign dec_tlu_misc_clk_override = mcgc[8]; + assign dec_tlu_dec_clk_override = mcgc[7]; + assign dec_tlu_exu_clk_override = mcgc[6]; + assign dec_tlu_ifu_clk_override = mcgc[5]; + assign dec_tlu_lsu_clk_override = mcgc[4]; + assign dec_tlu_bus_clk_override = mcgc[3]; + assign dec_tlu_pic_clk_override = mcgc[2]; + assign dec_tlu_dccm_clk_override = mcgc[1]; + assign dec_tlu_icm_clk_override = mcgc[0]; + + // ---------------------------------------------------------------------- + // MFDC (RW) Feature Disable Control + // [31:19] : Reserved, reads 0x0 + // [18:16] : DMA QoS Prty + // [15:11] : Reserved, reads 0x0 + // [10] : Disable dual issue + // [9] : Disable pic multiple ints + // [8] : Disable core ecc + // [7] : Disable secondary alu?s + // [6] : Disable multiple outstanding sideeffect accesses to bus + // [5] : Disable non-blocking loads/divides + // [4] : Disable fast divide + // [3] : Disable branch prediction and return stack + // [2] : Disable write buffer coalescing + // [1] : Disable load misses that bypass the write buffer + // [0] : Disable pipelining - Enable single instruction execution + // + `define MFDC 12'h7f9 + + assign wr_mfdc_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MFDC); + + rvdffe #(14) mfdc_ff (.*, .en(wr_mfdc_wb), .din(mfdc_ns[13:0]), .dout(mfdc_int[13:0])); + + `ifdef RV_BUILD_AXI4 + // flip poweron value of bit 6 for AXI build + assign mfdc_ns[13:0] = {~dec_csr_wrdata_wb[18:16],dec_csr_wrdata_wb[10:7], ~dec_csr_wrdata_wb[6], dec_csr_wrdata_wb[5:0]}; + assign mfdc[18:0] = {~mfdc_int[13:11], 5'b0, mfdc_int[10:7], ~mfdc_int[6], mfdc_int[5:0]}; + `else + assign mfdc_ns[13:0] = {~dec_csr_wrdata_wb[18:16],dec_csr_wrdata_wb[10:0]}; + assign mfdc[18:0] = {~mfdc_int[13:11], 5'b0, mfdc_int[10:0]}; + `endif + + assign dec_tlu_dma_qos_prty[2:0] = mfdc[18:16]; + assign dec_tlu_dual_issue_disable = mfdc[10]; + assign dec_tlu_core_ecc_disable = mfdc[8]; + assign dec_tlu_sec_alu_disable = mfdc[7]; + assign dec_tlu_sideeffect_posted_disable = mfdc[6]; + assign dec_tlu_non_blocking_disable = mfdc[5]; + assign dec_tlu_fast_div_disable = mfdc[4]; + assign dec_tlu_bpred_disable = mfdc[3]; + assign dec_tlu_wb_coalescing_disable = mfdc[2]; + assign dec_tlu_ld_miss_byp_wb_disable = mfdc[1]; + assign dec_tlu_pipelining_disable = mfdc[0]; + + // ---------------------------------------------------------------------- + // MCPC (RW) Pause counter + // [31:0] : Reads 0x0, decs in the wb register in decode_ctl + + `define MCPC 12'h7c2 + assign dec_tlu_wr_pause_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MCPC) & ~interrupt_valid_wb; + + // ---------------------------------------------------------------------- + // MRAC (RW) + // [31:0] : Region Access Control Register, 16 regions, {side_effect, cachable} pairs + `define MRAC 12'h7c0 + + assign wr_mrac_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MRAC); + + // prevent pairs of 0x11, side_effect and cacheable + logic [31:0] mrac_in; + assign mrac_in[31:0] = {dec_csr_wrdata_wb[31], dec_csr_wrdata_wb[30] & ~dec_csr_wrdata_wb[31], + dec_csr_wrdata_wb[29], dec_csr_wrdata_wb[28] & ~dec_csr_wrdata_wb[29], + dec_csr_wrdata_wb[27], dec_csr_wrdata_wb[26] & ~dec_csr_wrdata_wb[27], + dec_csr_wrdata_wb[25], dec_csr_wrdata_wb[24] & ~dec_csr_wrdata_wb[25], + dec_csr_wrdata_wb[23], dec_csr_wrdata_wb[22] & ~dec_csr_wrdata_wb[23], + dec_csr_wrdata_wb[21], dec_csr_wrdata_wb[20] & ~dec_csr_wrdata_wb[21], + dec_csr_wrdata_wb[19], dec_csr_wrdata_wb[18] & ~dec_csr_wrdata_wb[19], + dec_csr_wrdata_wb[17], dec_csr_wrdata_wb[16] & ~dec_csr_wrdata_wb[17], + dec_csr_wrdata_wb[15], dec_csr_wrdata_wb[14] & ~dec_csr_wrdata_wb[15], + dec_csr_wrdata_wb[13], dec_csr_wrdata_wb[12] & ~dec_csr_wrdata_wb[13], + dec_csr_wrdata_wb[11], dec_csr_wrdata_wb[10] & ~dec_csr_wrdata_wb[11], + dec_csr_wrdata_wb[9], dec_csr_wrdata_wb[8] & ~dec_csr_wrdata_wb[9], + dec_csr_wrdata_wb[7], dec_csr_wrdata_wb[6] & ~dec_csr_wrdata_wb[7], + dec_csr_wrdata_wb[5], dec_csr_wrdata_wb[4] & ~dec_csr_wrdata_wb[5], + dec_csr_wrdata_wb[3], dec_csr_wrdata_wb[2] & ~dec_csr_wrdata_wb[3], + dec_csr_wrdata_wb[1], dec_csr_wrdata_wb[0] & ~dec_csr_wrdata_wb[1]}; + + rvdffe #(32) mrac_ff (.*, .en(wr_mrac_wb), .din(mrac_in[31:0]), .dout(mrac[31:0])); + + // drive to LSU/IFU + assign dec_tlu_mrac_ff[31:0] = mrac[31:0]; + + // ---------------------------------------------------------------------- + // MDEAU (WAR0) + // [31:0] : Dbus Error Address Unlock register + // + `define MDEAU 12'hbc0 + + assign wr_mdeau_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MDEAU); + + + // ---------------------------------------------------------------------- + // MDSEAC (R) + // [31:0] : Dbus Store Error Address Capture register + // + `define MDSEAC 12'hfc0 + + // only capture error bus if the MDSEAC reg is not locked + assign mdseac_locked_ns = mdseac_en | (mdseac_locked_f & ~wr_mdeau_wb); + + assign mdseac_en = (lsu_imprecise_error_store_any | lsu_imprecise_error_load_any) & ~mdseac_locked_f; + + rvdffe #(32) mdseac_ff (.*, .en(mdseac_en), .din(lsu_imprecise_error_addr_any[31:0]), .dout(mdseac[31:0])); + + // ---------------------------------------------------------------------- + // MPMC (R0W1) + // [0:0] : FW halt + // + `define MPMC 12'h7c6 + logic wr_mpmc_wb; + assign wr_mpmc_wb = dec_csr_wrdata_wb[0] & dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MPMC); + assign fw_halt_req = wr_mpmc_wb & ~internal_dbg_halt_mode_f; + + // ---------------------------------------------------------------------- + // MICECT (I-Cache error counter/threshold) + // [31:27] : Icache parity error threshold + // [26:0] : Icache parity error count + `define MICECT 12'h7f0 + + logic [31:27] csr_sat; + assign csr_sat[31:27] = (dec_csr_wrdata_wb[31:27] > 5'd26) ? 5'd26 : dec_csr_wrdata_wb[31:27]; + + assign wr_micect_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MICECT); + assign {micect_cout_nc, micect_inc[26:0]} = micect[26:0] + {26'b0, ic_perr_wb}; + assign micect_ns = wr_micect_wb ? {csr_sat[31:27], dec_csr_wrdata_wb[26:0]} : {micect[31:27], micect_inc[26:0]}; + + rvdffe #(32) micect_ff (.*, .en(wr_micect_wb | ic_perr_wb), .din(micect_ns[31:0]), .dout(micect[31:0])); + + assign mice_ce_req = |({32'b1 << micect[31:27]} & {5'b0, micect[26:0]}); + + // ---------------------------------------------------------------------- + // MICCMECT (ICCM error counter/threshold) + // [31:27] : ICCM parity error threshold + // [26:0] : ICCM parity error count + `define MICCMECT 12'h7f1 + + assign wr_miccmect_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MICCMECT); + assign {miccmect_cout_nc, miccmect_inc[26:0]} = miccmect[26:0] + {26'b0, iccm_sbecc_wb | iccm_dma_sb_error}; + assign miccmect_ns = wr_miccmect_wb ? {csr_sat[31:27], dec_csr_wrdata_wb[26:0]} : {miccmect[31:27], miccmect_inc[26:0]}; + + rvdffe #(32) miccmect_ff (.*, .en(wr_miccmect_wb | iccm_sbecc_wb | iccm_dma_sb_error), .din(miccmect_ns[31:0]), .dout(miccmect[31:0])); + + assign miccme_ce_req = |({32'b1 << miccmect[31:27]} & {5'b0, miccmect[26:0]}); + + // ---------------------------------------------------------------------- + // MDCCMECT (DCCM error counter/threshold) + // [31:27] : DCCM parity error threshold + // [26:0] : DCCM parity error count + `define MDCCMECT 12'h7f2 + + assign wr_mdccmect_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MDCCMECT); + assign {mdccmect_cout_nc, mdccmect_inc[26:0]} = mdccmect[26:0] + {26'b0, lsu_single_ecc_error_wb}; + assign mdccmect_ns = wr_mdccmect_wb ? {csr_sat[31:27], dec_csr_wrdata_wb[26:0]} : {mdccmect[31:27], mdccmect_inc[26:0]}; + + rvdffe #(32) mdccmect_ff (.*, .en(wr_mdccmect_wb | lsu_single_ecc_error_wb), .din(mdccmect_ns[31:0]), .dout(mdccmect[31:0])); + + assign mdccme_ce_req = |({32'b1 << mdccmect[31:27]} & {5'b0, mdccmect[26:0]}); + + // ---------------------------------------------------------------------- + // MEIVT (External Interrupt Vector Table (R/W)) + // [31:10]: Base address (R/W) + // [9:0] : Reserved, reads 0x0 + `define MEIVT 12'hbc8 + + assign wr_meivt_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MEIVT); + + rvdffe #(22) meivt_ff (.*, .en(wr_meivt_wb), .din(dec_csr_wrdata_wb[31:10]), .dout(meivt[31:10])); + + // ---------------------------------------------------------------------- + // MEIHAP (External Interrupt Handler Access Pointer (R)) + // [31:10]: Base address (R/W) + // [9:2] : ClaimID (R) + // [1:0] : Reserved, 0x0 + `define MEIHAP 12'hfc8 + + assign wr_meihap_wb = wr_meicpct_wb; + + rvdffe #(8) meihap_ff (.*, .en(wr_meihap_wb), .din(pic_claimid[7:0]), .dout(meihap[9:2])); + + // ---------------------------------------------------------------------- + // MEICURPL (R/W) + // [31:4] : Reserved (read 0x0) + // [3:0] : CURRPRI - Priority level of current interrupt service routine (R/W) + `define MEICURPL 12'hbcc + + assign wr_meicurpl_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MEICURPL); + assign meicurpl_ns[3:0] = wr_meicurpl_wb ? dec_csr_wrdata_wb[3:0] : meicurpl[3:0]; + + rvdff #(4) meicurpl_ff (.*, .clk(csr_wr_clk), .din(meicurpl_ns[3:0]), .dout(meicurpl[3:0])); + + // PIC needs this reg + assign dec_tlu_meicurpl[3:0] = meicurpl[3:0]; + + + // ---------------------------------------------------------------------- + // MEICIDPL (R/W) + // [31:4] : Reserved (read 0x0) + // [3:0] : External Interrupt Claim ID's Priority Level Register + `define MEICIDPL 12'hbcb + + assign wr_meicidpl_wb = (dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MEICIDPL)); + + assign meicidpl_ns[3:0] = wr_meicpct_wb ? pic_pl[3:0] : (wr_meicidpl_wb ? dec_csr_wrdata_wb[3:0] : meicidpl[3:0]); + + rvdff #(4) meicidpl_ff (.*, .clk(csr_wr_clk), .din(meicidpl_ns[3:0]), .dout(meicidpl[3:0])); + + // ---------------------------------------------------------------------- + // MEICPCT (Capture CLAIMID in MEIHAP and PL in MEICIDPL + // [31:1] : Reserved (read 0x0) + // [0] : Capture (W1, Read 0) + `define MEICPCT 12'hbca + + assign wr_meicpct_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MEICPCT); + + // ---------------------------------------------------------------------- + // MEIPT (External Interrupt Priority Threshold) + // [31:4] : Reserved (read 0x0) + // [3:0] : PRITHRESH + `define MEIPT 12'hbc9 + + assign wr_meipt_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MEIPT); + assign meipt_ns[3:0] = wr_meipt_wb ? dec_csr_wrdata_wb[3:0] : meipt[3:0]; + + rvdff #(4) meipt_ff (.*, .clk(active_clk), .din(meipt_ns[3:0]), .dout(meipt[3:0])); + + // to PIC + assign dec_tlu_meipt[3:0] = meipt[3:0]; + // ---------------------------------------------------------------------- + // DCSR (R/W) (Only accessible in debug mode) + // [31:28] : xdebugver (hard coded to 0x4) RO + // [27:16] : 0x0, reserved + // [15] : ebreakm + // [14] : 0x0, reserved + // [13] : ebreaks (0x0 for this core) + // [12] : ebreaku (0x0 for this core) + // [11] : stepie + // [10] : stopcount + // [9] : 0x0 //stoptime + // [8:6] : cause (RO) + // [5:4] : 0x0, reserved + // [3] : nmip + // [2] : step + // [1:0] : prv (0x3 for this core) + // + `define DCSR 12'h7b0 + logic [8:6] dcsr_cause; + + // RV has clarified that 'priority 4' in the spec means top priority. + // 4. single step. 3. Debugger request. 2. Ebreak. 1. Trigger. + + // RV debug spec indicates a cause priority change for trigger hits during single step. + assign trigger_hit_for_dscr_cause_wb = trigger_hit_dmode_wb | (trigger_hit_wb & dcsr_single_step_done_f); + + assign dcsr_cause[8:6] = ( ({3{dcsr_single_step_done_f & ~ebreak_to_debug_mode_wb & ~trigger_hit_for_dscr_cause_wb & ~debug_halt_req}} & 3'b100) | + ({3{debug_halt_req & ~ebreak_to_debug_mode_wb & ~trigger_hit_for_dscr_cause_wb}} & 3'b011) | + ({3{ebreak_to_debug_mode_wb & ~trigger_hit_for_dscr_cause_wb}} & 3'b001) | + ({3{trigger_hit_for_dscr_cause_wb}} & 3'b010)); + + assign wr_dcsr_wb = allow_dbg_halt_csr_write & dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `DCSR); + + + + // Multiple halt enter requests can happen before we are halted. + // We have to continue to upgrade based on dcsr_cause priority but we can't downgrade. + logic enter_debug_halt_req_le, dcsr_cause_upgradeable; + assign dcsr_cause_upgradeable = internal_dbg_halt_mode_f & (dcsr[8:6] == 3'b011); + assign enter_debug_halt_req_le = enter_debug_halt_req & (~dbg_tlu_halted | dcsr_cause_upgradeable); + + assign nmi_in_debug_mode = nmi_int_detected_f & internal_dbg_halt_mode_f; + assign dcsr_ns[15:2] = enter_debug_halt_req_le ? {dcsr[15:9], dcsr_cause[8:6], dcsr[5:2]} : + (wr_dcsr_wb ? {dec_csr_wrdata_wb[15], 3'b0, dec_csr_wrdata_wb[11:10], 1'b0, dcsr[8:6], 2'b00, nmi_in_debug_mode | dcsr[3], dec_csr_wrdata_wb[2]} : + {dcsr[15:4], nmi_in_debug_mode, dcsr[2]}); + + rvdffe #(14) dcsr_ff (.*, .en(enter_debug_halt_req_le | wr_dcsr_wb | internal_dbg_halt_mode | take_nmi_wb), .din(dcsr_ns[15:2]), .dout(dcsr[15:2])); + + // ---------------------------------------------------------------------- + // DPC (R/W) (Only accessible in debug mode) + // [31:0] : Debug PC + `define DPC 12'h7b1 + + assign wr_dpc_wb = allow_dbg_halt_csr_write & dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `DPC); + assign dpc_capture_npc = dbg_tlu_halted & ~dbg_tlu_halted_f & ~request_debug_mode_done_f; + assign dpc_capture_pc = request_debug_mode_wb; + + assign dpc_ns[31:1] = ( ({31{~dpc_capture_pc & ~dpc_capture_npc & wr_dpc_wb}} & dec_csr_wrdata_wb[31:1]) | + ({31{dpc_capture_pc}} & pc_wb[31:1]) | + ({31{~dpc_capture_pc & dpc_capture_npc}} & npc_wb[31:1]) ); + + rvdffe #(31) dpc_ff (.*, .en(wr_dpc_wb | dpc_capture_pc | dpc_capture_npc), .din(dpc_ns[31:1]), .dout(dpc[31:1])); + + // ---------------------------------------------------------------------- + // DICAWICS (R/W) (Only accessible in debug mode) + // [31:25] : Reserved + // [24] : Array select, 0 is data, 1 is tag + // [23:22] : Reserved + // [21:20] : Way select + // [19:16] : Reserved + // [15:2] : Index + // [1:0] : Reserved + `define DICAWICS 12'h7c8 + + assign dicawics_ns[18:2] = {dec_csr_wrdata_wb[24], dec_csr_wrdata_wb[21:20], dec_csr_wrdata_wb[15:2]}; + assign wr_dicawics_wb = allow_dbg_halt_csr_write & dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `DICAWICS); + + rvdffe #(17) dicawics_ff (.*, .en(wr_dicawics_wb), .din(dicawics_ns[18:2]), .dout(dicawics[18:2])); + + // ---------------------------------------------------------------------- + // DICAD0 (R/W) (Only accessible in debug mode) + // + // If dicawics[array] is 0 + // [31:0] : inst data + // + // If dicawics[array] is 1 + // [31:16] : Tag + // [15:7] : Reserved + // [6:4] : LRU + // [3:1] : Reserved + // [0] : Valid + `define DICAD0 12'h7c9 + + assign dicad0_ns[31:0] = wr_dicad0_wb ? dec_csr_wrdata_wb[31:0] : ifu_ic_debug_rd_data[31:0]; + + assign wr_dicad0_wb = allow_dbg_halt_csr_write & dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `DICAD0); + + rvdffe #(32) dicad0_ff (.*, .en(wr_dicad0_wb | ifu_ic_debug_rd_data_valid), .din(dicad0_ns[31:0]), .dout(dicad0[31:0])); + + +`ifdef RV_ICACHE_ECC + // ---------------------------------------------------------------------- + // DICAD1 (R/W) (Only accessible in debug mode) + // [9:0] : ECC + `define DICAD1 12'h7ca + + assign dicad1_ns[9:0] = wr_dicad1_wb ? dec_csr_wrdata_wb[9:0] : ifu_ic_debug_rd_data[41:32]; + + assign wr_dicad1_wb = allow_dbg_halt_csr_write & dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `DICAD1); + + rvdffs #(10) dicad1_ff (.*, .clk(active_clk), .en(wr_dicad1_wb | ifu_ic_debug_rd_data_valid), .din(dicad1_ns[9:0]), .dout(dicad1[9:0])); + +`else + // ---------------------------------------------------------------------- + // DICAD1 (R/W) (Only accessible in debug mode) + // [1:0] : Parity + `define DICAD1 12'h7ca + + assign dicad1_ns[1:0] = wr_dicad1_wb ? dec_csr_wrdata_wb[1:0] : ifu_ic_debug_rd_data[33:32]; + + assign wr_dicad1_wb = allow_dbg_halt_csr_write & dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `DICAD1); + + rvdffs #(2) dicad1_ff (.*, .clk(active_clk), .en(wr_dicad1_wb | ifu_ic_debug_rd_data_valid), .din(dicad1_ns[1:0]), .dout(dicad1[1:0])); +`endif + // ---------------------------------------------------------------------- + // DICAGO (R/W) (Only accessible in debug mode) + // [0] : Go + `define DICAGO 12'h7cb + +`ifdef RV_ICACHE_ECC + assign dec_tlu_ic_diag_pkt.icache_wrdata[41:0] = {dicad1[9:0], dicad0[31:0]}; +`else + assign dec_tlu_ic_diag_pkt.icache_wrdata[33:0] = {dicad1[1:0], dicad0[31:0]}; +`endif + assign dec_tlu_ic_diag_pkt.icache_dicawics[18:2] = dicawics[18:2]; + + logic icache_rd_valid, icache_wr_valid, icache_rd_valid_f, icache_wr_valid_f; + assign icache_rd_valid = allow_dbg_halt_csr_write & dec_csr_any_unq_d & dec_i0_decode_d & ~dec_csr_wen_unq_d & (dec_csr_rdaddr_d[11:0] == `DICAGO); + assign icache_wr_valid = allow_dbg_halt_csr_write & dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `DICAGO); + + rvdff #(2) dicgo_ff (.*, .clk(active_clk), .din({icache_rd_valid, icache_wr_valid}), .dout({icache_rd_valid_f, icache_wr_valid_f})); + + assign dec_tlu_ic_diag_pkt.icache_rd_valid = icache_rd_valid_f; + assign dec_tlu_ic_diag_pkt.icache_wr_valid = icache_wr_valid_f; + + // ---------------------------------------------------------------------- + // MTSEL (R/W) + // [1:0] : Trigger select : 00, 01, 10 are data/address triggers. 11 is inst count + `define MTSEL 12'h7a0 + + assign wr_mtsel_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MTSEL); + assign mtsel_ns[1:0] = wr_mtsel_wb ? {dec_csr_wrdata_wb[1:0]} : mtsel[1:0]; + + rvdff #(2) mtsel_ff (.*, .clk(csr_wr_clk), .din(mtsel_ns[1:0]), .dout(mtsel[1:0])); + + // ---------------------------------------------------------------------- + // MTDATA1 (R/W) + // [31:0] : Trigger Data 1 + `define MTDATA1 12'h7a1 + + // for triggers 0, 1, 2 and 3 aka Match Control + // [31:28] : type, hard coded to 0x2 + // [27] : dmode + // [26:21] : hard coded to 0x1f + // [20] : hit + // [19] : select (0 - address, 1 - data) + // [18] : timing, always 'before', reads 0x0 + // [17:12] : action, bits [17:13] not implemented and reads 0x0 + // [11] : chain + // [10:7] : match, bits [10:8] not implemented and reads 0x0 + // [6] : M + // [5:3] : not implemented, reads 0x0 + // [2] : execute + // [1] : store + // [0] : load + // + // decoder ring + // [27] : => 9 + // [20] : => 8 + // [19] : => 7 + // [12] : => 6 + // [11] : => 5 + // [7] : => 4 + // [6] : => 3 + // [2] : => 2 + // [1] : => 1 + // [0] : => 0 + + + // don't allow setting load-data. + assign tdata_load = dec_csr_wrdata_wb[0] & ~dec_csr_wrdata_wb[19]; + // don't allow setting execute-data. + assign tdata_opcode = dec_csr_wrdata_wb[2] & ~dec_csr_wrdata_wb[19]; + // don't allow clearing DMODE and action=1 + assign tdata_action = (dec_csr_wrdata_wb[27] & dbg_tlu_halted_f) & dec_csr_wrdata_wb[12]; + + assign tdata_wrdata_wb[9:0] = {dec_csr_wrdata_wb[27] & dbg_tlu_halted_f, + dec_csr_wrdata_wb[20:19], + tdata_action, + dec_csr_wrdata_wb[11], + dec_csr_wrdata_wb[7:6], + tdata_opcode, + dec_csr_wrdata_wb[1], + tdata_load}; + + // If the DMODE bit is set, tdata1 can only be updated in debug_mode + assign wr_mtdata1_t0_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MTDATA1) & (mtsel[1:0] == 2'b0) & (~mtdata1_t0[`MTDATA1_DMODE] | dbg_tlu_halted_f); + assign mtdata1_t0_ns[9:0] = wr_mtdata1_t0_wb ? tdata_wrdata_wb[9:0] : + {mtdata1_t0[9], update_hit_bit_wb[0] | mtdata1_t0[8], mtdata1_t0[7:0]}; + + assign wr_mtdata1_t1_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MTDATA1) & (mtsel[1:0] == 2'b01) & (~mtdata1_t1[`MTDATA1_DMODE] | dbg_tlu_halted_f); + assign mtdata1_t1_ns[9:0] = wr_mtdata1_t1_wb ? tdata_wrdata_wb[9:0] : + {mtdata1_t1[9], update_hit_bit_wb[1] | mtdata1_t1[8], mtdata1_t1[7:0]}; + + assign wr_mtdata1_t2_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MTDATA1) & (mtsel[1:0] == 2'b10) & (~mtdata1_t2[`MTDATA1_DMODE] | dbg_tlu_halted_f); + assign mtdata1_t2_ns[9:0] = wr_mtdata1_t2_wb ? tdata_wrdata_wb[9:0] : + {mtdata1_t2[9], update_hit_bit_wb[2] | mtdata1_t2[8], mtdata1_t2[7:0]}; + + assign wr_mtdata1_t3_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MTDATA1) & (mtsel[1:0] == 2'b11) & (~mtdata1_t3[`MTDATA1_DMODE] | dbg_tlu_halted_f); + assign mtdata1_t3_ns[9:0] = wr_mtdata1_t3_wb ? tdata_wrdata_wb[9:0] : + {mtdata1_t3[9], update_hit_bit_wb[3] | mtdata1_t3[8], mtdata1_t3[7:0]}; + + + rvdff #(10) mtdata1_t0_ff (.*, .clk(active_clk), .din(mtdata1_t0_ns[9:0]), .dout(mtdata1_t0[9:0])); + rvdff #(10) mtdata1_t1_ff (.*, .clk(active_clk), .din(mtdata1_t1_ns[9:0]), .dout(mtdata1_t1[9:0])); + rvdff #(10) mtdata1_t2_ff (.*, .clk(active_clk), .din(mtdata1_t2_ns[9:0]), .dout(mtdata1_t2[9:0])); + rvdff #(10) mtdata1_t3_ff (.*, .clk(active_clk), .din(mtdata1_t3_ns[9:0]), .dout(mtdata1_t3[9:0])); + + assign mtdata1_tsel_out[31:0] = ( ({32{(mtsel[1:0] == 2'b00)}} & {4'h2, mtdata1_t0[9], 6'b011111, mtdata1_t0[8:7], 6'b0, mtdata1_t0[6:5], 3'b0, mtdata1_t0[4:3], 3'b0, mtdata1_t0[2:0]}) | + ({32{(mtsel[1:0] == 2'b01)}} & {4'h2, mtdata1_t1[9], 6'b011111, mtdata1_t1[8:7], 6'b0, mtdata1_t1[6:5], 3'b0, mtdata1_t1[4:3], 3'b0, mtdata1_t1[2:0]}) | + ({32{(mtsel[1:0] == 2'b10)}} & {4'h2, mtdata1_t2[9], 6'b011111, mtdata1_t2[8:7], 6'b0, mtdata1_t2[6:5], 3'b0, mtdata1_t2[4:3], 3'b0, mtdata1_t2[2:0]}) | + ({32{(mtsel[1:0] == 2'b11)}} & {4'h2, mtdata1_t3[9], 6'b011111, mtdata1_t3[8:7], 6'b0, mtdata1_t3[6:5], 3'b0, mtdata1_t3[4:3], 3'b0, mtdata1_t3[2:0]})); + + assign trigger_pkt_any[0].select = mtdata1_t0[`MTDATA1_SEL]; + assign trigger_pkt_any[0].match = mtdata1_t0[`MTDATA1_MATCH]; + assign trigger_pkt_any[0].store = mtdata1_t0[`MTDATA1_ST]; + assign trigger_pkt_any[0].load = mtdata1_t0[`MTDATA1_LD]; + assign trigger_pkt_any[0].execute = mtdata1_t0[`MTDATA1_EXE]; + assign trigger_pkt_any[0].m = mtdata1_t0[`MTDATA1_M_ENABLED]; + + assign trigger_pkt_any[1].select = mtdata1_t1[`MTDATA1_SEL]; + assign trigger_pkt_any[1].match = mtdata1_t1[`MTDATA1_MATCH]; + assign trigger_pkt_any[1].store = mtdata1_t1[`MTDATA1_ST]; + assign trigger_pkt_any[1].load = mtdata1_t1[`MTDATA1_LD]; + assign trigger_pkt_any[1].execute = mtdata1_t1[`MTDATA1_EXE]; + assign trigger_pkt_any[1].m = mtdata1_t1[`MTDATA1_M_ENABLED]; + + assign trigger_pkt_any[2].select = mtdata1_t2[`MTDATA1_SEL]; + assign trigger_pkt_any[2].match = mtdata1_t2[`MTDATA1_MATCH]; + assign trigger_pkt_any[2].store = mtdata1_t2[`MTDATA1_ST]; + assign trigger_pkt_any[2].load = mtdata1_t2[`MTDATA1_LD]; + assign trigger_pkt_any[2].execute = mtdata1_t2[`MTDATA1_EXE]; + assign trigger_pkt_any[2].m = mtdata1_t2[`MTDATA1_M_ENABLED]; + + assign trigger_pkt_any[3].select = mtdata1_t3[`MTDATA1_SEL]; + assign trigger_pkt_any[3].match = mtdata1_t3[`MTDATA1_MATCH]; + assign trigger_pkt_any[3].store = mtdata1_t3[`MTDATA1_ST]; + assign trigger_pkt_any[3].load = mtdata1_t3[`MTDATA1_LD]; + assign trigger_pkt_any[3].execute = mtdata1_t3[`MTDATA1_EXE]; + assign trigger_pkt_any[3].m = mtdata1_t3[`MTDATA1_M_ENABLED]; + + + + + + // ---------------------------------------------------------------------- + // MTDATA2 (R/W) + // [31:0] : Trigger Data 2 + `define MTDATA2 12'h7a2 + + // If the DMODE bit is set, tdata2 can only be updated in debug_mode + assign wr_mtdata2_t0_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MTDATA2) & (mtsel[1:0] == 2'b0) & (~mtdata1_t0[`MTDATA1_DMODE] | dbg_tlu_halted_f); + assign wr_mtdata2_t1_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MTDATA2) & (mtsel[1:0] == 2'b01) & (~mtdata1_t1[`MTDATA1_DMODE] | dbg_tlu_halted_f); + assign wr_mtdata2_t2_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MTDATA2) & (mtsel[1:0] == 2'b10) & (~mtdata1_t2[`MTDATA1_DMODE] | dbg_tlu_halted_f); + assign wr_mtdata2_t3_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MTDATA2) & (mtsel[1:0] == 2'b11) & (~mtdata1_t3[`MTDATA1_DMODE] | dbg_tlu_halted_f); + + rvdffe #(32) mtdata2_t0_ff (.*, .en(wr_mtdata2_t0_wb), .din(dec_csr_wrdata_wb[31:0]), .dout(mtdata2_t0[31:0])); + rvdffe #(32) mtdata2_t1_ff (.*, .en(wr_mtdata2_t1_wb), .din(dec_csr_wrdata_wb[31:0]), .dout(mtdata2_t1[31:0])); + rvdffe #(32) mtdata2_t2_ff (.*, .en(wr_mtdata2_t2_wb), .din(dec_csr_wrdata_wb[31:0]), .dout(mtdata2_t2[31:0])); + rvdffe #(32) mtdata2_t3_ff (.*, .en(wr_mtdata2_t3_wb), .din(dec_csr_wrdata_wb[31:0]), .dout(mtdata2_t3[31:0])); + + assign mtdata2_tsel_out[31:0] = ( ({32{(mtsel[1:0] == 2'b00)}} & mtdata2_t0[31:0]) | + ({32{(mtsel[1:0] == 2'b01)}} & mtdata2_t1[31:0]) | + ({32{(mtsel[1:0] == 2'b10)}} & mtdata2_t2[31:0]) | + ({32{(mtsel[1:0] == 2'b11)}} & mtdata2_t3[31:0])); + + assign trigger_pkt_any[0].tdata2[31:0] = mtdata2_t0[31:0]; + assign trigger_pkt_any[1].tdata2[31:0] = mtdata2_t1[31:0]; + assign trigger_pkt_any[2].tdata2[31:0] = mtdata2_t2[31:0]; + assign trigger_pkt_any[3].tdata2[31:0] = mtdata2_t3[31:0]; + + + //---------------------------------------------------------------------- + // Performance Monitor Counters section starts + //---------------------------------------------------------------------- + `define MHPME_NOEVENT 6'd0 + `define MHPME_CLK_ACTIVE 6'd1 // OOP - out of pipe + `define MHPME_ICACHE_HIT 6'd2 // OOP + `define MHPME_ICACHE_MISS 6'd3 // OOP + `define MHPME_INST_COMMIT 6'd4 + `define MHPME_INST_COMMIT_16B 6'd5 + `define MHPME_INST_COMMIT_32B 6'd6 + `define MHPME_INST_ALIGNED 6'd7 // OOP + `define MHPME_INST_DECODED 6'd8 // OOP + `define MHPME_INST_MUL 6'd9 + `define MHPME_INST_DIV 6'd10 + `define MHPME_INST_LOAD 6'd11 + `define MHPME_INST_STORE 6'd12 + `define MHPME_INST_MALOAD 6'd13 + `define MHPME_INST_MASTORE 6'd14 + `define MHPME_INST_ALU 6'd15 + `define MHPME_INST_CSRREAD 6'd16 + `define MHPME_INST_CSRRW 6'd17 + `define MHPME_INST_CSRWRITE 6'd18 + `define MHPME_INST_EBREAK 6'd19 + `define MHPME_INST_ECALL 6'd20 + `define MHPME_INST_FENCE 6'd21 + `define MHPME_INST_FENCEI 6'd22 + `define MHPME_INST_MRET 6'd23 + `define MHPME_INST_BRANCH 6'd24 + `define MHPME_BRANCH_MP 6'd25 + `define MHPME_BRANCH_TAKEN 6'd26 + `define MHPME_BRANCH_NOTP 6'd27 + `define MHPME_FETCH_STALL 6'd28 // OOP + `define MHPME_ALGNR_STALL 6'd29 // OOP + `define MHPME_DECODE_STALL 6'd30 // OOP + `define MHPME_POSTSYNC_STALL 6'd31 // OOP + `define MHPME_PRESYNC_STALL 6'd32 // OOP + `define MHPME_LSU_FREEZE 6'd33 // OOP + `define MHPME_LSU_SB_WB_STALL 6'd34 // OOP + `define MHPME_DMA_DCCM_STALL 6'd35 // OOP + `define MHPME_DMA_ICCM_STALL 6'd36 // OOP + `define MHPME_EXC_TAKEN 6'd37 + `define MHPME_TIMER_INT_TAKEN 6'd38 + `define MHPME_EXT_INT_TAKEN 6'd39 + `define MHPME_FLUSH_LOWER 6'd40 + `define MHPME_BR_ERROR 6'd41 + `define MHPME_IBUS_TRANS 6'd42 // OOP + `define MHPME_DBUS_TRANS 6'd43 // OOP + `define MHPME_DBUS_MA_TRANS 6'd44 // OOP + `define MHPME_IBUS_ERROR 6'd45 // OOP + `define MHPME_DBUS_ERROR 6'd46 // OOP + `define MHPME_IBUS_STALL 6'd47 // OOP + `define MHPME_DBUS_STALL 6'd48 // OOP + `define MHPME_INT_DISABLED 6'd49 // OOP + `define MHPME_INT_STALLED 6'd50 // OOP + + + logic [3:0][1:0] mhpmc_inc_e4, mhpmc_inc_wb; + logic [3:0][5:0] mhpme_vec; + logic mhpmc3_wr_en0, mhpmc3_wr_en1, mhpmc3_wr_en; + logic mhpmc4_wr_en0, mhpmc4_wr_en1, mhpmc4_wr_en; + logic mhpmc5_wr_en0, mhpmc5_wr_en1, mhpmc5_wr_en; + logic mhpmc6_wr_en0, mhpmc6_wr_en1, mhpmc6_wr_en; + logic mhpmc3h_wr_en0, mhpmc3h_wr_en; + logic mhpmc4h_wr_en0, mhpmc4h_wr_en; + logic mhpmc5h_wr_en0, mhpmc5h_wr_en; + logic mhpmc6h_wr_en0, mhpmc6h_wr_en; + logic [63:0] mhpmc3_incr, mhpmc4_incr, mhpmc5_incr, mhpmc6_incr; + + // Pack the event selects into a vector for genvar + assign mhpme_vec[0][5:0] = mhpme3[5:0]; + assign mhpme_vec[1][5:0] = mhpme4[5:0]; + assign mhpme_vec[2][5:0] = mhpme5[5:0]; + assign mhpme_vec[3][5:0] = mhpme6[5:0]; + + // only consider committed itypes + logic [3:0] pmu_i0_itype_qual, pmu_i1_itype_qual; + assign pmu_i0_itype_qual[3:0] = dec_tlu_packet_e4.pmu_i0_itype[3:0] & {4{tlu_i0_commit_cmt}}; + assign pmu_i1_itype_qual[3:0] = dec_tlu_packet_e4.pmu_i1_itype[3:0] & {4{tlu_i1_commit_cmt}}; + + // Generate the muxed incs for all counters based on event type + for (genvar i=0 ; i < 4; i++) begin + assign mhpmc_inc_e4[i][1:0] = {2{mgpmc}} & + ( + ({2{(mhpme_vec[i][5:0] == `MHPME_CLK_ACTIVE )}} & 2'b01) | + ({2{(mhpme_vec[i][5:0] == `MHPME_ICACHE_HIT )}} & {1'b0, ifu_pmu_ic_hit}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_ICACHE_MISS )}} & {1'b0, ifu_pmu_ic_miss}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_COMMIT )}} & {tlu_i1_commit_cmt, tlu_i0_commit_cmt & ~illegal_e4}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_COMMIT_16B )}} & {tlu_i1_commit_cmt & ~exu_pmu_i1_pc4, tlu_i0_commit_cmt & ~exu_pmu_i0_pc4 & ~illegal_e4}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_COMMIT_32B )}} & {tlu_i1_commit_cmt & exu_pmu_i1_pc4, tlu_i0_commit_cmt & exu_pmu_i0_pc4 & ~illegal_e4}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_ALIGNED )}} & ifu_pmu_instr_aligned[1:0]) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_DECODED )}} & dec_pmu_instr_decoded[1:0]) | + ({2{(mhpme_vec[i][5:0] == `MHPME_ALGNR_STALL )}} & {1'b0,ifu_pmu_align_stall}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_DECODE_STALL )}} & {1'b0,dec_pmu_decode_stall}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_MUL )}} & {(pmu_i1_itype_qual[3:0] == MUL), (pmu_i0_itype_qual[3:0] == MUL)}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_DIV )}} & {1'b0, dec_tlu_packet_e4.pmu_divide & tlu_i0_commit_cmt}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_LOAD )}} & {(pmu_i1_itype_qual[3:0] == LOAD), (pmu_i0_itype_qual[3:0] == LOAD)}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_STORE )}} & {(pmu_i1_itype_qual[3:0] == STORE), (pmu_i0_itype_qual[3:0] == STORE)}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_MALOAD )}} & {(pmu_i1_itype_qual[3:0] == LOAD), (pmu_i0_itype_qual[3:0] == LOAD)} & + {2{dec_tlu_packet_e4.pmu_lsu_misaligned}}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_MASTORE )}} & {(pmu_i1_itype_qual[3:0] == STORE), (pmu_i0_itype_qual[3:0] == STORE)} & + {2{dec_tlu_packet_e4.pmu_lsu_misaligned}}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_ALU )}} & {(pmu_i1_itype_qual[3:0] == ALU), (pmu_i0_itype_qual[3:0] == ALU)}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_CSRREAD )}} & {1'b0, (pmu_i0_itype_qual[3:0] == CSRREAD)}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_CSRWRITE )}} & {1'b0, (pmu_i0_itype_qual[3:0] == CSRWRITE)})| + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_CSRRW )}} & {1'b0, (pmu_i0_itype_qual[3:0] == CSRRW)}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_EBREAK )}} & {1'b0, (pmu_i0_itype_qual[3:0] == EBREAK)}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_ECALL )}} & {1'b0, (pmu_i0_itype_qual[3:0] == ECALL)}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_FENCE )}} & {1'b0, (pmu_i0_itype_qual[3:0] == FENCE)}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_FENCEI )}} & {1'b0, (pmu_i0_itype_qual[3:0] == FENCEI)}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_MRET )}} & {1'b0, (pmu_i0_itype_qual[3:0] == MRET)}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INST_BRANCH )}} & {((pmu_i1_itype_qual[3:0] == CONDBR) | (pmu_i1_itype_qual[3:0] == JAL)), + ((pmu_i0_itype_qual[3:0] == CONDBR) | (pmu_i0_itype_qual[3:0] == JAL))}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_BRANCH_MP )}} & {exu_pmu_i1_br_misp & tlu_i1_commit_cmt, exu_pmu_i0_br_misp & tlu_i0_commit_cmt}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_BRANCH_TAKEN )}} & {exu_pmu_i1_br_ataken & tlu_i1_commit_cmt, exu_pmu_i0_br_ataken & tlu_i0_commit_cmt}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_BRANCH_NOTP )}} & {dec_tlu_packet_e4.pmu_i1_br_unpred & tlu_i1_commit_cmt, dec_tlu_packet_e4.pmu_i0_br_unpred & tlu_i0_commit_cmt}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_FETCH_STALL )}} & {1'b0, ifu_pmu_fetch_stall}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_ALGNR_STALL )}} & {1'b0, ifu_pmu_align_stall}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_DECODE_STALL )}} & {1'b0, dec_pmu_decode_stall}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_POSTSYNC_STALL )}} & {1'b0,dec_pmu_postsync_stall}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_PRESYNC_STALL )}} & {1'b0,dec_pmu_presync_stall}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_LSU_FREEZE )}} & {1'b0, lsu_freeze_dc3}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_LSU_SB_WB_STALL )}} & {1'b0, lsu_store_stall_any}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_DMA_DCCM_STALL )}} & {1'b0, dma_dccm_stall_any}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_DMA_ICCM_STALL )}} & {1'b0, dma_iccm_stall_any}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_EXC_TAKEN )}} & {1'b0, (i0_exception_valid_e4 | trigger_hit_e4 | lsu_exc_valid_e4)}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_TIMER_INT_TAKEN )}} & {1'b0, take_timer_int}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_EXT_INT_TAKEN )}} & {1'b0, take_ext_int}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_FLUSH_LOWER )}} & {1'b0, tlu_flush_lower_e4}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_BR_ERROR )}} & {(dec_tlu_br1_error_e4 | dec_tlu_br1_start_error_e4) & rfpc_i1_e4, (dec_tlu_br0_error_e4 | dec_tlu_br0_start_error_e4) & rfpc_i0_e4}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_IBUS_TRANS )}} & {1'b0, ifu_pmu_bus_trxn}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_DBUS_TRANS )}} & {1'b0, lsu_pmu_bus_trxn}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_DBUS_MA_TRANS )}} & {1'b0, lsu_pmu_bus_misaligned}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_IBUS_ERROR )}} & {1'b0, ifu_pmu_bus_error}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_DBUS_ERROR )}} & {1'b0, lsu_pmu_bus_error}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_IBUS_STALL )}} & {1'b0, ifu_pmu_bus_busy}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_DBUS_STALL )}} & {1'b0, lsu_pmu_bus_busy}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INT_DISABLED )}} & {1'b0, ~mstatus[`MSTATUS_MIE]}) | + ({2{(mhpme_vec[i][5:0] == `MHPME_INT_STALLED )}} & {1'b0, ~mstatus[`MSTATUS_MIE] & |(mip[3:0] & mie[3:0])}) + ); + end + + rvdff #(2) pmu0inc_ff (.*, .clk(free_clk), .din(mhpmc_inc_e4[0][1:0]), .dout(mhpmc_inc_wb[0][1:0])); + rvdff #(2) pmu1inc_ff (.*, .clk(free_clk), .din(mhpmc_inc_e4[1][1:0]), .dout(mhpmc_inc_wb[1][1:0])); + rvdff #(2) pmu2inc_ff (.*, .clk(free_clk), .din(mhpmc_inc_e4[2][1:0]), .dout(mhpmc_inc_wb[2][1:0])); + rvdff #(2) pmu3inc_ff (.*, .clk(free_clk), .din(mhpmc_inc_e4[3][1:0]), .dout(mhpmc_inc_wb[3][1:0])); + + assign perfcnt_halted = ((dec_tlu_dbg_halted & dcsr[`DCSR_STOPC]) | dec_tlu_pmu_fw_halted); + + assign dec_tlu_perfcnt0[1:0] = mhpmc_inc_wb[0][1:0] & ~{2{perfcnt_halted}}; + assign dec_tlu_perfcnt1[1:0] = mhpmc_inc_wb[1][1:0] & ~{2{perfcnt_halted}}; + assign dec_tlu_perfcnt2[1:0] = mhpmc_inc_wb[2][1:0] & ~{2{perfcnt_halted}}; + assign dec_tlu_perfcnt3[1:0] = mhpmc_inc_wb[3][1:0] & ~{2{perfcnt_halted}}; + + // ---------------------------------------------------------------------- + // MHPMC3H(RW), MHPMC3(RW) + // [63:32][31:0] : Hardware Performance Monitor Counter 3 + `define MHPMC3 12'hB03 + `define MHPMC3H 12'hB83 + + assign mhpmc3_wr_en0 = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MHPMC3); + assign mhpmc3_wr_en1 = ~perfcnt_halted & (|(mhpmc_inc_wb[0][1:0])); + assign mhpmc3_wr_en = mhpmc3_wr_en0 | mhpmc3_wr_en1; + assign mhpmc3_incr[63:0] = {mhpmc3h[31:0],mhpmc3[31:0]} + {63'b0,mhpmc_inc_wb[0][1]} + {63'b0,mhpmc_inc_wb[0][0]}; + assign mhpmc3_ns[31:0] = mhpmc3_wr_en0 ? dec_csr_wrdata_wb[31:0] : mhpmc3_incr[31:0]; + rvdffe #(32) mhpmc3_ff (.*, .en(mhpmc3_wr_en), .din(mhpmc3_ns[31:0]), .dout(mhpmc3[31:0])); + + assign mhpmc3h_wr_en0 = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MHPMC3H); + assign mhpmc3h_wr_en = mhpmc3h_wr_en0 | mhpmc3_wr_en1; + assign mhpmc3h_ns[31:0] = mhpmc3h_wr_en0 ? dec_csr_wrdata_wb[31:0] : mhpmc3_incr[63:32]; + rvdffe #(32) mhpmc3h_ff (.*, .en(mhpmc3h_wr_en), .din(mhpmc3h_ns[31:0]), .dout(mhpmc3h[31:0])); + + // ---------------------------------------------------------------------- + // MHPMC4H(RW), MHPMC4(RW) + // [63:32][31:0] : Hardware Performance Monitor Counter 4 + `define MHPMC4 12'hB04 + `define MHPMC4H 12'hB84 + + assign mhpmc4_wr_en0 = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MHPMC4); + assign mhpmc4_wr_en1 = ~perfcnt_halted & (|(mhpmc_inc_wb[1][1:0])); + assign mhpmc4_wr_en = mhpmc4_wr_en0 | mhpmc4_wr_en1; + assign mhpmc4_incr[63:0] = {mhpmc4h[31:0],mhpmc4[31:0]} + {63'b0,mhpmc_inc_wb[1][1]} + {63'b0,mhpmc_inc_wb[1][0]}; + assign mhpmc4_ns[31:0] = mhpmc4_wr_en0 ? dec_csr_wrdata_wb[31:0] : mhpmc4_incr[31:0]; + rvdffe #(32) mhpmc4_ff (.*, .en(mhpmc4_wr_en), .din(mhpmc4_ns[31:0]), .dout(mhpmc4[31:0])); + + assign mhpmc4h_wr_en0 = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MHPMC4H); + assign mhpmc4h_wr_en = mhpmc4h_wr_en0 | mhpmc4_wr_en1; + assign mhpmc4h_ns[31:0] = mhpmc4h_wr_en0 ? dec_csr_wrdata_wb[31:0] : mhpmc4_incr[63:32]; + rvdffe #(32) mhpmc4h_ff (.*, .en(mhpmc4h_wr_en), .din(mhpmc4h_ns[31:0]), .dout(mhpmc4h[31:0])); + + // ---------------------------------------------------------------------- + // MHPMC5H(RW), MHPMC5(RW) + // [63:32][31:0] : Hardware Performance Monitor Counter 5 + `define MHPMC5 12'hB05 + `define MHPMC5H 12'hB85 + + assign mhpmc5_wr_en0 = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MHPMC5); + assign mhpmc5_wr_en1 = ~perfcnt_halted & (|(mhpmc_inc_wb[2][1:0])); + assign mhpmc5_wr_en = mhpmc5_wr_en0 | mhpmc5_wr_en1; + assign mhpmc5_incr[63:0] = {mhpmc5h[31:0],mhpmc5[31:0]} + {63'b0,mhpmc_inc_wb[2][1]} + {63'b0,mhpmc_inc_wb[2][0]}; + assign mhpmc5_ns[31:0] = mhpmc5_wr_en0 ? dec_csr_wrdata_wb[31:0] : mhpmc5_incr[31:0]; + rvdffe #(32) mhpmc5_ff (.*, .en(mhpmc5_wr_en), .din(mhpmc5_ns[31:0]), .dout(mhpmc5[31:0])); + + assign mhpmc5h_wr_en0 = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MHPMC5H); + assign mhpmc5h_wr_en = mhpmc5h_wr_en0 | mhpmc5_wr_en1; + assign mhpmc5h_ns[31:0] = mhpmc5h_wr_en0 ? dec_csr_wrdata_wb[31:0] : mhpmc5_incr[63:32]; + rvdffe #(32) mhpmc5h_ff (.*, .en(mhpmc5h_wr_en), .din(mhpmc5h_ns[31:0]), .dout(mhpmc5h[31:0])); + + // ---------------------------------------------------------------------- + // MHPMC6H(RW), MHPMC6(RW) + // [63:32][31:0] : Hardware Performance Monitor Counter 6 + `define MHPMC6 12'hB06 + `define MHPMC6H 12'hB86 + + assign mhpmc6_wr_en0 = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MHPMC6); + assign mhpmc6_wr_en1 = ~perfcnt_halted & (|(mhpmc_inc_wb[3][1:0])); + assign mhpmc6_wr_en = mhpmc6_wr_en0 | mhpmc6_wr_en1; + assign mhpmc6_incr[63:0] = {mhpmc6h[31:0],mhpmc6[31:0]} + {63'b0,mhpmc_inc_wb[3][1]} + {63'b0,mhpmc_inc_wb[3][0]}; + assign mhpmc6_ns[31:0] = mhpmc6_wr_en0 ? dec_csr_wrdata_wb[31:0] : mhpmc6_incr[31:0]; + rvdffe #(32) mhpmc6_ff (.*, .en(mhpmc6_wr_en), .din(mhpmc6_ns[31:0]), .dout(mhpmc6[31:0])); + + assign mhpmc6h_wr_en0 = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MHPMC6H); + assign mhpmc6h_wr_en = mhpmc6h_wr_en0 | mhpmc6_wr_en1; + assign mhpmc6h_ns[31:0] = mhpmc6h_wr_en0 ? dec_csr_wrdata_wb[31:0] : mhpmc6_incr[63:32]; + rvdffe #(32) mhpmc6h_ff (.*, .en(mhpmc6h_wr_en), .din(mhpmc6h_ns[31:0]), .dout(mhpmc6h[31:0])); + + // ---------------------------------------------------------------------- + // MHPME3(RW) + // [5:0] : Hardware Performance Monitor Event 3 + `define MHPME3 12'h323 + + // we only have 50 events, HPME* are WARL so saturate at 50 + logic [5:0] event_saturate_wb; + assign event_saturate_wb[5:0] = ((dec_csr_wrdata_wb[5:0] > 6'd50) | (|dec_csr_wrdata_wb[31:6])) ? 6'd50 : dec_csr_wrdata_wb[5:0]; + + assign wr_mhpme3_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MHPME3); + rvdffs #(6) mhpme3_ff (.*, .clk(active_clk), .en(wr_mhpme3_wb), .din(event_saturate_wb[5:0]), .dout(mhpme3[5:0])); + // ---------------------------------------------------------------------- + // MHPME4(RW) + // [5:0] : Hardware Performance Monitor Event 4 + `define MHPME4 12'h324 + + assign wr_mhpme4_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MHPME4); + rvdffs #(6) mhpme4_ff (.*, .clk(active_clk), .en(wr_mhpme4_wb), .din(event_saturate_wb[5:0]), .dout(mhpme4[5:0])); + // ---------------------------------------------------------------------- + // MHPME5(RW) + // [5:0] : Hardware Performance Monitor Event 5 + `define MHPME5 12'h325 + + assign wr_mhpme5_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MHPME5); + rvdffs #(6) mhpme5_ff (.*, .clk(active_clk), .en(wr_mhpme5_wb), .din(event_saturate_wb[5:0]), .dout(mhpme5[5:0])); + // ---------------------------------------------------------------------- + // MHPME6(RW) + // [5:0] : Hardware Performance Monitor Event 6 + `define MHPME6 12'h326 + + assign wr_mhpme6_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MHPME6); + rvdffs #(6) mhpme6_ff (.*, .clk(active_clk), .en(wr_mhpme6_wb), .din(event_saturate_wb[5:0]), .dout(mhpme6[5:0])); + + //---------------------------------------------------------------------- + // Performance Monitor Counters section ends + //---------------------------------------------------------------------- + // ---------------------------------------------------------------------- + + // MGPMC(RW) + // [31:1] : Reserved, read 0x0 + // [0] : Perfmon controls 0: disable perf counters 1: enable. + // + // Resets to 1'b1, counters enabled + `define MGPMC 12'h7d0 + + assign wr_mgpmc_wb = dec_csr_wen_wb_mod & (dec_csr_wraddr_wb[11:0] == `MGPMC); + rvdffs #(1) mgpmc_ff (.*, .clk(active_clk), .en(wr_mgpmc_wb), .din(~dec_csr_wrdata_wb[0]), .dout(mgpmc_b)); + assign mgpmc = ~mgpmc_b; + + + //-------------------------------------------------------------------------------- + // trace + //-------------------------------------------------------------------------------- + logic usoc_tclk; + + rvclkhdr usoctrace_cgc ( .en(i0_valid_wb | exc_or_int_valid_wb | interrupt_valid_wb | dec_tlu_i0_valid_wb1 | + dec_tlu_i0_exc_valid_wb1 | dec_tlu_i1_exc_valid_wb1 | dec_tlu_int_valid_wb1 | clk_override), .l1clk(usoc_tclk), .* ); + rvdff #(10) traceff (.*, .clk(usoc_tclk), + .din ({i0_valid_wb, i1_valid_wb, + i0_exception_valid_wb | lsu_i0_exc_wb | (i0_trigger_hit_wb & ~trigger_hit_dmode_wb), + ~(i0_exception_valid_wb | lsu_i0_exc_wb | i0_trigger_hit_wb) & exc_or_int_valid_wb & ~interrupt_valid_wb, + exc_cause_wb[4:0], + interrupt_valid_wb}), + .dout({dec_tlu_i0_valid_wb1, dec_tlu_i1_valid_wb1, + dec_tlu_i0_exc_valid_wb1, dec_tlu_i1_exc_valid_wb1, + dec_tlu_exc_cause_wb1[4:0], + dec_tlu_int_valid_wb1})); + + assign dec_tlu_mtval_wb1 = mtval[31:0]; + + // end trace + //-------------------------------------------------------------------------------- + + + // ---------------------------------------------------------------------- + // CSR read mux + // ---------------------------------------------------------------------- + +// file "csrdecode" is human readable file that has all of the CSR decodes defined and is part of git repo +// modify this file as needed + +// to generate all the equations below from "csrdecode" except legal equation: + +// 1) coredecode -in csrdecode > corecsrdecode.e + +// 2) espresso -Dso -oeqntott corecsrdecode.e | addassign -pre out. > csrequations + +// to generate the legal CSR equation below: + +// 1) coredecode -in csrdecode -legal > csrlegal.e + +// 2) espresso -Dso -oeqntott csrlegal.e | addassign -pre out. > csrlegal_equation + + +logic csr_misa; +logic csr_mvendorid; +logic csr_marchid; +logic csr_mimpid; +logic csr_mhartid; +logic csr_mstatus; +logic csr_mtvec; +logic csr_mip; +logic csr_mie; +logic csr_mcyclel; +logic csr_mcycleh; +logic csr_minstretl; +logic csr_minstreth; +logic csr_mscratch; +logic csr_mepc; +logic csr_mcause; +logic csr_mtval; +logic csr_mrac; +logic csr_dmst; +logic csr_mdseac; +logic csr_meihap; +logic csr_meivt; +logic csr_meipt; +logic csr_meicurpl; +logic csr_meicidpl; +logic csr_dcsr; +logic csr_mpmc; +logic csr_mcgc; +logic csr_mcpc; +logic csr_mfdc; +logic csr_dpc; +logic csr_mtsel; +logic csr_mtdata1; +logic csr_mtdata2; +logic csr_mhpmc3; +logic csr_mhpmc4; +logic csr_mhpmc5; +logic csr_mhpmc6; +logic csr_mhpmc3h; +logic csr_mhpmc4h; +logic csr_mhpmc5h; +logic csr_mhpmc6h; +logic csr_mhpme3; +logic csr_mhpme4; +logic csr_mhpme5; +logic csr_mhpme6; +logic csr_mgpmc; +logic csr_micect; +logic csr_miccmect; +logic csr_mdccmect; +logic csr_dicawics; +logic csr_dicad0; +logic csr_dicad1; +logic csr_dicago; +logic presync; +logic postsync; + +assign csr_misa = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[0]); + +assign csr_mvendorid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); + +assign csr_marchid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[7] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mimpid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[3] + &dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); + +assign csr_mhartid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[7] + &dec_csr_rdaddr_d[2]); + +assign csr_mstatus = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]); + +assign csr_mtvec = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[0]); + +assign csr_mip = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[2]); + +assign csr_mie = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]); + +assign csr_mcyclel = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[1]); + +assign csr_mcycleh = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]); + +assign csr_minstretl = (!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_minstreth = (!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mscratch = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mepc = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]); + +assign csr_mcause = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mtval = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]); + +assign csr_mrac = (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[1]); + +assign csr_dmst = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[1]); + +assign csr_mdseac = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]); + +assign csr_meihap = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10] + &dec_csr_rdaddr_d[3]); + +assign csr_meivt = (!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] + &!dec_csr_rdaddr_d[0]); + +assign csr_meipt = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]); + +assign csr_meicurpl = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[3] + &dec_csr_rdaddr_d[2]); + +assign csr_meicidpl = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); + +assign csr_dcsr = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[0]); + +assign csr_mpmc = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]); + +assign csr_mcgc = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[0]); + +assign csr_mcpc = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]); + +assign csr_mfdc = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + &dec_csr_rdaddr_d[0]); + +assign csr_dpc = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[0]); + +assign csr_mtsel = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mtdata1 = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[0]); + +assign csr_mtdata2 = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[1]); + +assign csr_mhpmc3 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[0]); + +assign csr_mhpmc4 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mhpmc5 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]); + +assign csr_mhpmc6 = (!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mhpmc3h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]); + +assign csr_mhpmc4h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mhpmc5h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]); + +assign csr_mhpmc6h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mhpme3 = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]); + +assign csr_mhpme4 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] + &!dec_csr_rdaddr_d[0]); + +assign csr_mhpme5 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]); + +assign csr_mhpme6 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1] + &!dec_csr_rdaddr_d[0]); + +assign csr_mgpmc = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4]); + +assign csr_micect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_miccmect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[0]); + +assign csr_mdccmect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[4] + &dec_csr_rdaddr_d[1]); + +assign csr_dicawics = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_dicad0 = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); + +assign csr_dicad1 = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[3] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_dicago = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[7] + &dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); + +assign presync = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + &dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[4]) | (!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[1]) | (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]) | ( + dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]); + +assign postsync = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + &dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]) | ( + !dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[10] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[1]) | (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1]) | ( + !dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[0]); + + +logic legal_csr; +assign legal_csr = (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11] + &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1]) | ( + dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11] + &dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]) | ( + !dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11] + &dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]) | ( + dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[1]) | (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10] + &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[2]) | ( + !dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[2]) | (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + &dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | ( + dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10] + &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[2]) | (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10] + &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11] + &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11] + &dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[0]) | ( + !dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[11] + &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[2]) | ( + !dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[2]) | (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10] + &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[11] + &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[1]) | ( + !dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[3]) | (!dec_csr_rdaddr_d[11] + &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[4]) | (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10] + &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[3]) | (dec_csr_rdaddr_d[11] + &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4]); + + + + + +assign dec_tlu_presync_d = presync & dec_csr_any_unq_d & ~dec_csr_wen_unq_d; +assign dec_tlu_postsync_d = postsync & dec_csr_any_unq_d; +assign valid_csr = ( legal_csr & (~(csr_dcsr | csr_dpc | csr_dmst | csr_dicawics | csr_dicad0 | csr_dicad1 | csr_dicago) | dbg_tlu_halted_f)); + +assign dec_csr_legal_d = ( dec_csr_any_unq_d & + valid_csr & // of a valid CSR + ~(dec_csr_wen_unq_d & (csr_mvendorid | csr_marchid | csr_mimpid | csr_mhartid | csr_mdseac | csr_meihap)) // that's not a write to a RO CSR + ); + // CSR read mux +assign dec_csr_rddata_d[31:0] = ( ({32{csr_misa}} & 32'h40001104) | + ({32{csr_mvendorid}} & 32'h00000045) | + ({32{csr_marchid}} & 32'h0000000b) | + ({32{csr_mimpid}} & 32'h1) | + ({32{csr_mstatus}} & {19'b0, 2'b11, 3'b0, mstatus[1], 3'b0, mstatus[0], 3'b0}) | + ({32{csr_mtvec}} & {mtvec[30:1], 1'b0, mtvec[0]}) | + ({32{csr_mip}} & {1'b0, mip[3], 18'b0, mip[2], 3'b0, mip[1], 3'b0, mip[0], 3'b0}) | + ({32{csr_mie}} & {1'b0, mie[3], 18'b0, mie[2], 3'b0, mie[1], 3'b0, mie[0], 3'b0}) | + ({32{csr_mcyclel}} & mcyclel[31:0]) | + ({32{csr_mcycleh}} & mcycleh_inc[31:0]) | + ({32{csr_minstretl}} & minstretl_read[31:0]) | + ({32{csr_minstreth}} & minstreth_read[31:0]) | + ({32{csr_mscratch}} & mscratch[31:0]) | + ({32{csr_mepc}} & {mepc[31:1], 1'b0}) | + ({32{csr_mcause}} & mcause[31:0]) | + ({32{csr_mtval}} & mtval[31:0]) | + ({32{csr_mrac}} & mrac[31:0]) | + ({32{csr_mdseac}} & mdseac[31:0]) | + ({32{csr_meivt}} & {meivt[31:10], 10'b0}) | + ({32{csr_meihap}} & {meivt[31:10], meihap[9:2], 2'b0}) | + ({32{csr_meicurpl}} & {28'b0, meicurpl[3:0]}) | + ({32{csr_meicidpl}} & {28'b0, meicidpl[3:0]}) | + ({32{csr_meipt}} & {28'b0, meipt[3:0]}) | + ({32{csr_mcgc}} & {23'b0, mcgc[8:0]}) | + ({32{csr_mfdc}} & {13'b0, mfdc[18:0]}) | + ({32{csr_dcsr}} & {16'h4000, dcsr[15:2], 2'b11}) | + ({32{csr_dpc}} & {dpc[31:1], 1'b0}) | + ({32{csr_dicad0}} & dicad0[31:0]) | +`ifdef RV_ICACHE_ECC + ({32{csr_dicad1}} & {22'b0, dicad1[9:0]}) | +`else + ({32{csr_dicad1}} & {30'b0, dicad1[1:0]}) | +`endif + ({32{csr_dicawics}} & {7'b0, dicawics[18], 2'b0, dicawics[17:16], 4'b0, dicawics[15:2], 2'b0}) | + ({32{csr_mtsel}} & {30'b0, mtsel[1:0]}) | + ({32{csr_mtdata1}} & {mtdata1_tsel_out[31:0]}) | + ({32{csr_mtdata2}} & {mtdata2_tsel_out[31:0]}) | + ({32{csr_micect}} & {micect[31:0]}) | + ({32{csr_miccmect}} & {miccmect[31:0]}) | + ({32{csr_mdccmect}} & {mdccmect[31:0]}) | + ({32{csr_mhpmc3}} & mhpmc3[31:0]) | + ({32{csr_mhpmc4}} & mhpmc4[31:0]) | + ({32{csr_mhpmc5}} & mhpmc5[31:0]) | + ({32{csr_mhpmc6}} & mhpmc6[31:0]) | + ({32{csr_mhpmc3h}} & mhpmc3h[31:0]) | + ({32{csr_mhpmc4h}} & mhpmc4h[31:0]) | + ({32{csr_mhpmc5h}} & mhpmc5h[31:0]) | + ({32{csr_mhpmc6h}} & mhpmc6h[31:0]) | + ({32{csr_mhpme3}} & {26'b0,mhpme3[5:0]}) | + ({32{csr_mhpme4}} & {26'b0,mhpme4[5:0]}) | + ({32{csr_mhpme5}} & {26'b0,mhpme5[5:0]}) | + ({32{csr_mhpme6}} & {26'b0,mhpme6[5:0]}) | + ({32{csr_mgpmc}} & {31'b0, mgpmc}) + ); + + + +`undef MSTATUS_MIE +`undef MISA +`undef MVENDORID +`undef MARCHID +`undef MIMPID +`undef MHARTID +`undef MSTATUS +`undef MTVEC +`undef MIP +`undef MIP_MEIP +`undef MIP_MTIP +`undef MIP_MSIP +`undef MIE +`undef MIE_MEIE +`undef MIE_MTIE +`undef MCYCLEL +`undef MCYCLEH +`undef MINSTRETL +`undef MINSTRETH +`undef MSCRATCH +`undef MEPC +`undef MCAUSE +`undef MTVAL +`undef MRAC +`undef MDSEAC +`undef MEIHAP +`undef MEIPT +`undef MEICURPL + + +endmodule // dec_tlu_ctl + diff --git a/design/dec/dec_trigger.sv b/design/dec/dec_trigger.sv new file mode 100644 index 0000000..012599f --- /dev/null +++ b/design/dec/dec_trigger.sv @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: DEC Trigger Logic +// Comments: +// +//******************************************************************************** +module dec_trigger + import swerv_types::*; +( + input logic clk, + input logic rst_l, + + input trigger_pkt_t [3:0] trigger_pkt_any, // Packet from tlu. 'select':0-pc,1-Opcode 'Execute' needs to be set for dec triggers to fire. 'match'-1 do mask, 0: full match + input logic [31:1] dec_i0_pc_d, // i0 pc + input logic [31:1] dec_i1_pc_d, // i1 pc + + output logic [3:0] dec_i0_trigger_match_d, + output logic [3:0] dec_i1_trigger_match_d +); + + logic [3:0][31:0] dec_i0_match_data; + logic [3:0] dec_i0_trigger_data_match; + logic [3:0][31:0] dec_i1_match_data; + logic [3:0] dec_i1_trigger_data_match; + + for (genvar i=0; i<4; i++) begin + assign dec_i0_match_data[i][31:0] = ({32{~trigger_pkt_any[i].select & trigger_pkt_any[i].execute}} & {dec_i0_pc_d[31:1], trigger_pkt_any[i].tdata2[0]}); // select=0; do a PC match + + assign dec_i1_match_data[i][31:0] = ({32{~trigger_pkt_any[i].select & trigger_pkt_any[i].execute}} & {dec_i1_pc_d[31:1], trigger_pkt_any[i].tdata2[0]} ); // select=0; do a PC match + + rvmaskandmatch trigger_i0_match (.mask(trigger_pkt_any[i].tdata2[31:0]), .data(dec_i0_match_data[i][31:0]), .masken(trigger_pkt_any[i].match), .match(dec_i0_trigger_data_match[i])); + rvmaskandmatch trigger_i1_match (.mask(trigger_pkt_any[i].tdata2[31:0]), .data(dec_i1_match_data[i][31:0]), .masken(trigger_pkt_any[i].match), .match(dec_i1_trigger_data_match[i])); + + assign dec_i0_trigger_match_d[i] = trigger_pkt_any[i].execute & trigger_pkt_any[i].m & dec_i0_trigger_data_match[i]; + assign dec_i1_trigger_match_d[i] = trigger_pkt_any[i].execute & trigger_pkt_any[i].m & dec_i1_trigger_data_match[i]; + end + +endmodule // dec_trigger + diff --git a/design/dec/decode b/design/dec/decode new file mode 100644 index 0000000..e4e85dd --- /dev/null +++ b/design/dec/decode @@ -0,0 +1,322 @@ + +.definition + +add = [0000000..........000.....0110011] +addi = [.................000.....0010011] + +sub = [0100000..........000.....0110011] + +and = [0000000..........111.....0110011] +andi = [.................111.....0010011] + +or = [0000000..........110.....0110011] +ori = [.................110.....0010011] + +xor = [0000000..........100.....0110011] +xori = [.................100.....0010011] + +sll = [0000000..........001.....0110011] +slli = [0000000..........001.....0010011] + +sra = [0100000..........101.....0110011] +srai = [0100000..........101.....0010011] + +srl = [0000000..........101.....0110011] +srli = [0000000..........101.....0010011] + +lui = [.........................0110111] +auipc= [.........................0010111] + +slt = [0000000..........010.....0110011] +sltu = [0000000..........011.....0110011] +slti = [.................010.....0010011] +sltiu= [.................011.....0010011] + +beq = [.................000.....1100011] +bne = [.................001.....1100011] +bge = [.................101.....1100011] +blt = [.................100.....1100011] +bgeu = [.................111.....1100011] +bltu = [.................110.....1100011] + +jal = [.........................1101111] +jalr = [.................000.....1100111] + +lb = [.................000.....0000011] +lh = [.................001.....0000011] +lw = [.................010.....0000011] + +sb = [.................000.....0100011] +sh = [.................001.....0100011] +sw = [.................010.....0100011] + +lbu = [.................100.....0000011] +lhu = [.................101.....0000011] + +fence = [0000........00000000000000001111] +fence.i = [00000000000000000001000000001111] + +ebreak = [00000000000100000000000001110011] +ecall = [00000000000000000000000001110011] + +mret = [00110000001000000000000001110011] + +wfi = [00010000010100000000000001110011] + +csrrc_ro = [............00000011.....1110011] +csrrc_rw0 = [............1....011.....1110011] +csrrc_rw1 = [.............1...011.....1110011] +csrrc_rw2 = [..............1..011.....1110011] +csrrc_rw3 = [...............1.011.....1110011] +csrrc_rw4 = [................1011.....1110011] + +csrrci_ro = [............00000111.....1110011] +csrrci_rw0 = [............1....111.....1110011] +csrrci_rw1 = [.............1...111.....1110011] +csrrci_rw2 = [..............1..111.....1110011] +csrrci_rw3 = [...............1.111.....1110011] +csrrci_rw4 = [................1111.....1110011] + +csrrs_ro = [............00000010.....1110011] +csrrs_rw0 = [............1....010.....1110011] +csrrs_rw1 = [.............1...010.....1110011] +csrrs_rw2 = [..............1..010.....1110011] +csrrs_rw3 = [...............1.010.....1110011] +csrrs_rw4 = [................1010.....1110011] + +csrrsi_ro = [............00000110.....1110011] +csrrsi_rw0 = [............1....110.....1110011] +csrrsi_rw1 = [.............1...110.....1110011] +csrrsi_rw2 = [..............1..110.....1110011] +csrrsi_rw3 = [...............1.110.....1110011] +csrrsi_rw4 = [................1110.....1110011] + + +csrw = [.................001000001110011] +csrrw0 = [.................001....11110011] +csrrw1 = [.................001...1.1110011] +csrrw2 = [.................001..1..1110011] +csrrw3 = [.................001.1...1110011] +csrrw4 = [.................0011....1110011] + +csrwi = [.................101000001110011] +csrrwi0 = [.................101....11110011] +csrrwi1 = [.................101...1.1110011] +csrrwi2 = [.................101..1..1110011] +csrrwi3 = [.................101.1...1110011] +csrrwi4 = [.................1011....1110011] + +mul = [0000001..........000.....0110011] +mulh = [0000001..........001.....0110011] +mulhsu = [0000001..........010.....0110011] +mulhu = [0000001..........011.....0110011] + +div = [0000001..........100.....0110011] +divu = [0000001..........101.....0110011] +rem = [0000001..........110.....0110011] +remu = [0000001..........111.....0110011] + + +.input + +rv32i = { + i[31] + i[30] + i[29] + i[28] + i[27] + i[26] + i[25] + i[24] + i[23] + i[22] + i[21] + i[20] + i[19] + i[18] + i[17] + i[16] + i[15] + i[14] + i[13] + i[12] + i[11] + i[10] + i[9] + i[8] + i[7] + i[6] + i[5] + i[4] + i[3] + i[2] + i[1] + i[0] +} + + +.output + +rv32i = { + alu + rs1 + rs2 + imm12 + rd + shimm5 + imm20 + pc + load + store + lsu + add + sub + land + lor + lxor + sll + sra + srl + slt + unsign + condbr + beq + bne + bge + blt + jal + by + half + word + csr_read + csr_clr + csr_set + csr_write + csr_imm + presync + postsync + ebreak + ecall + mret + mul + rs1_sign + rs2_sign + low + div + rem + fence + fence_i + pm_alu +} + +.decode + +rv32i[mul] = { mul rs1 rs2 rd low } +rv32i[mulh] = { mul rs1 rs2 rd rs1_sign rs2_sign } +rv32i[mulhu] = { mul rs1 rs2 rd } +rv32i[mulhsu] = { mul rs1 rs2 rd rs1_sign } + +rv32i[div] = { div rs1 rs2 rd presync postsync} +rv32i[divu] = { div rs1 rs2 rd unsign presync postsync} +rv32i[rem] = { div rs1 rs2 rd presync postsync rem} +rv32i[remu] = { div rs1 rs2 rd unsign presync postsync rem} + +rv32i[add] = { alu rs1 rs2 rd add pm_alu } +rv32i[addi] = { alu rs1 imm12 rd add pm_alu } + +rv32i[sub] = { alu rs1 rs2 rd sub pm_alu } + +rv32i[and] = { alu rs1 rs2 rd land pm_alu } +rv32i[andi] = { alu rs1 imm12 rd land pm_alu } + +rv32i[or] = { alu rs1 rs2 rd lor pm_alu } +rv32i[ori] = { alu rs1 imm12 rd lor pm_alu } + +rv32i[xor] = { alu rs1 rs2 rd lxor pm_alu } +rv32i[xori] = { alu rs1 imm12 rd lxor pm_alu } + +rv32i[sll] = { alu rs1 rs2 rd sll pm_alu } +rv32i[slli] = { alu rs1 shimm5 rd sll pm_alu } + +rv32i[sra] = { alu rs1 rs2 rd sra pm_alu } +rv32i[srai] = { alu rs1 shimm5 rd sra pm_alu } + +rv32i[srl] = { alu rs1 rs2 rd srl pm_alu } +rv32i[srli] = { alu rs1 shimm5 rd srl pm_alu } + +rv32i[lui] = { alu imm20 rd lor pm_alu } +rv32i[auipc] = { alu imm20 pc rd add pm_alu } + + +rv32i[slt] = { alu rs1 rs2 rd sub slt pm_alu } +rv32i[sltu] = { alu rs1 rs2 rd sub slt unsign pm_alu } +rv32i[slti] = { alu rs1 imm12 rd sub slt pm_alu } +rv32i[sltiu] = { alu rs1 imm12 rd sub slt unsign pm_alu } + +rv32i[beq] = { alu rs1 rs2 sub condbr beq } +rv32i[bne] = { alu rs1 rs2 sub condbr bne } +rv32i[bge] = { alu rs1 rs2 sub condbr bge } +rv32i[blt] = { alu rs1 rs2 sub condbr blt } +rv32i[bgeu] = { alu rs1 rs2 sub condbr bge unsign } +rv32i[bltu] = { alu rs1 rs2 sub condbr blt unsign } + +rv32i[jal] = { alu imm20 rd pc jal } +rv32i[jalr] = { alu rs1 rd imm12 jal } + +rv32i[lb] = { lsu load rs1 rd by } +rv32i[lh] = { lsu load rs1 rd half } +rv32i[lw] = { lsu load rs1 rd word } +rv32i[lbu] = { lsu load rs1 rd by unsign } +rv32i[lhu] = { lsu load rs1 rd half unsign } + +rv32i[sb] = { lsu store rs1 rs2 by } +rv32i[sh] = { lsu store rs1 rs2 half } +rv32i[sw] = { lsu store rs1 rs2 word } + + +rv32i[fence] = { alu lor fence presync} + +# fence.i has fence effect in addtion to flush I$ and redirect +rv32i[fence.i] = { alu lor fence fence_i presync postsync} + +# nops for now + +rv32i[ebreak] = { alu rs1 imm12 rd lor ebreak postsync} +rv32i[ecall] = { alu rs1 imm12 rd lor ecall postsync} +rv32i[mret] = { alu rs1 imm12 rd lor mret postsync} + +rv32i[wfi] = { alu rs1 imm12 rd lor pm_alu } + +# csr means read + +# csr_read - put csr on rs2 and rs1 0's +rv32i[csrrc_ro] = { alu rd csr_read lor } + +# put csr on rs2 and make rs1 0's into alu. Save rs1 for csr_clr later +rv32i[csrrc_rw{0-4}] = { alu rd csr_read rs1 csr_clr lor presync postsync } + +rv32i[csrrci_ro] = { alu rd csr_read lor } + +rv32i[csrrci_rw{0-4}] = { alu rd csr_read rs1 csr_clr csr_imm lor presync postsync } + +rv32i[csrrs_ro] = { alu rd csr_read lor } + +rv32i[csrrs_rw{0-4}] = { alu rd csr_read rs1 csr_set lor presync postsync } + +rv32i[csrrsi_ro] = { alu rd csr_read lor } + +rv32i[csrrsi_rw{0-4}] = { alu rd csr_read rs1 csr_set csr_imm lor presync postsync } + +rv32i[csrrw{0-4}] = { alu rd csr_read rs1 csr_write lor presync postsync } + + +rv32i[csrrwi{0-4}] = { alu rd csr_read rs1 csr_write csr_imm lor presync postsync } + +# optimize csr write only - pipelined +rv32i[csrw] = { alu rd rs1 csr_write } + +rv32i[csrwi] = { alu rd csr_write csr_imm } + + +.end + diff --git a/design/dma_ctrl.sv b/design/dma_ctrl.sv new file mode 100644 index 0000000..881165a --- /dev/null +++ b/design/dma_ctrl.sv @@ -0,0 +1,697 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// Function: Top level SWERV core file +// Comments: +// +//******************************************************************************** + +module dma_ctrl ( + input logic clk, + input logic free_clk, + input logic rst_l, + input logic dma_bus_clk_en, // slave bus clock enable + input logic clk_override, + + // AXI signals + // AXI Write Channels + input logic dma_axi_awvalid, + output logic dma_axi_awready, + input logic [`RV_DMA_BUS_TAG-1:0] dma_axi_awid, + input logic [31:0] dma_axi_awaddr, + input logic [2:0] dma_axi_awsize, + input logic [2:0] dma_axi_awprot, + input logic [7:0] dma_axi_awlen, + input logic [1:0] dma_axi_awburst, + + input logic dma_axi_wvalid, + output logic dma_axi_wready, + input logic [63:0] dma_axi_wdata, + input logic [7:0] dma_axi_wstrb, + input logic dma_axi_wlast, + + output logic dma_axi_bvalid, + input logic dma_axi_bready, + output logic [1:0] dma_axi_bresp, + output logic [`RV_DMA_BUS_TAG-1:0] dma_axi_bid, + + // AXI Read Channels + input logic dma_axi_arvalid, + output logic dma_axi_arready, + input logic [`RV_DMA_BUS_TAG-1:0] dma_axi_arid, + input logic [31:0] dma_axi_araddr, + input logic [2:0] dma_axi_arsize, + input logic [2:0] dma_axi_arprot, + input logic [7:0] dma_axi_arlen, + input logic [1:0] dma_axi_arburst, + + output logic dma_axi_rvalid, + input logic dma_axi_rready, + output logic [`RV_DMA_BUS_TAG-1:0] dma_axi_rid, + output logic [63:0] dma_axi_rdata, + output logic [1:0] dma_axi_rresp, + output logic dma_axi_rlast, + + output logic dma_slv_algn_err, + // Debug signals + input logic [31:0] dbg_cmd_addr, + input logic [31:0] dbg_cmd_wrdata, + input logic dbg_cmd_valid, + input logic dbg_cmd_write, // 1: write command, 0: read_command + input logic [1:0] dbg_cmd_type, // 0:gpr 1:csr 2: memory + input logic [1:0] dbg_cmd_size, // size of the abstract mem access debug command + + input logic dbg_dma_bubble, // Debug needs a bubble to send a valid + output logic dma_dbg_ready, // DMA is ready to accept debug request + + output logic dma_dbg_cmd_done, + output logic dma_dbg_cmd_fail, + output logic [31:0] dma_dbg_rddata, + + // Core side signals + output logic dma_dccm_req, // DMA dccm request (only one of dccm/iccm will be set) + output logic dma_iccm_req, // DMA iccm request + output logic [31:0] dma_mem_addr, // DMA request address + output logic [2:0] dma_mem_sz, // DMA request size + output logic dma_mem_write, // DMA write to dccm/iccm + output logic [63:0] dma_mem_wdata, // DMA write data + + input logic dccm_dma_rvalid, // dccm data valid for DMA read + input logic dccm_dma_ecc_error, // ECC error on DMA read + input logic [63:0] dccm_dma_rdata, // dccm data for DMA read + input logic iccm_dma_rvalid, // iccm data valid for DMA read + input logic iccm_dma_ecc_error, // ECC error on DMA read + input logic [63:0] iccm_dma_rdata, // iccm data for DMA read + + output logic dma_dccm_stall_any, // stall dccm pipe (bubble) so that DMA can proceed + output logic dma_iccm_stall_any, // stall iccm pipe (bubble) so that DMA can proceed + input logic dccm_ready, // dccm ready to accept DMA request + input logic iccm_ready, // iccm ready to accept DMA request + input logic dec_tlu_stall_dma, // stall dma accesses, tlu is attempting to enter halt/debug mode + input logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:15] + + input logic scan_mode +); + +`include "global.h" + + localparam DEPTH = DMA_BUF_DEPTH; + localparam DEPTH_PTR = $clog2(DEPTH); + localparam NACK_COUNT = 7; + + logic [DEPTH-1:0] fifo_valid; + logic [DEPTH-1:0][1:0] fifo_error; + logic [DEPTH-1:0] fifo_dccm_valid; + logic [DEPTH-1:0] fifo_iccm_valid; + logic [DEPTH-1:0] fifo_data_valid; + logic [DEPTH-1:0] fifo_data_bus_valid; + logic [DEPTH-1:0] fifo_error_bus; + logic [DEPTH-1:0] fifo_rpend; + logic [DEPTH-1:0] fifo_done; // DMA trxn is done in core + logic [DEPTH-1:0] fifo_done_bus; // DMA trxn is done in core synced to bus + logic [DEPTH-1:0] fifo_rsp_done; // DMA response sent to bus + logic [DEPTH-1:0][31:0] fifo_addr; + logic [DEPTH-1:0][2:0] fifo_sz; + logic [DEPTH-1:0] fifo_write; + logic [DEPTH-1:0] fifo_posted_write; + logic [DEPTH-1:0] fifo_dbg; + logic [DEPTH-1:0][63:0] fifo_data; + logic [DEPTH-1:0][DMA_BUS_TAG-1:0] fifo_tag; + + logic [DEPTH-1:0] fifo_cmd_en; + logic [DEPTH-1:0] fifo_valid_en; + logic [DEPTH-1:0] fifo_data_en; + logic [DEPTH-1:0] fifo_data_bus_en; + logic [DEPTH-1:0] fifo_pend_en; + logic [DEPTH-1:0] fifo_done_en; + logic [DEPTH-1:0] fifo_done_bus_en; + logic [DEPTH-1:0] fifo_error_en; + logic [DEPTH-1:0] fifo_error_bus_en; + //logic [DEPTH-1:0] fifo_rsp_done_en; + logic [DEPTH-1:0] fifo_reset; + logic [DEPTH-1:0][1:0] fifo_error_in; + logic [DEPTH-1:0][63:0] fifo_data_in; + + logic fifo_write_in; + logic fifo_posted_write_in; + logic fifo_dbg_in; + logic [31:0] fifo_addr_in; + logic [2:0] fifo_sz_in; + + logic [DEPTH_PTR-1:0] RspPtr, PrevRspPtr, NxtRspPtr; + logic [DEPTH_PTR-1:0] WrPtr, NxtWrPtr; + logic [DEPTH_PTR-1:0] RdPtr, NxtRdPtr; + logic [DEPTH_PTR-1:0] RdPtr_Q1, RdPtr_Q2, RdPtr_Q3; + logic WrPtrEn, RdPtrEn, RspPtrEn; + + logic dma_dbg_cmd_error_in; + logic dma_dbg_cmd_done_q; + + logic fifo_full, fifo_full_spec, fifo_empty; + logic dma_address_error, dma_alignment_error; + logic [3:0] num_fifo_vld; + logic dma_mem_req; + logic dma_addr_in_dccm; + logic dma_addr_in_iccm; + logic dma_addr_in_pic; + logic dma_addr_in_pic_region_nc; + logic dma_addr_in_dccm_region_nc; + logic dma_addr_in_iccm_region_nc; + + logic [2:0] dma_nack_count_csr; + logic [2:0] dma_nack_count, dma_nack_count_d; + + logic dma_buffer_c1_clken; + logic dma_free_clken; + logic dma_buffer_c1_clk; + logic dma_free_clk; + logic dma_bus_clk; + + + logic wrbuf_en, wrbuf_data_en; + logic wrbuf_cmd_sent, wrbuf_rst, wrbuf_data_rst; + logic wrbuf_vld; + logic wrbuf_data_vld; + logic wrbuf_posted; + logic [DMA_BUS_TAG-1:0] wrbuf_tag; + logic [2:0] wrbuf_size; + logic [31:0] wrbuf_addr; + logic [63:0] wrbuf_data; + logic [7:0] wrbuf_byteen; + + logic rdbuf_en; + logic rdbuf_cmd_sent, rdbuf_rst; + logic rdbuf_vld; + logic [DMA_BUS_TAG-1:0] rdbuf_tag; + logic [2:0] rdbuf_size; + logic [31:0] rdbuf_addr; + + + logic axi_mstr_valid, axi_mstr_valid_q; + logic axi_mstr_write; + logic axi_mstr_posted_write; + logic [DMA_BUS_TAG-1:0] axi_mstr_tag; + logic [31:0] axi_mstr_addr; + logic [2:0] axi_mstr_size; + logic [63:0] axi_mstr_wdata; + logic [7:0] axi_mstr_wstrb; + + logic axi_mstr_prty_in, axi_mstr_prty_en; + logic axi_mstr_priority; + logic axi_mstr_sel; + + logic axi_slv_valid; + logic axi_slv_sent, axi_slv_sent_q; + logic axi_slv_write; + logic axi_slv_posted_write; + logic [DMA_BUS_TAG-1:0] axi_slv_tag; + logic [1:0] axi_slv_error; + logic [63:0] axi_slv_rdata; + + logic dma_bus_clk_en_q; + logic fifo_full_spec_bus; + logic dbg_dma_bubble_bus; + logic dec_tlu_stall_dma_bus; + logic dma_fifo_ready; + + //------------------------LOGIC STARTS HERE--------------------------------- + + // FIFO inputs + assign fifo_addr_in[31:0] = dbg_cmd_valid ? dbg_cmd_addr[31:0] : axi_mstr_addr[31:0]; + assign fifo_sz_in[2:0] = dbg_cmd_valid ? {1'b0,dbg_cmd_size[1:0]} : axi_mstr_size[2:0]; + assign fifo_write_in = dbg_cmd_valid ? dbg_cmd_write : axi_mstr_write; + assign fifo_posted_write_in = axi_mstr_valid & axi_mstr_posted_write; + assign fifo_dbg_in = dbg_cmd_valid; + //assign fifo_error_in[1:0] = dccm_dma_rvalid ? {1'b0,dccm_dma_ecc_error} : iccm_dma_rvalid ? {1'b0,iccm_dma_ecc_error} : {(dma_address_error | dma_alignment_error | dma_dbg_cmd_error_in), dma_alignment_error}; + //assign fifo_data_in[63:0] = dccm_dma_rvalid ? dccm_dma_rdata[63:0] : (iccm_dma_rvalid ? iccm_dma_rdata[63:0] : + // (dbg_cmd_valid ? {2{dbg_cmd_wrdata[31:0]}} : axi_mstr_wdata[63:0])); + + for (genvar i=0 ;i= dma_nack_count_csr); + assign dma_iccm_stall_any = dma_mem_req & fifo_iccm_valid[RdPtr] & (dma_nack_count >= dma_nack_count_csr); + + // Used to indicate ready to debug + assign fifo_empty = ~(|(fifo_valid_en[DEPTH-1:0] | fifo_valid[DEPTH-1:0]) | axi_mstr_valid | axi_slv_sent_q); // We want RspPtr to settle before accepting debug command + + // Nack counter, stall the lsu pipe if 7 nacks + assign dma_nack_count_csr[2:0] = dec_tlu_dma_qos_prty[2:0]; + assign dma_nack_count_d[2:0] = (dma_nack_count[2:0] >= dma_nack_count_csr[2:0]) ? ({3{~(dma_dccm_req | dma_iccm_req)}} & dma_nack_count[2:0]) : + (dma_mem_req & ~(dma_dccm_req | dma_iccm_req)) ? (dma_nack_count[2:0] + 1'b1) : 3'b0; + + rvdffs #(3) nack_count_dff(.din(dma_nack_count_d[2:0]), .dout(dma_nack_count[2:0]), .en(dma_mem_req), .clk(dma_free_clk), .*); + + // Core outputs + assign dma_mem_req = fifo_valid[RdPtr] & ~fifo_rpend[RdPtr] & ~fifo_done[RdPtr] & ~(|fifo_error[RdPtr]) & (~fifo_write[RdPtr] | fifo_data_valid[RdPtr]); + assign dma_dccm_req = dma_mem_req & fifo_dccm_valid[RdPtr] & dccm_ready; + assign dma_iccm_req = dma_mem_req & fifo_iccm_valid[RdPtr] & iccm_ready; + assign dma_mem_addr[31:0] = fifo_addr[RdPtr]; + assign dma_mem_sz[2:0] = fifo_sz[RdPtr]; + assign dma_mem_write = fifo_write[RdPtr]; + assign dma_mem_wdata[63:0] = fifo_data[RdPtr]; + + // Address check dccm + rvrangecheck #(.CCM_SADR(`RV_DCCM_SADR), + .CCM_SIZE(`RV_DCCM_SIZE)) addr_dccm_rangecheck ( + .addr(fifo_addr_in[31:0]), + .in_range(dma_addr_in_dccm), + .in_region(dma_addr_in_dccm_region_nc) + ); + + // Address check iccm +`ifdef RV_ICCM_ENABLE + rvrangecheck #(.CCM_SADR(`RV_ICCM_SADR), + .CCM_SIZE(`RV_ICCM_SIZE)) addr_iccm_rangecheck ( + .addr(fifo_addr_in[31:0]), + .in_range(dma_addr_in_iccm), + .in_region(dma_addr_in_iccm_region_nc) + ); +`else + assign dma_addr_in_iccm = '0; + assign dma_addr_in_iccm_region_nc = '0; +`endif + + // PIC memory address check + rvrangecheck #(.CCM_SADR(`RV_PIC_BASE_ADDR), + .CCM_SIZE(`RV_PIC_SIZE)) addr_pic_rangecheck ( + .addr(fifo_addr_in[31:0]), + .in_range(dma_addr_in_pic), + .in_region(dma_addr_in_pic_region_nc) + ); + + // Inputs + rvdff #(1) ahbs_bus_clken_ff (.din(dma_bus_clk_en), .dout(dma_bus_clk_en_q), .clk(free_clk), .*); + rvdff #(1) fifo_full_bus_ff (.din(fifo_full_spec), .dout(fifo_full_spec_bus), .clk(dma_bus_clk), .*); + rvdff #(1) dbg_dma_bubble_ff (.din(dbg_dma_bubble), .dout(dbg_dma_bubble_bus), .clk(dma_bus_clk), .*); + rvdff #(1) dec_tlu_stall_dma_ff (.din(dec_tlu_stall_dma), .dout(dec_tlu_stall_dma_bus), .clk(dma_bus_clk), .*); + rvdff #(1) dma_dbg_cmd_doneff (.din(dma_dbg_cmd_done), .dout(dma_dbg_cmd_done_q), .clk(free_clk), .*); + + // Clock Gating logic + assign dma_buffer_c1_clken = (axi_mstr_valid & dma_bus_clk_en) | dbg_cmd_valid | dec_tlu_stall_dma | clk_override; + assign dma_free_clken = (axi_mstr_valid | axi_mstr_valid_q | axi_slv_valid | axi_slv_sent_q | dbg_cmd_valid | dma_dbg_cmd_done | dma_dbg_cmd_done_q | (|fifo_valid[DEPTH-1:0]) | wrbuf_vld | rdbuf_vld | dec_tlu_stall_dma | clk_override); + + rvclkhdr dma_buffer_c1cgc ( .en(dma_buffer_c1_clken), .l1clk(dma_buffer_c1_clk), .* ); + rvclkhdr dma_free_cgc (.en(dma_free_clken), .l1clk(dma_free_clk), .*); + rvclkhdr dma_bus_cgc (.en(dma_bus_clk_en), .l1clk(dma_bus_clk), .*); + + // Write channel buffer + assign wrbuf_en = dma_axi_awvalid & dma_axi_awready; + assign wrbuf_data_en = dma_axi_wvalid & dma_axi_wready; + assign wrbuf_cmd_sent = axi_mstr_valid & axi_mstr_write; + assign wrbuf_rst = wrbuf_cmd_sent & ~wrbuf_en; + assign wrbuf_data_rst = wrbuf_cmd_sent & ~wrbuf_data_en; + + rvdffsc #(.WIDTH(1)) wrbuf_vldff(.din(1'b1), .dout(wrbuf_vld), .en(wrbuf_en), .clear(wrbuf_rst), .clk(dma_bus_clk), .*); + rvdffsc #(.WIDTH(1)) wrbuf_data_vldff(.din(1'b1), .dout(wrbuf_data_vld), .en(wrbuf_data_en), .clear(wrbuf_data_rst), .clk(dma_bus_clk), .*); + rvdffs #(.WIDTH(1)) wrbuf_postedff(.din(1'b0), .dout(wrbuf_posted), .en(wrbuf_en), .clk(dma_bus_clk), .*); + rvdffs #(.WIDTH(DMA_BUS_TAG)) wrbuf_tagff(.din(dma_axi_awid[DMA_BUS_TAG-1:0]), .dout(wrbuf_tag[DMA_BUS_TAG-1:0]), .en(wrbuf_en), .clk(dma_bus_clk), .*); + rvdffs #(.WIDTH(3)) wrbuf_sizeff(.din(dma_axi_awsize[2:0]), .dout(wrbuf_size[2:0]), .en(wrbuf_en), .clk(dma_bus_clk), .*); + rvdffe #(.WIDTH(32)) wrbuf_addrff(.din(dma_axi_awaddr[31:0]), .dout(wrbuf_addr[31:0]), .en(wrbuf_en & dma_bus_clk_en), .*); + rvdffe #(.WIDTH(64)) wrbuf_dataff(.din(dma_axi_wdata[63:0]), .dout(wrbuf_data[63:0]), .en(wrbuf_data_en & dma_bus_clk_en), .*); + rvdffs #(.WIDTH(8)) wrbuf_byteenff(.din(dma_axi_wstrb[7:0]), .dout(wrbuf_byteen[7:0]), .en(wrbuf_data_en), .clk(dma_bus_clk), .*); + // Read channel buffer + assign rdbuf_en = dma_axi_arvalid & dma_axi_arready; + assign rdbuf_cmd_sent = axi_mstr_valid & ~axi_mstr_write & dma_fifo_ready; + assign rdbuf_rst = rdbuf_cmd_sent & ~rdbuf_en; + + rvdffsc #(.WIDTH(1)) rdbuf_vldff(.din(1'b1), .dout(rdbuf_vld), .en(rdbuf_en), .clear(rdbuf_rst), .clk(dma_bus_clk), .*); + rvdffs #(.WIDTH(DMA_BUS_TAG)) rdbuf_tagff(.din(dma_axi_arid[DMA_BUS_TAG-1:0]), .dout(rdbuf_tag[DMA_BUS_TAG-1:0]), .en(rdbuf_en), .clk(dma_bus_clk), .*); + rvdffs #(.WIDTH(3)) rdbuf_sizeff(.din(dma_axi_arsize[2:0]), .dout(rdbuf_size[2:0]), .en(rdbuf_en), .clk(dma_bus_clk), .*); + rvdffe #(.WIDTH(32)) rdbuf_addrff(.din(dma_axi_araddr[31:0]), .dout(rdbuf_addr[31:0]), .en(rdbuf_en & dma_bus_clk_en), .*); + + assign dma_axi_awready = ~(wrbuf_vld & ~wrbuf_cmd_sent); + assign dma_axi_wready = ~(wrbuf_data_vld & ~wrbuf_cmd_sent); + assign dma_axi_arready = ~(rdbuf_vld & ~rdbuf_cmd_sent); + + //Generate a single request from read/write channel + assign axi_mstr_valid = ((wrbuf_vld & wrbuf_data_vld) | rdbuf_vld) & dma_fifo_ready; + assign axi_mstr_tag[DMA_BUS_TAG-1:0] = axi_mstr_sel ? wrbuf_tag[DMA_BUS_TAG-1:0] : rdbuf_tag[DMA_BUS_TAG-1:0]; + assign axi_mstr_write = axi_mstr_sel; + assign axi_mstr_posted_write = axi_mstr_sel & wrbuf_posted; + assign axi_mstr_addr[31:0] = axi_mstr_sel ? wrbuf_addr[31:0] : rdbuf_addr[31:0]; + assign axi_mstr_size[2:0] = axi_mstr_sel ? wrbuf_size[2:0] : rdbuf_size[2:0]; + assign axi_mstr_wdata[63:0] = wrbuf_data[63:0]; + assign axi_mstr_wstrb[7:0] = wrbuf_byteen[7:0]; + + // Sel=1 -> write has higher priority + assign axi_mstr_sel = (wrbuf_vld & wrbuf_data_vld & rdbuf_vld) ? axi_mstr_priority : (wrbuf_vld & wrbuf_data_vld); + assign axi_mstr_prty_in = ~axi_mstr_priority; + assign axi_mstr_prty_en = axi_mstr_valid; + rvdffs #(.WIDTH(1)) mstr_prtyff(.din(axi_mstr_prty_in), .dout(axi_mstr_priority), .en(axi_mstr_prty_en), .clk(dma_bus_clk), .*); + + rvdff #(.WIDTH(1)) axi_mstr_validff (.din(axi_mstr_valid), .dout(axi_mstr_valid_q), .clk(dma_bus_clk), .*); + rvdff #(.WIDTH(1)) axi_slv_sentff (.din(axi_slv_sent), .dout(axi_slv_sent_q), .clk(dma_bus_clk), .*); + + //assign axi_slv_valid = fifo_valid[RspPtr] & ~fifo_rsp_done[RspPtr] & ~fifo_dbg[RspPtr] & + // ((fifo_write[RspPtr] & fifo_done_bus[RspPtr]) | (~fifo_write[RspPtr] & fifo_data_bus_valid[RspPtr]) | fifo_error_bus[RspPtr]); + assign axi_slv_valid = fifo_valid[RspPtr] & ~fifo_dbg[RspPtr] & fifo_done_bus[RspPtr]; + assign axi_slv_tag[DMA_BUS_TAG-1:0] = fifo_tag[RspPtr]; + //assign axi_slv_rdata[63:0] = (|fifo_error[RspPtr]) ? {32'b0,fifo_addr[RspPtr]} : fifo_data[RspPtr]; + assign axi_slv_rdata[63:0] = fifo_data[RspPtr]; + assign axi_slv_write = fifo_write[RspPtr]; + assign axi_slv_posted_write = axi_slv_write & fifo_posted_write[RspPtr]; + assign axi_slv_error[1:0] = fifo_error[RspPtr][0] ? 2'b10 : (fifo_error[RspPtr][1] ? 2'b11 : 2'b0); + + + // AXI response channel signals + assign dma_axi_bvalid = axi_slv_valid & axi_slv_write; + assign dma_axi_bresp[1:0] = axi_slv_error[1:0]; + assign dma_axi_bid[DMA_BUS_TAG-1:0] = axi_slv_tag[DMA_BUS_TAG-1:0]; + + assign dma_axi_rvalid = axi_slv_valid & ~axi_slv_write; + assign dma_axi_rresp[1:0] = axi_slv_error; + assign dma_axi_rid[DMA_BUS_TAG-1:0] = axi_slv_tag[DMA_BUS_TAG-1:0]; + assign dma_axi_rdata[63:0] = axi_slv_rdata[63:0]; + assign dma_axi_rlast = 1'b1; + + assign axi_slv_sent = (dma_axi_bvalid & dma_axi_bready) | (dma_axi_rvalid & dma_axi_rready); + assign dma_slv_algn_err = fifo_error[RspPtr][1]; +`ifdef ASSERT_ON + + //assert_nack_count: assert #0 (dma_nack_count[2:0] < 3'h4); + + for (genvar i=0; i ((dma_axi_awsize[2:0] == 3'h0) | + ((dma_axi_awsize[2:0] == 3'h1) & (dma_axi_awaddr[0] == 1'b0)) | + ((dma_axi_awsize[2:0] == 3'h2) & (dma_axi_awaddr[1:0] == 2'b0)) | + ((dma_axi_awsize[2:0] == 3'h3) & (dma_axi_awaddr[2:0] == 3'b0))); + endproperty + // assert_dma_write_trxn_aligned: assert property (dma_axi_write_trxn_aligned) else + // $display("Assertion dma_axi_write_trxn_aligned failed: dma_axi_awvalid=1'b%b, dma_axi_awsize=3'h%h, dma_axi_awaddr=32'h%h",dma_axi_awvalid, dma_axi_awsize[2:0], dma_axi_awaddr[31:0]); + + // Assertion to check AXI read address is aligned to size + property dma_axi_read_trxn_aligned; + @(posedge dma_bus_clk) dma_axi_arvalid |-> ((dma_axi_arsize[2:0] == 3'h0) | + ((dma_axi_arsize[2:0] == 3'h1) & (dma_axi_araddr[0] == 1'b0)) | + ((dma_axi_arsize[2:0] == 3'h2) & (dma_axi_araddr[1:0] == 2'b0)) | + ((dma_axi_arsize[2:0] == 3'h3) & (dma_axi_araddr[2:0] == 3'b0))); + endproperty + // assert_dma_read_trxn_aligned: assert property (dma_axi_read_trxn_aligned) else + // $display("Assertion dma_axi_read_trxn_aligned failed: dma_axi_arvalid=1'b%b, dma_axi_arsize=3'h%h, dma_axi_araddr=32'h%h",dma_axi_arvalid, dma_axi_arsize[2:0], dma_axi_araddr[31:0]); + + // Assertion to check write size is 8 byte or less + property dma_axi_awsize_check; + @(posedge dma_bus_clk) disable iff(~rst_l) (dma_axi_awvalid & dma_axi_awready) |-> (dma_axi_awsize[2] == 1'b0); + endproperty + assert_dma_axi_awsize_check: assert property (dma_axi_awsize_check) else + $display("DMA AXI awsize is illegal. Size greater than 8B not supported"); + + // Assertion to check there are no burst commands + property dma_axi_awlen_check; + @(posedge dma_bus_clk) disable iff(~rst_l) (dma_axi_awvalid & dma_axi_awready) |-> (dma_axi_awlen[7:0] == 8'b0); + endproperty + assert_dma_axi_awlen_check: assert property (dma_axi_awlen_check) else + $display("DMA AXI awlen is illegal. Length greater than 0 not supported"); + + // Assertion to check write size is 8 byte or less + property dma_axi_arsize_check; + @(posedge dma_bus_clk) disable iff(~rst_l) (dma_axi_arvalid & dma_axi_arready) |-> (dma_axi_arsize[2] == 1'b0); + endproperty + assert_dma_axi_arsize_check: assert property (dma_axi_arsize_check) else + $display("DMA AXI arsize is illegal, Size bigger than 8B not supported"); + + // Assertion to check there are no burst commands + property dma_axi_arlen_check; + @(posedge dma_bus_clk) disable iff(~rst_l) (dma_axi_arvalid & dma_axi_arready) |-> (dma_axi_arlen[7:0] == 8'b0); + endproperty + assert_dma_axi_arlen_check: assert property (dma_axi_arlen_check) else + $display("DMA AXI arlen greater than 0 not supported."); + + // Assertion to check cmd valid stays stable during entire bus clock + property dma_axi_awvalid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_awvalid != $past(dma_axi_awvalid)) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_awvalid_stable: assert property (dma_axi_awvalid_stable) else + $display("DMA AXI awvalid changed in middle of bus clock"); + + // Assertion to check cmd ready stays stable during entire bus clock + property dma_axi_awready_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_awready != $past(dma_axi_awready)) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_awready_stable: assert property (dma_axi_awready_stable) else + $display("DMA AXI awready changed in middle of bus clock"); + + // Assertion to check cmd tag stays stable during entire bus clock + property dma_axi_awid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_awvalid & (dma_axi_awid[DMA_BUS_TAG-1:0] != $past(dma_axi_awid[DMA_BUS_TAG-1:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_awid_stable: assert property (dma_axi_awid_stable) else + $display("DMA AXI awid changed in middle of bus clock"); + + // Assertion to check cmd addr stays stable during entire bus clock + property dma_axi_awaddr_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_awvalid & (dma_axi_awaddr[31:0] != $past(dma_axi_awaddr[31:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_awaddr_stable: assert property (dma_axi_awaddr_stable) else + $display("DMA AXI awaddr changed in middle of bus clock"); + + // Assertion to check cmd length stays stable during entire bus clock + property dma_axi_awsize_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_awvalid & (dma_axi_awsize[2:0] != $past(dma_axi_awsize[2:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_awsize_stable: assert property (dma_axi_awsize_stable) else + $display("DMA AXI awsize changed in middle of bus clock"); + + // Assertion to check cmd valid stays stable during entire bus clock + property dma_axi_wvalid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_wvalid != $past(dma_axi_wvalid)) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_wvalid_stable: assert property (dma_axi_wvalid_stable) else + $display("DMA AXI wvalid changed in middle of bus clock"); + + // Assertion to check cmd ready stays stable during entire bus clock + property dma_axi_wready_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_wready != $past(dma_axi_wready)) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_wready_stable: assert property (dma_axi_wready_stable) else + $display("DMA AXI wready changed in middle of bus clock"); + + // Assertion to check cmd wbe stays stable during entire bus clock + property dma_axi_wstrb_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_wvalid & (dma_axi_wstrb[7:0] != $past(dma_axi_wstrb[7:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_wstrb_stable: assert property (dma_axi_wstrb_stable) else + $display("DMA AXI wstrb changed in middle of bus clock"); + + // Assertion to check cmd wdata stays stable during entire bus clock + property dma_axi_wdata_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_wvalid & (dma_axi_wdata[63:0] != $past(dma_axi_wdata[63:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_wdata_stable: assert property (dma_axi_wdata_stable) else + $display("DMA AXI wdata changed in middle of bus clock"); + + // Assertion to check cmd valid stays stable during entire bus clock + property dma_axi_arvalid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_arvalid != $past(dma_axi_arvalid)) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_arvalid_stable: assert property (dma_axi_arvalid_stable) else + $display("DMA AXI arvalid changed in middle of bus clock"); + + // Assertion to check cmd ready stays stable during entire bus clock + property dma_axi_arready_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_arready != $past(dma_axi_arready)) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_arready_stable: assert property (dma_axi_arready_stable) else + $display("DMA AXI arready changed in middle of bus clock"); + + // Assertion to check cmd tag stays stable during entire bus clock + property dma_axi_arid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_arvalid & (dma_axi_arid[DMA_BUS_TAG-1:0] != $past(dma_axi_arid[DMA_BUS_TAG-1:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_arid_stable: assert property (dma_axi_arid_stable) else + $display("DMA AXI arid changed in middle of bus clock"); + + // Assertion to check cmd addr stays stable during entire bus clock + property dma_axi_araddr_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_arvalid & (dma_axi_araddr[31:0] != $past(dma_axi_araddr[31:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_araddr_stable: assert property (dma_axi_araddr_stable) else + $display("DMA AXI araddr changed in middle of bus clock"); + + // Assertion to check cmd length stays stable during entire bus clock + property dma_axi_arsize_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_arvalid & (dma_axi_arsize[2:0] != $past(dma_axi_arsize[2:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_arsize_stable: assert property (dma_axi_arsize_stable) else + $display("DMA AXI arsize changed in middle of bus clock"); + + //Assertion to check write rsp valid stays stable during entire bus clock + property dma_axi_bvalid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_bvalid != $past(dma_axi_bvalid)) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_bvalid_stable: assert property (dma_axi_bvalid_stable) else + $display("DMA AXI bvalid changed in middle of bus clock"); + + // //Assertion to check write rsp ready stays stable during entire bus clock + // property dma_axi_bready_stable; + // @(posedge clk) (dma_axi_bready != $past(dma_axi_bready)) |-> $past(dma_bus_clk_en); + // endproperty + // assert_dma_axi_bready_stable: assert property (dma_axi_bready_stable) else + // $display("DMA AXI bready changed in middle of bus clock"); + + //Assertion to check write rsp stays stable during entire bus clock + property dma_axi_bresp_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_bvalid & (dma_axi_bresp[1:0] != $past(dma_axi_bresp[1:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_bresp_stable: assert property (dma_axi_bresp_stable) else + $display("DMA AXI bvalid changed in middle of bus clock"); + + // Assertion to check write rsp tag stays stable during entire bus clock + property dma_axi_bid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_bvalid & (dma_axi_bid[DMA_BUS_TAG-1:0] != $past(dma_axi_bid[DMA_BUS_TAG-1:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_bid_stable: assert property (dma_axi_bid_stable) else + $display("DMA AXI bid changed in middle of bus clock"); + + //Assertion to check write rsp valid stays stable during entire bus clock + property dma_axi_rvalid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_rvalid != $past(dma_axi_rvalid)) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_rvalid_stable: assert property (dma_axi_rvalid_stable) else + $display("DMA AXI bvalid changed in middle of bus clock"); + + // //Assertion to check write rsp ready stays stable during entire bus clock + // property dma_axi_rready_stable; + // @(posedge clk) (dma_axi_rready != $past(dma_axi_rready)) |-> $past(dma_bus_clk_en); + // endproperty + // assert_dma_axi_rready_stable: assert property (dma_axi_rready_stable) else + // $display("DMA AXI bready changed in middle of bus clock"); + + //Assertion to check write rsp stays stable during entire bus clock + property dma_axi_rresp_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_rvalid & (dma_axi_rresp[1:0] != $past(dma_axi_rresp[1:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_rresp_stable: assert property (dma_axi_rresp_stable) else + $display("DMA AXI bvalid changed in middle of bus clock"); + + // Assertion to check write rsp tag stays stable during entire bus clock + property dma_axi_rid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_rvalid & (dma_axi_rid[DMA_BUS_TAG-1:0] != $past(dma_axi_rid[DMA_BUS_TAG-1:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_rid_stable: assert property (dma_axi_rid_stable) else + $display("DMA AXI bid changed in middle of bus clock"); + +`endif + +endmodule // dma_ctrl diff --git a/design/dmi/dmi_jtag_to_core_sync.v b/design/dmi/dmi_jtag_to_core_sync.v new file mode 100644 index 0000000..aa4c19f --- /dev/null +++ b/design/dmi/dmi_jtag_to_core_sync.v @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//------------------------------------------------------------------------------------ +// This module Synchronizes the signals between JTAG (TCK) and +// processor (clk) +// +//------------------------------------------------------------------------------------- + +module dmi_jtag_to_core_sync ( + // JTAG signals + input rd_en, // 1 bit Read Enable + input wr_en, // 1 bit Write enable + + + // Processor Signals + input rst_n, // Core clock + input clk, // Core reset + + output reg_en, // 1 bit Write interface bit to Processor + output reg_wr_en // 1 bit Write enable to Processor +); + + + + wire c_rd_en; + wire c_wr_en; + + + //Assign statements + + assign reg_en = c_wr_en | c_rd_en; + assign reg_wr_en = c_wr_en; + + reg [2:0] rden, wren; + +// synchronizers +always @ ( posedge clk or negedge rst_n) begin + if(!rst_n) begin + rden <= '0; + wren <= '0; + end + else begin + rden <= {rden[1:0], rd_en}; + wren <= {wren[1:0], wr_en}; + end +end + +assign c_rd_en = rden[1] & ~rden[2]; +assign c_wr_en = wren[1] & ~wren[2]; + + + +endmodule diff --git a/design/dmi/dmi_wrapper.v b/design/dmi/dmi_wrapper.v new file mode 100644 index 0000000..01d29d6 --- /dev/null +++ b/design/dmi/dmi_wrapper.v @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//------------------------------------------------------------------------------------ +// +// Copyright Western Digital, 2019 +// Owner : Anusha Narayanamoorthy +// Description: +// Wrapper module for JTAG_TAP and DMI synchronizer +// +//------------------------------------------------------------------------------------- + +module dmi_wrapper( + input scan_mode, // scan mode + + // JTAG signals + input trst_n, // JTAG reset + input tck, // JTAG clock + input tms, // Test mode select + input tdi, // Test Data Input + output tdo, // Test Data Output + output tdoEnable, // Test Data Output enable + + // Processor Signals + input core_rst_n, // Core reset + input core_clk, // Core clock + input [31:1] jtag_id, // JTAG ID + input [31:0] rd_data, // 32 bit Read data from Processor + output [31:0] reg_wr_data, // 32 bit Write data to Processor + output [6:0] reg_wr_addr, // 7 bit reg address to Processor + output reg_en, // 1 bit Read enable to Processor + output reg_wr_en, // 1 bit Write enable to Processor + output dmi_hard_reset +); + + + + + + //Wire Declaration + wire rd_en; + wire wr_en; + wire dmireset; + + + //jtag_tap instantiation + rvjtag_tap i_jtag_tap( + .trst(trst_n), // dedicated JTAG TRST (active low) pad signal or asynchronous active low power on reset + .tck(tck), // dedicated JTAG TCK pad signal + .tms(tms), // dedicated JTAG TMS pad signal + .tdi(tdi), // dedicated JTAG TDI pad signal + .tdo(tdo), // dedicated JTAG TDO pad signal + .tdoEnable(tdoEnable), // enable for TDO pad + .wr_data(reg_wr_data), // 32 bit Write data + .wr_addr(reg_wr_addr), // 7 bit Write address + .rd_en(rd_en), // 1 bit read enable + .wr_en(wr_en), // 1 bit Write enable + .rd_data(rd_data), // 32 bit Read data + .rd_status(2'b0), + .idle(3'h0), // no need to wait to sample data + .dmi_stat(2'b0), // no need to wait or error possible + .version(4'h1), // debug spec 0.13 compliant + .jtag_id(jtag_id), + .dmi_hard_reset(dmi_hard_reset), + .dmi_reset(dmireset) +); + + + // dmi_jtag_to_core_sync instantiation + dmi_jtag_to_core_sync i_dmi_jtag_to_core_sync( + .wr_en(wr_en), // 1 bit Write enable + .rd_en(rd_en), // 1 bit Read enable + + .rst_n(core_rst_n), + .clk(core_clk), + .reg_en(reg_en), // 1 bit Write interface bit + .reg_wr_en(reg_wr_en) // 1 bit Write enable + ); + +endmodule diff --git a/design/dmi/rvjtag_tap.sv b/design/dmi/rvjtag_tap.sv new file mode 100644 index 0000000..43c6e02 --- /dev/null +++ b/design/dmi/rvjtag_tap.sv @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License + +module rvjtag_tap #( +parameter AWIDTH = 7 +) +( +input trst, +input tck, +input tms, +input tdi, +output reg tdo, +output tdoEnable, + +output [31:0] wr_data, +output [AWIDTH-1:0] wr_addr, +output wr_en, +output rd_en, + +input [31:0] rd_data, +input [1:0] rd_status, + +output reg dmi_reset, +output reg dmi_hard_reset, + +input [2:0] idle, +input [1:0] dmi_stat, +/* +-- revisionCode : 4'h0; +-- manufacturersIdCode : 11'h45; +-- deviceIdCode : 16'h0001; +-- order MSB .. LSB -> [4 bit version or revision] [16 bit part number] [11 bit manufacturer id] [value of 1'b1 in LSB] +*/ +input [31:1] jtag_id, +input [3:0] version +); + +localparam USER_DR_LENGTH = AWIDTH + 34; + + +reg [USER_DR_LENGTH-1:0] sr, nsr, dr; + +/////////////////////////////////////////////////////// +// Tap controller +/////////////////////////////////////////////////////// +logic[3:0] state, nstate; +logic [4:0] ir; +wire jtag_reset; +wire shift_dr; +wire pause_dr; +wire update_dr; +wire capture_dr; +wire shift_ir; +wire pause_ir ; +wire update_ir ; +wire capture_ir; +wire[1:0] dr_en; +wire devid_sel; +wire [5:0] abits; + +assign abits = AWIDTH[5:0]; + + +localparam TEST_LOGIC_RESET_STATE = 0; +localparam RUN_TEST_IDLE_STATE = 1; +localparam SELECT_DR_SCAN_STATE = 2; +localparam CAPTURE_DR_STATE = 3; +localparam SHIFT_DR_STATE = 4; +localparam EXIT1_DR_STATE = 5; +localparam PAUSE_DR_STATE = 6; +localparam EXIT2_DR_STATE = 7; +localparam UPDATE_DR_STATE = 8; +localparam SELECT_IR_SCAN_STATE = 9; +localparam CAPTURE_IR_STATE = 10; +localparam SHIFT_IR_STATE = 11; +localparam EXIT1_IR_STATE = 12; +localparam PAUSE_IR_STATE = 13; +localparam EXIT2_IR_STATE = 14; +localparam UPDATE_IR_STATE = 15; + +always_comb begin + nstate = state; + case(state) + TEST_LOGIC_RESET_STATE: nstate = tms ? TEST_LOGIC_RESET_STATE : RUN_TEST_IDLE_STATE; + RUN_TEST_IDLE_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE; + SELECT_DR_SCAN_STATE: nstate = tms ? SELECT_IR_SCAN_STATE : CAPTURE_DR_STATE; + CAPTURE_DR_STATE: nstate = tms ? EXIT1_DR_STATE : SHIFT_DR_STATE; + SHIFT_DR_STATE: nstate = tms ? EXIT1_DR_STATE : SHIFT_DR_STATE; + EXIT1_DR_STATE: nstate = tms ? UPDATE_DR_STATE : PAUSE_DR_STATE; + PAUSE_DR_STATE: nstate = tms ? EXIT2_DR_STATE : PAUSE_DR_STATE; + EXIT2_DR_STATE: nstate = tms ? UPDATE_DR_STATE : SHIFT_DR_STATE; + UPDATE_DR_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE; + SELECT_IR_SCAN_STATE: nstate = tms ? TEST_LOGIC_RESET_STATE : CAPTURE_IR_STATE; + CAPTURE_IR_STATE: nstate = tms ? EXIT1_IR_STATE : SHIFT_IR_STATE; + SHIFT_IR_STATE: nstate = tms ? EXIT1_IR_STATE : SHIFT_IR_STATE; + EXIT1_IR_STATE: nstate = tms ? UPDATE_IR_STATE : PAUSE_IR_STATE; + PAUSE_IR_STATE: nstate = tms ? EXIT2_IR_STATE : PAUSE_IR_STATE; + EXIT2_IR_STATE: nstate = tms ? UPDATE_IR_STATE : SHIFT_IR_STATE; + UPDATE_IR_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE; + default: nstate = TEST_LOGIC_RESET_STATE; + endcase +end + +always @ (posedge tck or negedge trst) begin + if(!trst) state <= TEST_LOGIC_RESET_STATE; + else state <= nstate; +end + +assign jtag_reset = state == TEST_LOGIC_RESET_STATE; +assign shift_dr = state == SHIFT_DR_STATE; +assign pause_dr = state == PAUSE_DR_STATE; +assign update_dr = state == UPDATE_DR_STATE; +assign capture_dr = state == CAPTURE_DR_STATE; +assign shift_ir = state == SHIFT_IR_STATE; +assign pause_ir = state == PAUSE_IR_STATE; +assign update_ir = state == UPDATE_IR_STATE; +assign capture_ir = state == CAPTURE_IR_STATE; + +assign tdoEnable = shift_dr | shift_ir; + +/////////////////////////////////////////////////////// +// IR register +/////////////////////////////////////////////////////// + +always @ (negedge tck or negedge trst) begin + if (!trst) ir <= 5'b1; + else begin + if (jtag_reset) ir <= 5'b1; + else if (update_ir) ir <= (sr[4:0] == '0) ? 5'h1f :sr[4:0]; + end +end + + +assign devid_sel = ir == 5'b00001; +assign dr_en[0] = ir == 5'b10000; +assign dr_en[1] = ir == 5'b10001; + +/////////////////////////////////////////////////////// +// Shift register +/////////////////////////////////////////////////////// +always @ (posedge tck or negedge trst) begin + if(!trst)begin + sr <= '0; + end + else begin + sr <= nsr; + end +end + +// SR next value +always_comb begin + nsr = sr; + case(1) + shift_dr: begin + case(1) + dr_en[1]: nsr = {tdi, sr[USER_DR_LENGTH-1:1]}; + + dr_en[0], + devid_sel: nsr = {{USER_DR_LENGTH-32{1'b0}},tdi, sr[31:1]}; + default: nsr = {{USER_DR_LENGTH-1{1'b0}},tdi}; // bypass + endcase + end + capture_dr: begin + case(1) + dr_en[0]: nsr = {{USER_DR_LENGTH-15{1'b0}}, idle, dmi_stat, abits, version}; + dr_en[1]: nsr = {{AWIDTH{1'b0}}, rd_data, rd_status}; + devid_sel: nsr = {{USER_DR_LENGTH-32{1'b0}}, jtag_id, 1'b1}; + endcase + end + shift_ir: nsr = {{USER_DR_LENGTH-5{1'b0}},tdi, sr[4:1]}; + capture_ir: nsr = {{USER_DR_LENGTH-1{1'b0}},1'b1}; + endcase +end + +// TDO retiming +always @ (negedge tck ) tdo <= sr[0]; + +// DMI CS register +always @ (posedge tck or negedge trst) begin + if(!trst) begin + dmi_hard_reset <= 1'b0; + dmi_reset <= 1'b0; + end + else if (update_dr & dr_en[0]) begin + dmi_hard_reset <= sr[17]; + dmi_reset <= sr[16]; + end + else begin + dmi_hard_reset <= 1'b0; + dmi_reset <= 1'b0; + end +end + +// DR register +always @ (posedge tck or negedge trst) begin + if(!trst) + dr <= '0; + else begin + if (update_dr & dr_en[1]) + dr <= sr; + else + dr <= {dr[USER_DR_LENGTH-1:2],2'b0}; + end +end + +assign {wr_addr, wr_data, wr_en, rd_en} = dr; + + + + +endmodule diff --git a/design/exu/exu.sv b/design/exu/exu.sv new file mode 100644 index 0000000..20dfd2f --- /dev/null +++ b/design/exu/exu.sv @@ -0,0 +1,839 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +module exu + import swerv_types::*; +( + + input logic clk, // Top level clock + input logic active_clk, // Level 1 active clock + input logic clk_override, // Override multiply clock enables + input logic rst_l, // Reset + input logic scan_mode, // Scan control + input logic lsu_freeze_dc3, // Freeze pipe from D to DC3 + + input logic dec_tlu_fast_div_disable, // Disable divide small number optimization + + input logic [4:2] dec_i0_data_en, // Slot I0 clock enable {e1, e2, e3 }, one cycle pulse + input logic [4:1] dec_i0_ctl_en, // Slot I0 clock enable {e1, e2, e3, e4}, two cycle pulse + input logic [4:2] dec_i1_data_en, // Slot I1 clock enable {e1, e2, e3 }, one cycle pulse + input logic [4:1] dec_i1_ctl_en, // Slot I1 clock enable {e1, e2, e3, e4}, two cycle pulse + + input logic dec_debug_wdata_rs1_d, // Debug select to primary I0 RS1 + + input logic [31:0] dbg_cmd_wrdata, // Debug data to primary I0 RS1 + + input logic [31:0] lsu_result_dc3, // Load result + + input predict_pkt_t i0_predict_p_d, // DEC branch predict packet + input predict_pkt_t i1_predict_p_d, // DEC branch predict packet + + input logic dec_i0_rs1_bypass_en_e2, // DEC bypass bus select for E2 stage + input logic dec_i0_rs2_bypass_en_e2, // DEC bypass bus select for E2 stage + input logic dec_i1_rs1_bypass_en_e2, // DEC bypass bus select for E2 stage + input logic dec_i1_rs2_bypass_en_e2, // DEC bypass bus select for E2 stage + input logic [31:0] i0_rs1_bypass_data_e2, // DEC bypass bus + input logic [31:0] i0_rs2_bypass_data_e2, // DEC bypass bus + input logic [31:0] i1_rs1_bypass_data_e2, // DEC bypass bus + input logic [31:0] i1_rs2_bypass_data_e2, // DEC bypass bus + + input logic dec_i0_rs1_bypass_en_e3, // DEC bypass bus select for E3 stage + input logic dec_i0_rs2_bypass_en_e3, // DEC bypass bus select for E3 stage + input logic dec_i1_rs1_bypass_en_e3, // DEC bypass bus select for E3 stage + input logic dec_i1_rs2_bypass_en_e3, // DEC bypass bus select for E3 stage + input logic [31:0] i0_rs1_bypass_data_e3, // DEC bypass bus + input logic [31:0] i0_rs2_bypass_data_e3, // DEC bypass bus + input logic [31:0] i1_rs1_bypass_data_e3, // DEC bypass bus + input logic [31:0] i1_rs2_bypass_data_e3, // DEC bypass bus + + input logic dec_i0_sec_decode_e3, // Secondary ALU valid + input logic dec_i1_sec_decode_e3, // Secondary ALU valid + input logic [31:1] dec_i0_pc_e3, // Secondary ALU PC + input logic [31:1] dec_i1_pc_e3, // Secondary ALU PC + + input logic [31:1] pred_correct_npc_e2, // DEC NPC for correctly predicted branch + + input logic dec_i1_valid_e1, // I1 valid E1 + + input logic dec_i0_mul_d, // Select for Multiply GPR value + input logic dec_i1_mul_d, // Select for Multiply GPR value + + input logic dec_i0_div_d, // Select for Divide GPR value + input logic dec_i1_div_d, // Select for Divide GPR value + + input logic [31:0] gpr_i0_rs1_d, // DEC data gpr + input logic [31:0] gpr_i0_rs2_d, // DEC data gpr + input logic [31:0] dec_i0_immed_d, // DEC data immediate + + input logic [31:0] gpr_i1_rs1_d, // DEC data gpr + input logic [31:0] gpr_i1_rs2_d, // DEC data gpr + input logic [31:0] dec_i1_immed_d, // DEC data immediate + + input logic [31:0] i0_rs1_bypass_data_d, // DEC bypass data + input logic [31:0] i0_rs2_bypass_data_d, // DEC bypass data + input logic [31:0] i1_rs1_bypass_data_d, // DEC bypass data + input logic [31:0] i1_rs2_bypass_data_d, // DEC bypass data + + input logic [12:1] dec_i0_br_immed_d, // Branch immediate + input logic [12:1] dec_i1_br_immed_d, // Branch immediate + + input alu_pkt_t i0_ap, // DEC alu {valid,predecodes} + input alu_pkt_t i1_ap, // DEC alu {valid,predecodes} + + input logic dec_i0_alu_decode_d, // Valid to Primary ALU + input logic dec_i1_alu_decode_d, // Valid to Primary ALU + + input logic dec_i0_select_pc_d, // PC select to RS1 + input logic dec_i1_select_pc_d, // PC select to RS1 + + input logic [31:1] dec_i0_pc_d, dec_i1_pc_d, // Instruction PC + + input logic dec_i0_rs1_bypass_en_d, // DEC bypass select + input logic dec_i0_rs2_bypass_en_d, // DEC bypass select + input logic dec_i1_rs1_bypass_en_d, // DEC bypass select + input logic dec_i1_rs2_bypass_en_d, // DEC bypass select + + input logic dec_tlu_flush_lower_wb, // Flush divide and secondary ALUs + input logic [31:1] dec_tlu_flush_path_wb, // Redirect target + + input logic dec_tlu_i0_valid_e4, // Valid for GHR + input logic dec_tlu_i1_valid_e4, // Valid for GHR + + output logic [31:0] exu_i0_result_e1, // Primary ALU result to DEC + output logic [31:0] exu_i1_result_e1, // Primary ALU result to DEC + output logic [31:1] exu_i0_pc_e1, // Primary PC result to DEC + output logic [31:1] exu_i1_pc_e1, // Primary PC result to DEC + + + output logic [31:0] exu_i0_result_e4, // Secondary ALU result + output logic [31:0] exu_i1_result_e4, // Secondary ALU result + + + output logic exu_i0_flush_final, // I0 flush to DEC + output logic exu_i1_flush_final, // I1 flush to DEC + + + + input mul_pkt_t mul_p, // DEC {valid, operand signs, low, operand bypass} + + input div_pkt_t div_p, // DEC {valid, unsigned, rem} + + input logic dec_i0_lsu_d, // Bypass control for LSU operand bus + input logic dec_i1_lsu_d, // Bypass control for LSU operand bus + + input logic dec_csr_ren_d, // Clear I0 RS1 primary + + output logic [31:0] exu_lsu_rs1_d, // LSU operand + output logic [31:0] exu_lsu_rs2_d, // LSU operand + + output logic [31:0] exu_csr_rs1_e1, // RS1 source for a CSR instruction + + output logic exu_flush_final, // Pipe is being flushed this cycle + output logic [31:1] exu_flush_path_final, // Target for the oldest flush source + + output logic [31:0] exu_mul_result_e3, // Multiply result + + output logic [31:0] exu_div_result, // Divide result + output logic exu_div_finish, // Divide is finished + output logic exu_div_stall, // Divide is running + output logic [31:1] exu_npc_e4, // Divide NPC + + output logic exu_i0_flush_lower_e4, // to TLU - lower branch flush + output logic exu_i1_flush_lower_e4, // to TLU - lower branch flush + output logic [31:1] exu_i0_flush_path_e4, // to TLU - lower branch flush path + output logic [31:1] exu_i1_flush_path_e4, // to TLU - lower branch flush path + + output predict_pkt_t exu_mp_pkt, // Mispredict branch packet + + output logic [`RV_BHT_GHR_RANGE] exu_mp_eghr, // Mispredict global history + + output logic [1:0] exu_i0_br_hist_e4, // to DEC I0 branch history + output logic [1:0] exu_i0_br_bank_e4, // to DEC I0 branch bank + output logic exu_i0_br_error_e4, // to DEC I0 branch error + output logic exu_i0_br_start_error_e4, // to DEC I0 branch start error + output logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] exu_i0_br_index_e4, // to DEC I0 branch index + output logic exu_i0_br_valid_e4, // to DEC I0 branch valid + output logic exu_i0_br_mp_e4, // to DEC I0 branch mispredict +`ifdef RV_BTB_48 + output logic [1:0] exu_i0_br_way_e4, // to DEC I0 branch way +`else + output logic exu_i0_br_way_e4, // to DEC I0 branch way +`endif + output logic exu_i0_br_middle_e4, // to DEC I0 branch middle + output logic [`RV_BHT_GHR_RANGE] exu_i0_br_fghr_e4, // to DEC I0 branch fghr + output logic exu_i0_br_ret_e4, // to DEC I0 branch return + output logic exu_i0_br_call_e4, // to DEC I0 branch call + + output logic [1:0] exu_i1_br_hist_e4, // to DEC I1 branch history + output logic [1:0] exu_i1_br_bank_e4, // to DEC I1 branch bank + output logic exu_i1_br_error_e4, // to DEC I1 branch error + output logic exu_i1_br_start_error_e4, // to DEC I1 branch start error + output logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] exu_i1_br_index_e4, // to DEC I1 branch index + output logic exu_i1_br_valid_e4, // to DEC I1 branch valid + output logic exu_i1_br_mp_e4, // to DEC I1 branch mispredict +`ifdef RV_BTB_48 + output logic [1:0] exu_i1_br_way_e4, // to DEC I1 branch way +`else + output logic exu_i1_br_way_e4, // to DEC I1 branch way +`endif + output logic exu_i1_br_middle_e4, // to DEC I1 branch middle + output logic [`RV_BHT_GHR_RANGE] exu_i1_br_fghr_e4, // to DEC I1 branch fghr + output logic exu_i1_br_ret_e4, // to DEC I1 branch return + output logic exu_i1_br_call_e4, // to DEC I1 branch call + output logic exu_flush_upper_e2, // flush upper, either i0 or i1 + + output rets_pkt_t exu_rets_e1_pkt, // to IFU - I0+I1 {call, return, pc} + output rets_pkt_t exu_rets_e4_pkt, // to IFU - I0+I1 {call, return, pc} + + output logic exu_pmu_i0_br_misp, // to PMU - I0 E4 branch mispredict + output logic exu_pmu_i0_br_ataken, // to PMU - I0 E4 taken + output logic exu_pmu_i0_pc4, // to PMU - I0 E4 PC + output logic exu_pmu_i1_br_misp, // to PMU - I1 E4 branch mispredict + output logic exu_pmu_i1_br_ataken, // to PMU - I1 E4 taken + output logic exu_pmu_i1_pc4 // to PMU - I1 E4 PC + + ); + + + logic [31:0] i0_rs1_d,i0_rs2_d,i1_rs1_d,i1_rs2_d; + + + + logic exu_i0_flush_upper_e1; + logic [31:1] exu_i0_flush_path_e1; + + logic exu_i1_flush_upper_e1; + logic [31:1] exu_i1_flush_path_e1; + + logic [31:0] i0_rs1_final_d; + + logic [31:1] exu_flush_path_e2; + logic [31:0] mul_rs1_d, mul_rs2_d; + + logic [31:0] div_rs1_d, div_rs2_d; + + logic i1_valid_e2; + logic [31:1] npc_e4; + logic [31:1] div_npc; + + logic [31:0] i0_rs1_e1, i0_rs2_e1; + logic [31:0] i0_rs1_e2, i0_rs2_e2; + logic [31:0] i0_rs1_e3, i0_rs2_e3; + logic [12:1] i0_br_immed_e1, i0_br_immed_e2, i0_br_immed_e3; + + logic [31:0] i1_rs1_e1, i1_rs2_e1; + logic [31:0] i1_rs1_e2, i1_rs2_e2; + logic [31:0] i1_rs1_e3, i1_rs2_e3; + + logic [12:1] i1_br_immed_e1, i1_br_immed_e2, i1_br_immed_e3; + + logic [31:0] i0_rs1_e2_final, i0_rs2_e2_final; + logic [31:0] i1_rs1_e2_final, i1_rs2_e2_final; + logic [31:0] i0_rs1_e3_final, i0_rs2_e3_final; + logic [31:0] i1_rs1_e3_final, i1_rs2_e3_final; + logic [31:1] i0_alu_pc_nc, i1_alu_pc_nc; + logic [31:1] exu_flush_path_e1; + logic exu_i0_flush_upper_e2, exu_i1_flush_upper_e2; + logic i1_valid_e3, i1_valid_e4; + logic [31:1] pred_correct_npc_e3, pred_correct_npc_e4; + logic exu_i0_flush_upper_e3; + logic exu_i0_flush_upper_e4; + logic i1_pred_correct_upper_e1, i0_pred_correct_upper_e1; + logic i1_pred_correct_upper_e2, i0_pred_correct_upper_e2; + logic i1_pred_correct_upper_e3, i0_pred_correct_upper_e3; + logic i1_pred_correct_upper_e4, i0_pred_correct_upper_e4; + logic i1_pred_correct_lower_e4, i0_pred_correct_lower_e4; + + + logic i1_valid_e4_eff; + logic i1_sec_decode_e4, i0_sec_decode_e4; + logic i1_pred_correct_e4_eff, i0_pred_correct_e4_eff; + logic [31:1] i1_flush_path_e4_eff, i0_flush_path_e4_eff; + logic [31:0] csr_rs1_in_d; + logic [31:1] i1_flush_path_upper_e2, i0_flush_path_upper_e2; + logic [31:1] i1_flush_path_upper_e3, i0_flush_path_upper_e3; + logic [31:1] i1_flush_path_upper_e4, i0_flush_path_upper_e4; + + logic div_valid_e1; + logic div_finish_early; + logic freeze; + + + alu_pkt_t i0_ap_e1, i0_ap_e2, i0_ap_e3, i0_ap_e4; + alu_pkt_t i1_ap_e1, i1_ap_e2, i1_ap_e3, i1_ap_e4; + assign freeze = lsu_freeze_dc3; + + assign i0_rs1_d[31:0] = ({32{~dec_i0_rs1_bypass_en_d}} & ((dec_debug_wdata_rs1_d) ? dbg_cmd_wrdata[31:0] : gpr_i0_rs1_d[31:0])) | + ({32{~dec_i0_rs1_bypass_en_d & dec_i0_select_pc_d}} & { dec_i0_pc_d[31:1], 1'b0}) | // for jal's + ({32{ dec_i0_rs1_bypass_en_d}} & i0_rs1_bypass_data_d[31:0]); + + + assign i0_rs1_final_d[31:0] = ({32{~dec_csr_ren_d}} & i0_rs1_d[31:0]); + + assign i0_rs2_d[31:0] = ({32{~dec_i0_rs2_bypass_en_d}} & gpr_i0_rs2_d[31:0]) | + ({32{~dec_i0_rs2_bypass_en_d}} & dec_i0_immed_d[31:0]) | + ({32{ dec_i0_rs2_bypass_en_d}} & i0_rs2_bypass_data_d[31:0]); + + assign i1_rs1_d[31:0] = ({32{~dec_i1_rs1_bypass_en_d}} & gpr_i1_rs1_d[31:0]) | + ({32{~dec_i1_rs1_bypass_en_d & dec_i1_select_pc_d}} & { dec_i1_pc_d[31:1], 1'b0}) | // pc orthogonal with rs1 + ({32{ dec_i1_rs1_bypass_en_d}} & i1_rs1_bypass_data_d[31:0]); + + assign i1_rs2_d[31:0] = ({32{~dec_i1_rs2_bypass_en_d}} & gpr_i1_rs2_d[31:0]) | + ({32{~dec_i1_rs2_bypass_en_d}} & dec_i1_immed_d[31:0]) | + ({32{ dec_i1_rs2_bypass_en_d}} & i1_rs2_bypass_data_d[31:0]); + + assign exu_lsu_rs1_d[31:0] = ({32{ ~dec_i0_rs1_bypass_en_d & dec_i0_lsu_d }} & gpr_i0_rs1_d[31:0] ) | + ({32{ ~dec_i1_rs1_bypass_en_d & ~dec_i0_lsu_d & dec_i1_lsu_d}} & gpr_i1_rs1_d[31:0] ) | + ({32{ dec_i0_rs1_bypass_en_d & dec_i0_lsu_d }} & i0_rs1_bypass_data_d[31:0]) | + ({32{ dec_i1_rs1_bypass_en_d & ~dec_i0_lsu_d & dec_i1_lsu_d}} & i1_rs1_bypass_data_d[31:0]); + + assign exu_lsu_rs2_d[31:0] = ({32{ ~dec_i0_rs2_bypass_en_d & dec_i0_lsu_d }} & gpr_i0_rs2_d[31:0] ) | + ({32{ ~dec_i1_rs2_bypass_en_d & ~dec_i0_lsu_d & dec_i1_lsu_d}} & gpr_i1_rs2_d[31:0] ) | + ({32{ dec_i0_rs2_bypass_en_d & dec_i0_lsu_d }} & i0_rs2_bypass_data_d[31:0]) | + ({32{ dec_i1_rs2_bypass_en_d & ~dec_i0_lsu_d & dec_i1_lsu_d}} & i1_rs2_bypass_data_d[31:0]); + + assign mul_rs1_d[31:0] = ({32{ ~dec_i0_rs1_bypass_en_d & dec_i0_mul_d }} & gpr_i0_rs1_d[31:0] ) | + ({32{ ~dec_i1_rs1_bypass_en_d & ~dec_i0_mul_d & dec_i1_mul_d}} & gpr_i1_rs1_d[31:0] ) | + ({32{ dec_i0_rs1_bypass_en_d & dec_i0_mul_d }} & i0_rs1_bypass_data_d[31:0]) | + ({32{ dec_i1_rs1_bypass_en_d & ~dec_i0_mul_d & dec_i1_mul_d}} & i1_rs1_bypass_data_d[31:0]); + + assign mul_rs2_d[31:0] = ({32{ ~dec_i0_rs2_bypass_en_d & dec_i0_mul_d }} & gpr_i0_rs2_d[31:0] ) | + ({32{ ~dec_i1_rs2_bypass_en_d & ~dec_i0_mul_d & dec_i1_mul_d}} & gpr_i1_rs2_d[31:0] ) | + ({32{ dec_i0_rs2_bypass_en_d & dec_i0_mul_d }} & i0_rs2_bypass_data_d[31:0]) | + ({32{ dec_i1_rs2_bypass_en_d & ~dec_i0_mul_d & dec_i1_mul_d}} & i1_rs2_bypass_data_d[31:0]); + + + + assign div_rs1_d[31:0] = ({32{ ~dec_i0_rs1_bypass_en_d & dec_i0_div_d }} & gpr_i0_rs1_d[31:0]) | + ({32{ ~dec_i1_rs1_bypass_en_d & ~dec_i0_div_d & dec_i1_div_d}} & gpr_i1_rs1_d[31:0]) | + ({32{ dec_i0_rs1_bypass_en_d & dec_i0_div_d }} & i0_rs1_bypass_data_d[31:0]) | + ({32{ dec_i1_rs1_bypass_en_d & ~dec_i0_div_d & dec_i1_div_d}} & i1_rs1_bypass_data_d[31:0]); + + assign div_rs2_d[31:0] = ({32{ ~dec_i0_rs2_bypass_en_d & dec_i0_div_d }} & gpr_i0_rs2_d[31:0]) | + ({32{ ~dec_i1_rs2_bypass_en_d & ~dec_i0_div_d & dec_i1_div_d}} & gpr_i1_rs2_d[31:0]) | + ({32{ dec_i0_rs2_bypass_en_d & dec_i0_div_d }} & i0_rs2_bypass_data_d[31:0]) | + ({32{ dec_i1_rs2_bypass_en_d & ~dec_i0_div_d & dec_i1_div_d}} & i1_rs2_bypass_data_d[31:0]); + + + assign csr_rs1_in_d[31:0] = (dec_csr_ren_d) ? i0_rs1_d[31:0] : exu_csr_rs1_e1[31:0]; + + logic i0_e1_data_en, i0_e2_data_en, i0_e3_data_en; + logic i0_e1_ctl_en, i0_e2_ctl_en, i0_e3_ctl_en, i0_e4_ctl_en; + + assign {i0_e1_data_en, i0_e2_data_en, i0_e3_data_en } = dec_i0_data_en[4:2]; + assign {i0_e1_ctl_en, i0_e2_ctl_en, i0_e3_ctl_en, i0_e4_ctl_en } = dec_i0_ctl_en[4:1]; + + logic i1_e1_data_en, i1_e2_data_en, i1_e3_data_en; + logic i1_e1_ctl_en, i1_e2_ctl_en, i1_e3_ctl_en, i1_e4_ctl_en; + + assign {i1_e1_data_en, i1_e2_data_en, i1_e3_data_en} = dec_i1_data_en[4:2]; + assign {i1_e1_ctl_en, i1_e2_ctl_en, i1_e3_ctl_en, i1_e4_ctl_en} = dec_i1_ctl_en[4:1]; + + + + + rvdffe #(32) csr_rs1_ff (.*, .en(i0_e1_data_en), .din(csr_rs1_in_d[31:0]), .dout(exu_csr_rs1_e1[31:0])); + + + exu_mul_ctl mul_e1 (.*, + .clk_override ( clk_override ), // I + .freeze ( freeze ), // I + .mp ( mul_p ), // I + .a ( mul_rs1_d[31:0] ), // I + .b ( mul_rs2_d[31:0] ), // I + .out ( exu_mul_result_e3[31:0] )); // O + + + exu_div_ctl div_e1 (.*, + .flush_lower ( dec_tlu_flush_lower_wb ), // I + .dp ( div_p ), // I + .dividend ( div_rs1_d[31:0] ), // I + .divisor ( div_rs2_d[31:0] ), // I + .valid_ff_e1 ( div_valid_e1 ), // O + .div_stall ( exu_div_stall ), // O + .finish_early ( div_finish_early ), // O + .finish ( exu_div_finish ), // O + .out ( exu_div_result[31:0] )); // O + + + predict_pkt_t i0_predict_newp_d, i1_predict_newp_d; + + always_comb begin + i0_predict_newp_d = i0_predict_p_d; + i0_predict_newp_d.boffset = dec_i0_pc_d[1]; // from the start of inst + + i0_predict_newp_d.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = i0_predict_p_d.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; // from the end of inst + i0_predict_newp_d.bank[1:0] = i0_predict_p_d.bank[1:0]; + + i1_predict_newp_d = i1_predict_p_d; + i1_predict_newp_d.boffset = dec_i1_pc_d[1]; + + i1_predict_newp_d.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = i1_predict_p_d.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + i1_predict_newp_d.bank[1:0] = i1_predict_p_d.bank[1:0]; + + end + + + predict_pkt_t i0_predict_p_e1, i0_predict_p_e4; + predict_pkt_t i1_predict_p_e1, i1_predict_p_e4; + + assign exu_pmu_i0_br_misp = i0_predict_p_e4.misp & ~exu_div_finish; // qual with divide + assign exu_pmu_i0_br_ataken = i0_predict_p_e4.ataken & ~exu_div_finish; // qual with divide + assign exu_pmu_i0_pc4 = i0_predict_p_e4.pc4 | exu_div_finish; // divides are always 4B + assign exu_pmu_i1_br_misp = i1_predict_p_e4.misp; + assign exu_pmu_i1_br_ataken = i1_predict_p_e4.ataken; + assign exu_pmu_i1_pc4 = i1_predict_p_e4.pc4; + + + exu_alu_ctl i0_alu_e1 (.*, + .freeze ( freeze ), // I + .enable ( i0_e1_ctl_en ), // I + .predict_p ( i0_predict_newp_d ), // I + .valid ( dec_i0_alu_decode_d ), // I + .flush ( exu_flush_final ), // I + .a ( i0_rs1_final_d[31:0] ), // I + .b ( i0_rs2_d[31:0] ), // I + .pc ( dec_i0_pc_d[31:1] ), // I + .brimm ( dec_i0_br_immed_d[12:1] ), // I + .ap ( i0_ap_e1 ), // I + .out ( exu_i0_result_e1[31:0] ), // O + .flush_upper ( exu_i0_flush_upper_e1 ), // O : will be 0 if freeze this cycle + .flush_path ( exu_i0_flush_path_e1[31:1] ), // O + .predict_p_ff ( i0_predict_p_e1 ), // O + .pc_ff ( exu_i0_pc_e1[31:1] ), // O + .pred_correct ( i0_pred_correct_upper_e1 ) // O + ); + + + exu_alu_ctl i1_alu_e1 (.*, + .freeze ( freeze ), // I + .enable ( i1_e1_ctl_en ), // I + .predict_p ( i1_predict_newp_d ), // I + .valid ( dec_i1_alu_decode_d ), // I + .flush ( exu_flush_final ), // I + .a ( i1_rs1_d[31:0] ), // I + .b ( i1_rs2_d[31:0] ), // I + .pc ( dec_i1_pc_d[31:1] ), // I + .brimm ( dec_i1_br_immed_d[12:1] ), // I + .ap ( i1_ap_e1 ), // I + .out ( exu_i1_result_e1[31:0] ), // O + .flush_upper ( exu_i1_flush_upper_e1 ), // O : will be 0 if freeze this cycle + .flush_path ( exu_i1_flush_path_e1[31:1] ), // O + .predict_p_ff ( i1_predict_p_e1 ), // O + .pc_ff ( exu_i1_pc_e1[31:1] ), // O + .pred_correct ( i1_pred_correct_upper_e1 ) // O + ); + + predict_pkt_t i0_pp_e2, i0_pp_e3, i0_pp_e4_in; + + rvdffe #($bits(predict_pkt_t)) i0_pp_e2_ff (.*, .en(i0_e2_ctl_en), .din(i0_predict_p_e1),.dout(i0_pp_e2) ); + rvdffe #($bits(predict_pkt_t)) i0_pp_e3_ff (.*, .en(i0_e3_ctl_en), .din(i0_pp_e2),.dout(i0_pp_e3) ); + + predict_pkt_t i1_pp_e2, i1_pp_e3, i1_pp_e4_in; + + rvdffe #($bits(predict_pkt_t)) i1_pp_e2_ff (.*, .en(i1_e2_ctl_en), .din(i1_predict_p_e1),.dout(i1_pp_e2) ); + rvdffe #($bits(predict_pkt_t)) i1_pp_e3_ff (.*, .en(i1_e3_ctl_en), .din(i1_pp_e2),.dout(i1_pp_e3) ); + + // set the predict_pkt to 0's if freeze, goes to secondary alu's + assign i0_pp_e4_in = (freeze) ? '0 : i0_pp_e3; + assign i1_pp_e4_in = (freeze) ? '0 : i1_pp_e3; + + rvdffe #($bits(alu_pkt_t)) i0_ap_e1_ff (.*, .en(i0_e1_ctl_en), .din(i0_ap), .dout(i0_ap_e1) ); + rvdffe #($bits(alu_pkt_t)) i0_ap_e2_ff (.*, .en(i0_e2_ctl_en), .din(i0_ap_e1),.dout(i0_ap_e2) ); + rvdffe #($bits(alu_pkt_t)) i0_ap_e3_ff (.*, .en(i0_e3_ctl_en), .din(i0_ap_e2),.dout(i0_ap_e3) ); + rvdffe #($bits(alu_pkt_t)) i0_ap_e4_ff (.*, .en(i0_e4_ctl_en), .din(i0_ap_e3),.dout(i0_ap_e4) ); + + + rvdffe #($bits(alu_pkt_t)) i1_ap_e1_ff (.*, .en(i1_e1_ctl_en), .din(i1_ap), .dout(i1_ap_e1) ); + rvdffe #($bits(alu_pkt_t)) i1_ap_e2_ff (.*, .en(i1_e2_ctl_en), .din(i1_ap_e1),.dout(i1_ap_e2) ); + rvdffe #($bits(alu_pkt_t)) i1_ap_e3_ff (.*, .en(i1_e3_ctl_en), .din(i1_ap_e2),.dout(i1_ap_e3) ); + rvdffe #($bits(alu_pkt_t)) i1_ap_e4_ff (.*, .en(i1_e4_ctl_en), .din(i1_ap_e3),.dout(i1_ap_e4) ); + + assign exu_rets_e1_pkt.pc0_call = i0_predict_p_e1.pcall & i0_predict_p_e1.valid & ~i0_predict_p_e1.br_error; + assign exu_rets_e1_pkt.pc1_call = i1_predict_p_e1.pcall & i1_predict_p_e1.valid & ~i1_predict_p_e1.br_error; + assign exu_rets_e1_pkt.pc0_ret = i0_predict_p_e1.pret & i0_predict_p_e1.valid & ~i0_predict_p_e1.br_error; + assign exu_rets_e1_pkt.pc1_ret = i1_predict_p_e1.pret & i1_predict_p_e1.valid & ~i1_predict_p_e1.br_error; + assign exu_rets_e1_pkt.pc0_pc4 = i0_predict_p_e1.pc4; + assign exu_rets_e1_pkt.pc1_pc4 = i1_predict_p_e1.pc4; + + + + rvdffe #(76) i0_src_e1_ff (.*, + .en(i0_e1_data_en), + .din( {i0_rs1_d[31:0], i0_rs2_d[31:0], dec_i0_br_immed_d[12:1]}), + .dout({i0_rs1_e1[31:0], i0_rs2_e1[31:0], i0_br_immed_e1[12:1]}) + ); + + rvdffe #(76) i0_src_e2_ff (.*, + .en(i0_e2_data_en), + .din( {i0_rs1_e1[31:0], i0_rs2_e1[31:0], i0_br_immed_e1[12:1]}), + .dout({i0_rs1_e2[31:0], i0_rs2_e2[31:0], i0_br_immed_e2[12:1]}) + ); + + rvdffe #(76) i0_src_e3_ff (.*, + .en(i0_e3_data_en), + .din( {i0_rs1_e2_final[31:0], i0_rs2_e2_final[31:0], i0_br_immed_e2[12:1]}), + .dout({i0_rs1_e3[31:0], i0_rs2_e3[31:0], i0_br_immed_e3[12:1]}) + ); + + + + rvdffe #(76) i1_src_e1_ff (.*, + .en(i1_e1_data_en), + .din( {i1_rs1_d[31:0], i1_rs2_d[31:0], dec_i1_br_immed_d[12:1]}), + .dout({i1_rs1_e1[31:0], i1_rs2_e1[31:0], i1_br_immed_e1[12:1]}) + ); + + rvdffe #(76) i1_src_e2_ff (.*, + .en(i1_e2_data_en), + .din( {i1_rs1_e1[31:0], i1_rs2_e1[31:0], i1_br_immed_e1[12:1]}), + .dout({i1_rs1_e2[31:0], i1_rs2_e2[31:0], i1_br_immed_e2[12:1]}) + ); + + rvdffe #(76) i1_src_e3_ff (.*, + .en(i1_e3_data_en), + .din( {i1_rs1_e2_final[31:0], i1_rs2_e2_final[31:0], i1_br_immed_e2[12:1]}), + .dout({i1_rs1_e3[31:0], i1_rs2_e3[31:0], i1_br_immed_e3[12:1]}) + ); + + + + + assign i0_rs1_e2_final[31:0] = (dec_i0_rs1_bypass_en_e2) ? i0_rs1_bypass_data_e2[31:0] : i0_rs1_e2[31:0]; + assign i0_rs2_e2_final[31:0] = (dec_i0_rs2_bypass_en_e2) ? i0_rs2_bypass_data_e2[31:0] : i0_rs2_e2[31:0]; + assign i1_rs1_e2_final[31:0] = (dec_i1_rs1_bypass_en_e2) ? i1_rs1_bypass_data_e2[31:0] : i1_rs1_e2[31:0]; + assign i1_rs2_e2_final[31:0] = (dec_i1_rs2_bypass_en_e2) ? i1_rs2_bypass_data_e2[31:0] : i1_rs2_e2[31:0]; + + + assign i0_rs1_e3_final[31:0] = (dec_i0_rs1_bypass_en_e3) ? i0_rs1_bypass_data_e3[31:0] : i0_rs1_e3[31:0]; + assign i0_rs2_e3_final[31:0] = (dec_i0_rs2_bypass_en_e3) ? i0_rs2_bypass_data_e3[31:0] : i0_rs2_e3[31:0]; + assign i1_rs1_e3_final[31:0] = (dec_i1_rs1_bypass_en_e3) ? i1_rs1_bypass_data_e3[31:0] : i1_rs1_e3[31:0]; + assign i1_rs2_e3_final[31:0] = (dec_i1_rs2_bypass_en_e3) ? i1_rs2_bypass_data_e3[31:0] : i1_rs2_e3[31:0]; + + // E1 GHR + // fill in the ptaken for secondary branches. + + logic [`RV_BHT_GHR_RANGE] ghr_e4_ns, ghr_e4; + logic [`RV_BHT_GHR_RANGE] ghr_e1_ns, ghr_e1; + logic i0_taken_e1, i1_taken_e1, dec_i0_alu_decode_e1, dec_i1_alu_decode_e1, i0_valid_e1, i1_valid_e1, i0_ataken_e1, i1_ataken_e1, exu_flush_final_f; + assign i0_valid_e1 = ~exu_flush_final & ~exu_flush_final_f & (i0_predict_p_e1.valid | i0_predict_p_e1.misp); + assign i1_valid_e1 = ~exu_flush_final & ~exu_flush_final_f & (i1_predict_p_e1.valid | i1_predict_p_e1.misp) & ~exu_i0_flush_upper_e1; + assign i0_ataken_e1 = i0_predict_p_e1.ataken; + assign i1_ataken_e1 = i1_predict_p_e1.ataken; + + assign i0_taken_e1 = (i0_ataken_e1 & dec_i0_alu_decode_e1) | (i0_predict_p_e1.hist[1] & ~dec_i0_alu_decode_e1); + assign i1_taken_e1= (i1_ataken_e1 & dec_i1_alu_decode_e1) | (i1_predict_p_e1.hist[1] & ~dec_i1_alu_decode_e1); + + assign ghr_e1_ns[`RV_BHT_GHR_RANGE] = ( ({`RV_BHT_GHR_SIZE{~dec_tlu_flush_lower_wb & i0_valid_e1 & (i0_predict_p_e1.misp | ~i1_valid_e1)}} & {ghr_e1[`RV_BHT_GHR_SIZE-2:0], i0_taken_e1}) | +`ifdef RV_BHT_GHR_SIZE_2 + ({`RV_BHT_GHR_SIZE{~dec_tlu_flush_lower_wb & i0_valid_e1 & ~i0_predict_p_e1.misp & i1_valid_e1}} & { i0_taken_e1, i1_taken_e1}) | +`else + ({`RV_BHT_GHR_SIZE{~dec_tlu_flush_lower_wb & i0_valid_e1 & ~i0_predict_p_e1.misp & i1_valid_e1}} & {ghr_e1[`RV_BHT_GHR_SIZE-3:0], i0_taken_e1, i1_taken_e1}) | +`endif + ({`RV_BHT_GHR_SIZE{~dec_tlu_flush_lower_wb & ~i0_valid_e1 & ~i0_predict_p_e1.br_error & i1_valid_e1}} & {ghr_e1[`RV_BHT_GHR_SIZE-2:0], i1_taken_e1}) | + ({`RV_BHT_GHR_SIZE{dec_tlu_flush_lower_wb}} & ghr_e4[`RV_BHT_GHR_RANGE]) | + ({`RV_BHT_GHR_SIZE{~dec_tlu_flush_lower_wb & ~i0_valid_e1 & ~i1_valid_e1}} & ghr_e1[`RV_BHT_GHR_RANGE]) ); + + rvdffs #(`RV_BHT_GHR_SIZE) e1ghrff (.*, .clk(active_clk), .en(~freeze), .din({ghr_e1_ns[`RV_BHT_GHR_RANGE]}), .dout({ghr_e1[`RV_BHT_GHR_RANGE]})); + rvdffs #(2) e1ghrdecff (.*, .clk(active_clk), .en(~freeze), .din({dec_i0_alu_decode_d, dec_i1_alu_decode_d}), .dout({dec_i0_alu_decode_e1, dec_i1_alu_decode_e1})); + + // E4 GHR + // the ataken is filled in by e1 stage if e1 stage executes the branch, otherwise by e4 stage. + logic i0_valid_e4, i1_pred_valid_e4; + assign i0_valid_e4 = dec_tlu_i0_valid_e4 & ((i0_predict_p_e4.valid) | i0_predict_p_e4.misp); + assign i1_pred_valid_e4 = dec_tlu_i1_valid_e4 & ((i1_predict_p_e4.valid) | i1_predict_p_e4.misp) & ~exu_i0_flush_upper_e4; + assign ghr_e4_ns[`RV_BHT_GHR_RANGE] = ( ({`RV_BHT_GHR_SIZE{i0_valid_e4 & (i0_predict_p_e4.misp | ~i1_pred_valid_e4)}} & {ghr_e4[`RV_BHT_GHR_SIZE-2:0], i0_predict_p_e4.ataken}) | +`ifdef RV_BHT_GHR_SIZE_2 + ({`RV_BHT_GHR_SIZE{i0_valid_e4 & ~i0_predict_p_e4.misp & i1_pred_valid_e4}} & { i0_predict_p_e4.ataken, i1_predict_p_e4.ataken}) | +`else + ({`RV_BHT_GHR_SIZE{i0_valid_e4 & ~i0_predict_p_e4.misp & i1_pred_valid_e4}} & {ghr_e4[`RV_BHT_GHR_SIZE-3:0], i0_predict_p_e4.ataken, i1_predict_p_e4.ataken}) | +`endif + ({`RV_BHT_GHR_SIZE{~i0_valid_e4 & ~i0_predict_p_e4.br_error & i1_pred_valid_e4}} & {ghr_e4[`RV_BHT_GHR_SIZE-2:0], i1_predict_p_e4.ataken}) | + ({`RV_BHT_GHR_SIZE{~i0_valid_e4 & ~i1_pred_valid_e4}} & ghr_e4[`RV_BHT_GHR_RANGE]) ); + + rvdff #(`RV_BHT_GHR_SIZE) e4ghrff (.*, .clk(active_clk), .din({ghr_e4_ns[`RV_BHT_GHR_RANGE]}), + .dout({ghr_e4[`RV_BHT_GHR_RANGE]})); + rvdff #(1) e4ghrflushff (.*, .clk(active_clk), .din({exu_flush_final}), + .dout({exu_flush_final_f})); + +// RV_NO_SECONDARY_ALU {{ +`ifdef RV_NO_SECONDARY_ALU + + rvdffe #($bits(predict_pkt_t)) i0_pp_e4_ff (.*, .en(i0_e4_ctl_en), .din(i0_pp_e4_in),.dout(i0_predict_p_e4) ); + rvdffe #($bits(predict_pkt_t)) i1_pp_e4_ff (.*, .en(i1_e4_ctl_en), .din(i1_pp_e4_in),.dout(i1_predict_p_e4) ); + + assign exu_i0_result_e4[31:0] = '0; + assign exu_i0_flush_lower_e4 = '0; + assign exu_i0_flush_path_e4[31:1] = '0; + assign i0_alu_pc_nc[31:1] = '0; + assign i0_pred_correct_lower_e4 = '0; + + assign exu_i1_result_e4[31:0] = '0; + assign exu_i1_flush_lower_e4 = '0; + assign exu_i1_flush_path_e4[31:1] = '0; + assign i1_alu_pc_nc[31:1] = '0; + assign i1_pred_correct_lower_e4 = '0; + +`else + + exu_alu_ctl i0_alu_e4 (.*, + .freeze ( 1'b0 ), // I + .enable ( i0_e4_ctl_en ), // I + .predict_p ( i0_pp_e4_in ), // I + .valid ( dec_i0_sec_decode_e3 ), // I + .flush ( dec_tlu_flush_lower_wb ), // I + .a ( i0_rs1_e3_final[31:0] ), // I + .b ( i0_rs2_e3_final[31:0] ), // I + .pc ( dec_i0_pc_e3[31:1] ), // I + .brimm ( i0_br_immed_e3[12:1] ), // I + .ap ( i0_ap_e4 ), // I + .out ( exu_i0_result_e4[31:0] ), // O + .flush_upper ( exu_i0_flush_lower_e4 ), // O + .flush_path ( exu_i0_flush_path_e4[31:1] ), // O + .predict_p_ff ( i0_predict_p_e4 ), // O + .pc_ff ( i0_alu_pc_nc[31:1] ), // O + .pred_correct ( i0_pred_correct_lower_e4 ) // O + ); + + + exu_alu_ctl i1_alu_e4 (.*, + .freeze ( 1'b0 ), // I + .enable ( i1_e4_ctl_en ), // I + .predict_p ( i1_pp_e4_in ), // I + .valid ( dec_i1_sec_decode_e3 ), // I + .flush ( dec_tlu_flush_lower_wb ), // I + .a ( i1_rs1_e3_final[31:0] ), // I + .b ( i1_rs2_e3_final[31:0] ), // I + .pc ( dec_i1_pc_e3[31:1] ), // I + .brimm ( i1_br_immed_e3[12:1] ), // I + .ap ( i1_ap_e4 ), // I + .out ( exu_i1_result_e4[31:0] ), // O + .flush_upper ( exu_i1_flush_lower_e4 ), // O + .flush_path ( exu_i1_flush_path_e4[31:1] ), // O + .predict_p_ff ( i1_predict_p_e4 ), // O + .pc_ff ( i1_alu_pc_nc[31:1] ), // O + .pred_correct ( i1_pred_correct_lower_e4 ) // O + ); + +`endif // RV_NO_SECONDARY_ALU }} + + + assign exu_i0_br_hist_e4[1:0] = i0_predict_p_e4.hist[1:0]; + assign exu_i0_br_bank_e4[1:0] = i0_predict_p_e4.bank[1:0]; + assign exu_i0_br_error_e4 = i0_predict_p_e4.br_error; + assign exu_i0_br_fghr_e4[`RV_BHT_GHR_RANGE] = i0_predict_p_e4.fghr[`RV_BHT_GHR_RANGE]; + assign exu_i0_br_middle_e4 = i0_predict_p_e4.pc4 ^ i0_predict_p_e4.boffset; + assign exu_i0_br_start_error_e4 = i0_predict_p_e4.br_start_error; + assign exu_i0_br_index_e4[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = i0_predict_p_e4.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + + assign exu_i0_br_valid_e4 = i0_predict_p_e4.valid; + assign exu_i0_br_mp_e4 = i0_predict_p_e4.misp; // needed to squash i1 error + assign exu_i0_br_ret_e4 = i0_predict_p_e4.pret; + assign exu_i0_br_call_e4 = i0_predict_p_e4.pcall; + assign exu_i0_br_way_e4 = i0_predict_p_e4.way; + + assign exu_i1_br_hist_e4[1:0] = i1_predict_p_e4.hist[1:0]; + assign exu_i1_br_bank_e4[1:0] = i1_predict_p_e4.bank[1:0]; + assign exu_i1_br_fghr_e4[`RV_BHT_GHR_RANGE] = i1_predict_p_e4.fghr[`RV_BHT_GHR_RANGE]; + assign exu_i1_br_middle_e4 = i1_predict_p_e4.pc4 ^ i1_predict_p_e4.boffset; + assign exu_i1_br_error_e4 = i1_predict_p_e4.br_error; + assign exu_i1_br_index_e4[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = i1_predict_p_e4.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + + assign exu_i1_br_start_error_e4 = i1_predict_p_e4.br_start_error; + assign exu_i1_br_valid_e4 = i1_predict_p_e4.valid; + assign exu_i1_br_mp_e4 = i1_predict_p_e4.misp; + assign exu_i1_br_way_e4 = i1_predict_p_e4.way; + + assign exu_i1_br_ret_e4 = i1_predict_p_e4.pret; + assign exu_i1_br_call_e4 = i1_predict_p_e4.pcall; + + assign exu_rets_e4_pkt.pc0_call = i0_predict_p_e4.pcall & i0_predict_p_e4.valid & ~i0_predict_p_e4.br_error; + assign exu_rets_e4_pkt.pc1_call = i1_predict_p_e4.pcall & i1_predict_p_e4.valid & ~i1_predict_p_e4.br_error; + assign exu_rets_e4_pkt.pc0_ret = i0_predict_p_e4.pret & i0_predict_p_e4.valid & ~i0_predict_p_e4.br_error; + assign exu_rets_e4_pkt.pc1_ret = i1_predict_p_e4.pret & i1_predict_p_e4.valid & ~i1_predict_p_e4.br_error; + assign exu_rets_e4_pkt.pc0_pc4 = i0_predict_p_e4.pc4; + assign exu_rets_e4_pkt.pc1_pc4 = i1_predict_p_e4.pc4; + + predict_pkt_t final_predict_mp, final_predict_mp_ff; + + logic fp_enable, fp_enable_ff; + + assign fp_enable = exu_i0_flush_lower_e4 | exu_i1_flush_lower_e4 | + exu_i0_flush_upper_e1 | exu_i1_flush_upper_e1; + + rvdff #(1) final_predict_ff (.*, .clk(active_clk), .din(fp_enable), .dout(fp_enable_ff)); + + + // flush_upper_e1's below take freeze into account + assign final_predict_mp = (exu_i0_flush_lower_e4) ? i0_predict_p_e4 : + (exu_i1_flush_lower_e4) ? i1_predict_p_e4 : + (exu_i0_flush_upper_e1) ? i0_predict_p_e1 : + (exu_i1_flush_upper_e1) ? i1_predict_p_e1 : '0; + + rvdffe #($bits(predict_pkt_t)) predict_mp_ff (.*, .en(fp_enable | fp_enable_ff), .din(final_predict_mp), .dout(final_predict_mp_ff)); + + logic [`RV_BHT_GHR_RANGE] final_eghr, after_flush_eghr; + assign final_eghr[`RV_BHT_GHR_RANGE] = ((exu_i0_flush_upper_e1 | exu_i1_flush_upper_e1) & ~dec_tlu_flush_lower_wb & ~exu_i0_flush_lower_e4 & ~exu_i1_flush_lower_e4 ) ? ghr_e1[`RV_BHT_GHR_RANGE] : ghr_e4[`RV_BHT_GHR_RANGE]; + + assign after_flush_eghr[`RV_BHT_GHR_RANGE] = ((exu_i0_flush_upper_e2 | exu_i1_flush_upper_e2) & ~dec_tlu_flush_lower_wb) ? ghr_e1[`RV_BHT_GHR_RANGE] : ghr_e4[`RV_BHT_GHR_RANGE]; + + + assign exu_mp_pkt.way = final_predict_mp_ff.way; + assign exu_mp_pkt.misp = final_predict_mp_ff.misp; + assign exu_mp_pkt.pcall = final_predict_mp_ff.pcall; + assign exu_mp_pkt.pja = final_predict_mp_ff.pja; + assign exu_mp_pkt.pret = final_predict_mp_ff.pret; + assign exu_mp_pkt.ataken = final_predict_mp_ff.ataken; + assign exu_mp_pkt.boffset = final_predict_mp_ff.boffset; + assign exu_mp_pkt.pc4 = final_predict_mp_ff.pc4; + assign exu_mp_pkt.hist[1:0] = final_predict_mp_ff.hist[1:0]; + assign exu_mp_pkt.toffset[11:0] = final_predict_mp_ff.toffset[11:0]; + assign exu_mp_pkt.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = final_predict_mp_ff.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + assign exu_mp_pkt.bank[1:0] = final_predict_mp_ff.bank[1:0]; + assign exu_mp_pkt.btag[`RV_BTB_BTAG_SIZE-1:0] = final_predict_mp_ff.btag[`RV_BTB_BTAG_SIZE-1:0]; + assign exu_mp_pkt.fghr[`RV_BHT_GHR_RANGE] = after_flush_eghr[`RV_BHT_GHR_RANGE]; // fghr repair value + + assign exu_mp_eghr[`RV_BHT_GHR_RANGE] = final_predict_mp_ff.fghr[`RV_BHT_GHR_RANGE]; // mp ghr for bht write + + + + rvdffe #(32) i0_upper_flush_e2_ff (.*, + .en(i0_e2_ctl_en), + .din({ + exu_i0_flush_path_e1[31:1], + exu_i0_flush_upper_e1}), + + .dout({ + i0_flush_path_upper_e2[31:1], + exu_i0_flush_upper_e2}) + ); + + rvdffe #(33) i1_upper_flush_e2_ff (.*, + .en(i1_e2_ctl_en), + .din({dec_i1_valid_e1, + exu_i1_flush_path_e1[31:1], + exu_i1_flush_upper_e1}), + .dout({i1_valid_e2, + i1_flush_path_upper_e2[31:1], + exu_i1_flush_upper_e2}) + ); + + assign exu_flush_path_e2[31:1] = (exu_i0_flush_upper_e2) ? i0_flush_path_upper_e2[31:1] : i1_flush_path_upper_e2[31:1]; + + assign exu_i0_flush_final = dec_tlu_flush_lower_wb | (exu_i0_flush_upper_e2 & ~freeze); + + assign exu_i1_flush_final = dec_tlu_flush_lower_wb | (exu_i1_flush_upper_e2 & ~freeze); + + assign exu_flush_upper_e2 = (exu_i0_flush_upper_e2 | exu_i1_flush_upper_e2) & ~freeze; + + assign exu_flush_final = dec_tlu_flush_lower_wb | exu_flush_upper_e2; + + assign exu_flush_path_final[31:1] = (dec_tlu_flush_lower_wb) ? dec_tlu_flush_path_wb[31:1] : exu_flush_path_e2[31:1]; + + + rvdffe #(63) i0_upper_flush_e3_ff (.*, + .en(i0_e3_ctl_en), + .din({i0_flush_path_upper_e2[31:1], + pred_correct_npc_e2[31:1], + exu_i0_flush_upper_e2}), + .dout({ + i0_flush_path_upper_e3[31:1], + pred_correct_npc_e3[31:1], + exu_i0_flush_upper_e3}) + ); + + rvdffe #(32) i1_upper_flush_e3_ff (.*, + .en(i1_e3_ctl_en), + .din({i1_valid_e2, + i1_flush_path_upper_e2[31:1] + }), + .dout({i1_valid_e3, + i1_flush_path_upper_e3[31:1]}) + ); + + rvdffe #(63) i0_upper_flush_e4_ff (.*, + .en(i0_e4_ctl_en), + .din({ + i0_flush_path_upper_e3[31:1], + pred_correct_npc_e3[31:1], + exu_i0_flush_upper_e3 & ~freeze}), + .dout({ + i0_flush_path_upper_e4[31:1], + pred_correct_npc_e4[31:1], + exu_i0_flush_upper_e4}) + ); + + rvdffe #(32) i1_upper_flush_e4_ff (.*, + .en(i1_e4_ctl_en), + .din({i1_valid_e3 & ~freeze, + i1_flush_path_upper_e3[31:1]}), + .dout({i1_valid_e4, + i1_flush_path_upper_e4[31:1]}) + ); + + + // npc logic for commit + + rvdffs #(2) pred_correct_upper_e2_ff (.*, + .clk(active_clk), + .en(~freeze), + .din({i1_pred_correct_upper_e1,i0_pred_correct_upper_e1}), + .dout({i1_pred_correct_upper_e2,i0_pred_correct_upper_e2}) + ); + + rvdffs #(2) pred_correct_upper_e3_ff (.*, + .clk(active_clk), + .en(~freeze), + .din({i1_pred_correct_upper_e2,i0_pred_correct_upper_e2}), + .dout({i1_pred_correct_upper_e3,i0_pred_correct_upper_e3}) + ); + + rvdff #(2) pred_correct_upper_e4_ff (.*, + .clk(active_clk), + .din({i1_pred_correct_upper_e3,i0_pred_correct_upper_e3}), + .dout({i1_pred_correct_upper_e4,i0_pred_correct_upper_e4}) + ); + + rvdff #(2) sec_decode_e4_ff (.*, + .clk(active_clk), + .din({dec_i0_sec_decode_e3,dec_i1_sec_decode_e3}), + .dout({i0_sec_decode_e4,i1_sec_decode_e4}) + ); + + + + assign i1_valid_e4_eff = i1_valid_e4 & ~((i0_sec_decode_e4) ? exu_i0_flush_lower_e4 : exu_i0_flush_upper_e4); + + assign i1_pred_correct_e4_eff = (i1_sec_decode_e4) ? i1_pred_correct_lower_e4 : i1_pred_correct_upper_e4; + assign i0_pred_correct_e4_eff = (i0_sec_decode_e4) ? i0_pred_correct_lower_e4 : i0_pred_correct_upper_e4; + + assign i1_flush_path_e4_eff[31:1] = (i1_sec_decode_e4) ? exu_i1_flush_path_e4[31:1] : i1_flush_path_upper_e4[31:1]; + assign i0_flush_path_e4_eff[31:1] = (i0_sec_decode_e4) ? exu_i0_flush_path_e4[31:1] : i0_flush_path_upper_e4[31:1]; + + + assign npc_e4[31:1] = (i1_valid_e4_eff) ? ((i1_pred_correct_e4_eff) ? pred_correct_npc_e4[31:1] : i1_flush_path_e4_eff[31:1]) : + ((i0_pred_correct_e4_eff) ? pred_correct_npc_e4[31:1] : i0_flush_path_e4_eff[31:1]); + + + assign exu_npc_e4[31:1] = (div_finish_early) ? exu_i0_flush_path_e1[31:1] : + (exu_div_finish) ? div_npc[31:1] : + npc_e4[31:1]; + + // remember the npc of the divide + rvdffe #(31) npc_any_ff (.*, .en(div_valid_e1), .din(exu_i0_flush_path_e1[31:1]), .dout(div_npc[31:1])); + + +endmodule // exu diff --git a/design/exu/exu_alu_ctl.sv b/design/exu/exu_alu_ctl.sv new file mode 100644 index 0000000..3ca0d70 --- /dev/null +++ b/design/exu/exu_alu_ctl.sv @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +module exu_alu_ctl + import swerv_types::*; +( + input logic clk, // Top level clock + input logic active_clk, // Level 1 free clock + input logic rst_l, // Reset + input logic scan_mode, // Scan control + + input predict_pkt_t predict_p, // Predicted branch structure + + input logic freeze, // Clock enable for valid + + input logic [31:0] a, // A operand + input logic [31:0] b, // B operand + input logic [31:1] pc, // for pc=pc+2,4 calculations + + input logic valid, // Valid + input logic flush, // Flush pipeline + + input logic [12:1] brimm, // Branch offset + + input alu_pkt_t ap, // {valid,predecodes} + + input logic enable, // Clock enable + + + output logic [31:0] out, // final result + + output logic flush_upper, // Branch flush + output logic [31:1] flush_path, // Branch flush PC + + output logic [31:1] pc_ff, // flopped PC + + output logic pred_correct, // NPC control + output predict_pkt_t predict_p_ff // Predicted branch structure + + ); + + + + + logic [31:0] aout,bm; + logic cout,ov,neg; + + logic [3:1] logic_sel; + + logic [31:0] lout; + logic [31:0] sout; + logic sel_logic,sel_shift,sel_adder; + + logic slt_one; + + logic actual_taken; + + logic signed [31:0] a_ff; + + logic [31:0] b_ff; + + logic [12:1] brimm_ff; + + logic [31:1] pcout; + + logic valid_ff; + + logic [31:0] ashift; + logic cond_mispredict; + logic target_mispredict; + + logic eq, ne, lt, ge; + + + rvdffs #(1) validff (.*, .clk(active_clk), .en(~freeze), .din(valid & ~flush), .dout(valid_ff)); + + rvdffe #(32) aff (.*, .en(enable & valid), .din(a[31:0]), .dout(a_ff[31:0])); + + rvdffe #(32) bff (.*, .en(enable & valid), .din(b[31:0]), .dout(b_ff[31:0])); + + // any PC is run through here - doesn't have to be alu + rvdffe #(31) pcff (.*, .en(enable), .din(pc[31:1]), .dout(pc_ff[31:1])); + + rvdffe #(12) brimmff (.*, .en(enable), .din(brimm[12:1]), .dout(brimm_ff[12:1])); + + predict_pkt_t pp_ff; + + rvdffe #($bits(predict_pkt_t)) predictpacketff (.*, + .en(enable), + .din(predict_p), + .dout(pp_ff) + ); + + + // immediates are just muxed into rs2 + + // add => add=1; + // sub => add=1; sub=1; + + // and => lctl=3 + // or => lctl=2 + // xor => lctl=1 + + // sll => sctl=3 + // srl => sctl=2 + // sra => sctl=1 + + // slt => slt + + // lui => lctl=2; or x0, imm20 previously << 12 + // auipc => add; add pc, imm20 previously << 12 + + // beq => bctl=4; add; add x0, pc, sext(offset[12:1]) + // bne => bctl=3; add; add x0, pc, sext(offset[12:1]) + // blt => bctl=2; add; add x0, pc, sext(offset[12:1]) + // bge => bctl=1; add; add x0, pc, sext(offset[12:1]) + + // jal => rs1=pc {pc[31:1],1'b0}, rs2=sext(offset20:1]); rd=pc+[2,4] + // jalr => rs1=rs1, rs2=sext(offset20:1]); rd=pc+[2,4] + + + assign bm[31:0] = ( ap.sub ) ? ~b_ff[31:0] : b_ff[31:0]; + + + assign {cout, aout[31:0]} = {1'b0, a_ff[31:0]} + {1'b0, bm[31:0]} + {32'b0, ap.sub}; + + assign ov = (~a_ff[31] & ~bm[31] & aout[31]) | + ( a_ff[31] & bm[31] & ~aout[31] ); + + assign neg = aout[31]; + + assign eq = a_ff[31:0] == b_ff[31:0]; + + assign ne = ~eq; + + assign logic_sel[3] = ap.land | ap.lor; + assign logic_sel[2] = ap.lor | ap.lxor; + assign logic_sel[1] = ap.lor | ap.lxor; + + + + assign lout[31:0] = ( a_ff[31:0] & b_ff[31:0] & {32{logic_sel[3]}} ) | + ( a_ff[31:0] & ~b_ff[31:0] & {32{logic_sel[2]}} ) | + ( ~a_ff[31:0] & b_ff[31:0] & {32{logic_sel[1]}} ); + + + + assign ashift[31:0] = a_ff >>> b_ff[4:0]; + + assign sout[31:0] = ( {32{ap.sll}} & (a_ff[31:0] << b_ff[4:0]) ) | + ( {32{ap.srl}} & (a_ff[31:0] >> b_ff[4:0]) ) | + ( {32{ap.sra}} & ashift[31:0] ); + + + assign sel_logic = |{ap.land,ap.lor,ap.lxor}; + + assign sel_shift = |{ap.sll,ap.srl,ap.sra}; + + assign sel_adder = (ap.add | ap.sub) & ~ap.slt; + + + + + assign lt = (~ap.unsign & (neg ^ ov)) | + ( ap.unsign & ~cout); + + assign ge = ~lt; + + + assign slt_one = (ap.slt & lt); + + assign out[31:0] = ({32{sel_logic}} & lout[31:0]) | + ({32{sel_shift}} & sout[31:0]) | + ({32{sel_adder}} & aout[31:0]) | + ({32{ap.jal | pp_ff.pcall | pp_ff.pja | pp_ff.pret}} & {pcout[31:1],1'b0}) | + ({32{ap.csr_write}} & ((ap.csr_imm) ? b_ff[31:0] : a_ff[31:0])) | // csr_write: if csr_imm rs2 else rs1 + ({31'b0, slt_one}); + + // branch handling + + logic any_jal; + + assign any_jal = ap.jal | + pp_ff.pcall | + pp_ff.pja | + pp_ff.pret; + + + assign actual_taken = (ap.beq & eq) | + (ap.bne & ne) | + (ap.blt & lt) | + (ap.bge & ge) | + (any_jal); + + // for a conditional br pcout[] will be the opposite of the branch prediction + // for jal or pcall, it will be the link address pc+2 or pc+4 + + rvbradder ibradder ( + .pc(pc_ff[31:1]), + .offset(brimm_ff[12:1]), + .dout(pcout[31:1]) + ); + + // pred_correct is for the npc logic + // pred_correct indicates not to use the flush_path + // for any_jal pred_correct==0 + + assign pred_correct = ((ap.predict_nt & ~actual_taken) | + (ap.predict_t & actual_taken)) & ~any_jal; + + + // for any_jal adder output is the flush path + assign flush_path[31:1] = (any_jal) ? aout[31:1] : pcout[31:1]; + + + // pcall and pret are included here + assign cond_mispredict = (ap.predict_t & ~actual_taken) | + (ap.predict_nt & actual_taken); + + // target mispredicts on ret's + + assign target_mispredict = pp_ff.pret & (pp_ff.prett[31:1] != aout[31:1]); + + assign flush_upper = ( ap.jal | cond_mispredict | target_mispredict) & valid_ff & ~flush & ~freeze; + + + // .i 3 + // .o 2 + // .ilb hist[1] hist[0] taken + // .ob newhist[1] newhist[0] + // .type fd + // + // 00 0 01 + // 01 0 01 + // 10 0 00 + // 11 0 10 + // 00 1 10 + // 01 1 00 + // 10 1 11 + // 11 1 11 + + logic [1:0] newhist; + + assign newhist[1] = (pp_ff.hist[1]&pp_ff.hist[0]) | (!pp_ff.hist[0]&actual_taken); + + assign newhist[0] = (!pp_ff.hist[1]&!actual_taken) | (pp_ff.hist[1]&actual_taken); + + + + always_comb begin + predict_p_ff = pp_ff; + + predict_p_ff.misp = (valid_ff) ? (cond_mispredict | target_mispredict) & ~flush : pp_ff.misp; + predict_p_ff.ataken = (valid_ff) ? actual_taken : pp_ff.ataken; + predict_p_ff.hist[1] = (valid_ff) ? newhist[1] : pp_ff.hist[1]; + predict_p_ff.hist[0] = (valid_ff) ? newhist[0] : pp_ff.hist[0]; + + end + + + +endmodule // exu_alu_ctl diff --git a/design/exu/exu_div_ctl.sv b/design/exu/exu_div_ctl.sv new file mode 100644 index 0000000..1ae951b --- /dev/null +++ b/design/exu/exu_div_ctl.sv @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +module exu_div_ctl + import swerv_types::*; +( + input logic clk, // Top level clock + input logic active_clk, // Level 1 active clock + input logic rst_l, // Reset + input logic scan_mode, // Scan mode + + input logic dec_tlu_fast_div_disable, // Disable small number optimization + + input logic [31:0] dividend, // Numerator + input logic [31:0] divisor, // Denominator + + input div_pkt_t dp, // valid, sign, rem + + input logic flush_lower, // Flush pipeline + + + output logic valid_ff_e1, // Valid E1 stage + output logic finish_early, // Finish smallnum + output logic finish, // Finish smallnum or normal divide + output logic div_stall, // Divide is running + + output logic [31:0] out // Result + ); + + + logic run_in, run_state; + logic [5:0] count_in, count; + logic [32:0] m_ff; + logic qff_enable; + logic aff_enable; + logic [32:0] q_in, q_ff; + logic [32:0] a_in, a_ff; + logic [32:0] m_eff; + logic [32:0] a_shift; + logic dividend_neg_ff, divisor_neg_ff; + logic [31:0] dividend_comp; + logic [31:0] dividend_eff; + logic [31:0] q_ff_comp; + logic [31:0] q_ff_eff; + logic [31:0] a_ff_comp; + logic [31:0] a_ff_eff; + logic sign_ff, sign_eff; + logic rem_ff; + logic add; + logic [32:0] a_eff; + logic [64:0] a_eff_shift; + logic rem_correct; + logic flush_lower_ff; + logic valid_e1; + + logic smallnum_case, smallnum_case_ff; + logic [3:0] smallnum, smallnum_ff; + logic m_already_comp; + + + + rvdff #(1) flush_any_ff (.*, .clk(active_clk), .din(flush_lower), .dout(flush_lower_ff)); + rvdff #(1) e1val_ff (.*, .clk(active_clk), .din(dp.valid & ~flush_lower_ff), .dout(valid_ff_e1)); + rvdff #(1) runff (.*, .clk(active_clk), .din(run_in), .dout(run_state)); + rvdff #(6) countff (.*, .clk(active_clk), .din(count_in[5:0]), .dout(count[5:0])); + rvdffs #(4) miscf (.*, .clk(active_clk), .din({dividend[31],divisor[31],sign_eff,dp.rem}), .dout({dividend_neg_ff,divisor_neg_ff,sign_ff,rem_ff}), .en(dp.valid)); + rvdff #(5) smallnumff (.*, .clk(active_clk), .din({smallnum_case,smallnum[3:0]}), .dout({smallnum_case_ff,smallnum_ff[3:0]})); + rvdffe #(33) mff (.*, .en(dp.valid), .din({ ~dp.unsign & divisor[31], divisor[31:0]}), .dout(m_ff[32:0])); + rvdffe #(33) qff (.*, .en(qff_enable), .din(q_in[32:0]), .dout(q_ff[32:0])); + rvdffe #(33) aff (.*, .en(aff_enable), .din(a_in[32:0]), .dout(a_ff[32:0])); + + rvtwoscomp #(32) dividend_c (.din(q_ff[31:0]), .dout(dividend_comp[31:0])); + rvtwoscomp #(32) q_ff_c (.din(q_ff[31:0]), .dout(q_ff_comp[31:0])); + rvtwoscomp #(32) a_ff_c (.din(a_ff[31:0]), .dout(a_ff_comp[31:0])); + + + assign valid_e1 = valid_ff_e1 & ~flush_lower_ff; + + + // START - short circuit logic for small numbers {{ + + // small number divides - any 4b / 4b is done in 1 cycle (divisor != 0) + // to generate espresso equations: + // 1) smalldiv > smalldiv.e + // 2) espresso -Dso -oeqntott smalldiv.e | addassign > smalldiv + + // smallnum case does not cover divide by 0 + assign smallnum_case = ((q_ff[31:4] == 28'b0) & (m_ff[31:4] == 28'b0) & (m_ff[31:0] != 32'b0) & ~rem_ff & valid_e1 & ~dec_tlu_fast_div_disable) | + ((q_ff[31:0] == 32'b0) & (m_ff[31:0] != 32'b0) & ~rem_ff & valid_e1 & ~dec_tlu_fast_div_disable); + + + assign smallnum[3] = ( q_ff[3] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ); + + + assign smallnum[2] = ( q_ff[3] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) | + ( q_ff[2] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ) | + ( q_ff[3] & q_ff[2] & ~m_ff[3] & ~m_ff[2] ); + + + assign smallnum[1] = ( q_ff[2] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) | + ( q_ff[1] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ) | + ( q_ff[3] & ~m_ff[3] & ~m_ff[1] & ~m_ff[0]) | + ( q_ff[3] & ~q_ff[2] & ~m_ff[3] & ~m_ff[2] & m_ff[1] & m_ff[0]) | + (~q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & ~m_ff[2] ) | + ( q_ff[3] & q_ff[2] & ~m_ff[3] & ~m_ff[0]) | + ( q_ff[3] & q_ff[2] & ~m_ff[3] & m_ff[2] & ~m_ff[1] ) | + ( q_ff[3] & q_ff[1] & ~m_ff[3] & ~m_ff[1] ) | + ( q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & m_ff[2] ); + + + assign smallnum[0] = ( q_ff[2] & q_ff[1] & q_ff[0] & ~m_ff[3] & ~m_ff[1] ) | + ( q_ff[3] & ~q_ff[2] & q_ff[0] & ~m_ff[3] & m_ff[1] & m_ff[0]) | + ( q_ff[2] & ~m_ff[3] & ~m_ff[1] & ~m_ff[0]) | + ( q_ff[1] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) | + ( q_ff[0] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ) | + (~q_ff[3] & q_ff[2] & ~q_ff[1] & ~m_ff[3] & ~m_ff[2] & m_ff[1] & m_ff[0]) | + (~q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & ~m_ff[0]) | + ( q_ff[3] & ~m_ff[2] & ~m_ff[1] & ~m_ff[0]) | + ( q_ff[3] & ~q_ff[2] & ~m_ff[3] & m_ff[2] & m_ff[1] ) | + (~q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & m_ff[2] & ~m_ff[1] ) | + (~q_ff[3] & q_ff[2] & q_ff[0] & ~m_ff[3] & ~m_ff[1] ) | + ( q_ff[3] & ~q_ff[2] & ~q_ff[1] & ~m_ff[3] & m_ff[2] & m_ff[0]) | + ( ~q_ff[2] & q_ff[1] & q_ff[0] & ~m_ff[3] & ~m_ff[2] ) | + ( q_ff[3] & q_ff[2] & ~m_ff[1] & ~m_ff[0]) | + ( q_ff[3] & q_ff[1] & ~m_ff[2] & ~m_ff[0]) | + (~q_ff[3] & q_ff[2] & q_ff[1] & q_ff[0] & ~m_ff[3] & m_ff[2] ) | + ( q_ff[3] & q_ff[2] & m_ff[3] & ~m_ff[2] ) | + ( q_ff[3] & q_ff[1] & m_ff[3] & ~m_ff[2] & ~m_ff[1] ) | + ( q_ff[3] & q_ff[0] & ~m_ff[2] & ~m_ff[1] ) | + ( q_ff[3] & ~q_ff[1] & ~m_ff[3] & m_ff[2] & m_ff[1] & m_ff[0]) | + ( q_ff[3] & q_ff[2] & q_ff[1] & m_ff[3] & ~m_ff[0]) | + ( q_ff[3] & q_ff[2] & q_ff[1] & m_ff[3] & ~m_ff[1] ) | + ( q_ff[3] & q_ff[2] & q_ff[0] & m_ff[3] & ~m_ff[1] ) | + ( q_ff[3] & ~q_ff[2] & q_ff[1] & ~m_ff[3] & m_ff[1] ) | + ( q_ff[3] & q_ff[1] & q_ff[0] & ~m_ff[2] ) | + ( q_ff[3] & q_ff[2] & q_ff[1] & q_ff[0] & m_ff[3] ); + + + + // END - short circuit logic for small numbers }} + + + // *** Start Short Q *** {{ + + logic [4:0] a_cls; + logic [4:0] b_cls; + logic [5:0] shortq; + logic [5:0] shortq_shift; + logic [5:0] shortq_shift_ff; + logic shortq_enable; + logic shortq_enable_ff; + logic [32:0] short_dividend; + + assign short_dividend[31:0] = q_ff[31:0]; + assign short_dividend[32] = sign_ff & q_ff[31]; + + + // A B + // 210 210 SH + // --- --- -- + // 1xx 000 0 + // 1xx 001 8 + // 1xx 01x 16 + // 1xx 1xx 24 + // 01x 000 8 + // 01x 001 16 + // 01x 01x 24 + // 01x 1xx 32 + // 001 000 16 + // 001 001 24 + // 001 01x 32 + // 001 1xx 32 + // 000 000 24 + // 000 001 32 + // 000 01x 32 + // 000 1xx 32 + + logic [3:0] shortq_raw; + logic [3:0] shortq_shift_xx; + + assign a_cls[4:3] = 2'b0; + assign a_cls[2] = (~short_dividend[32] & (short_dividend[31:24] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[31:23] != {9{1'b1}})); + assign a_cls[1] = (~short_dividend[32] & (short_dividend[23:16] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[22:15] != {8{1'b1}})); + assign a_cls[0] = (~short_dividend[32] & (short_dividend[15:08] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[14:07] != {8{1'b1}})); + + assign b_cls[4:3] = 2'b0; + assign b_cls[2] = (~m_ff[32] & ( m_ff[31:24] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[31:24] != {8{1'b1}})); + assign b_cls[1] = (~m_ff[32] & ( m_ff[23:16] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[23:16] != {8{1'b1}})); + assign b_cls[0] = (~m_ff[32] & ( m_ff[15:08] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[15:08] != {8{1'b1}})); + + assign shortq_raw[3] = ( (a_cls[2:1] == 2'b01 ) & (b_cls[2] == 1'b1 ) ) | // Shift by 32 + ( (a_cls[2:0] == 3'b001) & (b_cls[2] == 1'b1 ) ) | + ( (a_cls[2:0] == 3'b000) & (b_cls[2] == 1'b1 ) ) | + ( (a_cls[2:0] == 3'b001) & (b_cls[2:1] == 2'b01 ) ) | + ( (a_cls[2:0] == 3'b000) & (b_cls[2:1] == 2'b01 ) ) | + ( (a_cls[2:0] == 3'b000) & (b_cls[2:0] == 3'b001) ); + + assign shortq_raw[2] = ( (a_cls[2] == 1'b1 ) & (b_cls[2] == 1'b1 ) ) | // Shift by 24 + ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:1] == 2'b01 ) ) | + ( (a_cls[2:0] == 3'b001) & (b_cls[2:0] == 3'b001) ) | + ( (a_cls[2:0] == 3'b000) & (b_cls[2:0] == 3'b000) ); + + assign shortq_raw[1] = ( (a_cls[2] == 1'b1 ) & (b_cls[2:1] == 2'b01 ) ) | // Shift by 16 + ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:0] == 3'b001) ) | + ( (a_cls[2:0] == 3'b001) & (b_cls[2:0] == 3'b000) ); + + assign shortq_raw[0] = ( (a_cls[2] == 1'b1 ) & (b_cls[2:0] == 3'b001) ) | // Shift by 8 + ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:0] == 3'b000) ); + + + assign shortq_enable = valid_ff_e1 & (m_ff[31:0] != 32'b0) & (shortq_raw[3:0] != 4'b0); + + assign shortq_shift[3:0] = ({4{shortq_enable}} & shortq_raw[3:0]); + + rvdff #(5) i_shortq_ff (.*, .clk(active_clk), .din({shortq_enable,shortq_shift[3:0]}), .dout({shortq_enable_ff,shortq_shift_xx[3:0]})); + + assign shortq_shift_ff[5:0] = ({6{shortq_shift_xx[3]}} & 6'b01_1111) | // 31 + ({6{shortq_shift_xx[2]}} & 6'b01_1000) | // 24 + ({6{shortq_shift_xx[1]}} & 6'b01_0000) | // 16 + ({6{shortq_shift_xx[0]}} & 6'b00_1000); // 8 + +`ifdef ASSERT_ON + + logic div_assert_fail; + + assign div_assert_fail = (shortq_shift_xx[3] & shortq_shift_xx[2]) | + (shortq_shift_xx[3] & shortq_shift_xx[1]) | + (shortq_shift_xx[3] & shortq_shift_xx[0]) | + (shortq_shift_xx[2] & shortq_shift_xx[1]) | + (shortq_shift_xx[2] & shortq_shift_xx[0]) | + (shortq_shift_xx[1] & shortq_shift_xx[0]); + + assert_exu_div_shortq_shift_error: assert #0 (~div_assert_fail) else $display("ERROR: SHORTQ_SHIFT_XX with multiple shifts ON!"); + +`endif + + // *** End Short Q *** }} + + + + + + assign div_stall = run_state; + + assign run_in = (dp.valid | run_state) & ~finish & ~flush_lower_ff; + + assign count_in[5:0] = {6{run_state & ~finish & ~flush_lower_ff & ~shortq_enable}} & (count[5:0] + shortq_shift_ff[5:0] + 6'd1); + + + assign finish_early = smallnum_case; + + assign finish = (smallnum_case | ((~rem_ff) ? (count[5:0] == 6'd32) : (count[5:0] == 6'd33))) & ~flush_lower & ~flush_lower_ff; + + assign sign_eff = ~dp.unsign & (divisor[31:0] != 32'b0); + + + assign q_in[32:0] = ({33{~run_state }} & {1'b0,dividend[31:0]}) | + ({33{ run_state & (valid_ff_e1 | shortq_enable_ff)}} & ({dividend_eff[31:0], ~a_in[32]} << shortq_shift_ff[5:0])) | + ({33{ run_state & ~(valid_ff_e1 | shortq_enable_ff)}} & {q_ff[31:0], ~a_in[32]}); + + assign qff_enable = dp.valid | (run_state & ~shortq_enable); + + + + + assign dividend_eff[31:0] = (sign_ff & dividend_neg_ff) ? dividend_comp[31:0] : q_ff[31:0]; + + + assign m_eff[32:0] = (add) ? m_ff[32:0] : ~m_ff[32:0]; + + assign a_eff_shift[64:0] = {33'b0, dividend_eff[31:0]} << shortq_shift_ff[5:0]; + + assign a_eff[32:0] = ({33{ rem_correct }} & a_ff[32:0] ) | + ({33{~rem_correct & ~shortq_enable_ff}} & {a_ff[31:0], q_ff[32]}) | + ({33{~rem_correct & shortq_enable_ff}} & a_eff_shift[64:32] ); + + assign a_shift[32:0] = {33{run_state}} & a_eff[32:0]; + + assign a_in[32:0] = {33{run_state}} & (a_shift[32:0] + m_eff[32:0] + {32'b0,~add}); + + assign aff_enable = dp.valid | (run_state & ~shortq_enable & (count[5:0]!=6'd33)) | rem_correct; + + + assign m_already_comp = (divisor_neg_ff & sign_ff); + + // if m already complemented, then invert operation add->sub, sub->add + assign add = (a_ff[32] | rem_correct) ^ m_already_comp; + + assign rem_correct = (count[5:0] == 6'd33) & rem_ff & a_ff[32]; + + + + assign q_ff_eff[31:0] = (sign_ff & (dividend_neg_ff ^ divisor_neg_ff)) ? q_ff_comp[31:0] : q_ff[31:0]; + + assign a_ff_eff[31:0] = (sign_ff & dividend_neg_ff) ? a_ff_comp[31:0] : a_ff[31:0]; + + assign out[31:0] = ({32{ smallnum_case_ff }} & {28'b0, smallnum_ff[3:0]}) | + ({32{ rem_ff}} & a_ff_eff[31:0] ) | + ({32{~smallnum_case_ff & ~rem_ff}} & q_ff_eff[31:0] ); + + +endmodule // exu_div_ctl diff --git a/design/exu/exu_mul_ctl.sv b/design/exu/exu_mul_ctl.sv new file mode 100644 index 0000000..3c7a0d0 --- /dev/null +++ b/design/exu/exu_mul_ctl.sv @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +module exu_mul_ctl + import swerv_types::*; +( + input logic clk, // Top level clock + input logic active_clk, // Level 1 active clock + input logic clk_override, // Override clock enables + input logic rst_l, // Reset + input logic scan_mode, // Scan mode + + input logic [31:0] a, // A operand + input logic [31:0] b, // B operand + + input logic [31:0] lsu_result_dc3, // Load result used in E1 bypass + + input logic freeze, // Pipeline freeze + + input mul_pkt_t mp, // valid, rs1_sign, rs2_sign, low, load_mul_rs1_bypass_e1, load_mul_rs2_bypass_e1 + + + output logic [31:0] out // Result + + ); + + + logic valid_e1, valid_e2; + logic mul_c1_e1_clken, mul_c1_e2_clken, mul_c1_e3_clken; + logic exu_mul_c1_e1_clk, exu_mul_c1_e2_clk, exu_mul_c1_e3_clk; + + logic [31:0] a_ff_e1, a_e1; + logic [31:0] b_ff_e1, b_e1; + logic load_mul_rs1_bypass_e1, load_mul_rs2_bypass_e1; + logic rs1_sign_e1, rs1_neg_e1; + logic rs2_sign_e1, rs2_neg_e1; + logic signed [32:0] a_ff_e2, b_ff_e2; + logic [63:0] prod_e3; + logic low_e1, low_e2, low_e3; + + + // --------------------------- Clock gating ---------------------------------- + + // C1 clock enables + assign mul_c1_e1_clken = (mp.valid | clk_override) & ~freeze; + assign mul_c1_e2_clken = (valid_e1 | clk_override) & ~freeze; + assign mul_c1_e3_clken = (valid_e2 | clk_override) & ~freeze; + + // C1 - 1 clock pulse for data + rvclkhdr exu_mul_c1e1_cgc (.*, .en(mul_c1_e1_clken), .l1clk(exu_mul_c1_e1_clk)); + rvclkhdr exu_mul_c1e2_cgc (.*, .en(mul_c1_e2_clken), .l1clk(exu_mul_c1_e2_clk)); + rvclkhdr exu_mul_c1e3_cgc (.*, .en(mul_c1_e3_clken), .l1clk(exu_mul_c1_e3_clk)); + + + // --------------------------- Input flops ---------------------------------- + + rvdffs #(1) valid_e1_ff (.*, .din(mp.valid), .dout(valid_e1), .clk(active_clk), .en(~freeze)); + rvdff #(1) rs1_sign_e1_ff (.*, .din(mp.rs1_sign), .dout(rs1_sign_e1), .clk(exu_mul_c1_e1_clk)); + rvdff #(1) rs2_sign_e1_ff (.*, .din(mp.rs2_sign), .dout(rs2_sign_e1), .clk(exu_mul_c1_e1_clk)); + rvdff #(1) low_e1_ff (.*, .din(mp.low), .dout(low_e1), .clk(exu_mul_c1_e1_clk)); + rvdff #(1) ld_rs1_byp_e1_ff (.*, .din(mp.load_mul_rs1_bypass_e1), .dout(load_mul_rs1_bypass_e1), .clk(exu_mul_c1_e1_clk)); + rvdff #(1) ld_rs2_byp_e1_ff (.*, .din(mp.load_mul_rs2_bypass_e1), .dout(load_mul_rs2_bypass_e1), .clk(exu_mul_c1_e1_clk)); + + rvdff #(32) a_e1_ff (.*, .din(a[31:0]), .dout(a_ff_e1[31:0]), .clk(exu_mul_c1_e1_clk)); + rvdff #(32) b_e1_ff (.*, .din(b[31:0]), .dout(b_ff_e1[31:0]), .clk(exu_mul_c1_e1_clk)); + + + + // --------------------------- E1 Logic Stage ---------------------------------- + + assign a_e1[31:0] = (load_mul_rs1_bypass_e1) ? lsu_result_dc3[31:0] : a_ff_e1[31:0]; + assign b_e1[31:0] = (load_mul_rs2_bypass_e1) ? lsu_result_dc3[31:0] : b_ff_e1[31:0]; + + assign rs1_neg_e1 = rs1_sign_e1 & a_e1[31]; + assign rs2_neg_e1 = rs2_sign_e1 & b_e1[31]; + + + rvdffs #(1) valid_e2_ff (.*, .din(valid_e1), .dout(valid_e2), .clk(active_clk), .en(~freeze)); + rvdff #(1) low_e2_ff (.*, .din(low_e1), .dout(low_e2), .clk(exu_mul_c1_e2_clk)); + + rvdff #(33) a_e2_ff (.*, .din({rs1_neg_e1, a_e1[31:0]}), .dout(a_ff_e2[32:0]), .clk(exu_mul_c1_e2_clk)); + rvdff #(33) b_e2_ff (.*, .din({rs2_neg_e1, b_e1[31:0]}), .dout(b_ff_e2[32:0]), .clk(exu_mul_c1_e2_clk)); + + + + // ---------------------- E2 Logic Stage -------------------------- + + + logic signed [65:0] prod_e2; + + assign prod_e2[65:0] = a_ff_e2 * b_ff_e2; + + + rvdff #(1) low_e3_ff (.*, .din(low_e2), .dout(low_e3), .clk(exu_mul_c1_e3_clk)); + rvdff #(64) prod_e3_ff (.*, .din(prod_e2[63:0]), .dout(prod_e3[63:0]), .clk(exu_mul_c1_e3_clk)); + + + + // ----------------------- E3 Logic Stage ------------------------- + + + assign out[31:0] = low_e3 ? prod_e3[31:0] : prod_e3[63:32]; + + +endmodule // exu_mul_ctl diff --git a/design/flist.questa b/design/flist.questa new file mode 100644 index 0000000..afbd089 --- /dev/null +++ b/design/flist.questa @@ -0,0 +1,52 @@ +$RV_ROOT/workspace/work/snapshots/default/common_defines.vh +$RV_ROOT/design/include/def.sv ++incdir+$RV_ROOT/workspace/work/snapshots/default ++incdir+$RV_ROOT/design/lib ++incdir+$RV_ROOT/design/include ++incdir+$RV_ROOT/design/dmi +$RV_ROOT/design/swerv_wrapper.sv +$RV_ROOT/design/mem.sv +$RV_ROOT/design/pic_ctrl.sv +$RV_ROOT/design/swerv.sv +$RV_ROOT/design/dma_ctrl.sv +$RV_ROOT/design/ifu/ifu_aln_ctl.sv +$RV_ROOT/design/ifu/ifu_compress_ctl.sv +$RV_ROOT/design/ifu/ifu_ifc_ctl.sv +$RV_ROOT/design/ifu/ifu_bp_ctl.sv +$RV_ROOT/design/ifu/ifu_ic_mem.sv +$RV_ROOT/design/ifu/ifu_mem_ctl.sv +$RV_ROOT/design/ifu/ifu_iccm_mem.sv +$RV_ROOT/design/ifu/ifu.sv +$RV_ROOT/design/dec/dec_decode_ctl.sv +$RV_ROOT/design/dec/dec_gpr_ctl.sv +$RV_ROOT/design/dec/dec_ib_ctl.sv +$RV_ROOT/design/dec/dec_tlu_ctl.sv +$RV_ROOT/design/dec/dec_trigger.sv +$RV_ROOT/design/dec/dec.sv +$RV_ROOT/design/exu/exu_alu_ctl.sv +$RV_ROOT/design/exu/exu_mul_ctl.sv +$RV_ROOT/design/exu/exu_div_ctl.sv +$RV_ROOT/design/exu/exu.sv +$RV_ROOT/design/lsu/lsu.sv +$RV_ROOT/design/lsu/lsu_clkdomain.sv +$RV_ROOT/design/lsu/lsu_addrcheck.sv +$RV_ROOT/design/lsu/lsu_lsc_ctl.sv +$RV_ROOT/design/lsu/lsu_stbuf.sv +$RV_ROOT/design/lsu/lsu_bus_buffer.sv +$RV_ROOT/design/lsu/lsu_bus_intf.sv +$RV_ROOT/design/lsu/lsu_ecc.sv +$RV_ROOT/design/lsu/lsu_dccm_mem.sv +$RV_ROOT/design/lsu/lsu_dccm_ctl.sv +$RV_ROOT/design/lsu/lsu_trigger.sv +$RV_ROOT/design/dbg/dbg.sv +$RV_ROOT/design/dmi/dmi_wrapper.v +$RV_ROOT/design/dmi/dmi_jtag_to_core_sync.v +$RV_ROOT/design/dmi/rvjtag_tap.sv +$RV_ROOT/design/lib/beh_lib.sv +$RV_ROOT/design/lib/mem_lib.sv +$RV_ROOT/design/lib/svci_to_ahb.sv +$RV_ROOT/design/lib/ahb_to_svci.sv +$RV_ROOT/design/lib/svci_to_axi4.sv +$RV_ROOT/design/lib/axi4_to_svci.sv +$RV_ROOT/design/lib/ahb_to_axi4.sv +$RV_ROOT/design/lib/axi4_to_ahb.sv diff --git a/design/ifu/ifu.sv b/design/ifu/ifu.sv new file mode 100644 index 0000000..931f022 --- /dev/null +++ b/design/ifu/ifu.sv @@ -0,0 +1,406 @@ +//******************************************************************************** +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** +//******************************************************************************** +// Function: Top level file for Icache, Fetch, Branch prediction & Aligner +// BFF -> F1 -> F2 -> A +//******************************************************************************** + +module ifu + import swerv_types::*; +( + input logic free_clk, + input logic active_clk, + input logic clk, + input logic clk_override, + input logic rst_l, + + input logic dec_ib3_valid_d, dec_ib2_valid_d, // mass balance for decode buffer + + input logic dec_ib0_valid_eff_d, // effective valid taking decode into account + input logic dec_ib1_valid_eff_d, // effective valid taking decode into account + + input logic exu_i0_br_ret_e4, // i0 branch commit is a ret + input logic exu_i1_br_ret_e4, // i1 branch commit is a ret + input logic exu_i0_br_call_e4, // i0 branch commit is a call + input logic exu_i1_br_call_e4, // i1 branch commit is a call + + input logic exu_flush_final, // flush, includes upper and lower + input logic dec_tlu_flush_err_wb , // flush due to parity error. + input logic dec_tlu_flush_noredir_wb, // don't fetch, validated with exu_flush_final + input logic dec_tlu_dbg_halted, // halted, used for leaving IDLE state + input logic dec_tlu_pmu_fw_halted, // Core is halted + input logic [31:1] exu_flush_path_final, // flush fetch address + input logic exu_flush_upper_e2, // flush upper, either i0 or i1 + + input logic [31:0] dec_tlu_mrac_ff ,// Side_effect , cacheable for each region + input logic dec_tlu_fence_i_wb, // fence.i, invalidate icache, validated with exu_flush_final + input logic dec_tlu_flush_leak_one_wb, // ignore bp for leak one fetches + + input logic dec_tlu_bpred_disable, // disable all branch prediction + input logic dec_tlu_core_ecc_disable, // disable ecc checking and flagging + + // AXI Write Channels - IFU never writes. So, 0 out mostly + output logic ifu_axi_awvalid, + input logic ifu_axi_awready, + output logic [`RV_IFU_BUS_TAG-1:0] ifu_axi_awid, + output logic [31:0] ifu_axi_awaddr, + output logic [3:0] ifu_axi_awregion, + output logic [7:0] ifu_axi_awlen, + output logic [2:0] ifu_axi_awsize, + output logic [1:0] ifu_axi_awburst, + output logic ifu_axi_awlock, + output logic [3:0] ifu_axi_awcache, + output logic [2:0] ifu_axi_awprot, + output logic [3:0] ifu_axi_awqos, + + output logic ifu_axi_wvalid, + input logic ifu_axi_wready, + output logic [63:0] ifu_axi_wdata, + output logic [7:0] ifu_axi_wstrb, + output logic ifu_axi_wlast, + + input logic ifu_axi_bvalid, + output logic ifu_axi_bready, + input logic [1:0] ifu_axi_bresp, + input logic [`RV_IFU_BUS_TAG-1:0] ifu_axi_bid, + + // AXI Read Channels + output logic ifu_axi_arvalid, + input logic ifu_axi_arready, + output logic [`RV_IFU_BUS_TAG-1:0] ifu_axi_arid, + output logic [31:0] ifu_axi_araddr, + output logic [3:0] ifu_axi_arregion, + output logic [7:0] ifu_axi_arlen, + output logic [2:0] ifu_axi_arsize, + output logic [1:0] ifu_axi_arburst, + output logic ifu_axi_arlock, + output logic [3:0] ifu_axi_arcache, + output logic [2:0] ifu_axi_arprot, + output logic [3:0] ifu_axi_arqos, + + input logic ifu_axi_rvalid, + output logic ifu_axi_rready, + input logic [`RV_IFU_BUS_TAG-1:0] ifu_axi_rid, + input logic [63:0] ifu_axi_rdata, + input logic [1:0] ifu_axi_rresp, + input logic ifu_axi_rlast, + + //// AHB LITE BUS +//`ifdef RV_BUILD_AHB_LITE + input logic ifu_bus_clk_en, + + + input logic dma_iccm_req, + input logic dma_iccm_stall_any, + input logic [31:0] dma_mem_addr, + input logic [2:0] dma_mem_sz, + input logic dma_mem_write, + input logic [63:0] dma_mem_wdata, + + output logic iccm_dma_ecc_error, + output logic iccm_dma_rvalid, + output logic [63:0] iccm_dma_rdata, + output logic iccm_ready, + +//`endif + + output logic [1:0] ifu_pmu_instr_aligned, + output logic ifu_pmu_align_stall, + output logic ifu_pmu_fetch_stall, + +// I$ & ITAG Ports + output logic [31:3] ic_rw_addr, // Read/Write addresss to the Icache. + output logic [3:0] ic_wr_en, // Icache write enable, when filling the Icache. + output logic ic_rd_en, // Icache read enable. +`ifdef RV_ICACHE_ECC + output logic [83:0] ic_wr_data, // Data to fill to the Icache. With ECC + input logic [167:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [24:0] ictag_debug_rd_data,// Debug icache tag. + output logic [41:0] ic_debug_wr_data, // Debug wr cache. + output logic [41:0] ifu_ic_debug_rd_data, +`else + output logic [67:0] ic_wr_data, // Data to fill to the Icache. With Parity + input logic [135:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With Parity + input logic [20:0] ictag_debug_rd_data,// Debug icache tag. + output logic [33:0] ic_debug_wr_data, // Debug wr cache. + output logic [33:0] ifu_ic_debug_rd_data, +`endif + + + output logic [127:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. + output logic ic_sel_premux_data, // Select the premux data. + + output logic [15:2] ic_debug_addr, // Read/Write addresss to the Icache. + output logic ic_debug_rd_en, // Icache debug rd + output logic ic_debug_wr_en, // Icache debug wr + output logic ic_debug_tag_array, // Debug tag array + output logic [3:0] ic_debug_way, // Debug way. Rd or Wr. + + + output logic [3:0] ic_tag_valid, // Valid bits when accessing the Icache. One valid bit per way. F2 stage + + input logic [3:0] ic_rd_hit, // Compare hits from Icache tags. Per way. F2 stage + input logic ic_tag_perr, // Icache Tag parity error + + +`ifdef RV_ICCM_ENABLE + // ICCM ports + output logic [`RV_ICCM_BITS-1:2] iccm_rw_addr, // ICCM read/write address. + output logic iccm_wren, // ICCM write enable (through the DMA) + output logic iccm_rden, // ICCM read enable. + output logic [77:0] iccm_wr_data, // ICCM write data. + output logic [2:0] iccm_wr_size, // ICCM write location within DW. + + input logic [155:0] iccm_rd_data, // Data read from ICCM. +`endif + +// Perf counter sigs + output logic ifu_pmu_ic_miss, // ic miss + output logic ifu_pmu_ic_hit, // ic hit + output logic ifu_pmu_bus_error, // iside bus error + output logic ifu_pmu_bus_busy, // iside bus busy + output logic ifu_pmu_bus_trxn, // iside bus transactions + + + output logic ifu_i0_valid, // Instruction 0 valid. From Aligner to Decode + output logic ifu_i1_valid, // Instruction 1 valid. From Aligner to Decode + output logic ifu_i0_icaf, // Instruction 0 access fault. From Aligner to Decode + output logic ifu_i1_icaf, // Instruction 1 access fault. From Aligner to Decode + output logic ifu_i0_icaf_f1, // Instruction 0 has access fault on second fetch group + output logic ifu_i1_icaf_f1, // Instruction 1 has access fault on second fetch group + output logic ifu_i0_perr, // Instruction 0 parity error. From Aligner to Decode + output logic ifu_i1_perr, // Instruction 1 parity error. From Aligner to Decode + output logic ifu_i0_sbecc, // Instruction 0 has single bit ecc error + output logic ifu_i1_sbecc, // Instruction 1 has single bit ecc error + output logic ifu_i0_dbecc, // Instruction 0 has double bit ecc error + output logic ifu_i1_dbecc, // Instruction 1 has double bit ecc error + output logic iccm_dma_sb_error, // Single Bit ECC error from a DMA access + output logic[31:0] ifu_i0_instr, // Instruction 0 . From Aligner to Decode + output logic[31:0] ifu_i1_instr, // Instruction 1 . From Aligner to Decode + output logic[31:1] ifu_i0_pc, // Instruction 0 pc. From Aligner to Decode + output logic[31:1] ifu_i1_pc, // Instruction 1 pc. From Aligner to Decode + output logic ifu_i0_pc4, // Instruction 0 is 4 byte. From Aligner to Decode + output logic ifu_i1_pc4, // Instruction 1 is 4 byte. From Aligner to Decode + output logic [15:0] ifu_illegal_inst, // Illegal instruction. + + output logic ifu_miss_state_idle, // There is no outstanding miss. Cache miss state is idle. + + + output br_pkt_t i0_brp, // Instruction 0 branch packet. From Aligner to Decode + output br_pkt_t i1_brp, // Instruction 1 branch packet. From Aligner to Decode + + input predict_pkt_t exu_mp_pkt, // mispredict packet + input logic [`RV_BHT_GHR_RANGE] exu_mp_eghr, // execute ghr + + input br_tlu_pkt_t dec_tlu_br0_wb_pkt, // slot0 update/error pkt + input br_tlu_pkt_t dec_tlu_br1_wb_pkt, // slot1 update/error pkt + input dec_tlu_flush_lower_wb, + + input rets_pkt_t exu_rets_e1_pkt, // E1 return stack packet + input rets_pkt_t exu_rets_e4_pkt, // E4 return stack packet + + // pc's used to maintain and update the BP RET stacks +`ifdef REAL_COMM_RS + input logic [31:1] exu_i0_pc_e1, + input logic [31:1] exu_i1_pc_e1, + input logic [31:1] dec_tlu_i0_pc_e4, + input logic [31:1] dec_tlu_i1_pc_e4, +`endif + + output logic [15:0] ifu_i0_cinst, + output logic [15:0] ifu_i1_cinst, + + +/// Icache debug + input cache_debug_pkt_t dec_tlu_ic_diag_pkt , + output logic ifu_ic_debug_rd_data_valid, + + + + input logic scan_mode + ); + + localparam TAGWIDTH = 2 ; + localparam IDWIDTH = 2 ; + + logic ifu_fb_consume1, ifu_fb_consume2; + logic [31:1] ifc_fetch_addr_f2; + logic ifc_fetch_uncacheable_f1; + + logic [7:0] ifu_fetch_val; // valids on a 2B boundary, left justified [7] implies valid fetch + logic [31:1] ifu_fetch_pc; // starting pc of fetch + + logic [31:1] ifc_fetch_addr_f1; + + logic ic_crit_wd_rdy; + logic ic_write_stall; + logic ic_dma_active; + logic ifc_dma_access_ok; + logic ifc_iccm_access_f1; + logic ifc_region_acc_fault_f1; + logic ic_access_fault_f2; + logic ifu_ic_mb_empty; + + + logic ic_hit_f2; + + // fetch control + ifu_ifc_ctl ifc (.* + ); + + +`ifdef RV_BTB_48 + logic [7:0][1:0] ifu_bp_way_f2; // way indication; right justified +`else + logic [7:0] ifu_bp_way_f2; // way indication; right justified +`endif + logic ifu_bp_kill_next_f2; // kill next fetch; taken target found + logic [31:1] ifu_bp_btb_target_f2; // predicted target PC + logic [7:1] ifu_bp_inst_mask_f2; // tell ic which valids to kill because of a taken branch; right justified + logic [7:0] ifu_bp_hist1_f2; // history counters for all 4 potential branches; right justified + logic [7:0] ifu_bp_hist0_f2; // history counters for all 4 potential branches; right justified + logic [11:0] ifu_bp_poffset_f2; // predicted target + logic [7:0] ifu_bp_ret_f2; // predicted ret ; right justified + logic [7:0] ifu_bp_pc4_f2; // pc4 indication; right justified + logic [7:0] ifu_bp_valid_f2; // branch valid, right justified + logic [`RV_BHT_GHR_RANGE] ifu_bp_fghr_f2; + + // branch predictor + ifu_bp_ctl bp (.*); + + + logic [7:0] ic_fetch_val_f2; + logic [127:0] ic_data_f2; + logic [127:0] ifu_fetch_data; + logic ifc_fetch_req_f1_raw, ifc_fetch_req_f1, ifc_fetch_req_f2; + logic ic_rd_parity_final_err; // This fetch has a data_cache or tag parity error. + logic iccm_rd_ecc_single_err; // This fetch has an iccm single error. + logic iccm_rd_ecc_double_err; // This fetch has an iccm double error. + + icache_err_pkt_t ic_error_f2; + + logic ifu_icache_fetch_f2 ; + logic [16:2] ifu_icache_error_index; // Index with parity error + logic ifu_icache_error_val; // Parity error + logic ifu_icache_sb_error_val; + + assign ifu_fetch_data[127:0] = ic_data_f2[127:0]; + assign ifu_fetch_val[7:0] = ic_fetch_val_f2[7:0]; + assign ifu_fetch_pc[31:1] = ifc_fetch_addr_f2[31:1]; + + // aligner + ifu_aln_ctl aln (.*); + + // icache + ifu_mem_ctl mem_ctl + (.*, + .fetch_addr_f1(ifc_fetch_addr_f1), + .ifu_icache_error_index(ifu_icache_error_index[16:6]), + .ic_hit_f2(ic_hit_f2), + .ic_data_f2(ic_data_f2[127:0]) + ); + + + + // Performance debug info + // + // +`ifdef DUMP_BTB_ON + logic exu_mp_valid; // conditional branch mispredict + logic exu_mp_way; // conditional branch mispredict + logic exu_mp_ataken; // direction is actual taken + logic exu_mp_boffset; // branch offsett + logic exu_mp_pc4; // branch is a 4B inst + logic exu_mp_call; // branch is a call inst + logic exu_mp_ret; // branch is a ret inst + logic exu_mp_ja; // branch is a jump always + logic [1:0] exu_mp_hist; // new history + logic [11:0] exu_mp_tgt; // target offset + logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address + logic [1:0] exu_mp_bank; // write bank; based on branch PC[3:2] + logic [`RV_BTB_BTAG_SIZE-1:0] exu_mp_btag; // branch tag + logic [`RV_BHT_GHR_RANGE] exu_mp_fghr; // original fetch ghr (for correcting dir) + + assign exu_mp_valid = exu_mp_pkt.misp; // conditional branch mispredict + assign exu_mp_ataken = exu_mp_pkt.ataken; // direction is actual taken + assign exu_mp_boffset = exu_mp_pkt.boffset; // branch offset + assign exu_mp_pc4 = exu_mp_pkt.pc4; // branch is a 4B inst + assign exu_mp_call = exu_mp_pkt.pcall; // branch is a call inst + assign exu_mp_ret = exu_mp_pkt.pret; // branch is a ret inst + assign exu_mp_ja = exu_mp_pkt.pja; // branch is a jump always + assign exu_mp_way = exu_mp_pkt.way; // branch is a jump always + assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0]; // new history + assign exu_mp_tgt[11:0] = exu_mp_pkt.toffset[11:0] ; // target offset + assign exu_mp_addr[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = exu_mp_pkt.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] ; // BTB/BHT address + assign exu_mp_bank[1:0] = exu_mp_pkt.bank[1:0] ; // write bank = exu_mp_pkt.; based on branch PC[3:2] + assign exu_mp_btag = exu_mp_pkt.btag[`RV_BTB_BTAG_SIZE-1:0] ; // branch tag + assign exu_mp_fghr[`RV_BHT_GHR_RANGE] = exu_mp_pkt.fghr[`RV_BHT_GHR_RANGE] ; // original fetch ghr (for correcting dir) + + logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] btb_rd_addr_f2; + `define DEC `CPU_TOP.dec + `define EXU `CPU_TOP.exu + rvbtb_addr_hash f2hash(.pc(ifc_fetch_addr_f2[31:1]), .hash(btb_rd_addr_f2[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO])); + logic [31:0] mppc_ns, mppc; + assign mppc_ns[31:1] = `EXU.exu_i0_flush_upper_e1 ? `DEC.decode.i0_pc_e1[31:1] : (`EXU.exu_i1_flush_upper_e1 ? `DEC.decode.i1_pc_e1[31:1] : (`EXU.exu_i0_flush_lower_e4 ? `DEC.decode.i0_pc_e4[31:1] : `DEC.decode.i1_pc_e4[31:1])); + assign mppc_ns[0] = 1'b0; + logic [3:0] ic_rd_hit_f2; + rvdff #(36) mdseal_ff (.*, .din({mppc_ns[31:0], mem_ctl.ic_rd_hit[3:0]}), .dout({mppc[31:0],ic_rd_hit_f2[3:0]})); +logic [2:0] tmp_bnk; +assign tmp_bnk[2:0] = encode8_3(bp.btb_sel_f2[7:0]); + always @(negedge clk) begin + if(`DEC.tlu.mcyclel[31:0] == 32'h0000_0010) begin + $display("BTB_CONFIG: %d",`RV_BTB_ARRAY_DEPTH*4); + `ifndef BP_NOGSHARE + $display("BHT_CONFIG: %d gshare: 1",`RV_BHT_ARRAY_DEPTH*4); + `else + $display("BHT_CONFIG: %d gshare: 0",`RV_BHT_ARRAY_DEPTH*4); + `endif + $display("RS_CONFIG: %d", `RV_RET_STACK_SIZE); + end + if(exu_flush_final & ~(dec_tlu_br0_wb_pkt.br_error | dec_tlu_br0_wb_pkt.br_start_error | dec_tlu_br1_wb_pkt.br_error | dec_tlu_br1_wb_pkt.br_start_error) & (exu_mp_pkt.misp | exu_mp_pkt.ataken)) + $display("%7d BTB_MP : index: %0h bank: %0h call: %b ret: %b ataken: %b hist: %h valid: %b tag: %h targ: %h eghr: %b pred: %b ghr_index: %h brpc: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha, exu_mp_addr[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO], exu_mp_bank[1:0], exu_mp_call, exu_mp_ret, exu_mp_ataken, exu_mp_hist[1:0], exu_mp_valid, exu_mp_pkt.btag[`RV_BTB_BTAG_SIZE-1:0], {exu_flush_path_final[31:1], 1'b0}, exu_mp_eghr[`RV_BHT_GHR_RANGE], exu_mp_valid, bp.bht_wr_addr0, mppc[31:0], exu_mp_pkt.way); + for(int i = 0; i < 8; i++) begin + if(ifu_bp_valid_f2[i] & ifc_fetch_req_f2) + $display("%7d BTB_HIT : index: %0h bank: %0h call: %b ret: %b taken: %b strength: %b tag: %h targ: %h ghr: %4b ghr_index: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,btb_rd_addr_f2[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO],encode8_3(bp.btb_sel_f2[7:0]), bp.btb_rd_call_f2, bp.btb_rd_ret_f2, ifu_bp_hist1_f2[tmp_bnk], ifu_bp_hist0_f2[tmp_bnk], bp.fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0], {ifu_bp_btb_target_f2[31:1], 1'b0}, bp.fghr[`RV_BHT_GHR_RANGE], bp.bht_rd_addr_f1, ifu_bp_way_f2[tmp_bnk]); + end +`ifdef RV_BTB_48 + for(int y = 0; y < 4; y++) begin + for(int z = 0; z < 4; z++) begin + if(bp.lru_bank_sel[y][z]) + $display("%7d BTB_LRU: index: %0h bank: %0h newlru %h", `DEC.tlu.mcyclel[31:0]+32'ha, z,y,bp.lru_bank_wr_data[y][z]); + end + end +`endif + if(dec_tlu_br0_wb_pkt.valid & ~(dec_tlu_br0_wb_pkt.br_error | dec_tlu_br0_wb_pkt.br_start_error)) + $display("%7d BTB_UPD0: ghr_index: %0h bank: %0h hist: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,bp.br0_hashed_wb[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO],{dec_tlu_br0_wb_pkt.bank[1:0],dec_tlu_br0_wb_pkt.middle}, dec_tlu_br0_wb_pkt.hist, dec_tlu_br0_wb_pkt.way); + if(dec_tlu_br1_wb_pkt.valid & ~(dec_tlu_br1_wb_pkt.br_error | dec_tlu_br1_wb_pkt.br_start_error)) + $display("%7d BTB_UPD1: ghr_index: %0h bank: %0h hist: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,bp.br1_hashed_wb[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO],{dec_tlu_br1_wb_pkt.bank[1:0],dec_tlu_br1_wb_pkt.middle}, dec_tlu_br1_wb_pkt.hist, dec_tlu_br1_wb_pkt.way); + if(dec_tlu_br0_wb_pkt.br_error | dec_tlu_br0_wb_pkt.br_start_error) + $display("%7d BTB_ERR0: index: %0h bank: %0h start: %b rfpc: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,dec_tlu_br0_wb_pkt.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO],dec_tlu_br0_wb_pkt.bank[1:0], dec_tlu_br0_wb_pkt.br_start_error, {exu_flush_path_final[31:1], 1'b0}, dec_tlu_br0_wb_pkt.way); + if(dec_tlu_br1_wb_pkt.br_error | dec_tlu_br1_wb_pkt.br_start_error) + $display("%7d BTB_ERR1: index: %0h bank: %0h start: %b rfpc: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,dec_tlu_br1_wb_pkt.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO],dec_tlu_br1_wb_pkt.bank[1:0], dec_tlu_br1_wb_pkt.br_start_error, {exu_flush_path_final[31:1], 1'b0}, dec_tlu_br1_wb_pkt.way); + end // always @ (negedge clk) + function [2:0] encode8_3; + input [7:0] in; + + encode8_3[2] = |in[7:4]; + encode8_3[1] = in[7] | in[6] | in[3] | in[2]; + encode8_3[0] = in[7] | in[5] | in[3] | in[1]; + + endfunction +`endif +endmodule // ifu diff --git a/design/ifu/ifu_aln_ctl.sv b/design/ifu/ifu_aln_ctl.sv new file mode 100644 index 0000000..bccf893 --- /dev/null +++ b/design/ifu/ifu_aln_ctl.sv @@ -0,0 +1,1245 @@ +//******************************************************************************** +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** + +//******************************************************************************** +// Function: Instruction aligner +//******************************************************************************** +module ifu_aln_ctl + import swerv_types::*; +( + + input logic active_clk, + + input logic iccm_rd_ecc_single_err, // This fetch has a single ICCM ecc error. + input logic iccm_rd_ecc_double_err, // This fetch has a double ICCM ecc error. + input logic ic_rd_parity_final_err, // for tag parity errors + + input logic ifu_icache_fetch_f2, + + input logic ic_access_fault_f2, // Instruction access fault for the current fetch. + input logic [`RV_BHT_GHR_RANGE] ifu_bp_fghr_f2, // fetch GHR + input logic [31:1] ifu_bp_btb_target_f2, // predicted RET target + input logic [11:0] ifu_bp_poffset_f2, // predicted target offset + + input logic [7:0] ifu_bp_hist0_f2, // history counters for all 4 potential branches, bit 1, right justified + input logic [7:0] ifu_bp_hist1_f2, // history counters for all 4 potential branches, bit 1, right justified + input logic [7:0] ifu_bp_pc4_f2, // pc4 indication, right justified +`ifdef RV_BTB_48 + input logic [7:0][1:0] ifu_bp_way_f2, // way indication, right justified +`else + input logic [7:0] ifu_bp_way_f2, // way indication, right justified +`endif + input logic [7:0] ifu_bp_valid_f2, // branch valid, right justified + input logic [7:0] ifu_bp_ret_f2, // predicted ret indication, right justified + + input logic exu_flush_final, // Flush from the pipeline. + + input logic dec_ib3_valid_d, // valids for top 2 instruction buffers at decode + input logic dec_ib2_valid_d, + + input logic dec_ib0_valid_eff_d, // effective valid taking decode into account + input logic dec_ib1_valid_eff_d, + + + input logic [127:0] ifu_fetch_data, // fetch data in memory format - not right justified + + input icache_err_pkt_t ic_error_f2, // based on configuration: either parity or ecc + + + input logic [7:0] ifu_fetch_val, // valids on a 2B boundary, right justified + input logic [31:1] ifu_fetch_pc, // starting pc of fetch + + + input logic rst_l, + input logic clk, + input logic dec_tlu_core_ecc_disable, // disable ecc checking and flagging + + output logic ifu_i0_valid, // Instruction 0 is valid + output logic ifu_i1_valid, // Instruction 1 is valid + output logic ifu_i0_icaf, // Instruction 0 has access fault + output logic ifu_i1_icaf, // Instruction 1 has access fault + output logic ifu_i0_icaf_f1, // Instruction 0 has access fault on second fetch group + output logic ifu_i1_icaf_f1, // Instruction 1 has access fault on second fetch group + output logic ifu_i0_perr, // Instruction 0 has parity error + output logic ifu_i1_perr, // Instruction 1 has parity error + output logic ifu_i0_sbecc, // Instruction 0 has single bit ecc error + output logic ifu_i1_sbecc, // Instruction 1 has single bit ecc error + output logic ifu_i0_dbecc, // Instruction 0 has double bit ecc error + output logic ifu_i1_dbecc, // Instruction 1 has double bit ecc error + output logic [31:0] ifu_i0_instr, // Instruction 0 + output logic [31:0] ifu_i1_instr, // Instruction 1 + output logic [31:1] ifu_i0_pc, // Instruction 0 PC + output logic [31:1] ifu_i1_pc, // Instruction 1 PC + output logic ifu_i0_pc4, + output logic ifu_i1_pc4, + + output logic ifu_fb_consume1, // Consumed one buffer. To fetch control fetch for buffer mass balance + output logic ifu_fb_consume2, // Consumed two buffers.To fetch control fetch for buffer mass balance + output logic [15:0] ifu_illegal_inst, // Illegal Instruction. + + output br_pkt_t i0_brp, // Branch packet for I0. + output br_pkt_t i1_brp, // Branch packet for I1. + + output logic [1:0] ifu_pmu_instr_aligned, // number of inst aligned this cycle + output logic ifu_pmu_align_stall, // aligner stalled this cycle + + output logic [16:2] ifu_icache_error_index, // Icache Error address index + output logic ifu_icache_error_val, // Icache error valid + output logic ifu_icache_sb_error_val, + + output logic [15:0] ifu_i0_cinst, // 16b compress inst for i0 + output logic [15:0] ifu_i1_cinst, // 16b compress inst for i1 + + input logic scan_mode + + + ); + +`include "global.h" + + logic ifvalid; + logic shift_f1_f0, shift_f2_f0, shift_f2_f1; + logic fetch_to_f0, fetch_to_f1, fetch_to_f2; + + logic [7:0] f2val_in, f2val; + logic [7:0] f1val_in, f1val; + logic [7:0] f0val_in, f0val; + + logic [7:0] sf1val, sf0val; + + logic [31:1] f2pc_in, f2pc; + logic [31:1] f1pc_in, f1pc; + logic [31:1] f0pc_in, f0pc; + logic [31:1] sf1pc, sf0pc; + + logic [63:0] aligndata; + logic first4B, first2B; + logic second4B, second2B; + + logic third4B, third2B; + logic [31:0] uncompress0, uncompress1, uncompress2; + logic ibuffer_room1_more; + logic ibuffer_room2_more; + logic i0_shift, i1_shift; + logic shift_2B, shift_4B, shift_6B, shift_8B; + logic f1_shift_2B, f1_shift_4B, f1_shift_6B; + logic f2_valid, sf1_valid, sf0_valid; + + logic [31:0] ifirst, isecond, ithird; + logic [31:1] f0pc_plus1, f0pc_plus2, f0pc_plus3, f0pc_plus4; + logic [31:1] f1pc_plus1, f1pc_plus2, f1pc_plus3; + logic [3:0] alignval; + logic [31:1] firstpc, secondpc, thirdpc, fourthpc; + + logic [11:0] f1poffset; + logic [11:0] f0poffset; + logic [`RV_BHT_GHR_RANGE] f1fghr; + logic [`RV_BHT_GHR_RANGE] f0fghr; + logic [7:0] f1hist1; + logic [7:0] f0hist1; + logic [7:0] f1hist0; + logic [7:0] f0hist0; + logic [7:0] f1pc4; + logic [7:0] f0pc4; + + logic [7:0] f1ret; + logic [7:0] f0ret; +`ifdef RV_BTB_48 + logic [7:0][1:0] f1way; + logic [7:0][1:0] f0way; +`else + logic [7:0] f1way; + logic [7:0] f0way; +`endif + + logic [7:0] f1brend; + logic [7:0] f0brend; + + logic [3:0] alignbrend; + logic [3:0] alignpc4; +`ifdef RV_ICACHE_ECC + logic [19:0] alignecc; +`else + logic [3:0] alignparity; +`endif + logic [3:0] alignret; + logic [3:0] alignway; + logic [3:0] alignhist1; + + logic [3:0] alignhist0; + logic [3:1] alignfromf1; + logic i0_ends_f1, i1_ends_f1; + logic i0_br_start_error, i1_br_start_error; + + logic [31:1] f1prett; + logic [31:1] f0prett; + logic f1dbecc; + logic f0dbecc; + logic f1sbecc; + logic f0sbecc; + logic f1perr; + logic f0perr; + logic f1icfetch; + logic f0icfetch; + logic f1icaf; + logic f0icaf; + + logic [3:0] alignicfetch; + logic [3:0] aligntagperr; + logic [3:0] aligndataperr; + logic [3:0] alignsbecc; + logic [3:0] aligndbecc; + logic [3:0] alignicaf; + logic i0_brp_pc4, i1_brp_pc4; + + logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] firstpc_hash, secondpc_hash, thirdpc_hash, fourthpc_hash; + + logic i0_illegal, i1_illegal; + logic shift_illegal; + logic first_legal, second_legal, third_legal; + logic [15:0] illegal_inst; + logic illegal_inst_en; + logic illegal_lockout_in, illegal_lockout; + + logic [3:0] alignfinalperr; + + logic f2_wr_en; + + assign f2_wr_en = fetch_to_f2; + + logic f0_shift_wr_en; + + assign f0_shift_wr_en = (fetch_to_f0 | shift_f2_f0 | shift_f1_f0 | shift_2B | shift_4B | shift_6B | shift_8B); + + logic f1_shift_wr_en; + + assign f1_shift_wr_en = (fetch_to_f1 | shift_f2_f1 | f1_shift_2B | f1_shift_4B | f1_shift_6B); + + logic [1:0] wrptr, wrptr_in; + logic [1:0] rdptr, rdptr_in; + logic [2:0] qwen; + logic [127:0] q2,q1,q0; + logic [2:0] first_offset, second_offset; + logic [2:0] q2off_eff, q2off_in, q2off; + logic [2:0] q1off_eff, q1off_in, q1off; + logic [2:0] q0off_eff, q0off_in, q0off; + logic f0_shift_2B, f0_shift_4B, f0_shift_6B, f0_shift_8B; + + logic [127:0] q0eff; + logic [127:0] q0final; + logic [2:0] q0ptr; + logic [7:0] q0sel; + + logic [127:0] q1eff; + logic [127:0] q1final; + logic [2:0] q1ptr; + logic [7:0] q1sel; + + logic [2:0] qren; + + logic consume_fb1, consume_fb0; + logic [3:1] icaf_eff; + +`ifdef RV_ICACHE_ECC + logic [39:0] q0ecc, q1ecc, q2ecc; + logic [39:0] q0ecceff, q1ecceff; + logic [39:0] q0eccfinal, q1eccfinal; +`else + logic [7:0] q0parity, q1parity, q2parity; + logic [7:0] q0parityeff, q1parityeff; + logic [7:0] q0parityfinal, q1parityfinal; +`endif + + // new queue control logic + + assign wrptr_in[1:0] = (({2{wrptr[1:0]==2'b00 & ifvalid}} & 2'b01) | + ({2{wrptr[1:0]==2'b01 & ifvalid}} & 2'b10) | + ({2{wrptr[1:0]==2'b10 & ifvalid}} & 2'b00) | + ({2{~ifvalid}} & wrptr[1:0])) & ~{2{exu_flush_final}}; + + rvdff #(2) wrpff (.*, .clk(active_clk), .din(wrptr_in[1:0]), .dout(wrptr[1:0])); + + assign rdptr_in[1:0] = (({2{rdptr[1:0]==2'b00 & ifu_fb_consume1}} & 2'b01) | + ({2{rdptr[1:0]==2'b01 & ifu_fb_consume1}} & 2'b10) | + ({2{rdptr[1:0]==2'b10 & ifu_fb_consume1}} & 2'b00) | + ({2{rdptr[1:0]==2'b00 & ifu_fb_consume2}} & 2'b10) | + ({2{rdptr[1:0]==2'b01 & ifu_fb_consume2}} & 2'b00) | + ({2{rdptr[1:0]==2'b10 & ifu_fb_consume2}} & 2'b01) | + ({2{~ifu_fb_consume1&~ifu_fb_consume2}} & rdptr[1:0])) & ~{2{exu_flush_final}}; + + rvdff #(2) rdpff (.*, .clk(active_clk), .din(rdptr_in[1:0]), .dout(rdptr[1:0])); + + assign qren[2:0] = { rdptr[1:0]==2'b10, + rdptr[1:0]==2'b01, + rdptr[1:0]==2'b00 + }; + + assign qwen[2:0] = { wrptr[1:0]==2'b10 & ifvalid, + wrptr[1:0]==2'b01 & ifvalid, + wrptr[1:0]==2'b00 & ifvalid + }; + + + assign first_offset[2:0] = {f0_shift_8B, f0_shift_6B|f0_shift_4B, f0_shift_6B|f0_shift_2B }; + + assign second_offset[2:0] = {1'b0, f1_shift_6B|f1_shift_4B, f1_shift_6B|f1_shift_2B }; + + + assign q2off_eff[2:0] = (rdptr[1:0]==2'd2) ? (q2off[2:0] + first_offset[2:0]) : + (rdptr[1:0]==2'd1) ? (q2off[2:0] + second_offset[2:0]) : + q2off[2:0]; + + assign q2off_in[2:0] = (qwen[2]) ? ifu_fetch_pc[3:1] : q2off_eff[2:0]; + + rvdff #(3) q2offsetff (.*, .clk(active_clk), .din(q2off_in[2:0]), .dout(q2off[2:0])); + + assign q1off_eff[2:0] = (rdptr[1:0]==2'd1) ? (q1off[2:0] + first_offset[2:0]) : + (rdptr[1:0]==2'd0) ? (q1off[2:0] + second_offset[2:0]) : + q1off[2:0]; + + + assign q1off_in[2:0] = (qwen[1]) ? ifu_fetch_pc[3:1] : q1off_eff[2:0]; + + rvdff #(3) q1offsetff (.*, .clk(active_clk), .din(q1off_in[2:0]), .dout(q1off[2:0])); + + + assign q0off_eff[2:0] = (rdptr[1:0]==2'd0) ? (q0off[2:0] + first_offset[2:0]) : + (rdptr[1:0]==2'd2) ? (q0off[2:0] + second_offset[2:0]) : + q0off[2:0]; + + + assign q0off_in[2:0] = (qwen[0]) ? ifu_fetch_pc[3:1] : q0off_eff[2:0]; + + + rvdff #(3) q0offsetff (.*, .clk(active_clk), .din(q0off_in[2:0]), .dout(q0off[2:0])); + + assign q0ptr[2:0] = (({3{rdptr[1:0]==2'b00}} & q0off[2:0]) | + ({3{rdptr[1:0]==2'b01}} & q1off[2:0]) | + ({3{rdptr[1:0]==2'b10}} & q2off[2:0])); + + assign q1ptr[2:0] = (({3{rdptr[1:0]==2'b00}} & q1off[2:0]) | + ({3{rdptr[1:0]==2'b01}} & q2off[2:0]) | + ({3{rdptr[1:0]==2'b10}} & q0off[2:0])); + + assign q0sel[7:0] = { q0ptr[2:0]==3'b111, + q0ptr[2:0]==3'b110, + q0ptr[2:0]==3'b101, + q0ptr[2:0]==3'b100, + q0ptr[2:0]==3'b011, + q0ptr[2:0]==3'b010, + q0ptr[2:0]==3'b001, + q0ptr[2:0]==3'b000 + }; + + assign q1sel[7:0] = { q1ptr[2:0]==3'b111, + q1ptr[2:0]==3'b110, + q1ptr[2:0]==3'b101, + q1ptr[2:0]==3'b100, + q1ptr[2:0]==3'b011, + q1ptr[2:0]==3'b010, + q1ptr[2:0]==3'b001, + q1ptr[2:0]==3'b000 + }; + + // end new queue control logic + + + // misc data that is associated with each fetch buffer + + localparam MHI = 47+`RV_BHT_GHR_SIZE; + localparam MSIZE = 48+`RV_BHT_GHR_SIZE; + + logic [MHI:0] misc_data_in, misc2, misc1, misc0; + logic [MHI:0] misc1eff, misc0eff; + + assign misc_data_in[MHI:0] = { iccm_rd_ecc_double_err, + iccm_rd_ecc_single_err, + ifu_icache_fetch_f2, + ic_rd_parity_final_err, + ic_access_fault_f2, + ifu_bp_btb_target_f2[31:1], + ifu_bp_poffset_f2[11:0], + ifu_bp_fghr_f2[`RV_BHT_GHR_RANGE] + }; + + rvdffe #(MSIZE) misc2ff (.*, .en(qwen[2]), .din(misc_data_in[MHI:0]), .dout(misc2[MHI:0])); + rvdffe #(MSIZE) misc1ff (.*, .en(qwen[1]), .din(misc_data_in[MHI:0]), .dout(misc1[MHI:0])); + rvdffe #(MSIZE) misc0ff (.*, .en(qwen[0]), .din(misc_data_in[MHI:0]), .dout(misc0[MHI:0])); + + + assign {misc1eff[MHI:0],misc0eff[MHI:0]} = (({MSIZE*2{qren[0]}} & {misc1[MHI:0],misc0[MHI:0]}) | + ({MSIZE*2{qren[1]}} & {misc2[MHI:0],misc1[MHI:0]}) | + ({MSIZE*2{qren[2]}} & {misc0[MHI:0],misc2[MHI:0]})); + assign { f1dbecc, + f1sbecc, + f1icfetch, + f1perr, + f1icaf, + f1prett[31:1], + f1poffset[11:0], + f1fghr[`RV_BHT_GHR_RANGE] + } = misc1eff[MHI:0]; + + assign { f0dbecc, + f0sbecc, + f0icfetch, + f0perr, + f0icaf, + f0prett[31:1], + f0poffset[11:0], + f0fghr[`RV_BHT_GHR_RANGE] + } = misc0eff[MHI:0]; + + +`ifdef RV_BTB_48 + localparam BRDATA_SIZE=56; + localparam BRDATA_WIDTH = 7; +`else + localparam BRDATA_SIZE=48; + localparam BRDATA_WIDTH = 6; +`endif + logic [BRDATA_SIZE-1:0] brdata_in, brdata2, brdata1, brdata0; + logic [BRDATA_SIZE-1:0] brdata1eff, brdata0eff; + logic [BRDATA_SIZE-1:0] brdata1final, brdata0final; + assign brdata_in[BRDATA_SIZE-1:0] = { + ifu_bp_hist1_f2[7],ifu_bp_hist0_f2[7],ifu_bp_pc4_f2[7],ifu_bp_way_f2[7],ifu_bp_valid_f2[7],ifu_bp_ret_f2[7], + ifu_bp_hist1_f2[6],ifu_bp_hist0_f2[6],ifu_bp_pc4_f2[6],ifu_bp_way_f2[6],ifu_bp_valid_f2[6],ifu_bp_ret_f2[6], + ifu_bp_hist1_f2[5],ifu_bp_hist0_f2[5],ifu_bp_pc4_f2[5],ifu_bp_way_f2[5],ifu_bp_valid_f2[5],ifu_bp_ret_f2[5], + ifu_bp_hist1_f2[4],ifu_bp_hist0_f2[4],ifu_bp_pc4_f2[4],ifu_bp_way_f2[4],ifu_bp_valid_f2[4],ifu_bp_ret_f2[4], + ifu_bp_hist1_f2[3],ifu_bp_hist0_f2[3],ifu_bp_pc4_f2[3],ifu_bp_way_f2[3],ifu_bp_valid_f2[3],ifu_bp_ret_f2[3], + ifu_bp_hist1_f2[2],ifu_bp_hist0_f2[2],ifu_bp_pc4_f2[2],ifu_bp_way_f2[2],ifu_bp_valid_f2[2],ifu_bp_ret_f2[2], + ifu_bp_hist1_f2[1],ifu_bp_hist0_f2[1],ifu_bp_pc4_f2[1],ifu_bp_way_f2[1],ifu_bp_valid_f2[1],ifu_bp_ret_f2[1], + ifu_bp_hist1_f2[0],ifu_bp_hist0_f2[0],ifu_bp_pc4_f2[0],ifu_bp_way_f2[0],ifu_bp_valid_f2[0],ifu_bp_ret_f2[0] + }; +// + rvdffe #(BRDATA_SIZE) brdata2ff (.*, .en(qwen[2]), .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata2[BRDATA_SIZE-1:0])); + rvdffe #(BRDATA_SIZE) brdata1ff (.*, .en(qwen[1]), .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata1[BRDATA_SIZE-1:0])); + rvdffe #(BRDATA_SIZE) brdata0ff (.*, .en(qwen[0]), .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata0[BRDATA_SIZE-1:0])); + + + assign {brdata1eff[BRDATA_SIZE-1:0],brdata0eff[BRDATA_SIZE-1:0]} = (({BRDATA_SIZE*2{qren[0]}} & {brdata1[BRDATA_SIZE-1:0],brdata0[BRDATA_SIZE-1:0]}) | + ({BRDATA_SIZE*2{qren[1]}} & {brdata2[BRDATA_SIZE-1:0],brdata1[BRDATA_SIZE-1:0]}) | + ({BRDATA_SIZE*2{qren[2]}} & {brdata0[BRDATA_SIZE-1:0],brdata2[BRDATA_SIZE-1:0]})); + + assign brdata0final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q0sel[0]}} & { brdata0eff[8*6-1:0*6]}) | + ({BRDATA_SIZE{q0sel[1]}} & {{1*BRDATA_WIDTH{1'b0}},brdata0eff[BRDATA_SIZE-1:1*BRDATA_WIDTH]}) | + ({BRDATA_SIZE{q0sel[2]}} & {{2*BRDATA_WIDTH{1'b0}},brdata0eff[BRDATA_SIZE-1:2*BRDATA_WIDTH]}) | + ({BRDATA_SIZE{q0sel[3]}} & {{3*BRDATA_WIDTH{1'b0}},brdata0eff[BRDATA_SIZE-1:3*BRDATA_WIDTH]}) | + ({BRDATA_SIZE{q0sel[4]}} & {{4*BRDATA_WIDTH{1'b0}},brdata0eff[BRDATA_SIZE-1:4*BRDATA_WIDTH]}) | + ({BRDATA_SIZE{q0sel[5]}} & {{5*BRDATA_WIDTH{1'b0}},brdata0eff[BRDATA_SIZE-1:5*BRDATA_WIDTH]}) | + ({BRDATA_SIZE{q0sel[6]}} & {{6*BRDATA_WIDTH{1'b0}},brdata0eff[BRDATA_SIZE-1:6*BRDATA_WIDTH]}) | + ({BRDATA_SIZE{q0sel[7]}} & {{7*BRDATA_WIDTH{1'b0}},brdata0eff[BRDATA_SIZE-1:7*BRDATA_WIDTH]})); + + assign brdata1final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q1sel[0]}} & { brdata1eff[8*6-1:0*6]}) | + ({BRDATA_SIZE{q1sel[1]}} & {{1*BRDATA_WIDTH{1'b0}},brdata1eff[BRDATA_SIZE-1:1*BRDATA_WIDTH]}) | + ({BRDATA_SIZE{q1sel[2]}} & {{2*BRDATA_WIDTH{1'b0}},brdata1eff[BRDATA_SIZE-1:2*BRDATA_WIDTH]}) | + ({BRDATA_SIZE{q1sel[3]}} & {{3*BRDATA_WIDTH{1'b0}},brdata1eff[BRDATA_SIZE-1:3*BRDATA_WIDTH]}) | + ({BRDATA_SIZE{q1sel[4]}} & {{4*BRDATA_WIDTH{1'b0}},brdata1eff[BRDATA_SIZE-1:4*BRDATA_WIDTH]}) | + ({BRDATA_SIZE{q1sel[5]}} & {{5*BRDATA_WIDTH{1'b0}},brdata1eff[BRDATA_SIZE-1:5*BRDATA_WIDTH]}) | + ({BRDATA_SIZE{q1sel[6]}} & {{6*BRDATA_WIDTH{1'b0}},brdata1eff[BRDATA_SIZE-1:6*BRDATA_WIDTH]}) | + ({BRDATA_SIZE{q1sel[7]}} & {{7*BRDATA_WIDTH{1'b0}},brdata1eff[BRDATA_SIZE-1:7*BRDATA_WIDTH]})); + + assign { + f0hist1[7],f0hist0[7],f0pc4[7],f0way[7],f0brend[7],f0ret[7], + f0hist1[6],f0hist0[6],f0pc4[6],f0way[6],f0brend[6],f0ret[6], + f0hist1[5],f0hist0[5],f0pc4[5],f0way[5],f0brend[5],f0ret[5], + f0hist1[4],f0hist0[4],f0pc4[4],f0way[4],f0brend[4],f0ret[4], + f0hist1[3],f0hist0[3],f0pc4[3],f0way[3],f0brend[3],f0ret[3], + f0hist1[2],f0hist0[2],f0pc4[2],f0way[2],f0brend[2],f0ret[2], + f0hist1[1],f0hist0[1],f0pc4[1],f0way[1],f0brend[1],f0ret[1], + f0hist1[0],f0hist0[0],f0pc4[0],f0way[0],f0brend[0],f0ret[0] + } = brdata0final[BRDATA_SIZE-1:0]; + + assign { + f1hist1[7],f1hist0[7],f1pc4[7],f1way[7],f1brend[7],f1ret[7], + f1hist1[6],f1hist0[6],f1pc4[6],f1way[6],f1brend[6],f1ret[6], + f1hist1[5],f1hist0[5],f1pc4[5],f1way[5],f1brend[5],f1ret[5], + f1hist1[4],f1hist0[4],f1pc4[4],f1way[4],f1brend[4],f1ret[4], + f1hist1[3],f1hist0[3],f1pc4[3],f1way[3],f1brend[3],f1ret[3], + f1hist1[2],f1hist0[2],f1pc4[2],f1way[2],f1brend[2],f1ret[2], + f1hist1[1],f1hist0[1],f1pc4[1],f1way[1],f1brend[1],f1ret[1], + f1hist1[0],f1hist0[0],f1pc4[0],f1way[0],f1brend[0],f1ret[0] + } = brdata1final[BRDATA_SIZE-1:0]; + + + // possible states of { sf0_valid, sf1_valid, f2_valid } + + // 000 if->f0 + + // 100 if->f1 + + // 101 illegal + + // 010 f1->f0, if->f1 + + // 110 if->f2 + + // 001 if->f1, f2->f0 + + // 011 f1->f0, f2->f1, if->f2 + + // 111 !if, no shift + + assign f2_valid = f2val[0]; + + assign sf1_valid = sf1val[0]; + + assign sf0_valid = sf0val[0]; + + // interface to fetch + + assign consume_fb0 = ~sf0val[0] & f0val[0]; + + assign consume_fb1 = ~sf1val[0] & f1val[0]; + + assign ifu_fb_consume1 = consume_fb0 & ~consume_fb1 & ~exu_flush_final; + + assign ifu_fb_consume2 = consume_fb0 & consume_fb1 & ~exu_flush_final; + + assign ifvalid = ifu_fetch_val[0]; + + assign shift_f1_f0 = ~sf0_valid & sf1_valid; + + assign shift_f2_f0 = ~sf0_valid & ~sf1_valid & f2_valid; + + assign shift_f2_f1 = ~sf0_valid & sf1_valid & f2_valid; + + assign fetch_to_f0 = ~sf0_valid & ~sf1_valid & ~f2_valid & ifvalid; + + assign fetch_to_f1 = (~sf0_valid & ~sf1_valid & f2_valid & ifvalid) | + (~sf0_valid & sf1_valid & ~f2_valid & ifvalid) | + ( sf0_valid & ~sf1_valid & ~f2_valid & ifvalid); + + assign fetch_to_f2 = (~sf0_valid & sf1_valid & f2_valid & ifvalid) | + ( sf0_valid & sf1_valid & ~f2_valid & ifvalid); + + // f0 valid states + // + // 11111111 + // 11111110 + // 11111100 + // 11111000 + // 11110000 + + // 11100000 + // 11000000 + // 10000000 + // 00000000 + + + + // make this two incrementors with some logic on the lower bits + + assign f0pc_plus1[31:1] = f0pc[31:1] + 31'd1; + assign f0pc_plus2[31:1] = f0pc[31:1] + 31'd2; + assign f0pc_plus3[31:1] = f0pc[31:1] + 31'd3; + assign f0pc_plus4[31:1] = f0pc[31:1] + 31'd4; + + assign f1pc_plus1[31:1] = f1pc[31:1] + 31'd1; + assign f1pc_plus2[31:1] = f1pc[31:1] + 31'd2; + assign f1pc_plus3[31:1] = f1pc[31:1] + 31'd3; + + assign f2pc_in[31:1] = ifu_fetch_pc[31:1]; + + rvdffe #(31) f2pcff (.*, .en(f2_wr_en), .din(f2pc_in[31:1]), .dout(f2pc[31:1])); + + assign sf1pc[31:1] = ({31{f1_shift_2B}} & (f1pc_plus1[31:1])) | + ({31{f1_shift_4B}} & (f1pc_plus2[31:1])) | + ({31{f1_shift_6B}} & (f1pc_plus3[31:1])) | + ({31{~f1_shift_2B&~f1_shift_4B&~f1_shift_6B}} & f1pc[31:1]); + + assign f1pc_in[31:1] = ({31{fetch_to_f1}} & ifu_fetch_pc[31:1]) | + ({31{shift_f2_f1}} & f2pc[31:1]) | + ({31{~fetch_to_f1&~shift_f2_f1}} & sf1pc[31:1]); + + rvdffe #(31) f1pcff (.*, .en(f1_shift_wr_en), .din(f1pc_in[31:1]), .dout(f1pc[31:1])); + + assign sf0pc[31:1] = ({31{shift_2B}} & (f0pc_plus1[31:1])) | + ({31{shift_4B}} & (f0pc_plus2[31:1])) | + ({31{shift_6B}} & (f0pc_plus3[31:1])) | + ({31{shift_8B}} & (f0pc_plus4[31:1])); + + assign f0pc_in[31:1] = ({31{fetch_to_f0}} & ifu_fetch_pc[31:1]) | + ({31{shift_f2_f0}} & f2pc[31:1]) | + ({31{shift_f1_f0}} & sf1pc[31:1]) | + ({31{~fetch_to_f0&~shift_f2_f0&~shift_f1_f0}} & sf0pc[31:1]); + + rvdffe #(31) f0pcff (.*, .en(f0_shift_wr_en), .din(f0pc_in[31:1]), .dout(f0pc[31:1])); + + // on flush_final all valids go to 0 + + // no clock-gating on the valids + + assign f2val_in[7:0] = (({8{fetch_to_f2}} & ifu_fetch_val[7:0]) | + ({8{~fetch_to_f2&~shift_f2_f1&~shift_f2_f0}} & f2val[7:0])) & ~{8{exu_flush_final}}; + + rvdff #(8) f2valff (.*, .clk(active_clk), .din(f2val_in[7:0]), .dout(f2val[7:0])); + + assign sf1val[7:0] = ({8{f1_shift_2B}} & {1'b0,f1val[7:1]}) | + ({8{f1_shift_4B}} & {2'b0,f1val[7:2]}) | + ({8{f1_shift_6B}} & {3'b0,f1val[7:3]}) | + ({8{~f1_shift_2B&~f1_shift_4B&~f1_shift_6B}} & f1val[7:0]); + + assign f1val_in[7:0] = (({8{fetch_to_f1}} & ifu_fetch_val[7:0]) | + ({8{shift_f2_f1}} & f2val[7:0]) | + ({8{~fetch_to_f1&~shift_f2_f1&~shift_f1_f0}} & sf1val[7:0])) & ~{8{exu_flush_final}}; + + rvdff #(8) f1valff (.*, .clk(active_clk), .din(f1val_in[7:0]), .dout(f1val[7:0])); + + + assign sf0val[7:0] = ({8{shift_2B}} & {1'b0,f0val[7:1]}) | + ({8{shift_4B}} & {2'b0,f0val[7:2]}) | + ({8{shift_6B}} & {3'b0,f0val[7:3]}) | + ({8{shift_8B}} & {4'b0,f0val[7:4]}) | + ({8{~shift_2B&~shift_4B&~shift_6B&~shift_8B}} & f0val[7:0]); + + assign f0val_in[7:0] = (({8{fetch_to_f0}} & ifu_fetch_val[7:0]) | + ({8{shift_f2_f0}} & f2val[7:0]) | + ({8{shift_f1_f0}} & sf1val[7:0]) | + ({8{~fetch_to_f0&~shift_f2_f0&~shift_f1_f0}} & sf0val[7:0])) & ~{8{exu_flush_final}}; + + rvdff #(8) f0valff (.*, .clk(active_clk), .din(f0val_in[7:0]), .dout(f0val[7:0])); + +// parity + +`ifdef RV_ICACHE_ECC + rvdffe #(40) q2eccff (.*, .en(qwen[2]), .din(ic_error_f2.ecc[39:0]), .dout(q2ecc[39:0])); + rvdffe #(40) q1eccff (.*, .en(qwen[1]), .din(ic_error_f2.ecc[39:0]), .dout(q1ecc[39:0])); + rvdffe #(40) q0eccff (.*, .en(qwen[0]), .din(ic_error_f2.ecc[39:0]), .dout(q0ecc[39:0])); + + + assign {q1ecceff[39:0],q0ecceff[39:0]} = (({80{qren[0]}} & {q1ecc[39:0],q0ecc[39:0]}) | + ({80{qren[1]}} & {q2ecc[39:0],q1ecc[39:0]}) | + ({80{qren[2]}} & {q0ecc[39:0],q2ecc[39:0]})); + + assign q0eccfinal[39:0] = (({40{q0sel[0]}} & { q0ecceff[8*5-1:0*5]}) | + ({40{q0sel[1]}} & { 5'b0,q0ecceff[8*5-1:1*5]}) | + ({40{q0sel[2]}} & {10'b0,q0ecceff[8*5-1:2*5]}) | + ({40{q0sel[3]}} & {15'b0,q0ecceff[8*5-1:3*5]}) | + ({40{q0sel[4]}} & {20'b0,q0ecceff[8*5-1:4*5]}) | + ({40{q0sel[5]}} & {25'b0,q0ecceff[8*5-1:5*5]}) | + ({40{q0sel[6]}} & {30'b0,q0ecceff[8*5-1:6*5]}) | + ({40{q0sel[7]}} & {35'b0,q0ecceff[8*5-1:7*5]})); + + assign q1eccfinal[39:0] = (({40{q1sel[0]}} & { q1ecceff[8*5-1:0*5]}) | + ({40{q1sel[1]}} & { 5'b0,q1ecceff[8*5-1:1*5]}) | + ({40{q1sel[2]}} & {10'b0,q1ecceff[8*5-1:2*5]}) | + ({40{q1sel[3]}} & {15'b0,q1ecceff[8*5-1:3*5]}) | + ({40{q1sel[4]}} & {20'b0,q1ecceff[8*5-1:4*5]}) | + ({40{q1sel[5]}} & {25'b0,q1ecceff[8*5-1:5*5]}) | + ({40{q1sel[6]}} & {30'b0,q1ecceff[8*5-1:6*5]}) | + ({40{q1sel[7]}} & {35'b0,q1ecceff[8*5-1:7*5]})); + +`else + rvdffe #(8) q2parityff (.*, .en(qwen[2]), .din(ic_error_f2.parity[7:0]), .dout(q2parity[7:0])); + rvdffe #(8) q1parityff (.*, .en(qwen[1]), .din(ic_error_f2.parity[7:0]), .dout(q1parity[7:0])); + rvdffe #(8) q0parityff (.*, .en(qwen[0]), .din(ic_error_f2.parity[7:0]), .dout(q0parity[7:0])); + + + assign {q1parityeff[7:0],q0parityeff[7:0]} = (({16{qren[0]}} & {q1parity[7:0],q0parity[7:0]}) | + ({16{qren[1]}} & {q2parity[7:0],q1parity[7:0]}) | + ({16{qren[2]}} & {q0parity[7:0],q2parity[7:0]})); + + assign q0parityfinal[7:0] = (({8{q0sel[0]}} & { q0parityeff[7:0]}) | + ({8{q0sel[1]}} & {1'b0,q0parityeff[7:1]}) | + ({8{q0sel[2]}} & {2'b0,q0parityeff[7:2]}) | + ({8{q0sel[3]}} & {3'b0,q0parityeff[7:3]}) | + ({8{q0sel[4]}} & {4'b0,q0parityeff[7:4]}) | + ({8{q0sel[5]}} & {5'b0,q0parityeff[7:5]}) | + ({8{q0sel[6]}} & {6'b0,q0parityeff[7:6]}) | + ({8{q0sel[7]}} & {7'b0,q0parityeff[7]})); + + assign q1parityfinal[7:0] = (({8{q1sel[0]}} & { q1parityeff[7:0]}) | + ({8{q1sel[1]}} & {1'b0,q1parityeff[7:1]}) | + ({8{q1sel[2]}} & {2'b0,q1parityeff[7:2]}) | + ({8{q1sel[3]}} & {3'b0,q1parityeff[7:3]}) | + ({8{q1sel[4]}} & {4'b0,q1parityeff[7:4]}) | + ({8{q1sel[5]}} & {5'b0,q1parityeff[7:5]}) | + ({8{q1sel[6]}} & {6'b0,q1parityeff[7:6]}) | + ({8{q1sel[7]}} & {7'b0,q1parityeff[7]})); +`endif // !`ifdef RV_ICACHE_ECC + + rvdffe #(128) q2ff (.*, .en(qwen[2]), .din(ifu_fetch_data[127:0]), .dout(q2[127:0])); + rvdffe #(128) q1ff (.*, .en(qwen[1]), .din(ifu_fetch_data[127:0]), .dout(q1[127:0])); + rvdffe #(128) q0ff (.*, .en(qwen[0]), .din(ifu_fetch_data[127:0]), .dout(q0[127:0])); + + + assign {q1eff[127:0],q0eff[127:0]} = (({256{qren[0]}} & {q1[127:0],q0[127:0]}) | + ({256{qren[1]}} & {q2[127:0],q1[127:0]}) | + ({256{qren[2]}} & {q0[127:0],q2[127:0]})); + + assign q0final[127:0] = (({128{q0sel[0]}} & { q0eff[8*16-1:16*0]}) | + ({128{q0sel[1]}} & {{16*1{1'b0}},q0eff[8*16-1:16*1]}) | + ({128{q0sel[2]}} & {{16*2{1'b0}},q0eff[8*16-1:16*2]}) | + ({128{q0sel[3]}} & {{16*3{1'b0}},q0eff[8*16-1:16*3]}) | + ({128{q0sel[4]}} & {{16*4{1'b0}},q0eff[8*16-1:16*4]}) | + ({128{q0sel[5]}} & {{16*5{1'b0}},q0eff[8*16-1:16*5]}) | + ({128{q0sel[6]}} & {{16*6{1'b0}},q0eff[8*16-1:16*6]}) | + ({128{q0sel[7]}} & {{16*7{1'b0}},q0eff[8*16-1:16*7]})); + + assign q1final[127:0] = (({128{q1sel[0]}} & { q1eff[8*16-1:16*0]}) | + ({128{q1sel[1]}} & {{16*1{1'b0}},q1eff[8*16-1:16*1]}) | + ({128{q1sel[2]}} & {{16*2{1'b0}},q1eff[8*16-1:16*2]}) | + ({128{q1sel[3]}} & {{16*3{1'b0}},q1eff[8*16-1:16*3]}) | + ({128{q1sel[4]}} & {{16*4{1'b0}},q1eff[8*16-1:16*4]}) | + ({128{q1sel[5]}} & {{16*5{1'b0}},q1eff[8*16-1:16*5]}) | + ({128{q1sel[6]}} & {{16*6{1'b0}},q1eff[8*16-1:16*6]}) | + ({128{q1sel[7]}} & {{16*7{1'b0}},q1eff[8*16-1:16*7]})); + + + assign aligndata[63:0] = ({64{(f0val[3])}} & {q0final[4*16-1:0]}) | + ({64{(f0val[2]&~f0val[3])}} & {q1final[1*16-1:0],q0final[3*16-1:0]}) | + ({64{(f0val[1]&~f0val[2])}} & {q1final[2*16-1:0],q0final[2*16-1:0]}) | + ({64{(f0val[0]&~f0val[1])}} & {q1final[3*16-1:0],q0final[1*16-1:0]}); + + assign alignval[3:0] = ({4{(f0val[3])}} & 4'b1111) | + ({4{(f0val[2]&~f0val[3])}} & {f1val[0],3'b111}) | + ({4{(f0val[1]&~f0val[2])}} & {f1val[1:0],2'b11}) | + ({4{(f0val[0]&~f0val[1])}} & {f1val[2:0],1'b1}); + + assign alignicaf[3:0] = ({4{(f0val[3])}} & {4{f0icaf}}) | + ({4{(f0val[2]&~f0val[3])}} & {{1{f1icaf}},{3{f0icaf}}}) | + ({4{(f0val[1]&~f0val[2])}} & {{2{f1icaf}},{2{f0icaf}}}) | + ({4{(f0val[0]&~f0val[1])}} & {{3{f1icaf}},{1{f0icaf}}}); + + + assign alignsbecc[3:0] = ({4{(f0val[3])}} & {4{f0sbecc}}) | + ({4{(f0val[2]&~f0val[3])}} & {{1{f1sbecc}},{3{f0sbecc}}}) | + ({4{(f0val[1]&~f0val[2])}} & {{2{f1sbecc}},{2{f0sbecc}}}) | + ({4{(f0val[0]&~f0val[1])}} & {{3{f1sbecc}},{1{f0sbecc}}}); + + + assign aligndbecc[3:0] = ({4{(f0val[3])}} & {4{f0dbecc}}) | + ({4{(f0val[2]&~f0val[3])}} & {{1{f1dbecc}},{3{f0dbecc}}}) | + ({4{(f0val[1]&~f0val[2])}} & {{2{f1dbecc}},{2{f0dbecc}}}) | + ({4{(f0val[0]&~f0val[1])}} & {{3{f1dbecc}},{1{f0dbecc}}}); + + // for branch prediction + assign alignbrend[3:0] = ({4{(f0val[3])}} & f0brend[3:0]) | + ({4{(f0val[2]&~f0val[3])}} & {f1brend[0],f0brend[2:0]}) | + ({4{(f0val[1]&~f0val[2])}} & {f1brend[1:0],f0brend[1:0]}) | + ({4{(f0val[0]&~f0val[1])}} & {f1brend[2:0],f0brend[0]}); + + assign alignpc4[3:0] = ({4{(f0val[3])}} & f0pc4[3:0]) | + ({4{(f0val[2]&~f0val[3])}} & {f1pc4[0],f0pc4[2:0]}) | + ({4{(f0val[1]&~f0val[2])}} & {f1pc4[1:0],f0pc4[1:0]}) | + ({4{(f0val[0]&~f0val[1])}} & {f1pc4[2:0],f0pc4[0]}); + +`ifdef RV_ICACHE_ECC + assign alignecc[19:0] = ({20{(f0val[3])}} & q0eccfinal[19:0]) | + ({20{(f0val[2]&~f0val[3])}} & {q1eccfinal[4:0], q0eccfinal[14:0]}) | + ({20{(f0val[1]&~f0val[2])}} & {q1eccfinal[9:0], q0eccfinal[9:0]}) | + ({20{(f0val[0]&~f0val[1])}} & {q1eccfinal[14:0],q0eccfinal[4:0]}); +`else + assign alignparity[3:0] = ({4{(f0val[3])}} & q0parityfinal[3:0]) | + ({4{(f0val[2]&~f0val[3])}} & {q1parityfinal[0], q0parityfinal[2:0]}) | + ({4{(f0val[1]&~f0val[2])}} & {q1parityfinal[1:0],q0parityfinal[1:0]}) | + ({4{(f0val[0]&~f0val[1])}} & {q1parityfinal[2:0],q0parityfinal[0]}); +`endif + + assign aligntagperr[3:0] = ({4{(f0val[3])}} & {4{f0perr}}) | + ({4{(f0val[2]&~f0val[3])}} & {{1{f1perr}},{3{f0perr}}}) | + ({4{(f0val[1]&~f0val[2])}} & {{2{f1perr}},{2{f0perr}}}) | + ({4{(f0val[0]&~f0val[1])}} & {{3{f1perr}},{1{f0perr}}}); + + assign alignicfetch[3:0] = ({4{(f0val[3])}} & {4{f0icfetch}}) | + ({4{(f0val[2]&~f0val[3])}} & {{1{f1icfetch}},{3{f0icfetch}}}) | + ({4{(f0val[1]&~f0val[2])}} & {{2{f1icfetch}},{2{f0icfetch}}}) | + ({4{(f0val[0]&~f0val[1])}} & {{3{f1icfetch}},{1{f0icfetch}}}); + + + assign alignret[3:0] = ({4{(f0val[3])}} & f0ret[3:0]) | + ({4{(f0val[2]&~f0val[3])}} & {f1ret[0],f0ret[2:0]}) | + ({4{(f0val[1]&~f0val[2])}} & {f1ret[1:0],f0ret[1:0]}) | + ({4{(f0val[0]&~f0val[1])}} & {f1ret[2:0],f0ret[0]}); + +`ifdef RV_BTB_48 + + logic [3:0] f0way_b0, f0way_b1, alignway_b0, alignway_b1; + logic [2:0] f1way_b0, f1way_b1; + assign f0way_b0[3:0] = {f0way[3][0], f0way[2][0], f0way[1][0], f0way[0][0]}; + assign f0way_b1[3:0] = {f0way[3][1], f0way[2][1], f0way[1][1], f0way[0][1]}; + assign f1way_b0[2:0] = {f1way[2][0], f1way[1][0], f1way[0][0]}; + assign f1way_b1[2:0] = {f1way[2][1], f1way[1][1], f1way[0][1]}; + + assign alignway_b0[3:0] = ({4{(f0val[3])}} & f0way_b0[3:0]) | + ({4{(f0val[2]&~f0val[3])}} & {f1way_b0[0], f0way_b0[2:0]}) | + ({4{(f0val[1]&~f0val[2])}} & {f1way_b0[1:0],f0way_b0[1:0]}) | + ({4{(f0val[0]&~f0val[1])}} & {f1way_b0[2:0],f0way_b0[0]}); + assign alignway_b1[3:0] = ({4{(f0val[3])}} & f0way_b1[3:0]) | + ({4{(f0val[2]&~f0val[3])}} & {f1way_b1[0], f0way_b1[2:0]}) | + ({4{(f0val[1]&~f0val[2])}} & {f1way_b1[1:0],f0way_b1[1:0]}) | + ({4{(f0val[0]&~f0val[1])}} & {f1way_b1[2:0],f0way_b1[0]}); +`else + assign alignway[3:0] = ({4{(f0val[3])}} & f0way[3:0]) | + ({4{(f0val[2]&~f0val[3])}} & {f1way[0],f0way[2:0]}) | + ({4{(f0val[1]&~f0val[2])}} & {f1way[1:0],f0way[1:0]}) | + ({4{(f0val[0]&~f0val[1])}} & {f1way[2:0],f0way[0]}); +`endif + assign alignhist1[3:0] = ({4{(f0val[3])}} & f0hist1[3:0]) | + ({4{(f0val[2]&~f0val[3])}} & {f1hist1[0],f0hist1[2:0]}) | + ({4{(f0val[1]&~f0val[2])}} & {f1hist1[1:0],f0hist1[1:0]}) | + ({4{(f0val[0]&~f0val[1])}} & {f1hist1[2:0],f0hist1[0]}); + + assign alignhist0[3:0] = ({4{(f0val[3])}} & f0hist0[3:0]) | + ({4{(f0val[2]&~f0val[3])}} & {f1hist0[0],f0hist0[2:0]}) | + ({4{(f0val[1]&~f0val[2])}} & {f1hist0[1:0],f0hist0[1:0]}) | + ({4{(f0val[0]&~f0val[1])}} & {f1hist0[2:0],f0hist0[0]}); + + assign alignfromf1[3:1] = ({3{(f0val[3])}} & 3'b0) | + ({3{(f0val[2]&~f0val[3])}} & {1'b1,2'b0}) | + ({3{(f0val[1]&~f0val[2])}} & {2'b11,1'b0}) | + ({3{(f0val[0]&~f0val[1])}} & {3'b111}); + + + assign { secondpc[31:1], + thirdpc[31:1], + fourthpc[31:1] } = ({3*31{(f0val[3])}} & {f0pc_plus1[31:1], f0pc_plus2[31:1], f0pc_plus3[31:1]}) | + ({3*31{(f0val[2]&~f0val[3])}} & {f0pc_plus1[31:1], f0pc_plus2[31:1], f1pc[31:1]}) | + ({3*31{(f0val[1]&~f0val[2])}} & {f0pc_plus1[31:1], f1pc[31:1], f1pc_plus1[31:1]}) | + ({3*31{(f0val[0]&~f0val[1])}} & {f1pc[31:1], f1pc_plus1[31:1], f1pc_plus2[31:1]}); + + + assign ifu_i0_pc[31:1] = f0pc[31:1]; + + assign firstpc[31:1] = f0pc[31:1]; + + assign ifu_i1_pc[31:1] = (first2B) ? secondpc[31:1] : thirdpc[31:1]; + + + assign ifu_i0_pc4 = first4B; + + assign ifu_i1_pc4 = (first2B & second4B) | + (first4B & third4B); + + // parity checking + +`ifdef RV_ICACHE_ECC + + logic [3:0] [31:0] ic_corrected_data_nc; + logic [3:0] [6:0] ic_corrected_ecc_nc; + logic [3:0] ic_single_ecc_error; + logic [3:0] ic_double_ecc_error; + logic [3:0] aligneccerr; + + for (genvar i=0; i < 4 ; i++) begin : ic_ecc_error + rvecc_decode ecc_decode ( + .en(~dec_tlu_core_ecc_disable), + .sed_ded(1'b1), + .din({16'b0, aligndata[16*(i+1)-1: (16*i)]}), + //.ecc_in({alignecc[(i*6)+5], 1'b0, alignecc[(i*6)+4:(i*6)]}), + .ecc_in({2'b0, alignecc[(i*5)+4:(i*5)]}), + .dout(ic_corrected_data_nc[i][31:0]), + .ecc_out(ic_corrected_ecc_nc[i][6:0]), + .single_ecc_error(ic_single_ecc_error[i]), + .double_ecc_error(ic_double_ecc_error[i])); + + // or the sb and db error detects into 1 signal called aligndataperr[i] where i corresponds to 2B position + assign aligneccerr[i] = ic_single_ecc_error[i] | ic_double_ecc_error[i]; + assign aligndataperr[i] = aligneccerr[i] ; + end // block: ic_ecc_error + +`else // !`ifdef RV_ICACHE_ECC + + for (genvar i=0; i<4 ; i++) begin : ic_par_error + rveven_paritycheck pchk ( + .data_in(aligndata[16*(i+1)-1: 16*i]), + .parity_in(alignparity[i]), + .parity_err(aligndataperr[i]) + ); + end + + `endif // !`ifdef RV_ICACHE_ECC + + + // logic for trace + assign ifu_i0_cinst[15:0] = aligndata[15:0]; + assign ifu_i1_cinst[15:0] = (first4B) ? aligndata[47:32] : aligndata[31:16]; + // end trace + + // check on 16B boundaries + // + assign first4B = aligndata[16*0+1:16*0] == 2'b11; + assign first2B = ~first4B; + + assign second4B = aligndata[16*1+1:16*1] == 2'b11; + assign second2B = ~second4B; + + assign third4B = aligndata[16*2+1:16*2] == 2'b11; + assign third2B = ~third4B; + + assign ifu_i0_valid = ((first4B & alignval[1]) | + (first2B & alignval[0])) & ~exu_flush_final; + + assign ifu_i1_valid = ((first4B & third4B & alignval[3]) | + (first4B & third2B & alignval[2]) | + (first2B & second4B & alignval[2]) | + (first2B & second2B & alignval[1])) & ~exu_flush_final; + + // inst access fault on any byte of inst results in access fault for the inst + assign ifu_i0_icaf = ((first4B & (|alignicaf[1:0])) | + (first2B & alignicaf[0])) & ~exu_flush_final; + + + + assign icaf_eff[3:1] = alignicaf[3:1] | aligndbecc[3:1]; + + assign ifu_i0_icaf_f1 = first4B & icaf_eff[1] & alignfromf1[1]; + + assign ifu_i1_icaf = ((first4B & third4B & (|alignicaf[3:2])) | + (first4B & third2B & alignicaf[2]) | + (first2B & second4B & (|alignicaf[2:1])) | + (first2B & second2B & alignicaf[1])) & ~exu_flush_final; + + assign ifu_i1_icaf_f1 = (first4B & third4B & icaf_eff[2] & alignfromf1[2]) | + (first4B & third4B & icaf_eff[3] & alignfromf1[3] & ~icaf_eff[2]) | + (first2B & second4B & icaf_eff[1] & alignfromf1[1]) | + (first2B & second4B & icaf_eff[2] & alignfromf1[2] & ~icaf_eff[1]); + + // inst parity error on any byte of inst results in parity error for the inst + + + assign alignfinalperr[3:0] = (aligntagperr[3:0] | aligndataperr[3:0]) & alignicfetch[3:0]; + + assign ifu_i0_perr = ((first4B & (|alignfinalperr[1:0])) | + (first2B & alignfinalperr[0])) & ~exu_flush_final; + + assign ifu_i1_perr = ((first4B & third4B & (|alignfinalperr[3:2])) | + (first4B & third2B & alignfinalperr[2]) | + (first2B & second4B & (|alignfinalperr[2:1])) | + (first2B & second2B & alignfinalperr[1])) & ~exu_flush_final; + + assign ifu_i0_sbecc = ((first4B & (|alignsbecc[1:0])) | + (first2B & alignsbecc[0])) & ~exu_flush_final; + + assign ifu_i1_sbecc = ((first4B & third4B & (|alignsbecc[3:2])) | + (first4B & third2B & alignsbecc[2]) | + (first2B & second4B & (|alignsbecc[2:1])) | + (first2B & second2B & alignsbecc[1])) & ~exu_flush_final; + + assign ifu_i0_dbecc = ((first4B & (|aligndbecc[1:0])) | + (first2B & aligndbecc[0])) & ~exu_flush_final; + + assign ifu_i1_dbecc = ((first4B & third4B & (|aligndbecc[3:2])) | + (first4B & third2B & aligndbecc[2]) | + (first2B & second4B & (|aligndbecc[2:1])) | + (first2B & second2B & aligndbecc[1])) & ~exu_flush_final; + + // send index information to the icache on a parity or single-bit ecc error + // parity error is orthogonal to single-bit ecc error; icache vs iccm + + logic [2:0] alignicerr; + + assign alignicerr[2:0] = alignfinalperr[2:0] | alignsbecc[2:0]; + + assign ifu_icache_error_index[16:2] = (alignicerr[0]) ? firstpc[16:2] : + (alignicerr[1]) ? secondpc[16:2] : + (alignicerr[2]) ? thirdpc[16:2] : + fourthpc[16:2]; + + assign ifu_icache_error_val = (i0_shift & ifu_i0_perr) | + (i1_shift & ifu_i1_perr & ~ifu_i0_sbecc); + + assign ifu_icache_sb_error_val = (i0_shift & ifu_i0_sbecc) | + (i1_shift & ifu_i1_sbecc & ~ifu_i0_perr); + +`ifdef ASSERT_ON + assert_ifu_icache_parity_with_sbecc_error: assert #0 ($onehot0({ifu_icache_error_val,ifu_icache_sb_error_val})); +`endif + // big endian 4B instructions + + + assign ifirst[31:0] = aligndata[2*16-1:0*16]; + + assign isecond[31:0] = aligndata[3*16-1:1*16]; + + assign ithird[31:0] = aligndata[4*16-1:2*16]; + + + + assign ifu_i0_instr[31:0] = ({32{first4B}} & ifirst[31:0]) | + ({32{first2B}} & uncompress0[31:0]); + + + assign ifu_i1_instr[31:0] = ({32{first4B & third4B}} & ithird[31:0]) | + ({32{first4B & third2B}} & uncompress2[31:0]) | + ({32{first2B & second4B}} & isecond[31:0]) | + ({32{first2B & second2B}} & uncompress1[31:0]); + + // if you detect br does not start on instruction boundary + + rvbtb_addr_hash firsthash(.pc(firstpc[31:1]), .hash(firstpc_hash[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO])); + rvbtb_addr_hash secondhash(.pc(secondpc[31:1]), .hash(secondpc_hash[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO])); + rvbtb_addr_hash thirdhash(.pc(thirdpc[31:1]), .hash(thirdpc_hash[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO])); + rvbtb_addr_hash fourthhash(.pc(fourthpc[31:1]), .hash(fourthpc_hash[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO])); + + logic [`RV_BTB_BTAG_SIZE-1:0] firstbrtag_hash, secondbrtag_hash, thirdbrtag_hash, fourthbrtag_hash; + + rvbtb_tag_hash first_brhash(.pc(firstpc[31:1]), .hash(firstbrtag_hash[`RV_BTB_BTAG_SIZE-1:0])); + rvbtb_tag_hash second_brhash(.pc(secondpc[31:1]), .hash(secondbrtag_hash[`RV_BTB_BTAG_SIZE-1:0])); + rvbtb_tag_hash third_brhash(.pc(thirdpc[31:1]), .hash(thirdbrtag_hash[`RV_BTB_BTAG_SIZE-1:0])); + rvbtb_tag_hash fourth_brhash(.pc(fourthpc[31:1]), .hash(fourthbrtag_hash[`RV_BTB_BTAG_SIZE-1:0])); + + // start_indexing - you want pc to be based on where the end of branch is prediction + // normal indexing pc based that's incorrect now for pc4 cases it's pc4 + 2 + + always_comb begin + + i0_brp = '0; + + i0_br_start_error = (first4B & alignval[1] & alignbrend[0]); + + i0_brp.valid = (first2B & alignbrend[0]) | + (first4B & alignbrend[1]) | + i0_br_start_error; + + i0_brp_pc4 = (first2B & alignpc4[0]) | + (first4B & alignpc4[1]); + + i0_brp.ret = (first2B & alignret[0]) | + (first4B & alignret[1]); + +`ifdef RV_BTB_48 + i0_brp.way = (first2B | alignbrend[0]) ? {alignway_b1[0], alignway_b0[0]} : {alignway_b1[1], alignway_b0[1]}; +`else + i0_brp.way = (first2B | alignbrend[0]) ? alignway[0] : alignway[1]; +`endif + i0_brp.hist[1] = (first2B & alignhist1[0]) | + (first4B & alignhist1[1]); + + i0_brp.hist[0] = (first2B & alignhist0[0]) | + (first4B & alignhist0[1]); + + i0_ends_f1 = (first4B & alignfromf1[1]); + + i0_brp.toffset[11:0] = (i0_ends_f1) ? f1poffset[11:0] : f0poffset[11:0]; + + i0_brp.fghr[`RV_BHT_GHR_RANGE] = (i0_ends_f1) ? f1fghr[`RV_BHT_GHR_RANGE] : f0fghr[`RV_BHT_GHR_RANGE]; + + i0_brp.prett[31:1] = (i0_ends_f1) ? f1prett[31:1] : f0prett[31:1]; + + i0_brp.br_start_error = i0_br_start_error; + + i0_brp.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = (first2B | alignbrend[0]) ? firstpc_hash[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]: + secondpc_hash[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + + i0_brp.btag[`RV_BTB_BTAG_SIZE-1:0] = (first2B | alignbrend[0]) ? firstbrtag_hash[`RV_BTB_BTAG_SIZE-1:0]: + secondbrtag_hash[`RV_BTB_BTAG_SIZE-1:0]; + + i0_brp.bank[1:0] = (first2B | alignbrend[0]) ? firstpc[3:2] : + secondpc[3:2]; + + + i0_brp.br_error = (i0_brp.valid & i0_brp_pc4 & first2B) | + (i0_brp.valid & ~i0_brp_pc4 & first4B); + + i1_brp = '0; + + i1_br_start_error = (first2B & second4B & alignval[2] & alignbrend[1]) | + (first4B & third4B & alignval[3] & alignbrend[2]); + + i1_brp.valid = (first4B & third2B & alignbrend[2]) | + (first4B & third4B & alignbrend[3]) | + (first2B & second2B & alignbrend[1]) | + (first2B & second4B & alignbrend[2]) | + i1_br_start_error; + + i1_brp_pc4 = (first4B & third2B & alignpc4[2]) | + (first4B & third4B & alignpc4[3]) | + (first2B & second2B & alignpc4[1]) | + (first2B & second4B & alignpc4[2]); + + i1_brp.ret = (first4B & third2B & alignret[2]) | + (first4B & third4B & alignret[3]) | + (first2B & second2B & alignret[1]) | + (first2B & second4B & alignret[2]); +`ifdef RV_BTB_48 + i1_brp.way = ({2{first4B & third2B }} & {alignway_b1[2], alignway_b0[2]} ) | + ({2{first4B & third4B & alignbrend[2] }} & {alignway_b1[2], alignway_b0[2]} ) | + ({2{first4B & third4B & ~alignbrend[2] }} & {alignway_b1[3], alignway_b0[3]} ) | + ({2{first2B & second2B }} & {alignway_b1[1], alignway_b0[1]} ) | + ({2{first2B & second4B & alignbrend[1]}} & {alignway_b1[1], alignway_b0[1]} ) | + ({2{first2B & second4B & ~alignbrend[1]}} & {alignway_b1[2], alignway_b0[2]} ); +`else + i1_brp.way = (first4B & third2B & alignway[2] ) | + (first4B & third4B & alignbrend[2] & alignway[2] ) | + (first4B & third4B & ~alignbrend[2] & alignway[3] ) | + (first2B & second2B & alignway[1] ) | + (first2B & second4B & alignbrend[1] & alignway[1] ) | + (first2B & second4B & ~alignbrend[1] & alignway[2] ); +`endif + i1_brp.hist[1] = (first4B & third2B & alignhist1[2]) | + (first4B & third4B & alignhist1[3]) | + (first2B & second2B & alignhist1[1]) | + (first2B & second4B & alignhist1[2]); + + i1_brp.hist[0] = (first4B & third2B & alignhist0[2]) | + (first4B & third4B & alignhist0[3]) | + (first2B & second2B & alignhist0[1]) | + (first2B & second4B & alignhist0[2]); + + i1_ends_f1 = (first4B & third2B & alignfromf1[2]) | + (first4B & third4B & alignfromf1[3]) | + (first2B & second2B & alignfromf1[1]) | + (first2B & second4B & alignfromf1[2]); + + i1_brp.toffset[11:0] = (i1_ends_f1) ? f1poffset[11:0] : f0poffset[11:0]; + + i1_brp.fghr[`RV_BHT_GHR_RANGE] = (i1_ends_f1) ? f1fghr[`RV_BHT_GHR_RANGE] : f0fghr[`RV_BHT_GHR_RANGE]; + + i1_brp.prett[31:1] = (i1_ends_f1) ? f1prett[31:1] : f0prett[31:1]; + + i1_brp.br_start_error = i1_br_start_error; + +`define RV_BTB_RANGE `RV_BTB_ADDR_HI-`RV_BTB_ADDR_LO+1 + + i1_brp.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = ({`RV_BTB_RANGE{first4B & third2B }} & thirdpc_hash[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] ) | + ({`RV_BTB_RANGE{first4B & third4B & alignbrend[2] }} & thirdpc_hash[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] ) | + ({`RV_BTB_RANGE{first4B & third4B & ~alignbrend[2] }} & fourthpc_hash[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] ) | + ({`RV_BTB_RANGE{first2B & second2B}} & secondpc_hash[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] ) | + ({`RV_BTB_RANGE{first2B & second4B & alignbrend[1]}} & secondpc_hash[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] ) | + ({`RV_BTB_RANGE{first2B & second4B & ~alignbrend[1]}} & thirdpc_hash[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] ); + + i1_brp.btag[`RV_BTB_BTAG_SIZE-1:0] = ({`RV_BTB_BTAG_SIZE{first4B & third2B }} & thirdbrtag_hash[`RV_BTB_BTAG_SIZE-1:0] ) | + ({`RV_BTB_BTAG_SIZE{first4B & third4B & alignbrend[2] }} & thirdbrtag_hash[`RV_BTB_BTAG_SIZE-1:0] ) | + ({`RV_BTB_BTAG_SIZE{first4B & third4B & ~alignbrend[2] }} & fourthbrtag_hash[`RV_BTB_BTAG_SIZE-1:0] ) | + ({`RV_BTB_BTAG_SIZE{first2B & second2B}} & secondbrtag_hash[`RV_BTB_BTAG_SIZE-1:0] ) | + ({`RV_BTB_BTAG_SIZE{first2B & second4B & alignbrend[1]}} & secondbrtag_hash[`RV_BTB_BTAG_SIZE-1:0] ) | + ({`RV_BTB_BTAG_SIZE{first2B & second4B & ~alignbrend[1]}} & thirdbrtag_hash[`RV_BTB_BTAG_SIZE-1:0] ); + + i1_brp.bank[1:0] = ({2{first4B & third2B }} & thirdpc[3:2] ) | + ({2{first4B & third4B & alignbrend[2] }} & thirdpc[3:2] ) | + ({2{first4B & third4B & ~alignbrend[2] }} & fourthpc[3:2] ) | + ({2{first2B & second2B}} & secondpc[3:2] ) | + ({2{first2B & second4B & alignbrend[1]}} & secondpc[3:2] ) | + ({2{first2B & second4B & ~alignbrend[1]}} & thirdpc[3:2] ); + + i1_brp.br_error = (i1_brp.valid & i1_brp_pc4 & first4B & third2B ) | + (i1_brp.valid & ~i1_brp_pc4 & first4B & third4B ) | + (i1_brp.valid & i1_brp_pc4 & first2B & second2B) | + (i1_brp.valid & ~i1_brp_pc4 & first2B & second4B); + end + +// figure out 2B illegal insts + + assign i0_illegal = (first2B & ~first_legal); + + assign i1_illegal = (first2B & second2B & ~second_legal) | + (first4B & third2B & ~third_legal); + + assign shift_illegal = (i0_shift & i0_illegal) | + (i1_shift & i1_illegal); + + assign illegal_inst[15:0] = (first2B & ~first_legal) ? aligndata[1*16-1:0*16] : + ((first2B & second2B & ~second_legal) ? aligndata[2*16-1:1*16] : aligndata[3*16-1:2*16]); + + assign illegal_inst_en = shift_illegal & ~illegal_lockout; + + rvdffe #(16) illegal_any_ff (.*, .en(illegal_inst_en), .din(illegal_inst[15:0]), .dout(ifu_illegal_inst[15:0])); + + assign illegal_lockout_in = (shift_illegal | illegal_lockout) & ~exu_flush_final; + + rvdff #(1) illegal_lockout_any_ff (.*, .clk(active_clk), .din(illegal_lockout_in), .dout(illegal_lockout)); + + + // decompress + + ifu_compress_ctl compress0 (.din(aligndata[16*1-1:0*16]), .dout(uncompress0[31:0]), .legal(first_legal) ); + + ifu_compress_ctl compress1 (.din(aligndata[16*2-1:1*16]), .dout(uncompress1[31:0]), .legal(second_legal) ); + + ifu_compress_ctl compress2 (.din(aligndata[16*3-1:2*16]), .dout(uncompress2[31:0]), .legal(third_legal) ); + + + + assign i0_shift = ifu_i0_valid & ibuffer_room1_more; + + assign i1_shift = ifu_i1_valid & ibuffer_room2_more; + + if (DEC_INSTBUF_DEPTH==4) begin + assign ibuffer_room1_more = ~dec_ib3_valid_d; + assign ibuffer_room2_more = ~dec_ib2_valid_d; + end + else begin + assign ibuffer_room1_more = ~dec_ib0_valid_eff_d | ~dec_ib1_valid_eff_d; + assign ibuffer_room2_more = ~dec_ib0_valid_eff_d & ~dec_ib1_valid_eff_d; + end + + + + assign ifu_pmu_instr_aligned[1:0] = { i1_shift, i0_shift }; + + assign ifu_pmu_align_stall = ifu_i0_valid & ~ibuffer_room1_more; + + // compute how many bytes are being shifted from f0 + + // assign shift_0B = ~i0_shift; + + assign shift_2B = i0_shift & ~i1_shift & first2B; + + + assign shift_4B = (i0_shift & ~i1_shift & first4B) | + (i0_shift & i1_shift & first2B & second2B); + + assign shift_6B = (i0_shift & i1_shift & first2B & second4B) | + (i0_shift & i1_shift & first4B & third2B); + + assign shift_8B = i0_shift & i1_shift & first4B & third4B; + + // exact equations for the queue logic + assign f0_shift_2B = (shift_2B & f0val[0]) | + ((shift_4B | shift_6B | shift_8B) & f0val[0] & ~f0val[1]); + + assign f0_shift_4B = (shift_4B & f0val[1]) | + ((shift_6B & shift_8B) & f0val[1] & ~f0val[2]); + + + assign f0_shift_6B = (shift_6B & f0val[2]) | + (shift_8B & f0val[2] & ~f0val[3]); + + assign f0_shift_8B = shift_8B & f0val[3]; + + + + // f0 valid states + // + // 11111111 + // 11111110 + // 11111100 + // 11111000 + // 11110000 + + // 11100000 + // 11000000 + // 10000000 + // 00000000 + + // assign f1_shift_0B = shift_0B; + + assign f1_shift_2B = (f0val[2] & ~f0val[3] & shift_8B) | + (f0val[1] & ~f0val[2] & shift_6B) | + (f0val[0] & ~f0val[1] & shift_4B); + + assign f1_shift_4B = (f0val[1] & ~f0val[2] & shift_8B) | + (f0val[0] & ~f0val[1] & shift_6B); + + assign f1_shift_6B = (f0val[0] & ~f0val[1] & shift_8B); + + + +endmodule + diff --git a/design/ifu/ifu_bp_ctl.sv b/design/ifu/ifu_bp_ctl.sv new file mode 100644 index 0000000..216452c --- /dev/null +++ b/design/ifu/ifu_bp_ctl.sv @@ -0,0 +1,1777 @@ +//******************************************************************************** +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** + +//******************************************************************************** +// Function: Branch predictor +// Comments: +// +// +// Bank3 : Bank2 : Bank1 : Bank0 +// FA C 8 4 0 +//******************************************************************************** + +module ifu_bp_ctl + import swerv_types::*; +( + + input logic clk, + input logic active_clk, + input logic clk_override, + input logic rst_l, + + input logic ic_hit_f2, // Icache hit, enables F2 address capture + + input logic [31:1] ifc_fetch_addr_f1, // look up btb address + input logic [31:1] ifc_fetch_addr_f2, // to tgt calc + input logic ifc_fetch_req_f1, // F1 valid + input logic ifc_fetch_req_f2, // F2 valid + + input br_tlu_pkt_t dec_tlu_br0_wb_pkt, // BP commit update packet, includes errors + input br_tlu_pkt_t dec_tlu_br1_wb_pkt, // BP commit update packet, includes errors + + input logic dec_tlu_flush_lower_wb, // used to move EX4 RS to EX1 and F + input logic dec_tlu_flush_leak_one_wb, // don't hit for leak one fetches + + input logic dec_tlu_bpred_disable, // disable all branch prediction + + input logic exu_i0_br_ret_e4, // EX4 ret stack update + input logic exu_i1_br_ret_e4, // EX4 ret stack update + input logic exu_i0_br_call_e4, // EX4 ret stack update + input logic exu_i1_br_call_e4, // EX4 ret stack update + + input predict_pkt_t exu_mp_pkt, // mispredict packet + + input rets_pkt_t exu_rets_e1_pkt, // EX1 rets packet + input rets_pkt_t exu_rets_e4_pkt, // EX4 rets packet + +`ifdef REAL_COMM_RS + input logic [31:1] exu_i0_pc_e1, // Used for RS computation + input logic [31:1] exu_i1_pc_e1, // Used for RS computation + input logic [31:1] dec_tlu_i0_pc_e4, // Used for RS computation + input logic [31:1] dec_tlu_i1_pc_e4, // Used for RS computation +`endif + + input logic [`RV_BHT_GHR_RANGE] exu_mp_eghr, // execute ghr (for patching fghr) + + input logic exu_flush_final, // all flushes + input logic exu_flush_upper_e2, // flush upper, either i0 or i1, cp EX1 RS to F RS + + output logic ifu_bp_kill_next_f2, // kill next fetch, taken target found + output logic [31:1] ifu_bp_btb_target_f2, // predicted target PC + output logic [7:1] ifu_bp_inst_mask_f2, // tell ic which valids to kill because of a taken branch, right justified + + output logic [`RV_BHT_GHR_RANGE] ifu_bp_fghr_f2, // fetch ghr + +`ifdef RV_BTB_48 + output logic [7:0][1:0] ifu_bp_way_f2, // way +`else + output logic [7:0] ifu_bp_way_f2, // way +`endif + output logic [7:0] ifu_bp_ret_f2, // predicted ret + output logic [7:0] ifu_bp_hist1_f2, // history counters for all 4 potential branches, bit 1, right justified + output logic [7:0] ifu_bp_hist0_f2, // history counters for all 4 potential branches, bit 0, right justified + output logic [11:0] ifu_bp_poffset_f2, // predicted target + output logic [7:0] ifu_bp_pc4_f2, // pc4 indication, right justified + output logic [7:0] ifu_bp_valid_f2, // branch valid, right justified + + input logic scan_mode + ); + +`define TAG 16+`RV_BTB_BTAG_SIZE:17 + + localparam PC4=4; + localparam BOFF=3; + localparam CALL=2; + localparam RET=1; + localparam BV=0; + + localparam LRU_SIZE=`RV_BTB_ARRAY_DEPTH; + localparam NUM_BHT_LOOP = (`RV_BHT_ARRAY_DEPTH > 16 ) ? 16 : `RV_BHT_ARRAY_DEPTH; + localparam NUM_BHT_LOOP_INNER_HI = (`RV_BHT_ARRAY_DEPTH > 16 ) ?`RV_BHT_ADDR_LO+3 : `RV_BHT_ADDR_HI; + localparam NUM_BHT_LOOP_OUTER_LO = (`RV_BHT_ARRAY_DEPTH > 16 ) ?`RV_BHT_ADDR_LO+4 : `RV_BHT_ADDR_LO; + localparam BHT_NO_ADDR_MATCH = ( `RV_BHT_ARRAY_DEPTH <= 16 ); + + logic exu_mp_valid_write; + logic exu_mp_ataken; + logic exu_mp_valid; // conditional branch mispredict + logic exu_mp_boffset; // branch offsett + logic exu_mp_pc4; // branch is a 4B inst + logic exu_mp_call; // branch is a call inst + logic exu_mp_ret; // branch is a ret inst + logic exu_mp_ja; // branch is a jump always + logic [1:0] exu_mp_hist; // new history + logic [11:0] exu_mp_tgt; // target offset + logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address + logic [1:0] exu_mp_bank; // write bank; based on branch PC[3:2] + logic [`RV_BTB_BTAG_SIZE-1:0] exu_mp_btag; // branch tag + logic [`RV_BHT_GHR_RANGE] exu_mp_fghr; // original fetch ghr (for bht update) + logic dec_tlu_br0_v_wb; // WB stage history update + logic [1:0] dec_tlu_br0_hist_wb; // new history + logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] dec_tlu_br0_addr_wb; // addr + logic [1:0] dec_tlu_br0_bank_wb; // write bank; based on branch PC[3:2] + logic dec_tlu_br0_error_wb; // error; invalidate bank + logic dec_tlu_br0_start_error_wb; // error; invalidate all 4 banks in fg + logic [`RV_BHT_GHR_RANGE] dec_tlu_br0_fghr_wb; + + logic dec_tlu_br1_v_wb; // WB stage history update + logic [1:0] dec_tlu_br1_hist_wb; // new history + logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] dec_tlu_br1_addr_wb; // addr + logic [1:0] dec_tlu_br1_bank_wb; // write bank; based on branch PC[3:2] + logic dec_tlu_br1_error_wb; // error + logic dec_tlu_br1_start_error_wb; // error; invalidate all 4 banks in fg + logic [`RV_BHT_GHR_RANGE] dec_tlu_br1_fghr_wb; + + logic [3:0] use_mp_way; + logic [`RV_RET_STACK_SIZE-1:0][31:1] rets_out, rets_in, e1_rets_out, e1_rets_in, e4_rets_out, e4_rets_in; + logic [`RV_RET_STACK_SIZE-1:0] rsenable; + + + logic [11:0] btb_rd_tgt_f2; + logic btb_rd_pc4_f2, btb_rd_boffset_f2, btb_rd_call_f2, btb_rd_ret_f2; + logic [3:1] bp_total_branch_offset_f2; + + logic [31:1] bp_btb_target_adder_f2; + logic [31:1] bp_rs_call_target_f2; + logic rs_push, rs_pop, rs_hold; + logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] btb_rd_addr_f1, btb_wr_addr, btb_rd_addr_f2; + logic [`RV_BTB_BTAG_SIZE-1:0] btb_wr_tag, fetch_rd_tag_f1, fetch_rd_tag_f2; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_wr_data; + logic [3:0] btb_wr_en_way0, btb_wr_en_way1; + + + logic dec_tlu_error_wb, dec_tlu_all_banks_error_wb, btb_valid, dec_tlu_br0_middle_wb, dec_tlu_br1_middle_wb; + logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] btb_error_addr_wb; + logic [1:0] dec_tlu_error_bank_wb; + logic branch_error_collision_f1, fetch_mp_collision_f1, fetch_mp_collision_f2; + + logic [6:0] fgmask_f2; + logic [3:0] branch_error_bank_conflict_f1, branch_error_bank_conflict_f2; + logic [`RV_BHT_GHR_RANGE] merged_ghr, fghr_ns, fghr; + logic [3:0] num_valids; + logic [LRU_SIZE-1:0] btb_lru_b0_f, btb_lru_b0_hold, btb_lru_b0_ns, btb_lru_b1_f, btb_lru_b1_hold, btb_lru_b1_ns, + btb_lru_b2_f, btb_lru_b2_hold, btb_lru_b2_ns, btb_lru_b3_f, btb_lru_b3_hold, btb_lru_b3_ns, + fetch_wrindex_dec, fetch_wrlru_b0, fetch_wrlru_b1, fetch_wrlru_b2, fetch_wrlru_b3, + mp_wrindex_dec, mp_wrlru_b0, mp_wrlru_b1, mp_wrlru_b2, mp_wrlru_b3; + logic [3:0] btb_lru_rd_f2, mp_bank_decoded, mp_bank_decoded_f, lru_update_valid_f2; + logic [3:0] tag_match_way0_f2, tag_match_way1_f2; + logic [7:0] way_raw, bht_dir_f2, btb_sel_f2, wayhit_f2; + logic [7:0] btb_sel_mask_f2, bht_valid_f2, bht_force_taken_f2; + + logic leak_one_f1, leak_one_f2, ifc_fetch_req_f2_raw; + + logic [LRU_SIZE-1:0][16+`RV_BTB_BTAG_SIZE:0] btb_bank0_rd_data_way0_out ; + logic [LRU_SIZE-1:0][16+`RV_BTB_BTAG_SIZE:0] btb_bank1_rd_data_way0_out ; + logic [LRU_SIZE-1:0][16+`RV_BTB_BTAG_SIZE:0] btb_bank2_rd_data_way0_out ; + logic [LRU_SIZE-1:0][16+`RV_BTB_BTAG_SIZE:0] btb_bank3_rd_data_way0_out ; + + logic [LRU_SIZE-1:0][16+`RV_BTB_BTAG_SIZE:0] btb_bank0_rd_data_way1_out ; + logic [LRU_SIZE-1:0][16+`RV_BTB_BTAG_SIZE:0] btb_bank1_rd_data_way1_out ; + logic [LRU_SIZE-1:0][16+`RV_BTB_BTAG_SIZE:0] btb_bank2_rd_data_way1_out ; + logic [LRU_SIZE-1:0][16+`RV_BTB_BTAG_SIZE:0] btb_bank3_rd_data_way1_out ; + + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank0_rd_data_way0_f2_in ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank1_rd_data_way0_f2_in ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank2_rd_data_way0_f2_in ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank3_rd_data_way0_f2_in ; + + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank0_rd_data_way1_f2_in ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank1_rd_data_way1_f2_in ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank2_rd_data_way1_f2_in ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank3_rd_data_way1_f2_in ; + + + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank0_rd_data_way0_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank1_rd_data_way0_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank2_rd_data_way0_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank3_rd_data_way0_f2 ; + + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank0_rd_data_way1_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank1_rd_data_way1_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank2_rd_data_way1_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank3_rd_data_way1_f2 ; + + logic final_h; + logic btb_fg_crossing_f2; + logic rs_correct; + logic middle_of_bank; + +`ifdef RV_BTB_48 + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank0_rd_data_way2_f2_in ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank1_rd_data_way2_f2_in ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank2_rd_data_way2_f2_in ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank3_rd_data_way2_f2_in ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank0_rd_data_way2_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank1_rd_data_way2_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank2_rd_data_way2_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank3_rd_data_way2_f2 ; + logic [LRU_SIZE-1:0][16+`RV_BTB_BTAG_SIZE:0] btb_bank0_rd_data_way2_out ; + logic [LRU_SIZE-1:0][16+`RV_BTB_BTAG_SIZE:0] btb_bank1_rd_data_way2_out ; + logic [LRU_SIZE-1:0][16+`RV_BTB_BTAG_SIZE:0] btb_bank2_rd_data_way2_out ; + logic [LRU_SIZE-1:0][16+`RV_BTB_BTAG_SIZE:0] btb_bank3_rd_data_way2_out ; + logic [3:0] btb_wr_en_way2, tag_match_way2_f2, fetch_lru_bank_hit_f2; + logic [7:0] tag_match_way2_expanded_f2; + + logic [1:0] exu_mp_way, exu_mp_way_f, dec_tlu_br0_way_wb, dec_tlu_br1_way_wb, dec_tlu_way_wb, dec_tlu_way_wb_f; + +`else // !`ifdef RV_BTB_48 + logic exu_mp_way, exu_mp_way_f, dec_tlu_br0_way_wb, dec_tlu_br1_way_wb, dec_tlu_way_wb, dec_tlu_way_wb_f; + +`endif + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank0e_rd_data_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank1e_rd_data_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank2e_rd_data_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank3e_rd_data_f2 ; + + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank0o_rd_data_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank1o_rd_data_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank2o_rd_data_f2 ; + logic [16+`RV_BTB_BTAG_SIZE:0] btb_bank3o_rd_data_f2 ; + + logic [7:0] tag_match_way0_expanded_f2, tag_match_way1_expanded_f2; + + + logic [1:0] bht_bank0_rd_data_f2 ; + logic [1:0] bht_bank1_rd_data_f2 ; + logic [1:0] bht_bank2_rd_data_f2 ; + logic [1:0] bht_bank3_rd_data_f2 ; + logic [1:0] bht_bank4_rd_data_f2 ; + logic [1:0] bht_bank5_rd_data_f2 ; + logic [1:0] bht_bank6_rd_data_f2 ; + logic [1:0] bht_bank7_rd_data_f2 ; + + assign exu_mp_valid = exu_mp_pkt.misp & ~leak_one_f2; // conditional branch mispredict + assign exu_mp_boffset = exu_mp_pkt.boffset; // branch offset + assign exu_mp_pc4 = exu_mp_pkt.pc4; // branch is a 4B inst + assign exu_mp_call = exu_mp_pkt.pcall; // branch is a call inst + assign exu_mp_ret = exu_mp_pkt.pret; // branch is a ret inst + assign exu_mp_ja = exu_mp_pkt.pja; // branch is a jump always + assign exu_mp_way = exu_mp_pkt.way; // repl way + assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0]; // new history + assign exu_mp_tgt[11:0] = exu_mp_pkt.toffset[11:0] ; // target offset + assign exu_mp_addr[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = exu_mp_pkt.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] ; // BTB/BHT address + assign exu_mp_bank[1:0] = exu_mp_pkt.bank[1:0] ; // write bank = exu_mp_pkt.; based on branch PC[3:2] + assign exu_mp_btag[`RV_BTB_BTAG_SIZE-1:0] = exu_mp_pkt.btag[`RV_BTB_BTAG_SIZE-1:0] ; // branch tag + assign exu_mp_fghr[`RV_BHT_GHR_RANGE] = exu_mp_pkt.fghr[`RV_BHT_GHR_RANGE] ; // original fetch ghr (for bht update) + assign exu_mp_ataken = exu_mp_pkt.ataken; + + assign dec_tlu_br0_v_wb = dec_tlu_br0_wb_pkt.valid; + assign dec_tlu_br0_hist_wb[1:0] = dec_tlu_br0_wb_pkt.hist[1:0]; + assign dec_tlu_br0_addr_wb[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = dec_tlu_br0_wb_pkt.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + assign dec_tlu_br0_bank_wb[1:0] = dec_tlu_br0_wb_pkt.bank[1:0]; + assign dec_tlu_br0_error_wb = dec_tlu_br0_wb_pkt.br_error; + assign dec_tlu_br0_middle_wb = dec_tlu_br0_wb_pkt.middle; + assign dec_tlu_br0_way_wb = dec_tlu_br0_wb_pkt.way; + assign dec_tlu_br0_start_error_wb = dec_tlu_br0_wb_pkt.br_start_error; + assign dec_tlu_br0_fghr_wb[`RV_BHT_GHR_RANGE] = dec_tlu_br0_wb_pkt.fghr[`RV_BHT_GHR_RANGE]; + + assign dec_tlu_br1_v_wb = dec_tlu_br1_wb_pkt.valid; + assign dec_tlu_br1_hist_wb[1:0] = dec_tlu_br1_wb_pkt.hist[1:0]; + assign dec_tlu_br1_addr_wb[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = dec_tlu_br1_wb_pkt.index[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + assign dec_tlu_br1_bank_wb[1:0] = dec_tlu_br1_wb_pkt.bank[1:0]; + assign dec_tlu_br1_middle_wb = dec_tlu_br1_wb_pkt.middle; + assign dec_tlu_br1_error_wb = dec_tlu_br1_wb_pkt.br_error; + assign dec_tlu_br1_way_wb = dec_tlu_br1_wb_pkt.way; + assign dec_tlu_br1_start_error_wb = dec_tlu_br1_wb_pkt.br_start_error; + assign dec_tlu_br1_fghr_wb[`RV_BHT_GHR_RANGE] = dec_tlu_br1_wb_pkt.fghr[`RV_BHT_GHR_RANGE]; + + + + // ---------------------------------------------------------------------- + // READ + // ---------------------------------------------------------------------- + + + // hash the incoming fetch PC, first guess at hashing algorithm + rvbtb_addr_hash f1hash(.pc(ifc_fetch_addr_f1[31:1]), .hash(btb_rd_addr_f1[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO])); + rvbtb_addr_hash f2hash(.pc(ifc_fetch_addr_f2[31:1]), .hash(btb_rd_addr_f2[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO])); + + + // based on the fetch group offset(PC[3:2]) and direction bits, findfirst from fetchPC + // this sel is zero/onehot + // Put the table below in a file and run espresso to generate the btb_sel_f2 and btb_vmask_raw_f2 equations + // espresso -oeqntott -eeat | addassign +// +// .i 11 +// .o 15 +// .ilb ifc_fetch_addr_f2[3] ifc_fetch_addr_f2[2] ifc_fetch_addr_f2[1] bht_dir_f2[7] bht_dir_f2[6] bht_dir_f2[5] bht_dir_f2[4] bht_dir_f2[3] bht_dir_f2[2] bht_dir_f2[1] bht_dir_f2[0] +// .ob btb_sel_f2[7] btb_sel_f2[6] btb_sel_f2[5] btb_sel_f2[4] btb_sel_f2[3] btb_sel_f2[2] btb_sel_f2[1] btb_sel_f2[0] btb_vmask_raw_f2[7] btb_vmask_raw_f2[6] btb_vmask_raw_f2[5] btb_vmask_raw_f2[4] btb_vmask_raw_f2[3] btb_vmask_raw_f2[2] btb_vmask_raw_f2[1] +// .type fr +// ##faddress[3:1] dir[7:0] sel[7:0] mask[7:1] +// 000 -------1 00000001 0000000 +// 000 ------10 00000010 0000001 +// 000 -----100 00000100 0000010 +// 000 ----1000 00001000 0000100 +// 000 ---10000 00010000 0001000 +// 000 --100000 00100000 0010000 +// 000 -1000000 01000000 0100000 +// 000 10000000 10000000 1000000 +// +// 001 ------1- 00000010 0000000 +// 001 -----10- 00000100 0000001 +// 001 ----100- 00001000 0000010 +// 001 ---1000- 00010000 0000100 +// 001 --10000- 00100000 0001000 +// 001 -100000- 01000000 0010000 +// 001 1000000- 10000000 0110000 +// +// 010 -----1-- 00000100 0000000 +// 010 ----10-- 00001000 0000001 +// 010 ---100-- 00010000 0000010 +// 010 --1000-- 00100000 0000100 +// 010 -10000-- 01000000 0001000 +// 010 100000-- 10000000 0010000 +// +// 011 ----1--- 00001000 0000000 +// 011 ---10--- 00010000 0000001 +// 011 --100--- 00100000 0000010 +// 011 -1000--- 01000000 0000100 +// 011 10000--- 10000000 0001000 +// +// 100 ---1---- 00010000 0000000 +// 100 --10---- 00100000 0000001 +// 100 -100---- 01000000 0000010 +// 100 1000---- 10000000 0000100 +// +// 101 --1----- 00100000 0000000 +// 101 -10----- 01000000 0000001 +// 101 100----- 10000000 0000010 +// +// 110 -1------ 01000000 0000000 +// 110 10------ 10000000 0000001 +// +// 111 1------- 10000000 0000000 + + + +assign btb_sel_f2[7] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & ~bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4] + & ~bht_dir_f2[3] & ~bht_dir_f2[2] & ~bht_dir_f2[1] & ~bht_dir_f2[0]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & ~bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3] + & ~bht_dir_f2[2] & ~bht_dir_f2[1]) | (~ifc_fetch_addr_f2[3] + & ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] & ~bht_dir_f2[6] + & ~bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3] & ~bht_dir_f2[2]) | ( + ~ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & ~bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3]) | ( + ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] + & ~bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4]) | ( + ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & ~bht_dir_f2[6] & ~bht_dir_f2[5]) | (ifc_fetch_addr_f2[3] + & ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] & ~bht_dir_f2[6]) | ( + ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1]); +assign btb_sel_f2[6] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ifc_fetch_addr_f2[1] & bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4] + & ~bht_dir_f2[3] & ~bht_dir_f2[2] & ~bht_dir_f2[1]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] + & bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3] + & ~bht_dir_f2[2] & ~bht_dir_f2[1] & ~bht_dir_f2[0]) | ( + ~ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] + & bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3] + & ~bht_dir_f2[2]) | (~ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] + & ifc_fetch_addr_f2[1] & bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4] + & ~bht_dir_f2[3]) | (ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4]) | ( + ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & bht_dir_f2[6] & ~bht_dir_f2[5]) | (ifc_fetch_addr_f2[3] + & ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] & bht_dir_f2[6]); +assign btb_sel_f2[5] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3] + & ~bht_dir_f2[2] & ~bht_dir_f2[1] & ~bht_dir_f2[0]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3] & ~bht_dir_f2[2] + & ~bht_dir_f2[1]) | (~ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3] + & ~bht_dir_f2[2]) | (~ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] + & ifc_fetch_addr_f2[1] & bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3]) | ( + ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] + & bht_dir_f2[5] & ~bht_dir_f2[4]) | (ifc_fetch_addr_f2[3] + & ~ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] & bht_dir_f2[5]); +assign btb_sel_f2[4] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & bht_dir_f2[4] & ~bht_dir_f2[3] & ~bht_dir_f2[2] + & ~bht_dir_f2[1] & ~bht_dir_f2[0]) | (~ifc_fetch_addr_f2[3] + & ~ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] & bht_dir_f2[4] + & ~bht_dir_f2[3] & ~bht_dir_f2[2] & ~bht_dir_f2[1]) | ( + ~ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] + & bht_dir_f2[4] & ~bht_dir_f2[3] & ~bht_dir_f2[2]) | ( + ~ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & bht_dir_f2[4] & ~bht_dir_f2[3]) | (ifc_fetch_addr_f2[3] + & ~ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] & bht_dir_f2[4]); +assign btb_sel_f2[3] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & bht_dir_f2[3] & ~bht_dir_f2[2] & ~bht_dir_f2[1] + & ~bht_dir_f2[0]) | (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ifc_fetch_addr_f2[1] & bht_dir_f2[3] & ~bht_dir_f2[2] & ~bht_dir_f2[1]) | ( + ~ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] + & bht_dir_f2[3] & ~bht_dir_f2[2]) | (~ifc_fetch_addr_f2[3] + & ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] & bht_dir_f2[3]); +assign btb_sel_f2[2] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & bht_dir_f2[2] & ~bht_dir_f2[1] & ~bht_dir_f2[0]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & bht_dir_f2[2] & ~bht_dir_f2[1]) | (~ifc_fetch_addr_f2[3] + & ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] & bht_dir_f2[2]); +assign btb_sel_f2[1] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & bht_dir_f2[1] & ~bht_dir_f2[0]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & bht_dir_f2[1]); +assign btb_sel_f2[0] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & bht_dir_f2[0]); + + + + // vmask[0] is always 1 +logic [7:0] btb_vmask_raw_f2; +assign btb_vmask_raw_f2[7] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & ~bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4] + & ~bht_dir_f2[3] & ~bht_dir_f2[2] & ~bht_dir_f2[1] & ~bht_dir_f2[0]); +assign btb_vmask_raw_f2[6] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ifc_fetch_addr_f2[1] & ~bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4] + & ~bht_dir_f2[3] & ~bht_dir_f2[2] & ~bht_dir_f2[1]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] + & bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3] + & ~bht_dir_f2[2] & ~bht_dir_f2[1] & ~bht_dir_f2[0]); +assign btb_vmask_raw_f2[5] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ifc_fetch_addr_f2[1] & bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4] + & ~bht_dir_f2[3] & ~bht_dir_f2[2] & ~bht_dir_f2[1]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & ~bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3] + & ~bht_dir_f2[2] & ~bht_dir_f2[1]) | (~ifc_fetch_addr_f2[3] + & ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] & ~bht_dir_f2[6] + & ~bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3] & ~bht_dir_f2[2]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] + & bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3] & ~bht_dir_f2[2] + & ~bht_dir_f2[1] & ~bht_dir_f2[0]); +assign btb_vmask_raw_f2[4] = (~ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] + & ifc_fetch_addr_f2[1] & ~bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4] + & ~bht_dir_f2[3]) | (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & bht_dir_f2[4] & ~bht_dir_f2[3] & ~bht_dir_f2[2] + & ~bht_dir_f2[1] & ~bht_dir_f2[0]) | (~ifc_fetch_addr_f2[3] + & ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] & bht_dir_f2[6] + & ~bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3] & ~bht_dir_f2[2]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3] & ~bht_dir_f2[2] + & ~bht_dir_f2[1]); +assign btb_vmask_raw_f2[3] = (ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & ~bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] + & bht_dir_f2[3] & ~bht_dir_f2[2] & ~bht_dir_f2[1] & ~bht_dir_f2[0]) | ( + ~ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3]) | ( + ~ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] + & bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3] & ~bht_dir_f2[2]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & bht_dir_f2[4] & ~bht_dir_f2[3] & ~bht_dir_f2[2] & ~bht_dir_f2[1]); +assign btb_vmask_raw_f2[2] = (ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ifc_fetch_addr_f2[1] & ~bht_dir_f2[6] & ~bht_dir_f2[5]) | ( + ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] + & bht_dir_f2[6] & ~bht_dir_f2[5] & ~bht_dir_f2[4]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] + & bht_dir_f2[2] & ~bht_dir_f2[1] & ~bht_dir_f2[0]) | ( + ~ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & bht_dir_f2[5] & ~bht_dir_f2[4] & ~bht_dir_f2[3]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & bht_dir_f2[3] & ~bht_dir_f2[2] & ~bht_dir_f2[1]) | ( + ~ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] + & bht_dir_f2[4] & ~bht_dir_f2[3] & ~bht_dir_f2[2]); +assign btb_vmask_raw_f2[1] = (ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & ~bht_dir_f2[6]) | (ifc_fetch_addr_f2[3] + & ~ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] & bht_dir_f2[6] + & ~bht_dir_f2[5]) | (ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & bht_dir_f2[5] & ~bht_dir_f2[4]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1] + & bht_dir_f2[1] & ~bht_dir_f2[0]) | (~ifc_fetch_addr_f2[3] + & ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] & bht_dir_f2[4] + & ~bht_dir_f2[3]) | (~ifc_fetch_addr_f2[3] & ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1] & bht_dir_f2[3] & ~bht_dir_f2[2]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] & ifc_fetch_addr_f2[1] + & bht_dir_f2[2] & ~bht_dir_f2[1]); + + // end of espresso generated equations + + + logic[7:1] btb_vmask_f2; + assign btb_vmask_f2[7:1] = {btb_vmask_raw_f2[7], + |btb_vmask_raw_f2[7:6], + |btb_vmask_raw_f2[7:5], + |btb_vmask_raw_f2[7:4], + |btb_vmask_raw_f2[7:3], + |btb_vmask_raw_f2[7:2], + |btb_vmask_raw_f2[7:1]}; + + + // Errors colliding with fetches must kill the btb/bht hit. + + assign branch_error_collision_f1 = dec_tlu_error_wb & (btb_error_addr_wb[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] == btb_rd_addr_f1[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]); + assign branch_error_bank_conflict_f1[3:0] = {4{branch_error_collision_f1}} & (decode2_4(dec_tlu_error_bank_wb[1:0]) | {4{dec_tlu_all_banks_error_wb}}); + + assign fetch_mp_collision_f1 = ( (exu_mp_btag[`RV_BTB_BTAG_SIZE-1:0] == fetch_rd_tag_f1[`RV_BTB_BTAG_SIZE-1:0]) & + exu_mp_valid & ifc_fetch_req_f1 & + (exu_mp_addr[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] == btb_rd_addr_f1[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]) + ); + // set on leak one, hold until next flush without leak one + assign leak_one_f1 = (dec_tlu_flush_leak_one_wb & dec_tlu_flush_lower_wb) | (leak_one_f2 & ~dec_tlu_flush_lower_wb); + +`ifdef RV_BTB_48 + rvdff #(15) coll_ff (.*, .clk(active_clk), +`else + rvdff #(13) coll_ff (.*, .clk(active_clk), +`endif + .din({branch_error_bank_conflict_f1[3:0], fetch_mp_collision_f1, mp_bank_decoded[3:0], exu_mp_way, dec_tlu_way_wb, leak_one_f1, ifc_fetch_req_f1}), + .dout({branch_error_bank_conflict_f2[3:0], fetch_mp_collision_f2, mp_bank_decoded_f[3:0], exu_mp_way_f, dec_tlu_way_wb_f, leak_one_f2, ifc_fetch_req_f2_raw})); +`ifdef RV_BTB_48 + + // 2 -way SA, figure out the way hit and mux accordingly + assign tag_match_way0_f2[3:0] = {btb_bank3_rd_data_way0_f2[BV] & (btb_bank3_rd_data_way0_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank2_rd_data_way0_f2[BV] & (btb_bank2_rd_data_way0_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank1_rd_data_way0_f2[BV] & (btb_bank1_rd_data_way0_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank0_rd_data_way0_f2[BV] & (btb_bank0_rd_data_way0_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0])} & + ~({4{dec_tlu_way_wb_f==2'b0}} & branch_error_bank_conflict_f2[3:0]) & {4{ifc_fetch_req_f2_raw & ~leak_one_f2}}; + + assign tag_match_way1_f2[3:0] = {btb_bank3_rd_data_way1_f2[BV] & (btb_bank3_rd_data_way1_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank2_rd_data_way1_f2[BV] & (btb_bank2_rd_data_way1_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank1_rd_data_way1_f2[BV] & (btb_bank1_rd_data_way1_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank0_rd_data_way1_f2[BV] & (btb_bank0_rd_data_way1_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0])} & + ~({4{dec_tlu_way_wb_f[0]}} & branch_error_bank_conflict_f2[3:0]) & {4{ifc_fetch_req_f2_raw & ~leak_one_f2}}; + + assign tag_match_way2_f2[3:0] = {btb_bank3_rd_data_way2_f2[BV] & (btb_bank3_rd_data_way2_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank2_rd_data_way2_f2[BV] & (btb_bank2_rd_data_way2_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank1_rd_data_way2_f2[BV] & (btb_bank1_rd_data_way2_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank0_rd_data_way2_f2[BV] & (btb_bank0_rd_data_way2_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0])} & + ~({4{dec_tlu_way_wb_f[1]}} & branch_error_bank_conflict_f2[3:0]) & {4{ifc_fetch_req_f2_raw & ~leak_one_f2}}; + +`else + // 2 -way SA, figure out the way hit and mux accordingly + assign tag_match_way0_f2[3:0] = {btb_bank3_rd_data_way0_f2[BV] & (btb_bank3_rd_data_way0_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank2_rd_data_way0_f2[BV] & (btb_bank2_rd_data_way0_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank1_rd_data_way0_f2[BV] & (btb_bank1_rd_data_way0_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank0_rd_data_way0_f2[BV] & (btb_bank0_rd_data_way0_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0])} & + ~({4{~dec_tlu_way_wb_f}} & branch_error_bank_conflict_f2[3:0]) & {4{ifc_fetch_req_f2_raw & ~leak_one_f2}}; + + assign tag_match_way1_f2[3:0] = {btb_bank3_rd_data_way1_f2[BV] & (btb_bank3_rd_data_way1_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank2_rd_data_way1_f2[BV] & (btb_bank2_rd_data_way1_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank1_rd_data_way1_f2[BV] & (btb_bank1_rd_data_way1_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]), + btb_bank0_rd_data_way1_f2[BV] & (btb_bank0_rd_data_way1_f2[`TAG] == fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0])} & + ~({4{dec_tlu_way_wb_f}} & branch_error_bank_conflict_f2[3:0]) & {4{ifc_fetch_req_f2_raw & ~leak_one_f2}}; + +`endif + + // Both ways could hit, use the offset bit to reorder + + assign tag_match_way0_expanded_f2[7:0] = {tag_match_way0_f2[3] & (btb_bank3_rd_data_way0_f2[BOFF] ^ btb_bank3_rd_data_way0_f2[PC4]), + tag_match_way0_f2[3] & ~(btb_bank3_rd_data_way0_f2[BOFF] ^ btb_bank3_rd_data_way0_f2[PC4]), + tag_match_way0_f2[2] & (btb_bank2_rd_data_way0_f2[BOFF] ^ btb_bank2_rd_data_way0_f2[PC4]), + tag_match_way0_f2[2] & ~(btb_bank2_rd_data_way0_f2[BOFF] ^ btb_bank2_rd_data_way0_f2[PC4]), + tag_match_way0_f2[1] & (btb_bank1_rd_data_way0_f2[BOFF] ^ btb_bank1_rd_data_way0_f2[PC4]), + tag_match_way0_f2[1] & ~(btb_bank1_rd_data_way0_f2[BOFF] ^ btb_bank1_rd_data_way0_f2[PC4]), + tag_match_way0_f2[0] & (btb_bank0_rd_data_way0_f2[BOFF] ^ btb_bank0_rd_data_way0_f2[PC4]), + tag_match_way0_f2[0] & ~(btb_bank0_rd_data_way0_f2[BOFF] ^ btb_bank0_rd_data_way0_f2[PC4])}; + + assign tag_match_way1_expanded_f2[7:0] = {tag_match_way1_f2[3] & (btb_bank3_rd_data_way1_f2[BOFF] ^ btb_bank3_rd_data_way1_f2[PC4]), + tag_match_way1_f2[3] & ~(btb_bank3_rd_data_way1_f2[BOFF] ^ btb_bank3_rd_data_way1_f2[PC4]), + tag_match_way1_f2[2] & (btb_bank2_rd_data_way1_f2[BOFF] ^ btb_bank2_rd_data_way1_f2[PC4]), + tag_match_way1_f2[2] & ~(btb_bank2_rd_data_way1_f2[BOFF] ^ btb_bank2_rd_data_way1_f2[PC4]), + tag_match_way1_f2[1] & (btb_bank1_rd_data_way1_f2[BOFF] ^ btb_bank1_rd_data_way1_f2[PC4]), + tag_match_way1_f2[1] & ~(btb_bank1_rd_data_way1_f2[BOFF] ^ btb_bank1_rd_data_way1_f2[PC4]), + tag_match_way1_f2[0] & (btb_bank0_rd_data_way1_f2[BOFF] ^ btb_bank0_rd_data_way1_f2[PC4]), + tag_match_way1_f2[0] & ~(btb_bank0_rd_data_way1_f2[BOFF] ^ btb_bank0_rd_data_way1_f2[PC4])}; + +`ifdef RV_BTB_48 + assign tag_match_way2_expanded_f2[7:0] = {tag_match_way2_f2[3] & (btb_bank3_rd_data_way2_f2[BOFF] ^ btb_bank3_rd_data_way2_f2[PC4]), + tag_match_way2_f2[3] & ~(btb_bank3_rd_data_way2_f2[BOFF] ^ btb_bank3_rd_data_way2_f2[PC4]), + tag_match_way2_f2[2] & (btb_bank2_rd_data_way2_f2[BOFF] ^ btb_bank2_rd_data_way2_f2[PC4]), + tag_match_way2_f2[2] & ~(btb_bank2_rd_data_way2_f2[BOFF] ^ btb_bank2_rd_data_way2_f2[PC4]), + tag_match_way2_f2[1] & (btb_bank1_rd_data_way2_f2[BOFF] ^ btb_bank1_rd_data_way2_f2[PC4]), + tag_match_way2_f2[1] & ~(btb_bank1_rd_data_way2_f2[BOFF] ^ btb_bank1_rd_data_way2_f2[PC4]), + tag_match_way2_f2[0] & (btb_bank0_rd_data_way2_f2[BOFF] ^ btb_bank0_rd_data_way2_f2[PC4]), + tag_match_way2_f2[0] & ~(btb_bank0_rd_data_way2_f2[BOFF] ^ btb_bank0_rd_data_way2_f2[PC4])}; + + assign wayhit_f2[7:0] = tag_match_way0_expanded_f2[7:0] | tag_match_way1_expanded_f2[7:0] | tag_match_way2_expanded_f2[7:0]; + + assign btb_bank3o_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[7]}} & btb_bank3_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[7]}} & btb_bank3_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way2_expanded_f2[7]}} & btb_bank3_rd_data_way2_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + assign btb_bank3e_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[6]}} & btb_bank3_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[6]}} & btb_bank3_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way2_expanded_f2[6]}} & btb_bank3_rd_data_way2_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + + assign btb_bank2o_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[5]}} & btb_bank2_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[5]}} & btb_bank2_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way2_expanded_f2[5]}} & btb_bank2_rd_data_way2_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + assign btb_bank2e_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[4]}} & btb_bank2_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[4]}} & btb_bank2_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way2_expanded_f2[4]}} & btb_bank2_rd_data_way2_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + + assign btb_bank1o_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[3]}} & btb_bank1_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[3]}} & btb_bank1_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way2_expanded_f2[3]}} & btb_bank1_rd_data_way2_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + assign btb_bank1e_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[2]}} & btb_bank1_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[2]}} & btb_bank1_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way2_expanded_f2[2]}} & btb_bank1_rd_data_way2_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + + assign btb_bank0o_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[1]}} & btb_bank0_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[1]}} & btb_bank0_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way2_expanded_f2[1]}} & btb_bank0_rd_data_way2_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + assign btb_bank0e_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[0]}} & btb_bank0_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[0]}} & btb_bank0_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way2_expanded_f2[0]}} & btb_bank0_rd_data_way2_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + + +`else // !`ifdef RV_BTB_48 + + assign wayhit_f2[7:0] = tag_match_way0_expanded_f2[7:0] | tag_match_way1_expanded_f2[7:0]; + assign btb_bank3o_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[7]}} & btb_bank3_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[7]}} & btb_bank3_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + assign btb_bank3e_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[6]}} & btb_bank3_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[6]}} & btb_bank3_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + + assign btb_bank2o_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[5]}} & btb_bank2_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[5]}} & btb_bank2_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + assign btb_bank2e_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[4]}} & btb_bank2_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[4]}} & btb_bank2_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + + assign btb_bank1o_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[3]}} & btb_bank1_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[3]}} & btb_bank1_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + assign btb_bank1e_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[2]}} & btb_bank1_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[2]}} & btb_bank1_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + + assign btb_bank0o_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[1]}} & btb_bank0_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[1]}} & btb_bank0_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + assign btb_bank0e_rd_data_f2[16+`RV_BTB_BTAG_SIZE:0] = ( ({17+`RV_BTB_BTAG_SIZE{tag_match_way0_expanded_f2[0]}} & btb_bank0_rd_data_way0_f2[16+`RV_BTB_BTAG_SIZE:0]) | + ({17+`RV_BTB_BTAG_SIZE{tag_match_way1_expanded_f2[0]}} & btb_bank0_rd_data_way1_f2[16+`RV_BTB_BTAG_SIZE:0]) ); + +`endif + + + + // -------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------- + // update lru + // mp + + assign mp_bank_decoded[3:0] = decode2_4(exu_mp_bank[1:0]); + // create a onehot lru write vector + assign mp_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << exu_mp_addr[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + + // fetch + assign fetch_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << btb_rd_addr_f2[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + + assign mp_wrlru_b0[LRU_SIZE-1:0] = mp_wrindex_dec[LRU_SIZE-1:0] & {LRU_SIZE{mp_bank_decoded[0] & exu_mp_valid}}; + assign mp_wrlru_b1[LRU_SIZE-1:0] = mp_wrindex_dec[LRU_SIZE-1:0] & {LRU_SIZE{mp_bank_decoded[1] & exu_mp_valid}}; + assign mp_wrlru_b2[LRU_SIZE-1:0] = mp_wrindex_dec[LRU_SIZE-1:0] & {LRU_SIZE{mp_bank_decoded[2] & exu_mp_valid}}; + assign mp_wrlru_b3[LRU_SIZE-1:0] = mp_wrindex_dec[LRU_SIZE-1:0] & {LRU_SIZE{mp_bank_decoded[3] & exu_mp_valid}}; + + genvar j, i; + + +`ifdef BTB_ROUND_ROBIN + assign fetch_wrlru_b0[LRU_SIZE-1:0] = {LRU_SIZE-1{1'b0}}; + assign fetch_wrlru_b1[LRU_SIZE-1:0] = {LRU_SIZE-1{1'b0}}; + assign fetch_wrlru_b2[LRU_SIZE-1:0] = {LRU_SIZE-1{1'b0}}; + assign fetch_wrlru_b3[LRU_SIZE-1:0] = {LRU_SIZE-1{1'b0}}; + + assign lru_update_valid_f2[3:0] = 4'b0; + +`else + + assign lru_update_valid_f2[3:0] = {((bht_valid_f2[6] & btb_sel_mask_f2[6]) | (bht_valid_f2[7] & btb_sel_mask_f2[7])) & ifc_fetch_req_f2 & ~leak_one_f2, + ((bht_valid_f2[4] & btb_sel_mask_f2[4]) | (bht_valid_f2[5] & btb_sel_mask_f2[5])) & ifc_fetch_req_f2 & ~leak_one_f2, + ((bht_valid_f2[2] & btb_sel_mask_f2[2]) | (bht_valid_f2[3] & btb_sel_mask_f2[3])) & ifc_fetch_req_f2 & ~leak_one_f2, + ((bht_valid_f2[0] & btb_sel_mask_f2[0]) | (bht_valid_f2[1] & btb_sel_mask_f2[1])) & ifc_fetch_req_f2 & ~leak_one_f2}; + + assign fetch_wrlru_b0[LRU_SIZE-1:0] = fetch_wrindex_dec[LRU_SIZE-1:0] & + {LRU_SIZE{lru_update_valid_f2[0]}}; + assign fetch_wrlru_b1[LRU_SIZE-1:0] = fetch_wrindex_dec[LRU_SIZE-1:0] & + {LRU_SIZE{lru_update_valid_f2[1]}}; + assign fetch_wrlru_b2[LRU_SIZE-1:0] = fetch_wrindex_dec[LRU_SIZE-1:0] & + {LRU_SIZE{lru_update_valid_f2[2]}}; + assign fetch_wrlru_b3[LRU_SIZE-1:0] = fetch_wrindex_dec[LRU_SIZE-1:0] & + {LRU_SIZE{lru_update_valid_f2[3]}}; + +`endif + + assign btb_lru_b0_hold[LRU_SIZE-1:0] = ~mp_wrlru_b0[LRU_SIZE-1:0] & ~fetch_wrlru_b0[LRU_SIZE-1:0]; + assign btb_lru_b1_hold[LRU_SIZE-1:0] = ~mp_wrlru_b1[LRU_SIZE-1:0] & ~fetch_wrlru_b1[LRU_SIZE-1:0]; + assign btb_lru_b2_hold[LRU_SIZE-1:0] = ~mp_wrlru_b2[LRU_SIZE-1:0] & ~fetch_wrlru_b2[LRU_SIZE-1:0]; + assign btb_lru_b3_hold[LRU_SIZE-1:0] = ~mp_wrlru_b3[LRU_SIZE-1:0] & ~fetch_wrlru_b3[LRU_SIZE-1:0]; + + // Forward the mp lru information to the fetch, avoids multiple way hits later + assign use_mp_way[3:0] = {4{fetch_mp_collision_f2}} & mp_bank_decoded_f[3:0]; + + + + +`ifdef RV_BTB_48 + logic [3:0][3:0] [2:0] lru_bank_wr_data ; + logic [3:0][3:0] lru_bank_sel ; + logic [3:0] [1:0] hitway_enc; + logic [3:0] [2:0] fetch_new_lru; + logic [2:0] lru_bank0_rd_data_f2_in, lru_bank1_rd_data_f2_in, lru_bank2_rd_data_f2_in, lru_bank3_rd_data_f2_in; + logic [2:0] lru_bank0_rd_data_f2, lru_bank1_rd_data_f2, lru_bank2_rd_data_f2, lru_bank3_rd_data_f2; + logic [1:0] lru_bank0_next_way, lru_bank1_next_way, lru_bank2_next_way, lru_bank3_next_way, + fetch_replway_bank0_enc, fetch_replway_bank1_enc, fetch_replway_bank2_enc, fetch_replway_bank3_enc, + fetch_replway_bank4_enc, fetch_replway_bank5_enc, fetch_replway_bank6_enc, fetch_replway_bank7_enc; + logic [3:0][3:0] [2:0] lru_bank_rd_data_out; + +// // could have 2 ways hit for case where same bank, different offset hit. Update LRU accordingly +logic [3:0] two_hits; + assign two_hits[3:0] = (tag_match_way0_f2[3:0] & tag_match_way1_f2[3:0]) | + (tag_match_way0_f2[3:0] & tag_match_way2_f2[3:0]) | + (tag_match_way1_f2[3:0] & tag_match_way2_f2[3:0]) ; + + logic [2:0] mp_new_lru; + assign mp_new_lru[2:0] = newlru(lru_bank_rd_data_out[exu_mp_bank[1:0]][exu_mp_addr[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]], exu_mp_way[1:0]); + + + assign fetch_lru_bank_hit_f2[3:0] = lru_update_valid_f2[3:0] & (tag_match_way0_f2[3:0] | tag_match_way1_f2[3:0] | tag_match_way2_f2[3:0]); + + // banks + for ( i=0; i<4; i++) begin : LRUBANKS + // only 4 indices here + // encode the hit way in case the fetch hits + assign hitway_enc[i] = tag_match_way1_f2[i] ? 2'b01 : tag_match_way2_f2[i] ? 2'b10 : 'b0; + // update the lru assuming a hit + assign fetch_new_lru[i] = newlru(lru_bank_rd_data_out[i][btb_rd_addr_f2[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]], hitway_enc[i][1:0]); + + // index + for (j=0 ; j<4 ; j++) begin : LRUFLOPS + + // mux the write data + assign lru_bank_wr_data[i][j] = (exu_mp_valid & mp_bank_decoded[i]) ? mp_new_lru[2:0] : fetch_lru_bank_hit_f2[i] ? fetch_new_lru[i] : 'b0; + + // bank enable if there was a fetch hit or a mispredict + // simul mp and fetch, mp has priority + assign lru_bank_sel[i][j] = (~exu_mp_valid & fetch_lru_bank_hit_f2[i] & (btb_rd_addr_f2[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] == j)) | + ( exu_mp_valid & mp_bank_decoded[i] & (exu_mp_addr[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] == j)); + + + rvdffs #(3) lru_bank (.*, + .clk (active_clk), + .en (lru_bank_sel[i][j]), + .din (lru_bank_wr_data[i][j]), + .dout (lru_bank_rd_data_out[i][j])); + + end // block: LRUFLOPS + end // block: LRUBANKS + +always_comb begin : LRU_rd_mux + lru_bank0_rd_data_f2_in[2:0] = '0 ; + lru_bank1_rd_data_f2_in[2:0] = '0 ; + lru_bank2_rd_data_f2_in[2:0] = '0 ; + lru_bank3_rd_data_f2_in[2:0] = '0 ; + for (int j=0; j<4; j++) begin + if (btb_rd_addr_f1[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] == j) begin + lru_bank0_rd_data_f2_in[2:0] = lru_bank_rd_data_out[0][j]; + lru_bank1_rd_data_f2_in[2:0] = lru_bank_rd_data_out[1][j]; + lru_bank2_rd_data_f2_in[2:0] = lru_bank_rd_data_out[2][j]; + lru_bank3_rd_data_f2_in[2:0] = lru_bank_rd_data_out[3][j]; + end + end +end // block: LRU_rd_mux + + + rvdffe #(12) lru_dataoutf (.*, .en (ifc_fetch_req_f1), + .din ({lru_bank0_rd_data_f2_in[2:0], + lru_bank1_rd_data_f2_in[2:0], + lru_bank2_rd_data_f2_in[2:0], + lru_bank3_rd_data_f2_in[2:0] + }), + .dout ({lru_bank0_rd_data_f2 [2:0], + lru_bank1_rd_data_f2 [2:0], + lru_bank2_rd_data_f2 [2:0], + lru_bank3_rd_data_f2 [2:0] + })); + + // Create the replacement way to send down the pipe. First is hitway, then consider invalid ways first, then lru way + assign lru_bank0_next_way[1:0] = use_mp_way[0] ? exu_mp_way_f[1:0] : lru2way(lru_bank0_rd_data_f2[2:0], {btb_bank0_rd_data_way2_f2[BV],btb_bank0_rd_data_way1_f2[BV],btb_bank0_rd_data_way0_f2[BV]}); + assign lru_bank1_next_way[1:0] = use_mp_way[1] ? exu_mp_way_f[1:0] : lru2way(lru_bank1_rd_data_f2[2:0], {btb_bank1_rd_data_way2_f2[BV],btb_bank1_rd_data_way1_f2[BV],btb_bank1_rd_data_way0_f2[BV]}); + assign lru_bank2_next_way[1:0] = use_mp_way[2] ? exu_mp_way_f[1:0] : lru2way(lru_bank2_rd_data_f2[2:0], {btb_bank2_rd_data_way2_f2[BV],btb_bank2_rd_data_way1_f2[BV],btb_bank2_rd_data_way0_f2[BV]}); + assign lru_bank3_next_way[1:0] = use_mp_way[3] ? exu_mp_way_f[1:0] : lru2way(lru_bank3_rd_data_f2[2:0], {btb_bank3_rd_data_way2_f2[BV],btb_bank3_rd_data_way1_f2[BV],btb_bank3_rd_data_way0_f2[BV]}); + + assign fetch_replway_bank0_enc[1:0] = ( ({2{tag_match_way0_expanded_f2[0]}} & 2'b00) | + ({2{tag_match_way1_expanded_f2[0] & ~tag_match_way0_expanded_f2[0]}} & 2'b01) | + ({2{tag_match_way2_expanded_f2[0] & ~tag_match_way1_expanded_f2[0] & ~tag_match_way0_expanded_f2[0]}} & 2'b10) | + ({2{~tag_match_way2_expanded_f2[0] & ~tag_match_way1_expanded_f2[0] & ~tag_match_way0_expanded_f2[0]}} & lru_bank0_next_way[1:0])); + assign fetch_replway_bank1_enc[1:0] = ( ({2{tag_match_way0_expanded_f2[1]}} & 2'b00) | + ({2{tag_match_way1_expanded_f2[1] & ~tag_match_way0_expanded_f2[1]}} & 2'b01) | + ({2{tag_match_way2_expanded_f2[1] & ~tag_match_way1_expanded_f2[1] & ~tag_match_way0_expanded_f2[1]}} & 2'b10) | + ({2{~tag_match_way2_expanded_f2[1] & ~tag_match_way1_expanded_f2[1] & ~tag_match_way0_expanded_f2[1]}} & lru_bank0_next_way[1:0])); + assign fetch_replway_bank2_enc[1:0] = ( ({2{tag_match_way0_expanded_f2[2]}} & 2'b00) | + ({2{tag_match_way1_expanded_f2[2] & ~tag_match_way0_expanded_f2[2]}} & 2'b01) | + ({2{tag_match_way2_expanded_f2[2] & ~tag_match_way1_expanded_f2[2] & ~tag_match_way0_expanded_f2[2]}} & 2'b10) | + ({2{~tag_match_way2_expanded_f2[2] & ~tag_match_way1_expanded_f2[2] & ~tag_match_way0_expanded_f2[2]}} & lru_bank1_next_way[1:0])); + assign fetch_replway_bank3_enc[1:0] = ( ({2{tag_match_way0_expanded_f2[3]}} & 2'b00) | + ({2{tag_match_way1_expanded_f2[3] & ~tag_match_way0_expanded_f2[3]}} & 2'b01) | + ({2{tag_match_way2_expanded_f2[3] & ~tag_match_way1_expanded_f2[3] & ~tag_match_way0_expanded_f2[3]}} & 2'b10) | + ({2{~tag_match_way2_expanded_f2[3] & ~tag_match_way1_expanded_f2[3] & ~tag_match_way0_expanded_f2[3]}} & lru_bank1_next_way[1:0])); + assign fetch_replway_bank4_enc[1:0] = ( ({2{tag_match_way0_expanded_f2[4]}} & 2'b00) | + ({2{tag_match_way1_expanded_f2[4] & ~tag_match_way0_expanded_f2[4]}} & 2'b01) | + ({2{tag_match_way2_expanded_f2[4] & ~tag_match_way1_expanded_f2[4] & ~tag_match_way0_expanded_f2[4]}} & 2'b10) | + ({2{~tag_match_way2_expanded_f2[4] & ~tag_match_way1_expanded_f2[4] & ~tag_match_way0_expanded_f2[4]}} & lru_bank2_next_way[1:0])); + assign fetch_replway_bank5_enc[1:0] = ( ({2{tag_match_way0_expanded_f2[5]}} & 2'b00) | + ({2{tag_match_way1_expanded_f2[5] & ~tag_match_way0_expanded_f2[5]}} & 2'b01) | + ({2{tag_match_way2_expanded_f2[5] & ~tag_match_way1_expanded_f2[5] & ~tag_match_way0_expanded_f2[5]}} & 2'b10) | + ({2{~tag_match_way2_expanded_f2[5] & ~tag_match_way1_expanded_f2[5] & ~tag_match_way0_expanded_f2[5]}} & lru_bank2_next_way[1:0])); + assign fetch_replway_bank6_enc[1:0] = ( ({2{tag_match_way0_expanded_f2[6]}} & 2'b00) | + ({2{tag_match_way1_expanded_f2[6] & ~tag_match_way0_expanded_f2[6]}} & 2'b01) | + ({2{tag_match_way2_expanded_f2[6] & ~tag_match_way1_expanded_f2[6] & ~tag_match_way0_expanded_f2[6]}} & 2'b10) | + ({2{~tag_match_way2_expanded_f2[6] & ~tag_match_way1_expanded_f2[6] & ~tag_match_way0_expanded_f2[6]}} & lru_bank3_next_way[1:0])); + assign fetch_replway_bank7_enc[1:0] = ( ({2{tag_match_way0_expanded_f2[7]}} & 2'b00) | + ({2{tag_match_way1_expanded_f2[7] & ~tag_match_way0_expanded_f2[7]}} & 2'b01) | + ({2{tag_match_way2_expanded_f2[7] & ~tag_match_way1_expanded_f2[7] & ~tag_match_way0_expanded_f2[7]}} & 2'b10) | + ({2{~tag_match_way2_expanded_f2[7] & ~tag_match_way1_expanded_f2[7] & ~tag_match_way0_expanded_f2[7]}} & lru_bank3_next_way[1:0])); + +`else + + + + assign btb_lru_b0_ns[LRU_SIZE-1:0] = ( (btb_lru_b0_hold[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]) | + (mp_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{~exu_mp_way}}) | + (fetch_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_f2[0]}}) ); + + assign btb_lru_b1_ns[LRU_SIZE-1:0] = ( (btb_lru_b1_hold[LRU_SIZE-1:0] & btb_lru_b1_f[LRU_SIZE-1:0]) | + (mp_wrlru_b1[LRU_SIZE-1:0] & {LRU_SIZE{~exu_mp_way}}) | + (fetch_wrlru_b1[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_f2[1]}}) ); + + assign btb_lru_b2_ns[LRU_SIZE-1:0] = ( (btb_lru_b2_hold[LRU_SIZE-1:0] & btb_lru_b2_f[LRU_SIZE-1:0]) | + (mp_wrlru_b2[LRU_SIZE-1:0] & {LRU_SIZE{~exu_mp_way}}) | + (fetch_wrlru_b2[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_f2[2]}}) ); + + assign btb_lru_b3_ns[LRU_SIZE-1:0] = ( (btb_lru_b3_hold[LRU_SIZE-1:0] & btb_lru_b3_f[LRU_SIZE-1:0]) | + (mp_wrlru_b3[LRU_SIZE-1:0] & {LRU_SIZE{~exu_mp_way}}) | + (fetch_wrlru_b3[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_f2[3]}}) ); + + assign btb_lru_rd_f2[0] = use_mp_way[0] ? exu_mp_way_f : |(fetch_wrindex_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]); + assign btb_lru_rd_f2[1] = use_mp_way[1] ? exu_mp_way_f : |(fetch_wrindex_dec[LRU_SIZE-1:0] & btb_lru_b1_f[LRU_SIZE-1:0]); + assign btb_lru_rd_f2[2] = use_mp_way[2] ? exu_mp_way_f : |(fetch_wrindex_dec[LRU_SIZE-1:0] & btb_lru_b2_f[LRU_SIZE-1:0]); + assign btb_lru_rd_f2[3] = use_mp_way[3] ? exu_mp_way_f : |(fetch_wrindex_dec[LRU_SIZE-1:0] & btb_lru_b3_f[LRU_SIZE-1:0]); + + assign way_raw[7:0] = tag_match_way1_expanded_f2[7:0] | (~wayhit_f2[7:0] & {{2{btb_lru_rd_f2[3]}}, {2{btb_lru_rd_f2[2]}}, {2{btb_lru_rd_f2[1]}}, {2{btb_lru_rd_f2[0]}}}); + + rvdffe #(LRU_SIZE*4) btb_lru_ff (.*, .en(ifc_fetch_req_f2 | exu_mp_valid), + .din({btb_lru_b0_ns[(LRU_SIZE)-1:0], + btb_lru_b1_ns[(LRU_SIZE)-1:0], + btb_lru_b2_ns[(LRU_SIZE)-1:0], + btb_lru_b3_ns[(LRU_SIZE)-1:0]}), + .dout({btb_lru_b0_f[(LRU_SIZE)-1:0], + btb_lru_b1_f[(LRU_SIZE)-1:0], + btb_lru_b2_f[(LRU_SIZE)-1:0], + btb_lru_b3_f[(LRU_SIZE)-1:0]})); +`endif // !`ifdef RV_BTB_48 + + + // -------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------- + + // mux out critical hit bank for pc computation + // This is only useful for the first taken branch in the fetch group + logic [16:1] btb_sel_data_f2; + assign { + btb_rd_tgt_f2[11:0], + btb_rd_pc4_f2, + btb_rd_boffset_f2, + btb_rd_call_f2, + btb_rd_ret_f2} = btb_sel_data_f2[16:1]; + + assign btb_sel_data_f2[16:1] = ( ({16{btb_sel_f2[7]}} & btb_bank3o_rd_data_f2[16:1]) | + ({16{btb_sel_f2[6]}} & btb_bank3e_rd_data_f2[16:1]) | + ({16{btb_sel_f2[5]}} & btb_bank2o_rd_data_f2[16:1]) | + ({16{btb_sel_f2[4]}} & btb_bank2e_rd_data_f2[16:1]) | + ({16{btb_sel_f2[3]}} & btb_bank1o_rd_data_f2[16:1]) | + ({16{btb_sel_f2[2]}} & btb_bank1e_rd_data_f2[16:1]) | + ({16{btb_sel_f2[1]}} & btb_bank0o_rd_data_f2[16:1]) | + ({16{btb_sel_f2[0]}} & btb_bank0e_rd_data_f2[16:1]) ); + + + logic [7:0] bp_valid_f2, bp_hist1_f2; + + // a valid taken target needs to kill the next fetch as we compute the target address + assign ifu_bp_kill_next_f2 = |(bp_valid_f2[7:0] & bp_hist1_f2[7:0]) & ifc_fetch_req_f2 & ~leak_one_f2 & ~dec_tlu_bpred_disable; + + + // Don't put calls/rets/ja in the predictor, force the bht taken instead + assign bht_force_taken_f2[7:0] = {(btb_bank3o_rd_data_f2[CALL] | btb_bank3o_rd_data_f2[RET]), + (btb_bank3e_rd_data_f2[CALL] | btb_bank3e_rd_data_f2[RET]), + (btb_bank2o_rd_data_f2[CALL] | btb_bank2o_rd_data_f2[RET]), + (btb_bank2e_rd_data_f2[CALL] | btb_bank2e_rd_data_f2[RET]), + (btb_bank1o_rd_data_f2[CALL] | btb_bank1o_rd_data_f2[RET]), + (btb_bank1e_rd_data_f2[CALL] | btb_bank1e_rd_data_f2[RET]), + (btb_bank0o_rd_data_f2[CALL] | btb_bank0o_rd_data_f2[RET]), + (btb_bank0e_rd_data_f2[CALL] | btb_bank0e_rd_data_f2[RET])}; + + + // taken and valid, otherwise, branch errors must clear the bht + assign bht_valid_f2[7:0] = wayhit_f2[7:0]; + + assign bht_dir_f2[7:0] = {(bht_force_taken_f2[7] | bht_bank7_rd_data_f2[1]) & bht_valid_f2[7], + (bht_force_taken_f2[6] | bht_bank6_rd_data_f2[1]) & bht_valid_f2[6], + (bht_force_taken_f2[5] | bht_bank5_rd_data_f2[1]) & bht_valid_f2[5], + (bht_force_taken_f2[4] | bht_bank4_rd_data_f2[1]) & bht_valid_f2[4], + (bht_force_taken_f2[3] | bht_bank3_rd_data_f2[1]) & bht_valid_f2[3], + (bht_force_taken_f2[2] | bht_bank2_rd_data_f2[1]) & bht_valid_f2[2], + (bht_force_taken_f2[1] | bht_bank1_rd_data_f2[1]) & bht_valid_f2[1], + (bht_force_taken_f2[0] | bht_bank0_rd_data_f2[1]) & bht_valid_f2[0]}; + + // final inst_valid_mask. + // vmask[7] is a 0, vmask[0] is a 1, initially + // (assumes pc2 with boffset 0) + // + logic minus1, plus1; + + assign plus1 = ( (~btb_rd_pc4_f2 & btb_rd_boffset_f2 & ~ifc_fetch_addr_f2[1]) | + ( btb_rd_pc4_f2 & ~btb_rd_boffset_f2 & ~ifc_fetch_addr_f2[1]) ); + + assign minus1 = ( (~btb_rd_pc4_f2 & ~btb_rd_boffset_f2 & ifc_fetch_addr_f2[1]) | + ( btb_rd_pc4_f2 & btb_rd_boffset_f2 & ifc_fetch_addr_f2[1]) ); + + assign ifu_bp_inst_mask_f2[7:1] = ( ({7{ ifu_bp_kill_next_f2}} & btb_vmask_f2[7:1]) | + ({7{~ifu_bp_kill_next_f2}} & 7'b1111111) ); + + logic [7:0] hist0_raw, hist1_raw, pc4_raw, pret_raw; + + + // Branch prediction info is sent with the 2byte lane associated with the end of the branch. + // Cases + // BANK1 BANK0 + // ------------------------------- + // | : | : | + // ------------------------------- + // <------------> : PC4 branch, offset, should be in B1 (indicated on [2]) + // <------------> : PC4 branch, no offset, indicate PC4, VALID, HIST on [1] + // <------------> : PC4 branch, offset, indicate PC4, VALID, HIST on [0] + // <------> : PC2 branch, offset, indicate VALID, HIST on [1] + // <------> : PC2 branch, no offset, indicate VALID, HIST on [0] + // + + assign hist1_raw[7:0] = bht_force_taken_f2[7:0] | {bht_bank7_rd_data_f2[1], + bht_bank6_rd_data_f2[1], + bht_bank5_rd_data_f2[1], + bht_bank4_rd_data_f2[1], + bht_bank3_rd_data_f2[1], + bht_bank2_rd_data_f2[1], + bht_bank1_rd_data_f2[1], + bht_bank0_rd_data_f2[1]}; + + assign hist0_raw[7:0] = {bht_bank7_rd_data_f2[0], + bht_bank6_rd_data_f2[0], + bht_bank5_rd_data_f2[0], + bht_bank4_rd_data_f2[0], + bht_bank3_rd_data_f2[0], + bht_bank2_rd_data_f2[0], + bht_bank1_rd_data_f2[0], + bht_bank0_rd_data_f2[0]}; + + + assign pc4_raw[7:0] = {wayhit_f2[7] & btb_bank3o_rd_data_f2[PC4], + wayhit_f2[6] & btb_bank3e_rd_data_f2[PC4], + wayhit_f2[5] & btb_bank2o_rd_data_f2[PC4], + wayhit_f2[4] & btb_bank2e_rd_data_f2[PC4], + wayhit_f2[3] & btb_bank1o_rd_data_f2[PC4], + wayhit_f2[2] & btb_bank1e_rd_data_f2[PC4], + wayhit_f2[1] & btb_bank0o_rd_data_f2[PC4], + wayhit_f2[0] & btb_bank0e_rd_data_f2[PC4]}; + + assign pret_raw[7:0] = {wayhit_f2[3] & ~btb_bank3o_rd_data_f2[CALL] & btb_bank3o_rd_data_f2[RET], + wayhit_f2[3] & ~btb_bank3e_rd_data_f2[CALL] & btb_bank3e_rd_data_f2[RET], + wayhit_f2[2] & ~btb_bank2o_rd_data_f2[CALL] & btb_bank2o_rd_data_f2[RET], + wayhit_f2[2] & ~btb_bank2e_rd_data_f2[CALL] & btb_bank2e_rd_data_f2[RET], + wayhit_f2[1] & ~btb_bank1o_rd_data_f2[CALL] & btb_bank1o_rd_data_f2[RET], + wayhit_f2[1] & ~btb_bank1e_rd_data_f2[CALL] & btb_bank1e_rd_data_f2[RET], + wayhit_f2[0] & ~btb_bank0o_rd_data_f2[CALL] & btb_bank0o_rd_data_f2[RET], + wayhit_f2[0] & ~btb_bank0e_rd_data_f2[CALL] & btb_bank0e_rd_data_f2[RET]}; + + // GHR + + // Figure out how many valid branches are in the fetch group +assign fgmask_f2[6] = (~ifc_fetch_addr_f2[1]) | (~ifc_fetch_addr_f2[2]) | ( + ~ifc_fetch_addr_f2[3]); +assign fgmask_f2[5] = (~ifc_fetch_addr_f2[2]) | (~ifc_fetch_addr_f2[3]); +assign fgmask_f2[4] = (~ifc_fetch_addr_f2[2] & ~ifc_fetch_addr_f2[1]) | ( + ~ifc_fetch_addr_f2[3]); +assign fgmask_f2[3] = (~ifc_fetch_addr_f2[3]); +assign fgmask_f2[2] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[1]) | ( + ~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2]); +assign fgmask_f2[1] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2]); +assign fgmask_f2[0] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] + & ~ifc_fetch_addr_f2[1]); + + assign btb_sel_mask_f2[7:0] = {btb_sel_f2[7], + |btb_sel_f2[7:6] & fgmask_f2[6], + |btb_sel_f2[7:5] & fgmask_f2[5], + |btb_sel_f2[7:4] & fgmask_f2[4], + |btb_sel_f2[7:3] & fgmask_f2[3], + |btb_sel_f2[7:2] & fgmask_f2[2], + |btb_sel_f2[7:1] & fgmask_f2[1], + |btb_sel_f2[7:0] & fgmask_f2[0]}; + + // count the valids with masking based on first taken + assign num_valids[3:0] = countones(bht_valid_f2[7:0] & btb_sel_mask_f2[7:0]); + + // Note that the following property holds + // P: prior ghr, H: history bit of last valid branch in line (could be 1 or 0) + // Num valid branches What new GHR must be + // >=4 000H + // 3 P00H + // 2 PP0H + // 1 PPPH + // 0 PPPP + + assign final_h = |(btb_sel_f2[7:0] & bht_dir_f2[7:0]); + + assign merged_ghr[`RV_BHT_GHR_RANGE] = ( ({`RV_BHT_GHR_SIZE{num_valids[3:0] >= 4'h4}} & {`RV_BHT_GHR_PAD, final_h }) | // 000H + ({`RV_BHT_GHR_SIZE{num_valids[3:0] == 4'h3}} & {`RV_BHT_GHR_PAD2, final_h}) | // P00H +`ifdef RV_BHT_GHR_SIZE_2 + ({`RV_BHT_GHR_SIZE{num_valids[3:0] == 4'h2}} & { 1'b0, final_h}) | // PP0H +`else + ({`RV_BHT_GHR_SIZE{num_valids[3:0] == 4'h2}} & {fghr[`RV_BHT_GHR_SIZE-3:0], 1'b0, final_h}) | // PP0H +`endif + ({`RV_BHT_GHR_SIZE{num_valids[3:0] == 4'h1}} & {fghr[`RV_BHT_GHR_SIZE-2:0], final_h}) | // PPPH + ({`RV_BHT_GHR_SIZE{num_valids[3:0] == 4'h0}} & {fghr[`RV_BHT_GHR_RANGE]}) ); // PPPP + + logic [`RV_BHT_GHR_RANGE] exu_flush_ghr; + assign exu_flush_ghr[`RV_BHT_GHR_RANGE] = exu_mp_fghr[`RV_BHT_GHR_RANGE]; + + assign fghr_ns[`RV_BHT_GHR_RANGE] = ( ({`RV_BHT_GHR_SIZE{exu_flush_final}} & exu_flush_ghr[`RV_BHT_GHR_RANGE]) | + ({`RV_BHT_GHR_SIZE{~exu_flush_final & ifc_fetch_req_f2_raw & ~leak_one_f2}} & merged_ghr[`RV_BHT_GHR_RANGE]) | + ({`RV_BHT_GHR_SIZE{~exu_flush_final & ~(ifc_fetch_req_f2_raw & ~leak_one_f2)}} & fghr[`RV_BHT_GHR_RANGE])); + + rvdff #(`RV_BHT_GHR_SIZE) fetchghr (.*, .clk(active_clk), .din(fghr_ns[`RV_BHT_GHR_RANGE]), .dout(fghr[`RV_BHT_GHR_RANGE])); + assign ifu_bp_fghr_f2[`RV_BHT_GHR_RANGE] = fghr[`RV_BHT_GHR_RANGE]; + + +`ifdef RV_BTB_48 + assign ifu_bp_way_f2 = {fetch_replway_bank7_enc[1:0], + fetch_replway_bank6_enc[1:0], + fetch_replway_bank5_enc[1:0], + fetch_replway_bank4_enc[1:0], + fetch_replway_bank3_enc[1:0], + fetch_replway_bank2_enc[1:0], + fetch_replway_bank1_enc[1:0], + fetch_replway_bank0_enc[1:0]}; + +`else + assign ifu_bp_way_f2[7:0] = way_raw[7:0]; +`endif + assign ifu_bp_hist1_f2[7:0] = hist1_raw[7:0]; + assign ifu_bp_hist0_f2[7:0] = hist0_raw[7:0]; + assign ifu_bp_pc4_f2[7:0] = pc4_raw[7:0]; + assign ifu_bp_valid_f2[7:0] = wayhit_f2[7:0] & ~{8{dec_tlu_bpred_disable}}; + assign ifu_bp_ret_f2[7:0] = pret_raw[7:0]; + + + // Truncate taken and valid, used for detecting a taken branch in the fetch group + always_comb begin + casez(ifc_fetch_addr_f2[3:1]) + 3'b000 : begin + bp_hist1_f2[7:0] = hist1_raw[7:0]; + bp_valid_f2[7:0] = wayhit_f2[7:0]; + end + 3'b001 : begin + bp_hist1_f2[7:0] = {1'b0, hist1_raw[7:1]}; + bp_valid_f2[7:0] = {1'b0, wayhit_f2[7:1]}; + end + 3'b010 : begin + bp_hist1_f2[7:0] = {2'b0, hist1_raw[7:2]}; + bp_valid_f2[7:0] = {2'b0, wayhit_f2[7:2]}; + end + 3'b011 : begin + bp_hist1_f2[7:0] = {3'b0, hist1_raw[7:3]}; + bp_valid_f2[7:0] = {3'b0, wayhit_f2[7:3]}; + end + 3'b100 : begin + bp_hist1_f2[7:0] = {4'b0, hist1_raw[7:4]}; + bp_valid_f2[7:0] = {4'b0, wayhit_f2[7:4]}; + end + 3'b101 : begin + bp_hist1_f2[7:0] = {5'b0, hist1_raw[7:5]}; + bp_valid_f2[7:0] = {5'b0, wayhit_f2[7:5]}; + end + 3'b110 : begin + bp_hist1_f2[7:0] = {6'b0, hist1_raw[7:6]}; + bp_valid_f2[7:0] = {6'b0, wayhit_f2[7:6]}; + end + 3'b111 : begin + bp_hist1_f2[7:0] = {7'b0, hist1_raw[7]}; + bp_valid_f2[7:0] = {7'b0, wayhit_f2[7]}; + end + default: begin + bp_hist1_f2[7:0] = hist1_raw[7:0]; + bp_valid_f2[7:0] = wayhit_f2[7:0]; + end + endcase // casex (ifc_fetch_addr_f1[3:2]) + + end + // compute target + // Form the fetch group offset based on the btb hit location and the location of the branch within the 4 byte chunk + assign btb_fg_crossing_f2 = btb_sel_f2[0] & btb_rd_pc4_f2; + + wire [2:0] btb_sel_f2_enc, btb_sel_f2_enc_shift; + assign btb_sel_f2_enc[2:0] = encode8_3(btb_sel_f2[7:0]); + assign btb_sel_f2_enc_shift[2:0] = encode8_3({1'b0,btb_sel_f2[7:1]}); + + assign bp_total_branch_offset_f2[3:1] = (({3{ btb_rd_pc4_f2}} & btb_sel_f2_enc_shift[2:0]) | + ({3{~btb_rd_pc4_f2}} & btb_sel_f2_enc[2:0]) | + ({3{btb_fg_crossing_f2}})); + + + logic [31:4] adder_pc_in_f2, ifc_fetch_adder_prior; + rvdffe #(28) faddrf2_ff (.*, .en(ifc_fetch_req_f2 & ~ifu_bp_kill_next_f2 & ic_hit_f2), .din(ifc_fetch_addr_f2[31:4]), .dout(ifc_fetch_adder_prior[31:4])); + + assign ifu_bp_poffset_f2[11:0] = btb_rd_tgt_f2[11:0]; + + assign adder_pc_in_f2[31:4] = ( ({28{ btb_fg_crossing_f2}} & ifc_fetch_adder_prior[31:4]) | + ({28{~btb_fg_crossing_f2}} & ifc_fetch_addr_f2[31:4])); + + rvbradder predtgt_addr (.pc({adder_pc_in_f2[31:4], bp_total_branch_offset_f2[3:1]}), + .offset(btb_rd_tgt_f2[11:0]), + .dout(bp_btb_target_adder_f2[31:1]) + ); + // mux in the return stack address here for a predicted return + assign ifu_bp_btb_target_f2[31:1] = btb_rd_ret_f2 & ~btb_rd_call_f2 ? rets_out[0][31:1] : bp_btb_target_adder_f2[31:1]; + + + // ---------------------------------------------------------------------- + // Return Stack + // ---------------------------------------------------------------------- + + rvbradder rs_addr (.pc({adder_pc_in_f2[31:4], bp_total_branch_offset_f2[3:1]}), + .offset({10'b0, btb_rd_pc4_f2, ~btb_rd_pc4_f2}), + .dout(bp_rs_call_target_f2[31:1]) + ); + + // Calls/Rets are always taken, so there shouldn't be a push and pop in the same fetch group + logic rs_overpop_correct, rsoverpop_valid_ns, rsoverpop_valid_f; + logic [31:1] rsoverpop_ns, rsoverpop_f; + logic rsunderpop_valid_ns, rsunderpop_valid_f, rs_underpop_correct; +`ifdef RS_COMMIT_EN + assign rs_overpop_correct = rsoverpop_valid_f & exu_flush_final & ~exu_mp_ret; + assign rs_underpop_correct = rsunderpop_valid_f & exu_flush_final & ~exu_mp_call; + + assign rsunderpop_valid_ns = (rs_push | (rsunderpop_valid_f & ~(exu_i0_br_call_e4 | exu_i1_br_call_e4))) & ~exu_flush_final; + assign rsoverpop_valid_ns = (rs_pop | (rsoverpop_valid_f & ~(exu_i0_br_ret_e4 | exu_i1_br_ret_e4))) & ~exu_flush_final; + assign rsoverpop_ns[31:1] = ( ({31{rs_pop}} & rets_out[0][31:1]) | + ({31{~rs_pop}} & rsoverpop_f[31:1]) ); + + rvdff #(33) retoverpop_ff (.*, .clk(active_clk), .din({rsunderpop_valid_ns, rsoverpop_valid_ns, rsoverpop_ns[31:1]}), .dout({rsunderpop_valid_f, rsoverpop_valid_f, rsoverpop_f[31:1]})); +`else + assign rs_overpop_correct = 1'b0; + assign rs_underpop_correct = 1'b0; + assign rsoverpop_f[31:1] = 'b0; +`endif // !`ifdef RS_COMMIT_EN + + logic e4_rs_correct; +`ifdef REAL_COMM_RS + assign rs_correct = exu_flush_upper_e2 & ~e4_rs_correct; +`else + assign e4_rs_correct = 1'b0; + assign rs_correct = 1'b0; +`endif + + assign rs_push = ((btb_rd_call_f2 & ~btb_rd_ret_f2 & ifu_bp_kill_next_f2) | (rs_overpop_correct & ~rs_underpop_correct)) & ~rs_correct & ~e4_rs_correct; + assign rs_pop = ((btb_rd_ret_f2 & ~btb_rd_call_f2 & ifu_bp_kill_next_f2) | (rs_underpop_correct & ~rs_overpop_correct)) & ~rs_correct & ~e4_rs_correct; + assign rs_hold = ~rs_push & ~rs_pop & ~rs_overpop_correct & ~rs_underpop_correct & ~rs_correct & ~e4_rs_correct; + + + + // Fetch based + assign rets_in[0][31:1] = ( ({31{rs_overpop_correct & rs_underpop_correct}} & rsoverpop_f[31:1]) | + ({31{rs_push & rs_overpop_correct}} & rsoverpop_f[31:1]) | + ({31{rs_push & ~rs_overpop_correct}} & bp_rs_call_target_f2[31:1]) | +`ifdef REAL_COMM_RS + ({31{rs_correct}} & e1_rets_out[0][31:1]) | + ({31{e4_rs_correct}} & e4_rets_out[0][31:1]) | +`endif + ({31{rs_pop}} & rets_out[1][31:1]) ); + + assign rsenable[0] = ~rs_hold; + + for (i=0; i<`RV_RET_STACK_SIZE; i++) begin : retstack + + // for the last entry in the stack, we don't have a pop position + if(i==`RV_RET_STACK_SIZE-1) begin +`ifdef REAL_COMM_RS + assign rets_in[i][31:1] = ( ({31{rs_push}} & rets_out[i-1][31:1]) | + ({31{rs_correct}} & e1_rets_out[i][31:1]) | + ({31{e4_rs_correct}} & e4_rets_out[i][31:1]) ); +`else + assign rets_in[i][31:1] = rets_out[i-1][31:1]; +`endif + assign rsenable[i] = rs_push | rs_correct | e4_rs_correct; + end + else if(i>0) begin +`ifdef REAL_COMM_RS + assign rets_in[i][31:1] = ( ({31{rs_push}} & rets_out[i-1][31:1]) | + ({31{rs_pop}} & rets_out[i+1][31:1]) | + ({31{rs_correct}} & e1_rets_out[i][31:1]) | + ({31{e4_rs_correct}} & e4_rets_out[i][31:1]) ); +`else + assign rets_in[i][31:1] = ( ({31{rs_push}} & rets_out[i-1][31:1]) | + ({31{rs_pop}} & rets_out[i+1][31:1]) ); +`endif + assign rsenable[i] = rs_push | rs_pop | rs_correct | e4_rs_correct; + end + rvdffe #(31) rets_ff (.*, .en(rsenable[i]), .din(rets_in[i][31:1]), .dout(rets_out[i][31:1])); + + end : retstack + + +`ifdef REAL_COMM_RS + logic [31:1] e1_rs_call0_target_f2, e1_rs_call1_target_f2, e1_rs_call_target_f2, e4_rs_call0_target_f2, e4_rs_call1_target_f2, e4_rs_call_target_f2; + logic e1_null, e1_rs_push1, e1_rs_push2, e1_rs_pop1, e1_rs_pop2, e1_rs_hold; + logic e4_null, e4_rs_push1, e4_rs_push2, e4_rs_pop1, e4_rs_pop2, e4_rs_hold; + // E1 based + assign e4_rs_correct = dec_tlu_flush_lower_wb; + assign e1_null = exu_rets_e1_pkt.pc0_call & exu_rets_e1_pkt.pc1_ret; + assign e1_rs_push1 = (exu_rets_e1_pkt.pc0_call ^ exu_rets_e1_pkt.pc1_call) & ~e1_null & ~e4_rs_correct; + assign e1_rs_push2 = (exu_rets_e1_pkt.pc0_call & exu_rets_e1_pkt.pc1_call) & ~e4_rs_correct; + assign e1_rs_pop1 = (exu_rets_e1_pkt.pc0_ret ^ exu_rets_e1_pkt.pc1_ret) & ~e4_rs_correct; + assign e1_rs_pop2 = (exu_rets_e1_pkt.pc0_ret & exu_rets_e1_pkt.pc1_ret) & ~e4_rs_correct; + assign e1_rs_hold = (~e1_rs_push1 & ~e1_rs_push2 & ~e1_rs_pop1 & ~e1_rs_pop2 & ~e4_rs_correct); + + rvbradder e1_rs_addr0 (.pc({exu_i0_pc_e1[31:1]}), + .offset({10'b0, exu_rets_e1_pkt.pc0_pc4, ~exu_rets_e1_pkt.pc0_pc4}), + .dout(e1_rs_call0_target_f2[31:1]) + ); + rvbradder e1_rs_addr1 (.pc({exu_i1_pc_e1[31:1]}), + .offset({10'b0, exu_rets_e1_pkt.pc1_pc4, ~exu_rets_e1_pkt.pc1_pc4}), + .dout(e1_rs_call1_target_f2[31:1]) + ); + + assign e1_rs_call_target_f2[31:1] = exu_rets_e1_pkt.pc0_call ? e1_rs_call0_target_f2[31:1] : e1_rs_call1_target_f2[31:1]; + + assign e1_rets_in[0][31:1] = ( ({31{e1_rs_push1}} & e1_rs_call_target_f2[31:1]) | + ({31{e1_rs_push2}} & e1_rs_call1_target_f2[31:1]) | + ({31{e1_rs_pop1}} & e1_rets_out[1][31:1]) | + ({31{e1_rs_pop2}} & e1_rets_out[2][31:1]) | + ({31{e4_rs_correct}} & e4_rets_out[0][31:1]) | + ({31{e1_rs_hold}} & e1_rets_out[0][31:1]) ); + + assign e1_rets_in[1][31:1] = ( ({31{e1_rs_push1}} & e1_rets_out[0][31:1]) | + ({31{e1_rs_push2}} & e1_rs_call0_target_f2[31:1]) | + ({31{e1_rs_pop1}} & e1_rets_out[2][31:1]) | + ({31{e1_rs_pop2}} & e1_rets_out[3][31:1]) | + ({31{e4_rs_correct}} & e4_rets_out[1][31:1]) | + ({31{e1_rs_hold}} & e1_rets_out[0][31:1]) ); + + + for (i=0; i<`RV_RET_STACK_SIZE; i++) begin : e1_retstack + + // for the last entry in the stack, we don't have a pop position + if(i==`RV_RET_STACK_SIZE-1) + assign e1_rets_in[i][31:1] = ( ({31{e1_rs_push1}} & e1_rets_out[i-1][31:1]) | + ({31{e1_rs_push2}} & e1_rets_out[i-2][31:1]) | + ({31{e4_rs_correct}} & e4_rets_out[i][31:1]) | + ({31{e1_rs_hold}} & e1_rets_out[i][31:1]) ); + else if(i==`RV_RET_STACK_SIZE-2) + assign e1_rets_in[i][31:1] = ( ({31{e1_rs_push1}} & e1_rets_out[i-1][31:1]) | + ({31{e1_rs_push2}} & e1_rets_out[i-2][31:1]) | + ({31{e1_rs_pop1}} & e1_rets_out[i+1][31:1]) | + ({31{e4_rs_correct}} & e4_rets_out[i][31:1]) | + ({31{e1_rs_hold}} & e1_rets_out[i][31:1]) ); + + else if(i>1) + assign e1_rets_in[i][31:1] = ( ({31{e1_rs_push1}} & e1_rets_out[i-1][31:1]) | + ({31{e1_rs_push2}} & e1_rets_out[i-2][31:1]) | + ({31{e1_rs_pop1}} & e1_rets_out[i+1][31:1]) | + ({31{e1_rs_pop2}} & e1_rets_out[i+2][31:1]) | + ({31{e4_rs_correct}} & e4_rets_out[i][31:1]) | + ({31{e1_rs_hold}} & e1_rets_out[i][31:1]) ); + + + rvdff #(31) e1_rets_ff (.*, .din(e1_rets_in[i][31:1]), .dout(e1_rets_out[i][31:1])); + + end : e1_retstack + + // E4 based + assign e4_null = exu_rets_e4_pkt.pc0_call & exu_rets_e4_pkt.pc1_ret; + assign e4_rs_push1 = (exu_rets_e4_pkt.pc0_call ^ exu_rets_e4_pkt.pc1_call) & ~e4_null; + assign e4_rs_push2 = (exu_rets_e4_pkt.pc0_call & exu_rets_e4_pkt.pc1_call); + assign e4_rs_pop1 = (exu_rets_e4_pkt.pc0_ret ^ exu_rets_e4_pkt.pc1_ret); + assign e4_rs_pop2 = (exu_rets_e4_pkt.pc0_ret & exu_rets_e4_pkt.pc1_ret); + assign e4_rs_hold = (~e4_rs_push1 & ~e4_rs_push2 & ~e4_rs_pop1 & ~e4_rs_pop2); + + rvbradder e4_rs_addr0 (.pc({dec_tlu_i0_pc_e4[31:1]}), + .offset({10'b0, exu_rets_e4_pkt.pc0_pc4, ~exu_rets_e4_pkt.pc0_pc4}), + .dout(e4_rs_call0_target_f2[31:1]) + ); + rvbradder e4_rs_addr1 (.pc({dec_tlu_i1_pc_e4[31:1]}), + .offset({10'b0, exu_rets_e4_pkt.pc1_pc4, ~exu_rets_e4_pkt.pc1_pc4}), + .dout(e4_rs_call1_target_f2[31:1]) + ); + + assign e4_rs_call_target_f2[31:1] = exu_rets_e4_pkt.pc0_call ? e4_rs_call0_target_f2[31:1] : e4_rs_call1_target_f2[31:1]; + + assign e4_rets_in[0][31:1] = ( ({31{e4_rs_push1}} & e4_rs_call_target_f2[31:1]) | + ({31{e4_rs_push2}} & e4_rs_call1_target_f2[31:1]) | + ({31{e4_rs_pop1}} & e4_rets_out[1][31:1]) | + ({31{e4_rs_pop2}} & e4_rets_out[2][31:1]) | + ({31{e4_rs_hold}} & e4_rets_out[0][31:1]) ); + + assign e4_rets_in[1][31:1] = ( ({31{e4_rs_push1}} & e4_rets_out[0][31:1]) | + ({31{e4_rs_push2}} & e4_rs_call0_target_f2[31:1]) | + ({31{e4_rs_pop1}} & e4_rets_out[2][31:1]) | + ({31{e4_rs_pop2}} & e4_rets_out[3][31:1]) | + ({31{e4_rs_hold}} & e4_rets_out[0][31:1]) ); + + + for (i=0; i<`RV_RET_STACK_SIZE; i++) begin : e4_retstack + + // for the last entry in the stack, we don't have a pop position + if(i==`RV_RET_STACK_SIZE-1) + assign e4_rets_in[i][31:1] = ( ({31{e4_rs_push1}} & e4_rets_out[i-1][31:1]) | + ({31{e4_rs_push2}} & e4_rets_out[i-2][31:1]) | + ({31{e4_rs_hold}} & e4_rets_out[i][31:1]) ); + else if(i==`RV_RET_STACK_SIZE-2) + assign e4_rets_in[i][31:1] = ( ({31{e4_rs_push1}} & e4_rets_out[i-1][31:1]) | + ({31{e4_rs_push2}} & e4_rets_out[i-2][31:1]) | + ({31{e4_rs_pop1}} & e4_rets_out[i+1][31:1]) | + ({31{e4_rs_hold}} & e4_rets_out[i][31:1]) ); + + else if(i>1) + assign e4_rets_in[i][31:1] = ( ({31{e4_rs_push1}} & e4_rets_out[i-1][31:1]) | + ({31{e4_rs_push2}} & e4_rets_out[i-2][31:1]) | + ({31{e4_rs_pop1}} & e4_rets_out[i+1][31:1]) | + ({31{e4_rs_pop2}} & e4_rets_out[i+2][31:1]) | + ({31{e4_rs_hold}} & e4_rets_out[i][31:1]) ); + + + rvdff #(31) e4_rets_ff (.*, .din(e4_rets_in[i][31:1]), .dout(e4_rets_out[i][31:1])); + + end : e4_retstack + +`endif // `ifdef REAL_COMM_RS + + + + // ---------------------------------------------------------------------- + // WRITE + // ---------------------------------------------------------------------- + + + assign dec_tlu_error_wb = dec_tlu_br0_start_error_wb | dec_tlu_br0_error_wb | dec_tlu_br1_start_error_wb | dec_tlu_br1_error_wb; + assign dec_tlu_all_banks_error_wb = dec_tlu_br0_start_error_wb | (~dec_tlu_br0_error_wb & dec_tlu_br1_start_error_wb); + + assign dec_tlu_error_bank_wb[1:0] = (dec_tlu_br0_error_wb | dec_tlu_br0_start_error_wb) ? dec_tlu_br0_bank_wb[1:0] : dec_tlu_br1_bank_wb[1:0]; + assign btb_error_addr_wb[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = (dec_tlu_br0_error_wb | dec_tlu_br0_start_error_wb) ? dec_tlu_br0_addr_wb[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] : dec_tlu_br1_addr_wb[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + + assign dec_tlu_way_wb = (dec_tlu_br0_error_wb | dec_tlu_br0_start_error_wb) ? dec_tlu_br0_way_wb : dec_tlu_br1_way_wb; + + assign btb_valid = exu_mp_valid & ~dec_tlu_error_wb; + + assign btb_wr_tag[`RV_BTB_BTAG_SIZE-1:0] = exu_mp_btag[`RV_BTB_BTAG_SIZE-1:0]; + rvbtb_tag_hash rdtagf1(.hash(fetch_rd_tag_f1[`RV_BTB_BTAG_SIZE-1:0]), .pc({ifc_fetch_addr_f1[31:4], 3'b0})); + rvdff #(`RV_BTB_BTAG_SIZE) rdtagf (.*, .clk(active_clk), .din({fetch_rd_tag_f1[`RV_BTB_BTAG_SIZE-1:0]}), .dout({fetch_rd_tag_f2[`RV_BTB_BTAG_SIZE-1:0]})); + + assign btb_wr_data[16+`RV_BTB_BTAG_SIZE:0] = {btb_wr_tag[`RV_BTB_BTAG_SIZE-1:0], exu_mp_tgt[11:0], exu_mp_pc4, exu_mp_boffset, exu_mp_call | exu_mp_ja, exu_mp_ret | exu_mp_ja, btb_valid} ; + + assign exu_mp_valid_write = exu_mp_valid & exu_mp_ataken; +`ifdef RV_BTB_48 + + assign btb_wr_en_way0[3:0] = ( ({4{(exu_mp_way==2'b0) & exu_mp_valid_write & ~dec_tlu_error_wb}} & decode2_4(exu_mp_bank[1:0])) | + ({4{(dec_tlu_way_wb==2'b0) & dec_tlu_error_wb & ~dec_tlu_all_banks_error_wb}} & decode2_4(dec_tlu_error_bank_wb[1:0])) | + ({4{(dec_tlu_way_wb==2'b0) & dec_tlu_all_banks_error_wb}})); + + assign btb_wr_en_way1[3:0] = ( ({4{exu_mp_way[0] & exu_mp_valid_write & ~dec_tlu_error_wb}} & decode2_4(exu_mp_bank[1:0])) | + ({4{dec_tlu_way_wb[0] & dec_tlu_error_wb & ~dec_tlu_all_banks_error_wb}} & decode2_4(dec_tlu_error_bank_wb[1:0])) | + ({4{dec_tlu_way_wb[0] & dec_tlu_all_banks_error_wb}})); + + assign btb_wr_en_way2[3:0] = ( ({4{exu_mp_way[1] & exu_mp_valid_write & ~dec_tlu_error_wb}} & decode2_4(exu_mp_bank[1:0])) | + ({4{dec_tlu_way_wb[1] & dec_tlu_error_wb & ~dec_tlu_all_banks_error_wb}} & decode2_4(dec_tlu_error_bank_wb[1:0])) | + ({4{dec_tlu_way_wb[1] & dec_tlu_all_banks_error_wb}})); +`else // !`ifdef RV_BTB_48 + assign btb_wr_en_way0[3:0] = ( ({4{~exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}} & decode2_4(exu_mp_bank[1:0])) | + ({4{~dec_tlu_way_wb & dec_tlu_error_wb & ~dec_tlu_all_banks_error_wb}} & decode2_4(dec_tlu_error_bank_wb[1:0])) | + ({4{~dec_tlu_way_wb & dec_tlu_all_banks_error_wb}})); + + assign btb_wr_en_way1[3:0] = ( ({4{exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}} & decode2_4(exu_mp_bank[1:0])) | + ({4{dec_tlu_way_wb & dec_tlu_error_wb & ~dec_tlu_all_banks_error_wb}} & decode2_4(dec_tlu_error_bank_wb[1:0])) | + ({4{dec_tlu_way_wb & dec_tlu_all_banks_error_wb}})); + + +`endif + + assign btb_wr_addr[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = dec_tlu_error_wb ? btb_error_addr_wb[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] : exu_mp_addr[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]; + + logic [1:0] bht_wr_data0, bht_wr_data1, bht_wr_data2; + logic [7:0] bht_wr_en0, bht_wr_en1, bht_wr_en2; + + assign middle_of_bank = exu_mp_pc4 ^ exu_mp_boffset; + assign bht_wr_en0[7:0] = {8{exu_mp_valid & ~exu_mp_call & ~exu_mp_ret & ~exu_mp_ja}} & decode3_8({exu_mp_bank[1:0], middle_of_bank}); + assign bht_wr_en1[7:0] = {8{dec_tlu_br1_v_wb}} & decode3_8({dec_tlu_br1_bank_wb[1:0], dec_tlu_br1_middle_wb}); + assign bht_wr_en2[7:0] = {8{dec_tlu_br0_v_wb}} & decode3_8({dec_tlu_br0_bank_wb[1:0], dec_tlu_br0_middle_wb}); + + // Experiments show this is the best priority scheme for same bank/index writes at the same time. + assign bht_wr_data0[1:0] = exu_mp_hist[1:0]; // lowest priority + assign bht_wr_data1[1:0] = dec_tlu_br1_hist_wb[1:0]; + assign bht_wr_data2[1:0] = dec_tlu_br0_hist_wb[1:0]; // highest priority + + + + logic [`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO] bht_rd_addr_f1, bht_wr_addr0, bht_wr_addr1, bht_wr_addr2; + + logic [`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO] mp_hashed, br0_hashed_wb, br1_hashed_wb, bht_rd_addr_hashed_f1; + rvbtb_ghr_hash mpghrhs (.hashin(exu_mp_addr[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]), .ghr(exu_mp_eghr[`RV_BHT_GHR_RANGE]), .hash(mp_hashed[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO])); + rvbtb_ghr_hash br0ghrhs (.hashin(dec_tlu_br0_addr_wb[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]), .ghr(dec_tlu_br0_fghr_wb[`RV_BHT_GHR_RANGE]), .hash(br0_hashed_wb[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO])); + rvbtb_ghr_hash br1ghrhs (.hashin(dec_tlu_br1_addr_wb[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]), .ghr(dec_tlu_br1_fghr_wb[`RV_BHT_GHR_RANGE]), .hash(br1_hashed_wb[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO])); + rvbtb_ghr_hash fghrhs (.hashin(btb_rd_addr_f1[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO]), .ghr(fghr_ns[`RV_BHT_GHR_RANGE]), .hash(bht_rd_addr_hashed_f1[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO])); + + assign bht_wr_addr0[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO] = mp_hashed[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO]; + assign bht_wr_addr1[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO] = br1_hashed_wb[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO]; + assign bht_wr_addr2[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO] = br0_hashed_wb[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO]; + assign bht_rd_addr_f1[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO] = bht_rd_addr_hashed_f1[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO]; + + + // ---------------------------------------------------------------------- + // Structures. Using FLOPS + // ---------------------------------------------------------------------- + // BTB + // Entry -> tag[`RV_BTB_BTAG_SIZE-1:0], toffset[11:0], pc4, boffset, call, ret, valid + + + for (j=0 ; j direction, strength + // + //----------------------------------------------------------------------------- + + logic [7:0] [(`RV_BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0][1:0] bht_bank_wr_data ; + logic [7:0] [`RV_BHT_ARRAY_DEPTH-1:0] [1:0] bht_bank_rd_data_out ; + logic [1:0] bht_bank0_rd_data_f2_in, bht_bank1_rd_data_f2_in, bht_bank2_rd_data_f2_in, bht_bank3_rd_data_f2_in; + logic [1:0] bht_bank4_rd_data_f2_in, bht_bank5_rd_data_f2_in, bht_bank6_rd_data_f2_in, bht_bank7_rd_data_f2_in; + logic [7:0] [(`RV_BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0] bht_bank_clken ; + logic [7:0] [(`RV_BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0] bht_bank_clk ; + logic [7:0] [(`RV_BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0] bht_bank_sel ; + + for ( i=0; i<8; i++) begin : BANKS + for (genvar k=0 ; k < (`RV_BHT_ARRAY_DEPTH)/NUM_BHT_LOOP ; k++) begin : BHT_CLK_GROUP + assign bht_bank_clken[i][k] = (bht_wr_en0[i] & ((bht_wr_addr0[`RV_BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) | + (bht_wr_en1[i] & ((bht_wr_addr1[`RV_BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) | + (bht_wr_en2[i] & ((bht_wr_addr2[`RV_BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)); + + rvclkhdr bht_bank_grp_cgc ( .en(bht_bank_clken[i][k]), .l1clk(bht_bank_clk[i][k]), .* ); + + for (j=0 ; j cdecode.e + +// 2) espresso -Dso -oeqntott cdecode.e | addassign > compress_equations + +// to generate the legal (16b compressed instruction is legal) equation below: + +// 1) coredecode -in cdecode -legal > clegal.e + +// 2) espresso -Dso -oeqntott clegal.e | addassign > clegal_equation + + + + + +// espresso decodes +assign rdrd = (!i[14]&i[6]&i[1]) | (!i[15]&i[14]&i[11]&i[0]) | (!i[14]&i[5]&i[1]) | ( + !i[15]&i[14]&i[10]&i[0]) | (!i[14]&i[4]&i[1]) | (!i[15]&i[14]&i[9] + &i[0]) | (!i[14]&i[3]&i[1]) | (!i[15]&i[14]&!i[8]&i[0]) | (!i[14] + &i[2]&i[1]) | (!i[15]&i[14]&i[7]&i[0]) | (!i[15]&i[1]) | (!i[15] + &!i[13]&i[0]); + +assign rdrs1 = (!i[14]&i[12]&i[11]&i[1]) | (!i[14]&i[12]&i[10]&i[1]) | (!i[14] + &i[12]&i[9]&i[1]) | (!i[14]&i[12]&i[8]&i[1]) | (!i[14]&i[12]&i[7] + &i[1]) | (!i[14]&!i[12]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14] + &i[12]&i[6]&i[1]) | (!i[14]&i[12]&i[5]&i[1]) | (!i[14]&i[12]&i[4] + &i[1]) | (!i[14]&i[12]&i[3]&i[1]) | (!i[14]&i[12]&i[2]&i[1]) | ( + !i[15]&!i[14]&!i[13]&i[0]) | (!i[15]&!i[14]&i[1]); + +assign rs2rs2 = (i[15]&i[6]&i[1]) | (i[15]&i[5]&i[1]) | (i[15]&i[4]&i[1]) | ( + i[15]&i[3]&i[1]) | (i[15]&i[2]&i[1]) | (i[15]&i[14]&i[1]); + +assign rdprd = (i[15]&!i[14]&!i[13]&i[0]); + +assign rdprs1 = (i[15]&!i[13]&i[0]) | (i[15]&i[14]&i[0]) | (i[14]&!i[1]&!i[0]); + +assign rs2prs2 = (i[15]&!i[14]&!i[13]&i[11]&i[10]&i[0]) | (i[15]&!i[1]&!i[0]); + +assign rs2prd = (!i[15]&!i[1]&!i[0]); + +assign uimm9_2 = (!i[14]&!i[1]&!i[0]); + +assign ulwimm6_2 = (!i[15]&i[14]&!i[1]&!i[0]); + +assign ulwspimm7_2 = (!i[15]&i[14]&i[1]); + +assign rdeq2 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]); + +assign rdeq1 = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14] + &i[12]&i[10]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[9] + &!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[8]&!i[6]&!i[5] + &!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[7]&!i[6]&!i[5]&!i[4]&!i[3] + &!i[2]&i[1]) | (!i[15]&!i[14]&i[13]); + +assign rs1eq2 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]) | (i[14] + &i[1]) | (!i[14]&!i[1]&!i[0]); + +assign sbroffset8_1 = (i[15]&i[14]&i[0]); + +assign simm9_4 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]); + +assign simm5_0 = (!i[14]&!i[13]&i[11]&!i[10]&i[0]) | (!i[15]&!i[13]&i[0]); + +assign sjaloffset11_1 = (!i[14]&i[13]); + +assign sluimm17_12 = (!i[15]&i[14]&i[13]&i[7]) | (!i[15]&i[14]&i[13]&!i[8]) | ( + !i[15]&i[14]&i[13]&i[9]) | (!i[15]&i[14]&i[13]&i[10]) | (!i[15]&i[14] + &i[13]&i[11]); + +assign uimm5_0 = (i[15]&!i[14]&!i[13]&!i[11]&i[0]) | (!i[15]&!i[14]&i[1]); + +assign uswimm6_2 = (i[15]&!i[1]&!i[0]); + +assign uswspimm7_2 = (i[15]&i[14]&i[1]); + +assign o[31] = 1'b0; + +assign o[30] = (i[15]&!i[14]&!i[13]&i[10]&!i[6]&!i[5]&i[0]) | (i[15]&!i[14] + &!i[13]&!i[11]&i[10]&i[0]); + +assign o[29] = 1'b0; + +assign o[28] = 1'b0; + +assign o[27] = 1'b0; + +assign o[26] = 1'b0; + +assign o[25] = 1'b0; + +assign o[24] = 1'b0; + +assign o[23] = 1'b0; + +assign o[22] = 1'b0; + +assign o[21] = 1'b0; + +assign o[20] = (!i[14]&i[12]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[6]&!i[5]&!i[4] + &!i[3]&!i[2]&i[1]); + +assign o[19] = 1'b0; + +assign o[18] = 1'b0; + +assign o[17] = 1'b0; + +assign o[16] = 1'b0; + +assign o[15] = 1'b0; + +assign o[14] = (i[15]&!i[14]&!i[13]&!i[11]&i[0]) | (i[15]&!i[14]&!i[13]&!i[10] + &i[0]) | (i[15]&!i[14]&!i[13]&i[6]&i[0]) | (i[15]&!i[14]&!i[13]&i[5] + &i[0]); + +assign o[13] = (i[15]&!i[14]&!i[13]&i[11]&!i[10]&i[0]) | (i[15]&!i[14]&!i[13] + &i[11]&i[6]&i[0]) | (i[14]&!i[0]); + +assign o[12] = (i[15]&!i[14]&!i[13]&i[6]&i[5]&i[0]) | (i[15]&!i[14]&!i[13]&!i[11] + &i[0]) | (i[15]&!i[14]&!i[13]&!i[10]&i[0]) | (!i[15]&!i[14]&i[1]) | ( + i[15]&i[14]&i[13]); + +assign o[11] = 1'b0; + +assign o[10] = 1'b0; + +assign o[9] = 1'b0; + +assign o[8] = 1'b0; + +assign o[7] = 1'b0; + +assign o[6] = (i[15]&!i[14]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&!i[0]) | (!i[14]&i[13]) | ( + i[15]&i[14]&i[0]); + +assign o[5] = (i[15]&!i[0]) | (i[15]&i[11]&i[10]) | (i[13]&!i[8]) | (i[13]&i[7]) | ( + i[13]&i[9]) | (i[13]&i[10]) | (i[13]&i[11]) | (!i[14]&i[13]) | ( + i[15]&i[14]); + +assign o[4] = (!i[14]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[0]) | (!i[15]&!i[14] + &!i[0]) | (!i[14]&i[6]&!i[0]) | (!i[15]&i[14]&i[0]) | (!i[14]&i[5] + &!i[0]) | (!i[14]&i[4]&!i[0]) | (!i[14]&!i[13]&i[0]) | (!i[14]&i[3] + &!i[0]) | (!i[14]&i[2]&!i[0]); + +assign o[3] = (!i[14]&i[13]); + +assign o[2] = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14] + &i[12]&i[10]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[9] + &!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[8]&!i[6]&!i[5] + &!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[7]&!i[6]&!i[5]&!i[4]&!i[3] + &!i[2]&i[1]) | (i[15]&!i[14]&!i[12]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2] + &!i[0]) | (!i[15]&i[13]&!i[8]) | (!i[15]&i[13]&i[7]) | (!i[15]&i[13] + &i[9]) | (!i[15]&i[13]&i[10]) | (!i[15]&i[13]&i[11]) | (!i[14]&i[13]); + +// 32b instruction has lower two bits 2'b11 + +assign o[1] = 1'b1; + +assign o[0] = 1'b1; + +assign legal = (!i[13]&!i[12]&i[11]&i[1]&!i[0]) | (!i[13]&!i[12]&i[6]&i[1]&!i[0]) | ( + !i[15]&!i[13]&i[11]&!i[1]) | (!i[13]&!i[12]&i[5]&i[1]&!i[0]) | ( + !i[13]&!i[12]&i[10]&i[1]&!i[0]) | (!i[15]&!i[13]&i[6]&!i[1]) | ( + i[15]&!i[12]&!i[1]&i[0]) | (!i[13]&!i[12]&i[9]&i[1]&!i[0]) | (!i[12] + &i[6]&!i[1]&i[0]) | (!i[15]&!i[13]&i[5]&!i[1]) | (!i[13]&!i[12]&i[8] + &i[1]&!i[0]) | (!i[12]&i[5]&!i[1]&i[0]) | (!i[15]&!i[13]&i[10]&!i[1]) | ( + !i[13]&!i[12]&i[7]&i[1]&!i[0]) | (i[12]&i[11]&!i[10]&!i[1]&i[0]) | ( + !i[15]&!i[13]&i[9]&!i[1]) | (!i[13]&!i[12]&i[4]&i[1]&!i[0]) | (i[13] + &i[12]&!i[1]&i[0]) | (!i[15]&!i[13]&i[8]&!i[1]) | (!i[13]&!i[12]&i[3] + &i[1]&!i[0]) | (i[13]&i[4]&!i[1]&i[0]) | (!i[13]&!i[12]&i[2]&i[1] + &!i[0]) | (!i[15]&!i[13]&i[7]&!i[1]) | (i[13]&i[3]&!i[1]&i[0]) | ( + i[13]&i[2]&!i[1]&i[0]) | (i[14]&!i[13]&!i[1]) | (!i[14]&!i[12]&!i[1] + &i[0]) | (i[15]&!i[13]&i[12]&i[1]&!i[0]) | (!i[15]&!i[13]&!i[12]&i[1] + &!i[0]) | (!i[15]&!i[13]&i[12]&!i[1]) | (i[14]&!i[13]&!i[0]); + + + + +endmodule diff --git a/design/ifu/ifu_ic_mem.sv b/design/ifu/ifu_ic_mem.sv new file mode 100644 index 0000000..7140728 --- /dev/null +++ b/design/ifu/ifu_ic_mem.sv @@ -0,0 +1,559 @@ +//******************************************************************************** +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** +//////////////////////////////////////////////////// +// ICACHE DATA & TAG MODULE WRAPPER // +///////////////////////////////////////////////////// +module ifu_ic_mem + ( + input logic clk, + input logic rst_l, + input logic clk_override, + input logic dec_tlu_core_ecc_disable, + + input logic [31:3] ic_rw_addr, + input logic [3:0] ic_wr_en , // Which way to write + input logic ic_rd_en , // Read enable + + input logic [15:2] ic_debug_addr, // Read/Write addresss to the Icache. + input logic ic_debug_rd_en, // Icache debug rd + input logic ic_debug_wr_en, // Icache debug wr + input logic ic_debug_tag_array, // Debug tag array + input logic [3:0] ic_debug_way, // Debug way. Rd or Wr. + input logic [127:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. + input logic ic_sel_premux_data, // Select the pre_muxed data + + + +`ifdef RV_ICACHE_ECC + input logic [83:0] ic_wr_data, // Data to fill to the Icache. With ECC + output logic [167:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + output logic [24:0] ictag_debug_rd_data,// Debug icache tag. + input logic [41:0] ic_debug_wr_data, // Debug wr cache. +`else + input logic [67:0] ic_wr_data, // Data to fill to the Icache. With Parity + output logic [135:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With Parity + output logic [20:0] ictag_debug_rd_data,// Debug icache tag. + input logic [33:0] ic_debug_wr_data, // Debug wr cache. +`endif + + + input logic [3:0] ic_tag_valid, // Valid from the I$ tag valid outside (in flops). + + output logic [3:0] ic_rd_hit, // ic_rd_hit[3:0] + output logic ic_tag_perr, // Tag Parity error + input logic scan_mode + ) ; + +`include "global.h" + + IC_TAG #( .ICACHE_TAG_HIGH(ICACHE_TAG_HIGH) , + .ICACHE_TAG_LOW(ICACHE_TAG_LOW) , + .ICACHE_TAG_DEPTH(ICACHE_TAG_DEPTH) + ) ic_tag_inst + ( + .*, + .ic_wr_en (ic_wr_en[3:0]), + .ic_debug_addr(ic_debug_addr[ICACHE_TAG_HIGH-1:2]), + .ic_rw_addr (ic_rw_addr[31:3]) + ) ; + + IC_DATA #( .ICACHE_TAG_HIGH(ICACHE_TAG_HIGH) , + .ICACHE_TAG_LOW(ICACHE_TAG_LOW) , + .ICACHE_IC_DEPTH(ICACHE_IC_DEPTH) + ) ic_data_inst + ( + .*, + .ic_wr_en (ic_wr_en[3:0]), + .ic_debug_addr(ic_debug_addr[ICACHE_TAG_HIGH-1:2]), + .ic_rw_addr (ic_rw_addr[ICACHE_TAG_HIGH-1:3]) + ) ; + + endmodule + + +///////////////////////////////////////////////// +////// ICACHE DATA MODULE //////////////////// +///////////////////////////////////////////////// +module IC_DATA #(parameter ICACHE_TAG_HIGH = 16 , + ICACHE_TAG_LOW=6 , + ICACHE_IC_DEPTH=1024 + ) + ( + input logic clk, + input logic rst_l, + input logic clk_override, + + input logic [ICACHE_TAG_HIGH-1:3] ic_rw_addr, + input logic [3:0] ic_wr_en, + input logic ic_rd_en, // Read enable +`ifdef RV_ICACHE_ECC + input logic [83:0] ic_wr_data, // Data to fill to the Icache. With ECC + output logic [167:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [41:0] ic_debug_wr_data, // Debug wr cache. +`else + input logic [67:0] ic_wr_data, // Data to fill to the Icache. With Parity + output logic [135:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With Parity + input logic [33:0] ic_debug_wr_data, // Debug wr cache. +`endif + + + input logic [ICACHE_TAG_HIGH-1:2] ic_debug_addr, // Read/Write addresss to the Icache. + input logic ic_debug_rd_en, // Icache debug rd + input logic ic_debug_wr_en, // Icache debug wr + input logic ic_debug_tag_array, // Debug tag array + input logic [3:0] ic_debug_way, // Debug way. Rd or Wr. + input logic [127:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. + input logic ic_sel_premux_data, // Select the pre_muxed data + + input logic [3:0] ic_rd_hit, + + input logic scan_mode + + ) ; + + logic [5:4] ic_rw_addr_ff; + + + logic [3:0][3:0] ic_b_sb_wren; // way, bank + + logic ic_debug_sel_sb0 ; + logic ic_debug_sel_sb1 ; + logic ic_debug_sel_sb2 ; + logic ic_debug_sel_sb3 ; + + +`ifdef RV_ICACHE_ECC + logic [3:0] [167:0] bank_set_dout; + logic [3:0][167:0] wb_dout ; // + logic [3:0][41:0] ic_sb_wr_data; +`else + logic [3:0] [135:0] bank_set_dout; + logic [3:0] [135:0] wb_dout ; // bank , way , size + logic [3:0] [33:0] ic_sb_wr_data; +`endif + + logic [3:0] ic_bank_way_clken; // bank , way + logic [3:0] ic_bank_way_clk ; // bank , way + logic ic_b_rden; + logic [3:0] ic_debug_rd_way_en; // debug wr_way + logic [3:0] ic_debug_rd_way_en_ff; // debug wr_way + logic [3:0] ic_debug_wr_way_en; // debug wr_way + logic [ICACHE_TAG_HIGH-1:4] ic_rw_addr_q; + + assign ic_debug_rd_way_en[3:0] = {4{ic_debug_rd_en & ~ic_debug_tag_array}} & ic_debug_way[3:0] ; + assign ic_debug_wr_way_en[3:0] = {4{ic_debug_wr_en & ~ic_debug_tag_array}} & ic_debug_way[3:0] ; + + assign ic_b_sb_wren[0][3:0] = (ic_wr_en[3:0] & {4{~ic_rw_addr[3]}} ) | + (ic_debug_wr_way_en[3:0] & {4{ic_debug_addr[3:2] == 2'b00}}) ; + assign ic_b_sb_wren[1][3:0] = (ic_wr_en[3:0] & {4{~ic_rw_addr[3]}} ) | + (ic_debug_wr_way_en[3:0] & {4{ic_debug_addr[3:2] == 2'b01}}) ; + assign ic_b_sb_wren[2][3:0] = (ic_wr_en[3:0] & {4{ic_rw_addr[3]}} ) | + (ic_debug_wr_way_en[3:0] & {4{ic_debug_addr[3:2] == 2'b10}}) ; + assign ic_b_sb_wren[3][3:0] = (ic_wr_en[3:0] & {4{ic_rw_addr[3]}} ) | + (ic_debug_wr_way_en[3:0] & {4{ic_debug_addr[3:2] == 2'b11}}) ; + + assign ic_debug_sel_sb0 = (ic_debug_addr[3:2] == 2'b00 ) ; + assign ic_debug_sel_sb1 = (ic_debug_addr[3:2] == 2'b01 ) ; + assign ic_debug_sel_sb2 = (ic_debug_addr[3:2] == 2'b10 ) ; + assign ic_debug_sel_sb3 = (ic_debug_addr[3:2] == 2'b11 ) ; + +`ifdef RV_ICACHE_ECC + + assign ic_sb_wr_data[0][41:0] = (ic_debug_sel_sb0 & ic_debug_wr_en) ? {ic_debug_wr_data[41:0]} : + ic_wr_data[41:0] ; + assign ic_sb_wr_data[1][41:0] = (ic_debug_sel_sb1 & ic_debug_wr_en) ? {ic_debug_wr_data[41:0]} : + ic_wr_data[83:42] ; + assign ic_sb_wr_data[2][41:0] = (ic_debug_sel_sb2 & ic_debug_wr_en) ? {ic_debug_wr_data[41:0]} : + ic_wr_data[41:0] ; + assign ic_sb_wr_data[3][41:0] = (ic_debug_sel_sb3 & ic_debug_wr_en) ? {ic_debug_wr_data[41:0]} : + ic_wr_data[83:42] ; +`else + assign ic_sb_wr_data[0][33:0] = (ic_debug_sel_sb0 & ic_debug_wr_en) ? ic_debug_wr_data[33:0] : + ic_wr_data[33:0] ; + assign ic_sb_wr_data[1][33:0] = (ic_debug_sel_sb1 & ic_debug_wr_en) ? ic_debug_wr_data[33:0] : + ic_wr_data[67:34] ; + assign ic_sb_wr_data[2][33:0] = (ic_debug_sel_sb2 & ic_debug_wr_en) ? ic_debug_wr_data[33:0] : + ic_wr_data[33:0] ; + assign ic_sb_wr_data[3][33:0] = (ic_debug_sel_sb3 & ic_debug_wr_en) ? ic_debug_wr_data[33:0] : + ic_wr_data[67:34] ; +`endif + + +// bank read enables + + assign ic_b_rden = (ic_rd_en | ic_debug_rd_en ); + + assign ic_bank_way_clken[3:0] = ({4{ic_b_rden | clk_override }}) | + ic_b_sb_wren[0][3:0] | + ic_b_sb_wren[1][3:0] | + ic_b_sb_wren[2][3:0] | + ic_b_sb_wren[3][3:0] ; + + + + assign ic_rw_addr_q[ICACHE_TAG_HIGH-1:4] = (ic_debug_rd_en | ic_debug_wr_en) ? + ic_debug_addr[ICACHE_TAG_HIGH-1:4] : + ic_rw_addr[ICACHE_TAG_HIGH-1:4] ; + + logic ic_debug_rd_en_ff; + + rvdff #(2) adr_ff (.*, + .din ({ic_rw_addr_q[5:4]}), + .dout({ic_rw_addr_ff[5:4]})); + + rvdff #(5) debug_rd_wy_ff (.*, + .din ({ic_debug_rd_way_en[3:0], ic_debug_rd_en}), + .dout({ic_debug_rd_way_en_ff[3:0], ic_debug_rd_en_ff})); + +localparam NUM_WAYS=4 ; +localparam NUM_SUBBANKS=4 ; + + + for (genvar i=0; i F1 -> F2 -> A +//******************************************************************************** + +module ifu_mem_ctl + import swerv_types::*; +( + input logic clk, + input logic free_clk, // free clock always except during pause + input logic active_clk, // Active always except during pause + input logic rst_l, + + input logic exu_flush_final, // Flush from the pipeline. + input logic dec_tlu_flush_err_wb, // Flush from the pipeline due to perr. + + input logic [31:1] fetch_addr_f1, // Fetch Address byte aligned always. F1 stage. + input logic ifc_fetch_uncacheable_f1, // The fetch request is uncacheable space. F1 stage + input logic ifc_fetch_req_f1, // Fetch request. Comes with the address. F1 stage + input logic ifc_fetch_req_f1_raw, // Fetch request without some qualifications. Used for clock-gating. F1 stage + input logic ifc_iccm_access_f1, // This request is to the ICCM. Do not generate misses to the bus. + input logic ifc_region_acc_fault_f1, // Access fault. in ICCM region but offset is outside defined ICCM. + input logic ifc_dma_access_ok, // It is OK to give dma access to the ICCM. (ICCM is not busy this cycle). + input logic dec_tlu_fence_i_wb, // Fence.i instruction is committing. Clear all Icache valids. + + + input logic [16:6] ifu_icache_error_index, // Index with parity/ecc error + input logic ifu_icache_error_val, // Parity/Ecc error + input logic ifu_icache_sb_error_val, // single bit iccm error + input logic [7:1] ifu_bp_inst_mask_f2, // tell ic which valids to kill because of a taken branch, right justified + + output logic ifu_miss_state_idle, // No icache misses are outstanding. + output logic ifu_ic_mb_empty, // Continue with normal fetching. This does not mean that miss is finished. + output logic ic_dma_active , // In the middle of servicing dma request to ICCM. Do not make any new requests. + output logic ic_write_stall, // Stall fetch the cycle we are writing the cache. + +/// PMU signals + output logic ifu_pmu_ic_miss, // IC miss event + output logic ifu_pmu_ic_hit, // IC hit event + output logic ifu_pmu_bus_error, // Bus error event + output logic ifu_pmu_bus_busy, // Bus busy event + output logic ifu_pmu_bus_trxn, // Bus transaction + + // AXI Write Channels - IFU never writes. So, 0 out mostly + output logic ifu_axi_awvalid, + input logic ifu_axi_awready, + output logic [`RV_IFU_BUS_TAG-1:0] ifu_axi_awid, + output logic [31:0] ifu_axi_awaddr, + output logic [3:0] ifu_axi_awregion, + output logic [7:0] ifu_axi_awlen, + output logic [2:0] ifu_axi_awsize, + output logic [1:0] ifu_axi_awburst, + output logic ifu_axi_awlock, + output logic [3:0] ifu_axi_awcache, + output logic [2:0] ifu_axi_awprot, + output logic [3:0] ifu_axi_awqos, + + output logic ifu_axi_wvalid, + input logic ifu_axi_wready, + output logic [63:0] ifu_axi_wdata, + output logic [7:0] ifu_axi_wstrb, + output logic ifu_axi_wlast, + + input logic ifu_axi_bvalid, + output logic ifu_axi_bready, + input logic [1:0] ifu_axi_bresp, + input logic [`RV_IFU_BUS_TAG-1:0] ifu_axi_bid, + + // AXI Read Channels + output logic ifu_axi_arvalid, + input logic ifu_axi_arready, + output logic [`RV_IFU_BUS_TAG-1:0] ifu_axi_arid, + output logic [31:0] ifu_axi_araddr, + output logic [3:0] ifu_axi_arregion, + output logic [7:0] ifu_axi_arlen, + output logic [2:0] ifu_axi_arsize, + output logic [1:0] ifu_axi_arburst, + output logic ifu_axi_arlock, + output logic [3:0] ifu_axi_arcache, + output logic [2:0] ifu_axi_arprot, + output logic [3:0] ifu_axi_arqos, + + input logic ifu_axi_rvalid, + output logic ifu_axi_rready, + input logic [`RV_IFU_BUS_TAG-1:0] ifu_axi_rid, + input logic [63:0] ifu_axi_rdata, + input logic [1:0] ifu_axi_rresp, + input logic ifu_axi_rlast, + + /// SCVI Bus interface + input logic ifu_bus_clk_en, + + + input logic dma_iccm_req, // dma iccm command (read or write) + input logic [31:0] dma_mem_addr, // dma address + input logic [2:0] dma_mem_sz, // size + input logic dma_mem_write, // write + input logic [63:0] dma_mem_wdata, // write data + + output logic iccm_dma_ecc_error,// Data read from iccm has an ecc error + output logic iccm_dma_rvalid, // Data read from iccm is valid + output logic [63:0] iccm_dma_rdata, // dma data read from iccm + output logic iccm_ready, // iccm ready to accept new command. + + +// I$ & ITAG Ports + output logic [31:3] ic_rw_addr, // Read/Write addresss to the Icache. + output logic [3:0] ic_wr_en, // Icache write enable, when filling the Icache. + output logic ic_rd_en, // Icache read enable. + +`ifdef RV_ICACHE_ECC + output logic [83:0] ic_wr_data, // Data to fill to the Icache. With ECC + input logic [167:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [24:0] ictag_debug_rd_data,// Debug icache tag. + output logic [41:0] ic_debug_wr_data, // Debug wr cache. + output logic [41:0] ifu_ic_debug_rd_data, // debug data read +`else + output logic [67:0] ic_wr_data, // Data to fill to the Icache. With Parity + input logic [135:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With Parity + input logic [20:0] ictag_debug_rd_data,// Debug icache tag. + output logic [33:0] ic_debug_wr_data, // Debug wr cache. + output logic [33:0] ifu_ic_debug_rd_data, // debug data read +`endif + + output logic [15:2] ic_debug_addr, // Read/Write addresss to the Icache. + output logic ic_debug_rd_en, // Icache debug rd + output logic ic_debug_wr_en, // Icache debug wr + output logic ic_debug_tag_array, // Debug tag array + output logic [3:0] ic_debug_way, // Debug way. Rd or Wr. + + + output logic [3:0] ic_tag_valid, // Valid bits when accessing the Icache. One valid bit per way. F2 stage + + input logic [3:0] ic_rd_hit, // Compare hits from Icache tags. Per way. F2 stage + input logic ic_tag_perr, // Icache Tag parity error + +`ifdef RV_ICCM_ENABLE + // ICCM ports + output logic [`RV_ICCM_BITS-1:2] iccm_rw_addr, // ICCM read/write address. + output logic iccm_wren, // ICCM write enable (through the DMA) + output logic iccm_rden, // ICCM read enable. + output logic [77:0] iccm_wr_data, // ICCM write data. + output logic [2:0] iccm_wr_size, // ICCM write location within DW. + + input logic [155:0] iccm_rd_data, // Data read from ICCM. +`endif + + + // IFU control signals + output logic ic_hit_f2, // Hit in Icache(if Icache access) or ICCM access( ICCM always has ic_hit_f2) + output logic ic_crit_wd_rdy, // Critical fetch is ready to be bypassed. + output logic ic_access_fault_f2, // Access fault (bus error or ICCM access in region but out of offset range). + output logic ic_rd_parity_final_err, // This fetch has an tag parity error. + output logic iccm_rd_ecc_single_err, // This fetch has a single ICCM ecc error. + output logic iccm_rd_ecc_double_err, // This fetch has a double ICCM ecc error. + output logic iccm_dma_sb_error, // Single Bit ECC error from a DMA access + output logic [7:0] ic_fetch_val_f2, // valid bytes for fetch. To the Aligner. + output logic [127:0] ic_data_f2, // Data read from Icache or ICCM. To the Aligner. + output icache_err_pkt_t ic_error_f2 , // Parity or ECC bits for the Icache Data + output logic ifu_icache_fetch_f2 , + output logic [127:0] ic_premux_data, // Premuxed data to be muxed with Icache data + output logic ic_sel_premux_data, // Select premux data. + +///// Debug + input cache_debug_pkt_t dec_tlu_ic_diag_pkt , // Icache/tag debug read/write packet + input logic dec_tlu_core_ecc_disable, // disable the ecc checking and flagging + output logic ifu_ic_debug_rd_data_valid, // debug data valid. + + + input logic scan_mode + ); + +`include "global.h" + +// Create different defines for ICACHE and ICCM enable combinations +`ifdef RV_ICCM_ENABLE + `ifdef RV_ICACHE_ENABLE + `define ICCM_AND_ICACHE + `else + `define ICCM_AND_NOT_ICACHE + `endif +`else + `ifdef RV_ICACHE_ENABLE + `define NOT_ICCM_AND_ICACHE + `else + `define NOT_ICCM_AND_NOT_ICACHE + `endif +`endif + + + localparam NUM_OF_BEATS = 8 ; + + + + logic [31:3] ifu_ic_req_addr_f2; + logic uncacheable_miss_in ; + logic uncacheable_miss_ff; + + + logic ifu_wr_en_new ; + logic ifu_wr_en_new_q ; + logic [63:0] ifu_wr_data_new ; + + logic axi_ifu_wr_en_new ; + logic axi_ifu_wr_en_new_q ; + logic axi_ifu_wr_en_new_wo_err ; + logic [63:0] axi_ifu_wr_data_new ; + logic [3:0] axi_ic_wr_en ; + + logic reset_tag_valid_for_miss ; + + + logic [2:0] way_status; + logic [2:0] way_status_mb_in; + logic [2:0] way_status_rep_new; + logic [2:0] way_status_mb_ff; + logic [2:0] way_status_new; + logic [2:0] way_status_hit_new; + logic [2:0] way_status_new_w_debug; + logic [3:0] tagv_mb_in; + logic [3:0] tagv_mb_ff; + + + logic ifu_wr_data_comb_err ; + logic ifu_wr_data_error; + logic ifu_byp_data_err; + logic ifu_wr_cumulative_err_data; + logic ifu_wr_cumulative_err; + logic ifu_wr_data_comb_err_ff; + logic write_even_beat; + + + logic ifc_dma_access_q_ok; + logic ifc_iccm_access_f2 ; + logic ifc_region_acc_fault_f2; + logic ifc_bus_acc_fault_f2; + logic ic_act_miss_f2; + logic ic_miss_under_miss_f2; + logic ic_act_hit_f2; + logic miss_pending; + logic [31:1] imb_in , imb_ff ; + logic flush_final_f2; + logic ifc_fetch_req_f2; + logic ifc_fetch_req_f2_raw; + logic fetch_req_f2_qual ; + logic ifc_fetch_req_qual_f1 ; + logic [3:0] replace_way_mb_any; + logic last_beat; + logic reset_beat_cnt ; + logic [2:0] req_addr_count ; + logic [5:3] ic_req_addr_bits_5_3 ; + logic [5:3] ic_wr_addr_bits_5_3 ; + logic [31:1] ifu_fetch_addr_int_f2 ; + logic [31:1] ifu_ic_rw_int_addr ; + logic ic_crit_wd_rdy_in ; + logic crit_wd_byp_ok_ff ; + logic ic_crit_wd_rdy_ff; + logic ic_byp_hit_f2 ; + logic ic_valid ; + logic ic_valid_ff; + logic reset_all_tags; + logic ic_valid_w_debug; + + logic [3:0] ifu_tag_wren,ifu_tag_wren_ff; + logic [3:0] ic_debug_tag_wr_en; + logic [3:0] ifu_tag_wren_w_debug; + logic [3:0] ic_debug_way_ff; + logic ic_debug_rd_en_ff ; + logic write_bypass_data; + logic fetch_f1_f2_c1_clken ; + logic fetch_f1_f2_c1_clk; + logic debug_c1_clken; + logic debug_c1_clk; + + logic reset_ic_in ; + logic reset_ic_ff ; + logic [3:1] vaddr_f2 ; + logic [31:1] ifu_status_wr_addr; + logic sel_fetch_u_miss; + logic sel_fetch_u_miss_ff; + logic sel_mb_addr ; + logic sel_mb_addr_ff ; + logic [127:0] ic_final_data; + + logic [ICACHE_TAG_HIGH-1:ICACHE_TAG_LOW] ifu_ic_rw_int_addr_ff ; + logic [ICACHE_TAG_HIGH-1:ICACHE_TAG_LOW] ifu_status_wr_addr_ff ; + logic [ICACHE_TAG_HIGH-1:ICACHE_TAG_LOW] ifu_ic_rw_int_addr_w_debug ; + logic [ICACHE_TAG_HIGH-1:ICACHE_TAG_LOW] ifu_status_wr_addr_w_debug ; + + logic [2:0] way_status_new_ff ; + logic way_status_wr_en_ff ; + logic [ICACHE_TAG_DEPTH-1:0][2:0] way_status_out ; + logic [1:0] ic_debug_way_enc; + logic [127:0] ic_byp_data_only; + logic [127:0] ic_rd_data_only; + + logic [`RV_IFU_BUS_TAG-1:0] ifu_axi_rid_ff; + + logic fetch_req_icache_f2; + logic fetch_req_iccm_f2; + logic ic_iccm_hit_f2; + logic fetch_uncacheable_ff; + logic way_status_wr_en; + logic sel_byp_data; + logic sel_ic_data; + logic sel_iccm_data; + logic ic_rd_parity_final_err_ff; + logic ic_act_miss_f2_delayed; + logic axi_ifu_wr_data_error; + logic way_status_wr_en_w_debug; + logic ic_debug_tag_val_rd_out; + logic ifu_pmu_ic_miss_in; + logic ifu_pmu_ic_hit_in; + logic ifu_pmu_bus_error_in; + logic ifu_pmu_bus_trxn_in; + logic ifu_pmu_bus_busy_in; + logic ic_debug_ict_array_sel_in; + logic ic_debug_ict_array_sel_ff; + logic debug_data_clk; + logic debug_data_clken; + logic ifc_region_acc_fault_final_f1, ifc_region_acc_fault_memory, ifc_region_acc_okay; + + +`ifdef RV_ICACHE_ECC + logic [19:0] ic_wr_ecc; + logic [3:0] [1:0] ic_wr_ecc0_unused; // bit 6:5 are not used for a the 16bit sedded + +`else + logic [3:0] ic_wr_parity; +`endif + + + + assign ifu_axi_awvalid = '0; + assign ifu_axi_awid[`RV_IFU_BUS_TAG-1:0] = '0; + assign ifu_axi_awaddr[31:0] = '0; + assign ifu_axi_awsize[2:0] = '0; + assign ifu_axi_awprot[2:0] = '0; + assign ifu_axi_awcache[3:0] = '0; + assign ifu_axi_awregion[3:0] = '0; + assign ifu_axi_awlen[7:0] = '0; + assign ifu_axi_awburst[1:0] = '0; + assign ifu_axi_awqos[3:0] = '0; + assign ifu_axi_awlock = '0; + // AXI Write Data Channel + assign ifu_axi_wvalid = '0; + assign ifu_axi_wdata[63:0] = '0; + assign ifu_axi_wstrb[7:0] = '0; + assign ifu_axi_wlast = '1; + // AXI Write Response Channel + assign ifu_axi_bready = '1; + + +// ---- Clock gating section ----- +// c1 clock enables + + + assign fetch_f1_f2_c1_clken = ifc_fetch_req_f1_raw | ifc_fetch_req_f2 | miss_pending | exu_flush_final ; + assign debug_c1_clken = ic_debug_rd_en | ic_debug_wr_en ; + // C1 - 1 clock pulse for data + + rvclkhdr fetch_f1_f2_c1_cgc ( .en(fetch_f1_f2_c1_clken), .l1clk(fetch_f1_f2_c1_clk), .* ); + rvclkhdr debug_c1_cgc ( .en(debug_c1_clken), .l1clk(debug_c1_clk), .* ); + +// ------ end clock gating section ------------------------ + + + logic [ICCM_BITS-1:2] iccm_ecc_corr_index_ff; + logic [38:0] iccm_ecc_corr_data_ff; + logic iccm_ecc_write_status ; + logic iccm_correct_ecc ; + logic iccm_rd_ecc_single_err_ff ; + logic perr_state_en; + logic [7:0] fetch_mask, ic_fetch_mem_val, bp_mask, ic_bp_mem_mask, ic_fetch_val_mem_f2; + + assign iccm_dma_sb_error = iccm_rd_ecc_single_err & ic_dma_active; + + + typedef enum logic [2:0] {ERR_IDLE=3'b000, PERR_WFF=3'b001 , ECC_WFF=3'b010 , ECC_CORR=3'b011, DMA_SB_ERR=3'b100} perr_state_t; + perr_state_t perr_state, perr_nxtstate; + + + assign ic_dma_active = iccm_correct_ecc | (perr_state == DMA_SB_ERR); + //////////////////////////////////// Create Miss State Machine /////////////////////// + // Create Miss State Machine // + // Create Miss State Machine // + // Create Miss State Machine // + //////////////////////////////////// Create Miss State Machine /////////////////////// + logic miss_state_en; + + typedef enum logic [2:0] {IDLE=3'b000, CRIT_BYP_OK=3'b001, HIT_U_MISS=3'b010, MISS_WAIT=3'b011,CRIT_WRD_RDY=3'b100,SCND_MISS=3'b101} miss_state_t; + miss_state_t miss_state, miss_nxtstate; + + // FIFO state machine + always_comb begin : MISS_SM + miss_nxtstate = IDLE; + miss_state_en = 1'b0; + case (miss_state) + IDLE: begin : idle + miss_nxtstate = (ic_act_miss_f2 & ~exu_flush_final) ? CRIT_BYP_OK : HIT_U_MISS ; + miss_state_en = ic_act_miss_f2; + end + CRIT_BYP_OK: begin : crit_byp_ok + miss_nxtstate = ( ic_byp_hit_f2 & ~exu_flush_final & ~(ifu_wr_en_new & last_beat) & ~uncacheable_miss_ff) ? MISS_WAIT : + ( ic_byp_hit_f2 & uncacheable_miss_ff) ? IDLE : + (~ic_byp_hit_f2 & ~exu_flush_final & (ifu_wr_en_new & last_beat) & uncacheable_miss_ff) ? CRIT_WRD_RDY : + ( (ifu_wr_en_new & last_beat) & ~uncacheable_miss_ff) ? IDLE : + ( exu_flush_final & ~(ifu_wr_en_new & last_beat) ) ? HIT_U_MISS : IDLE; + miss_state_en = exu_flush_final | ic_byp_hit_f2 | (ifu_wr_en_new & last_beat) ; + end + CRIT_WRD_RDY: begin : crit_wrd_rdy + miss_nxtstate = IDLE ; + miss_state_en = exu_flush_final | flush_final_f2 | ic_byp_hit_f2 ; + end + MISS_WAIT: begin : miss_wait + miss_nxtstate = (exu_flush_final & ~(ifu_wr_en_new & last_beat)) ? HIT_U_MISS : IDLE ; + miss_state_en = exu_flush_final | (ifu_wr_en_new & last_beat) ; + end + HIT_U_MISS: begin : hit_u_miss + miss_nxtstate = ic_miss_under_miss_f2 & ~(ifu_wr_en_new & last_beat) ? SCND_MISS : IDLE ; + miss_state_en = (ifu_wr_en_new & last_beat) | ic_miss_under_miss_f2; + end + SCND_MISS: begin : scnd_miss + miss_nxtstate = IDLE ; + miss_state_en = (ifu_wr_en_new & last_beat) ; + end + default: begin : def_case + miss_nxtstate = IDLE; + miss_state_en = 1'b0; + end + endcase + end + rvdffs #(($bits(miss_state_t))) miss_state_ff (.clk(free_clk), .din(miss_nxtstate), .dout({miss_state}), .en(miss_state_en), .*); + logic sel_hold_imb ; + + assign miss_pending = (miss_state != IDLE) ; + assign crit_wd_byp_ok_ff = (miss_state == CRIT_BYP_OK) | ((miss_state == CRIT_WRD_RDY) & ~flush_final_f2); + assign sel_hold_imb = (miss_pending & ~(ifu_wr_en_new & last_beat) & ~((miss_state == CRIT_WRD_RDY) & exu_flush_final)) | ic_act_miss_f2 | + (miss_pending & (miss_nxtstate == CRIT_WRD_RDY)) ; + + + + + assign ic_req_addr_bits_5_3[5:3] = req_addr_count[2:0] ; + assign ic_wr_addr_bits_5_3[5:3] = ifu_axi_rid_ff[2:0] ; + // NOTE: Cacheline size is 16 bytes in this example. + // Tag Index Bank Offset + // [31:16] [15:5] [4] [3:0] + + + assign fetch_req_icache_f2 = ifc_fetch_req_f2 & ~ifc_iccm_access_f2 & ~ifc_region_acc_fault_f2; + assign fetch_req_iccm_f2 = ifc_fetch_req_f2 & ifc_iccm_access_f2; + + assign ic_iccm_hit_f2 = fetch_req_iccm_f2 & (~miss_pending | (miss_state==HIT_U_MISS)); + assign ic_byp_hit_f2 = ic_crit_wd_rdy & fetch_req_icache_f2 & miss_pending ; + assign ic_act_hit_f2 = (|ic_rd_hit[3:0]) & fetch_req_icache_f2 & ~reset_all_tags & (~miss_pending | (miss_state==HIT_U_MISS)) & ~sel_mb_addr_ff; + assign ic_act_miss_f2 = (~(|ic_rd_hit[3:0]) | reset_all_tags) & fetch_req_icache_f2 & ~miss_pending & ~ifc_region_acc_fault_f2; + assign ic_miss_under_miss_f2 = (~(|ic_rd_hit[3:0]) | reset_all_tags) & fetch_req_icache_f2 & (miss_state == HIT_U_MISS) ; + assign ic_hit_f2 = ic_act_hit_f2 | ic_byp_hit_f2 | ic_iccm_hit_f2 | (ifc_region_acc_fault_f2 & ifc_fetch_req_f2); + + assign uncacheable_miss_in = sel_hold_imb ? uncacheable_miss_ff : ifc_fetch_uncacheable_f1 ; + assign imb_in[31:1] = sel_hold_imb ? imb_ff[31:1] : {fetch_addr_f1[31:1]} ; + assign way_status_mb_in[2:0] = ( miss_pending) ? way_status_mb_ff[2:0] : {way_status[2:0]} ; + assign tagv_mb_in[3:0] = ( miss_pending) ? tagv_mb_ff[3:0] : {ic_tag_valid[3:0]} ; + + assign reset_ic_in = miss_pending & (reset_all_tags | reset_ic_ff) ; + + rvdff #(1) reset_ic_f (.*, .clk(free_clk), .din (reset_ic_in), .dout(reset_ic_ff)); + rvdff #(1) uncache_ff (.*, .clk(active_clk), .din (ifc_fetch_uncacheable_f1), .dout(fetch_uncacheable_ff)); + + + + rvdff #(31) ifu_fetch_addr_f2_ff (.*, + .clk (fetch_f1_f2_c1_clk), + .din ({fetch_addr_f1[31:1]}), + .dout({ifu_fetch_addr_int_f2[31:1]})); + + assign vaddr_f2[3:1] = ifu_fetch_addr_int_f2[3:1] ; + + rvdff #(1) unc_miss_ff (.*, .clk(fetch_f1_f2_c1_clk), .din (uncacheable_miss_in), .dout(uncacheable_miss_ff)); + rvdff #(31) imb_f2_ff (.*, .clk(fetch_f1_f2_c1_clk), .din ({imb_in[31:1]}), .dout({imb_ff[31:1]})); + rvdff #(3) mb_rep_wayf2_ff (.*, .clk(fetch_f1_f2_c1_clk), .din ({way_status_mb_in[2:0]}), .dout({way_status_mb_ff[2:0]})); + + rvdff #(4) mb_tagv_ff (.*, .clk(fetch_f1_f2_c1_clk), .din ({tagv_mb_in[3:0]}), .dout({tagv_mb_ff[3:0]})); + + assign ifc_fetch_req_qual_f1 = ifc_fetch_req_f1 & ~((miss_state == CRIT_WRD_RDY) & flush_final_f2) ;// & ~exu_flush_final ; + rvdff #(1) fetch_req_f2_ff (.*, .clk(active_clk), .din(ifc_fetch_req_qual_f1), .dout(ifc_fetch_req_f2_raw)); + + assign ifc_fetch_req_f2 = ifc_fetch_req_f2_raw & ~exu_flush_final ; + + rvdff #(1) ifu_iccm_acc_ff (.*, .clk(fetch_f1_f2_c1_clk), .din(ifc_iccm_access_f1), .dout(ifc_iccm_access_f2)); + rvdff #(1) ifu_iccm_reg_acc_ff (.*, .clk(fetch_f1_f2_c1_clk), .din(ifc_region_acc_fault_final_f1), .dout(ifc_region_acc_fault_f2)); + + + assign ifu_ic_req_addr_f2[31:3] = {imb_ff[31:6] , ic_req_addr_bits_5_3[5:3] }; + assign ifu_ic_mb_empty = ((miss_state == HIT_U_MISS) & ~(ifu_wr_en_new & last_beat)) | ~miss_pending ; + assign ifu_miss_state_idle = (miss_state == IDLE) ; + +// four-way set associative - three bits +// each bit represents one branch point in a binary decision tree; let 1 +// represent that the left side has been referenced more recently than the +// right side, and 0 vice-versa +// +// are all 4 ways valid? +// / \ +// | no, use an invalid way. +// | +// | +// bit_0 == 0? state | replace ref to | next state +// / \ ------+-------- -------+----------- +// y n x00 | way_0 way_0 | _11 +// / \ x10 | way_1 way_1 | _01 +// bit_1 == 0? bit_2 == 0? 0x1 | way_2 way_2 | 1_0 +// / \ / \ 1x1 | way_3 way_3 | 0_0 +// y n y n +// / \ / \ ('x' means don't care ('_' means unchanged) +// way_0 way_1 way_2 way_3 don't care) + + + + + assign replace_way_mb_any[3] = ( way_status_mb_ff[2] & way_status_mb_ff[0] & (&tagv_mb_ff[3:0])) | + (~tagv_mb_ff[3]& tagv_mb_ff[2] & tagv_mb_ff[1] & tagv_mb_ff[0]) ; + assign replace_way_mb_any[2] = (~way_status_mb_ff[2] & way_status_mb_ff[0] & (&tagv_mb_ff[3:0])) | + (~tagv_mb_ff[2]& tagv_mb_ff[1] & tagv_mb_ff[0]) ; + assign replace_way_mb_any[1] = ( way_status_mb_ff[1] & ~way_status_mb_ff[0] & (&tagv_mb_ff[3:0])) | + (~tagv_mb_ff[1]& tagv_mb_ff[0] ) ; + assign replace_way_mb_any[0] = (~way_status_mb_ff[1] & ~way_status_mb_ff[0] & (&tagv_mb_ff[3:0])) | + (~tagv_mb_ff[0] ) ; + + assign way_status_hit_new[2:0] = ({3{ic_rd_hit[0]}} & {way_status[2] , 1'b1 , 1'b1}) | + ({3{ic_rd_hit[1]}} & {way_status[2] , 1'b0 , 1'b1}) | + ({3{ic_rd_hit[2]}} & {1'b1 ,way_status[1] , 1'b0}) | + ({3{ic_rd_hit[3]}} & {1'b0 ,way_status[1] , 1'b0}) ; + + assign way_status_rep_new[2:0] = ({3{replace_way_mb_any[0]}} & {way_status_mb_ff[2] , 1'b1 , 1'b1}) | + ({3{replace_way_mb_any[1]}} & {way_status_mb_ff[2] , 1'b0 , 1'b1}) | + ({3{replace_way_mb_any[2]}} & {1'b1 ,way_status_mb_ff[1] , 1'b0}) | + ({3{replace_way_mb_any[3]}} & {1'b0 ,way_status_mb_ff[1] , 1'b0}) ; + + // Make sure to select the way_status_hit_new even when in hit_under_miss. + assign way_status_new[2:0] = (ifu_wr_en_new_q ) ? way_status_rep_new[2:0] : + way_status_hit_new[2:0] ; + + + assign way_status_wr_en = (ifu_wr_en_new_q ) | ic_act_hit_f2; + + + + assign sel_fetch_u_miss = ((miss_state == HIT_U_MISS) & ifc_fetch_req_f1 ) ; + rvdff #(1) sel_f_u_m_ff (.*, .clk(free_clk), .din (sel_fetch_u_miss), .dout(sel_fetch_u_miss_ff)); + + + assign sel_mb_addr = ((miss_pending & ifu_wr_en_new ) | reset_tag_valid_for_miss) ; + assign ifu_ic_rw_int_addr[31:1] = ({31{ sel_mb_addr}} & {imb_ff[31:6] , ic_wr_addr_bits_5_3[5:3] , imb_ff[2:1]}) | + ({31{~sel_mb_addr}} & fetch_addr_f1[31:1] ) ; + + assign ifu_status_wr_addr[31:1] = ({31{ sel_mb_addr}} & {imb_ff[31:6] , ic_wr_addr_bits_5_3[5:3] , imb_ff[2:1]}) | + ({31{~sel_mb_addr}} & ifu_fetch_addr_int_f2[31:1] ) ; + + rvdff #(1) sel_mb_addr_flop (.*, .clk(free_clk), .din({sel_mb_addr}), .dout({sel_mb_addr_ff})); + + assign ic_rw_addr[31:3] = ifu_ic_rw_int_addr[31:3] ; + + +genvar i ; +for (i=0 ; i < 4 ; i++) begin : DATA_PGEN + `ifdef RV_ICACHE_ECC + rvecc_encode ic_ecc_encode0 ( + .din ({16'b0, ifu_wr_data_new[((16*i)+15):(16*i)]}), + .ecc_out({ ic_wr_ecc0_unused[i],ic_wr_ecc[i*5+4:i*5]})); + + `else + rveven_paritygen #(16) parlo (.data_in (ifu_wr_data_new[((16*i)+15):(16*i)]), + .parity_out(ic_wr_parity[i])); + + `endif +end + + assign ifu_wr_data_comb_err = ifu_wr_data_error ; + assign ifu_wr_cumulative_err = (ifu_wr_data_comb_err | ifu_wr_data_comb_err_ff) & ~reset_beat_cnt; + assign ifu_wr_cumulative_err_data = ifu_wr_data_comb_err | ifu_wr_data_comb_err_ff ; + + rvdff #(1) cumul_err_ff (.*, .clk(free_clk), .din (ifu_wr_cumulative_err), .dout(ifu_wr_data_comb_err_ff)); + +`ifdef RV_ICACHE_ECC + assign ic_rd_data_only[127:0] = {ic_rd_data [157:126], ic_rd_data [115:84] , ic_rd_data [73:42],ic_rd_data [31:0]} ; + assign ic_error_f2.ecc[39:0] = {ic_rd_data[167:158], ic_rd_data[125:116], ic_rd_data[83:74] ,ic_rd_data[41:32]}; + assign ic_wr_data[83:0] = {ic_wr_ecc[19:10], + ifu_wr_data_new[63:32], + ic_wr_ecc[9:0], + ifu_wr_data_new[31:0]} ; + +`else + assign ic_rd_data_only[127:0] = {ic_rd_data [133:102], ic_rd_data [99:68] , ic_rd_data [65:34] ,ic_rd_data [31:0]} ; + assign ic_error_f2.parity[7:0] = {ic_rd_data[135:134] , ic_rd_data[101:100], ic_rd_data [67:66] ,ic_rd_data [33:32]}; + assign ic_wr_data[67:0] = {ic_wr_parity[3:2], + ifu_wr_data_new[63:32], + ic_wr_parity[1:0], + ifu_wr_data_new[31:0]} ; + + +`endif + + + assign sel_byp_data = ic_crit_wd_rdy & ~ifu_byp_data_err; + assign sel_ic_data = ~ic_crit_wd_rdy & ~fetch_req_iccm_f2 ; +`ifdef ICCM_AND_ICACHE + assign sel_iccm_data = fetch_req_iccm_f2 ; + + assign ic_final_data = ({128{sel_byp_data | sel_iccm_data | sel_ic_data}} & {ic_rd_data_only[127:0]} ) ; + + assign ic_premux_data = ({128{sel_byp_data }} & {ic_byp_data_only[127:0]} ) | + ({128{sel_iccm_data}} & {iccm_rd_data[148:117],iccm_rd_data[109:78] , iccm_rd_data[70:39],iccm_rd_data[31:0]}); + + assign ic_sel_premux_data = sel_iccm_data | sel_byp_data ; + +`endif + +`ifdef ICCM_AND_NOT_ICACHE + assign sel_iccm_data = fetch_req_iccm_f2 ; + assign ic_final_data = ({128{sel_byp_data }} & {ic_byp_data_only[127:0]} ) | + ({128{sel_iccm_data}} & {iccm_rd_data[148:117],iccm_rd_data[109:78] , iccm_rd_data[70:39],iccm_rd_data[31:0]}); + assign ic_premux_data = '0 ; + assign ic_sel_premux_data = '0 ; +`endif + +`ifdef NOT_ICCM_AND_ICACHE + + assign ic_final_data = ({128{sel_byp_data | sel_ic_data}} & {ic_rd_data_only[127:0]} ) ; + + assign ic_premux_data = ({128{sel_byp_data }} & {ic_byp_data_only[127:0]} ) ; + assign ic_sel_premux_data = sel_byp_data ; +`endif + +`ifdef NOT_ICCM_AND_NOT_ICACHE + assign ic_final_data = ({128{sel_byp_data }} & {ic_byp_data_only[127:0]} ) ; + assign ic_premux_data = 0 ; + assign ic_sel_premux_data = '0 ; +`endif + + assign ifu_icache_fetch_f2 = sel_ic_data ; + + assign ifc_bus_acc_fault_f2 = ic_byp_hit_f2 & ifu_byp_data_err ; + assign ic_data_f2[127:0] = ic_final_data[127:0]; + + +rvdff #(1) flush_final_ff (.*, .clk(free_clk), .din({exu_flush_final}), .dout({flush_final_f2})); +assign fetch_req_f2_qual = ic_hit_f2 & ~exu_flush_final; +assign ic_access_fault_f2 = (ifc_region_acc_fault_f2 | ifc_bus_acc_fault_f2) & ~exu_flush_final; + + // right justified +assign ic_fetch_val_f2[7] = fetch_req_f2_qual & ifu_bp_inst_mask_f2[7] & ((!vaddr_f2[3]&!vaddr_f2[2]&!vaddr_f2[1])); +assign ic_fetch_val_f2[6] = fetch_req_f2_qual & ifu_bp_inst_mask_f2[6] & ((!vaddr_f2[3]&!vaddr_f2[2])); +assign ic_fetch_val_f2[5] = fetch_req_f2_qual & ifu_bp_inst_mask_f2[5] & ((!vaddr_f2[3]&!vaddr_f2[1]) | (!vaddr_f2[3]&!vaddr_f2[2])); +assign ic_fetch_val_f2[4] = fetch_req_f2_qual & ifu_bp_inst_mask_f2[4] & ((!vaddr_f2[3])); +assign ic_fetch_val_f2[3] = fetch_req_f2_qual & ifu_bp_inst_mask_f2[3] & ((!vaddr_f2[2]&!vaddr_f2[1]) | (!vaddr_f2[3])); +assign ic_fetch_val_f2[2] = fetch_req_f2_qual & ifu_bp_inst_mask_f2[2] & ((!vaddr_f2[2]) | (!vaddr_f2[3])); +assign ic_fetch_val_f2[1] = fetch_req_f2_qual & ifu_bp_inst_mask_f2[1] & ((!vaddr_f2[1]) | (!vaddr_f2[2]) | (!vaddr_f2[3])) ; +assign ic_fetch_val_f2[0] = fetch_req_f2_qual ; + + assign fetch_mask[7:0] = {vaddr_f2[3:1]==3'b111,vaddr_f2[3:1]==3'b110,vaddr_f2[3:1]==3'b101,vaddr_f2[3:1]==3'b100,vaddr_f2[3:1]==3'b011,vaddr_f2[3:1]==3'b010,vaddr_f2[3:1]==3'b001,vaddr_f2[3:1]==3'b000}; + + assign ic_fetch_mem_val[7:0] = { 1'b1, |fetch_mask[6:0], |fetch_mask[5:0], |fetch_mask[4:0], |fetch_mask[3:0], |fetch_mask[2:0], |fetch_mask[1:0], fetch_mask[0] }; + + assign bp_mask[7:0] = {ifu_bp_inst_mask_f2[7:1], 1'b1}; + + assign ic_bp_mem_mask[7:0] = ({8{fetch_mask[0]}} & bp_mask[7:0]) | // unrotate the bpmask + ({8{fetch_mask[1]}} & {bp_mask[6:0],1'b0}) | + ({8{fetch_mask[2]}} & {bp_mask[5:0],2'b0}) | + ({8{fetch_mask[3]}} & {bp_mask[4:0],3'b0}) | + ({8{fetch_mask[4]}} & {bp_mask[3:0],4'b0}) | + ({8{fetch_mask[5]}} & {bp_mask[2:0],5'b0}) | + ({8{fetch_mask[6]}} & {bp_mask[1:0],6'b0}) | + ({8{fetch_mask[7]}} & {bp_mask[0] ,7'b0}); + + assign ic_fetch_val_mem_f2[7:0] = {8{fetch_req_f2_qual}} & ic_bp_mem_mask[7:0] & ic_fetch_mem_val[7:0]; + + + +///////////////////////////////////////////////////////////////////////////////////// +// New logic for bypass +///////////////////////////////////////////////////////////////////////////////////// + + logic write_byp_first_data ; + logic write_byp_second_data ; + + logic [63:0] ifu_byp_data_first_half; + logic [63:0] ifu_byp_data_second_half; + logic ifu_byp_data_error_first_half_in; + logic ifu_byp_data_error_first_half; + logic ifu_byp_data_error_second_half_in; + logic ifu_byp_data_error_second_half; + logic ifu_byp_data_first_half_valid_in ; + logic ifu_byp_data_first_half_valid ; + logic ifu_byp_data_second_half_valid_in ; + logic ifu_byp_data_second_half_valid ; + logic ic_crit_wd_complete ; + + logic [IFU_BUS_TAG-1:0] byp_tag_ff; + logic byp_data_first_c1_clken ; + logic byp_data_first_c1_clk; + logic byp_data_second_c1_clken ; + logic byp_data_second_c1_clk; + + assign byp_data_first_c1_clken = write_byp_first_data; + assign byp_data_second_c1_clken = write_byp_second_data; + + rvclkhdr byp_data_first_c1_cgc ( .en(byp_data_first_c1_clken), .l1clk(byp_data_first_c1_clk), .* ); + rvclkhdr byp_data_second_c1_cgc ( .en(byp_data_second_c1_clken), .l1clk(byp_data_second_c1_clk), .* ); + + assign byp_tag_ff[IFU_BUS_TAG-1:0] = IFU_BUS_TAG'({imb_ff[5:4] , 1'b0}); + assign write_byp_first_data = axi_ifu_wr_en_new & ({byp_tag_ff[IFU_BUS_TAG-1:1],1'b0} == ifu_axi_rid_ff[IFU_BUS_TAG-1:0]); + assign write_byp_second_data = axi_ifu_wr_en_new & ({byp_tag_ff[IFU_BUS_TAG-1:1],1'b1} == ifu_axi_rid_ff[IFU_BUS_TAG-1:0]); + + // First Half flops + rvdff #(64) byp_data_first_half (.*, + .clk(byp_data_first_c1_clk), + .din (ifu_wr_data_new[63:0]), + .dout(ifu_byp_data_first_half[63:0])); + + assign ifu_byp_data_error_first_half_in = write_byp_first_data ? ifu_wr_data_error : (ifu_byp_data_error_first_half & ~ic_act_miss_f2) ; + + rvdff #(1) byp_data_first_half_err (.*, + .clk(free_clk), + .din (ifu_byp_data_error_first_half_in), + .dout(ifu_byp_data_error_first_half)); + + assign ifu_byp_data_first_half_valid_in = write_byp_first_data ? 1'b1 : (ifu_byp_data_first_half_valid & ~ic_act_miss_f2) ; + rvdff #(1) byp_data_first_half_val (.*, + .clk(free_clk), + .din (ifu_byp_data_first_half_valid_in), + .dout(ifu_byp_data_first_half_valid)); + + + // Second Half flops + rvdff #(64) byp_data_second_half (.*, + .clk(byp_data_second_c1_clk), + .din (ifu_wr_data_new[63:0]), + .dout(ifu_byp_data_second_half[63:0])); + + assign ifu_byp_data_error_second_half_in = write_byp_second_data ? ifu_wr_data_error : (ifu_byp_data_error_second_half & ~ic_act_miss_f2) ; + rvdff #(1) byp_data_second_half_err (.*, + .clk(free_clk), + .din (ifu_byp_data_error_second_half_in), + .dout(ifu_byp_data_error_second_half)); + + assign ifu_byp_data_second_half_valid_in = write_byp_second_data ? 1'b1 : (ifu_byp_data_second_half_valid & ~ic_act_miss_f2) ; + rvdff #(1) byp_data_second_half_val (.*, + .clk(free_clk), + .din (ifu_byp_data_second_half_valid_in), + .dout(ifu_byp_data_second_half_valid)); + + assign ic_byp_data_only[127:0] = { ifu_byp_data_second_half[63:0] , ifu_byp_data_first_half[63:0] } ; + assign ifu_byp_data_err = ifu_byp_data_error_second_half | ifu_byp_data_error_first_half ; + + +// Critical word ready. + assign ic_crit_wd_complete = (write_byp_first_data & ifu_byp_data_second_half_valid) | + (write_byp_second_data & ifu_byp_data_first_half_valid) ; + + assign ic_crit_wd_rdy_in = (ic_crit_wd_complete & crit_wd_byp_ok_ff & ~exu_flush_final ) | + (ic_crit_wd_rdy_ff & ~fetch_req_icache_f2 & crit_wd_byp_ok_ff & ~exu_flush_final) ; + + rvdff #(1) crit_wd_ff (.*, .clk(free_clk), .din(ic_crit_wd_rdy_in), .dout(ic_crit_wd_rdy_ff)); +///////////////////////////////////////////////////////////////////////////////////// +// Parity checking logic for Icache logic. // +///////////////////////////////////////////////////////////////////////////////////// + + +assign ic_rd_parity_final_err = ic_tag_perr & ifu_icache_fetch_f2 ; + +logic [16:6] ifu_ic_rw_int_addr_f2_Q ; +logic [3:0] perr_err_inv_way; +logic [16:6] perr_ic_index_ff; +logic perr_sel_invalidate; +logic perr_sb_write_status ; +logic ifu_icache_sb_error_val_ff ; + + rvdff #(11) ic_index_q (.*, + .clk(active_clk), + .din(ifu_icache_error_index[16:6]), + .dout(ifu_ic_rw_int_addr_f2_Q[16:6])); + + + + + rvdff #((1)) perr_err_ff (.clk(active_clk), .din(ifu_icache_error_val), .dout(ic_rd_parity_final_err_ff), .*); + rvdff #((1)) sbiccm_err_ff (.clk(active_clk), .din(ifu_icache_sb_error_val), .dout(ifu_icache_sb_error_val_ff), .*); + rvdffs #((11)) perr_dat_ff (.clk(active_clk), .din(ifu_ic_rw_int_addr_f2_Q[16:6]), .dout(perr_ic_index_ff), .en(perr_sb_write_status), .*); + + assign perr_err_inv_way[3:0] = {4{perr_sel_invalidate}} ; + + //////////////////////////////////// Create Parity Error State Machine /////////////////////// + // Create Parity Error State Machine // + // Create Parity Error State Machine // + // Create Parity Error State Machine // + //////////////////////////////////// Create Parity Error State Machine /////////////////////// + + assign iccm_correct_ecc = (perr_state == ECC_CORR); + + // FIFO state machine + always_comb begin : ERROR_SM + perr_nxtstate = ERR_IDLE; + perr_state_en = 1'b0; + perr_sb_write_status = 1'b0; + perr_sel_invalidate = 1'b0; + //iccm_correct_ecc = 1'b0; + + case (perr_state) + ERR_IDLE: begin : err_idle + perr_nxtstate = iccm_dma_sb_error ? DMA_SB_ERR : (ic_rd_parity_final_err_ff & ~exu_flush_final) ? PERR_WFF : ECC_WFF; + perr_state_en = ((ic_rd_parity_final_err_ff | ifu_icache_sb_error_val_ff) & ~exu_flush_final) | iccm_dma_sb_error; + perr_sb_write_status = perr_state_en; + end + PERR_WFF: begin : perr_wff + perr_nxtstate = ERR_IDLE ; + perr_state_en = exu_flush_final ; + perr_sel_invalidate = (dec_tlu_flush_err_wb & exu_flush_final); + end + ECC_WFF: begin : ecc_wff + perr_nxtstate = (~dec_tlu_flush_err_wb & exu_flush_final ) ? ERR_IDLE : ECC_CORR ; + perr_state_en = exu_flush_final ; + end + DMA_SB_ERR : begin : dma_sb_ecc + perr_nxtstate = ECC_CORR; + perr_state_en = 1'b1; + end + ECC_CORR: begin : ecc_corr + perr_nxtstate = ERR_IDLE ; + perr_state_en = 1'b1 ; + end + default: begin : def_case + perr_nxtstate = ERR_IDLE; + perr_state_en = 1'b0; + perr_sb_write_status = 1'b0; + perr_sel_invalidate = 1'b0; + // iccm_correct_ecc = 1'b0; + end + endcase + end + rvdffs #(($bits(perr_state_t))) perr_state_ff (.clk(free_clk), .din(perr_nxtstate), .dout({perr_state}), .en(perr_state_en), .*); + +`ifdef RV_ICCM_ENABLE +///////////////////////////////////////////////////////////////////////////////////// +// ECC checking logic for ICCM data. // +///////////////////////////////////////////////////////////////////////////////////// + +logic [3:0] [31:0] iccm_corrected_data; +logic [3:0] [06:0] iccm_corrected_ecc; +logic [3:0] iccm_single_ecc_error; +logic [3:0] iccm_double_ecc_error; +logic [3:0] iccm_ecc_word_enable; + + +logic [ICCM_BITS-1:4] iccm_rw_addr_f2; + +logic [31:0] iccm_corrected_data_f2_mux; +logic [06:0] iccm_corrected_ecc_f2_mux; +logic [3:0] iccm_rd_err_f2_mux; +logic iccm_dma_rvalid_in; + +for (i=0; i < 4 ; i++) begin : ICCM_ECC_CHECK +assign iccm_ecc_word_enable[i] = ((|ic_fetch_val_mem_f2[(2*i+1):(2*i)] & ~exu_flush_final & sel_iccm_data) | iccm_dma_rvalid_in) & ~dec_tlu_core_ecc_disable; +rvecc_decode ecc_decode ( + .en(iccm_ecc_word_enable[i]), + .sed_ded ( 1'b0 ), // 1 : means only detection + .din(iccm_rd_data[(39*i+31):(39*i)]), + .ecc_in(iccm_rd_data[(39*i+38):(39*i+32)]), + .dout(iccm_corrected_data[i][31:0]), + .ecc_out(iccm_corrected_ecc[i][6:0]), + .single_ecc_error(iccm_single_ecc_error[i]), + .double_ecc_error(iccm_double_ecc_error[i])); +end + +assign iccm_rd_ecc_single_err = (|iccm_single_ecc_error ) & ifc_iccm_access_f2; +assign iccm_rd_ecc_double_err = (|iccm_double_ecc_error ) & ifc_iccm_access_f2; + +assign iccm_corrected_data_f2_mux[31:0] = iccm_single_ecc_error[0] ? iccm_corrected_data[0] : + iccm_single_ecc_error[1] ? iccm_corrected_data[1] : + iccm_single_ecc_error[2] ? iccm_corrected_data[2] : + iccm_corrected_data[3] ; + +assign iccm_corrected_ecc_f2_mux[06:0] = iccm_single_ecc_error[0] ? iccm_corrected_ecc[0] : + iccm_single_ecc_error[1] ? iccm_corrected_ecc[1] : + iccm_single_ecc_error[2] ? iccm_corrected_ecc[2] : + iccm_corrected_ecc[3] ; + + + rvdff #(ICCM_BITS-4) iccm_index_f2 (.*, .clk(free_clk), .din(iccm_rw_addr[ICCM_BITS-1:4]), .dout(iccm_rw_addr_f2[ICCM_BITS-1:4])); + + assign iccm_rd_err_f2_mux[1:0] = iccm_single_ecc_error[0] ? 2'b00: + iccm_single_ecc_error[1] ? 2'b01: + iccm_single_ecc_error[2] ? 2'b10: 2'b11 ; + + + logic iccm_rd_ecc_single_err_hold_in ; + + assign iccm_ecc_write_status = ((iccm_rd_ecc_single_err & ~iccm_rd_ecc_single_err_ff) & ~exu_flush_final) | iccm_dma_sb_error; + assign iccm_rd_ecc_single_err_hold_in = (iccm_rd_ecc_single_err | iccm_rd_ecc_single_err_ff) & ~exu_flush_final; + + rvdff #((1)) ecc_rr_ff (.clk(free_clk), .din(iccm_rd_ecc_single_err_hold_in), .dout(iccm_rd_ecc_single_err_ff), .*); + rvdffs #((32)) ecc_dat0_ff (.clk(free_clk), .din(iccm_corrected_data_f2_mux[31:0]), .dout(iccm_ecc_corr_data_ff[31:0]), .en(iccm_ecc_write_status), .*); + rvdffs #((7)) ecc_dat1_ff (.clk(free_clk), .din(iccm_corrected_ecc_f2_mux[6:0]), .dout(iccm_ecc_corr_data_ff[38:32]), .en(iccm_ecc_write_status), .*); + rvdffs #((ICCM_BITS-4))ecc_ind0_ff (.clk(free_clk), .din(iccm_rw_addr_f2[ICCM_BITS-1:4]), .dout(iccm_ecc_corr_index_ff[ICCM_BITS-1:4]),.en(iccm_ecc_write_status), .*); + rvdffs #((2)) ecc_ind1_ff (.clk(free_clk), .din(iccm_rd_err_f2_mux[1:0]), .dout(iccm_ecc_corr_index_ff[3:2]), .en(iccm_ecc_write_status), .*); + +`else +assign iccm_rd_ecc_single_err = 1'b0 ; +assign iccm_rd_ecc_double_err = 1'b0 ; +assign iccm_rd_ecc_single_err_ff = 1'b0 ; + +assign iccm_ecc_corr_index_ff[ICCM_BITS-1:2] = '0; +assign iccm_ecc_corr_data_ff[38:0] = '0; +assign iccm_ecc_write_status = '0; + +`endif + + logic axiclk; + logic axiclk_reset; + logic axi_ifu_bus_clk_en_ff; + logic axi_ifu_bus_clk_en ; + + logic ifc_axi_ic_req_ff_in; + logic ifc_axi_ic_req_ff2 ; + + logic axi_inc_data_beat_cnt ; + logic axi_reset_data_beat_cnt ; + logic axi_hold_data_beat_cnt ; + + logic axi_inc_cmd_beat_cnt ; + logic axi_reset_cmd_beat_cnt_0 ; + logic axi_reset_cmd_beat_cnt_6 ; + logic axi_hold_cmd_beat_cnt ; + + logic [2:0] axi_new_data_beat_count ; + logic [2:0] axi_data_beat_count ; + + logic [2:0] axi_new_cmd_beat_count ; + logic [2:0] axi_cmd_beat_count ; + + logic axi_inc_rd_addr_cnt ; + logic axi_set_rd_addr_cnt ; + logic axi_reset_rd_addr_cnt; + logic axi_hold_rd_addr_cnt ; + + logic [2:0] axi_new_rd_addr_count; + logic [2:0] axi_rd_addr_count; + + + logic axi_cmd_sent ; + logic axi_last_data_beat ; + logic axi_wrap_addr ; + + + logic ifu_axi_rvalid_ff ; + logic ifu_axi_rvalid_unq_ff ; + logic ifu_axi_arready_unq_ff ; + logic ifu_axi_arvalid_ff ; + logic ifu_axi_arready_ff ; + logic [63:0] ifu_axi_rdata_ff ; + logic [1:0] ifu_axi_rresp_ff ; + + logic axi_w0_wren ; + logic axi_w1_wren ; + logic axi_w2_wren ; + logic axi_w3_wren ; + + logic axi_w0_wren_last ; + logic axi_w1_wren_last ; + logic axi_w2_wren_last ; + logic axi_w3_wren_last ; + + logic w0_wren_reset_miss ; + logic w1_wren_reset_miss ; + logic w2_wren_reset_miss ; + logic w3_wren_reset_miss ; + + logic ifc_dma_access_ok_d; + logic ifc_dma_access_ok_prev; + +assign axi_ifu_bus_clk_en = ifu_bus_clk_en ; + + rvclkhdr axi_clk(.en(axi_ifu_bus_clk_en), + .l1clk(axiclk), .*); + + + rvdff #(1) axi_clken_ff (.*, .clk(free_clk), .din(axi_ifu_bus_clk_en), .dout(axi_ifu_bus_clk_en_ff)); + + logic axi_cmd_req_in ; + logic axi_cmd_req_hold ; + + + assign ifc_axi_ic_req_ff_in = (ic_act_miss_f2 | axi_cmd_req_hold | ifc_axi_ic_req_ff2) & ~((axi_cmd_beat_count==3'b111) & ifu_axi_arvalid & ifu_axi_arready & miss_pending); + rvdff #(1) axi_ic_req_ff2(.*, .clk(axiclk), .din(ifc_axi_ic_req_ff_in), .dout(ifc_axi_ic_req_ff2)); + + assign axi_cmd_req_in = (ic_act_miss_f2 | axi_cmd_req_hold) & ~axi_cmd_sent ; // hold until first command sent + // changes for making the axi blocking + rvdff #(1) axi_cmd_req_ff (.*, .clk(free_clk), .din(axi_cmd_req_in), .dout(axi_cmd_req_hold)); + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// logic axi_cmd_rsp_pend; +// `ifdef RV_BUILD_SVC +// rvdffsc axi_cmd_rsp_pend_ff ( .din(ifu_axi_arready), .dout(axi_cmd_rsp_pend), .en(ifu_axi_arvalid), .clear(ifu_axi_rvalid & ifu_axi_rready), .clk(axiclk), .*); +// `elsif RV_BUILD_AXI4 +// rvdffsc axi_cmd_rsp_pend_ff ( .din(ifu_axi_arready), .dout(axi_cmd_rsp_pend), .en(ifu_axi_arvalid), .clear(ifu_axi_rvalid & ifu_axi_rready), .clk(axiclk), .*); +// `else +// assign axi_cmd_rsp_pend = 1'b0; +// `endif +// assign ifu_axi_arvalid = ifc_axi_ic_req_ff2 & ~axi_cmd_rsp_pend; +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + assign ifu_axi_arvalid = ifc_axi_ic_req_ff2 ; + assign ifu_axi_arid[IFU_BUS_TAG-1:0] = IFU_BUS_TAG'(axi_new_rd_addr_count[2:0]); + assign ifu_axi_araddr[31:0] = {ifu_ic_req_addr_f2[31:3],3'b0} ; + assign ifu_axi_rready = 1'b1; + assign ifu_axi_arsize[2:0] = 3'b011; + assign ifu_axi_arcache[3:0] = 4'b1111; + assign ifu_axi_arprot[2:0] = 3'b100; + assign ifu_axi_arregion[3:0] = ifu_axi_araddr[31:28]; + assign ifu_axi_arlen[7:0] = '0; + assign ifu_axi_arburst[1:0] = 2'b01; + assign ifu_axi_arqos[3:0] = '0; + assign ifu_axi_arlock = '0; + + // IFU Write channels - not needed, so 00 out + rvdff #(1) axi_rdy_ff (.*, .clk(axiclk), .din(ifu_axi_arready), .dout(ifu_axi_arready_unq_ff)); + rvdff #(1) axi_rsp_vld_ff (.*, .clk(axiclk), .din(ifu_axi_rvalid), .dout(ifu_axi_rvalid_unq_ff)); + rvdff #(1) axi_cmd_ff (.*, .clk(axiclk), .din(ifu_axi_arvalid), .dout(ifu_axi_arvalid_ff)); + rvdff #(2) scvi_rsp_cmd_ff (.*, .clk(axiclk), .din(ifu_axi_rresp[1:0]), .dout(ifu_axi_rresp_ff[1:0])); + rvdff #(IFU_BUS_TAG) scvi_rsp_tag_ff (.*, .clk(axiclk), .din(ifu_axi_rid[IFU_BUS_TAG-1:0]), .dout(ifu_axi_rid_ff[IFU_BUS_TAG-1:0])); + rvdff #(64) axi_data_ff (.*, .clk(axiclk), .din(ifu_axi_rdata[63:0]), .dout(ifu_axi_rdata_ff[63:0])); + + + + assign ifu_axi_arready_ff = ifu_axi_arready_unq_ff & axi_ifu_bus_clk_en_ff ; + assign ifu_axi_rvalid_ff = ifu_axi_rvalid_unq_ff & axi_ifu_bus_clk_en_ff ; + assign axi_cmd_sent = ifu_axi_arvalid_ff & ifu_axi_arready_ff & miss_pending; + assign axi_inc_data_beat_cnt = (axi_ifu_wr_en_new & ~axi_last_data_beat) ; + assign axi_reset_data_beat_cnt = ic_act_miss_f2 | (axi_ifu_wr_en_new & axi_last_data_beat) ; + assign axi_hold_data_beat_cnt = ~axi_inc_data_beat_cnt & ~axi_reset_data_beat_cnt ; + + assign axi_new_data_beat_count[2:0] = ({3{axi_reset_data_beat_cnt}} & 3'b000 ) | + ({3{axi_inc_data_beat_cnt}} & (axi_data_beat_count[2:0] + 3'b001)) | + ({3{axi_hold_data_beat_cnt}} & axi_data_beat_count[2:0]) ; + + rvdff #(3) axi_mb_beat_count_ff (.*, .clk(free_clk), .din ({axi_new_data_beat_count[2:0]}), .dout({axi_data_beat_count[2:0]})); + +// Request Address Count + assign axi_inc_rd_addr_cnt = axi_cmd_sent; + assign axi_set_rd_addr_cnt = ic_act_miss_f2 ; + assign axi_hold_rd_addr_cnt = ~axi_inc_rd_addr_cnt & ~axi_set_rd_addr_cnt; + + assign axi_new_rd_addr_count[2:0] = ~miss_pending ? {imb_ff[5:4],1'b0} : axi_inc_rd_addr_cnt ? (axi_rd_addr_count[2:0] + 3'b001) : axi_rd_addr_count[2:0]; + + rvdffs #(3) axi_rd_addr_ff (.*, .en(~axi_hold_rd_addr_cnt), .clk(free_clk), .din ({axi_new_rd_addr_count[2:0]}), .dout({axi_rd_addr_count[2:0]})); + +// command beat Count + assign axi_inc_cmd_beat_cnt = ifu_axi_arvalid & ifu_axi_arready & miss_pending; + assign axi_reset_cmd_beat_cnt_0 = ic_act_miss_f2 & ~uncacheable_miss_in ; + assign axi_reset_cmd_beat_cnt_6 = ic_act_miss_f2 & uncacheable_miss_in ; + assign axi_hold_cmd_beat_cnt = ~axi_inc_cmd_beat_cnt & ~ic_act_miss_f2 ; + + assign axi_new_cmd_beat_count[2:0] = ({3{axi_reset_cmd_beat_cnt_0}} & 3'b000 ) | + ({3{axi_reset_cmd_beat_cnt_6}} & 3'b110 ) | + ({3{axi_inc_cmd_beat_cnt}} & (axi_cmd_beat_count[2:0] + 3'b001)) | + ({3{axi_hold_cmd_beat_cnt}} & axi_cmd_beat_count[2:0]) ; + + rvclkhdr axi_clk_reset(.en(axi_ifu_bus_clk_en | ic_act_miss_f2), + .l1clk(axiclk_reset), .*); + + + rvdff #(3) axi_cmd_beat_ff (.*, .clk(axiclk_reset), .din ({axi_new_cmd_beat_count[2:0]}), + .dout({axi_cmd_beat_count[2:0]})); + + assign req_addr_count[2:0] = axi_new_rd_addr_count[2:0] ; + + + + assign axi_last_data_beat = uncacheable_miss_ff ? (axi_data_beat_count[2:0] == 3'b001) : (axi_data_beat_count[2:0] == 3'b111); + assign axi_wrap_addr = (axi_rd_addr_count[2:0] == 3'b111); + + assign axi_ifu_wr_en_new = ifu_axi_rvalid_ff & miss_pending ; + assign axi_ifu_wr_en_new_q = ifu_axi_rvalid_ff & miss_pending & ~uncacheable_miss_ff & ~(|ifu_axi_rresp_ff[1:0]); // qualify with no-error conditions ; + assign axi_ifu_wr_en_new_wo_err = ifu_axi_rvalid_ff & miss_pending & ~uncacheable_miss_ff; + assign axi_ifu_wr_data_new[63:0] = ifu_axi_rdata_ff[63:0] ; + + assign axi_w0_wren = axi_ifu_wr_en_new_q & (replace_way_mb_any[3:0] == 4'b0001) & miss_pending ; + assign axi_w1_wren = axi_ifu_wr_en_new_q & (replace_way_mb_any[3:0] == 4'b0010) & miss_pending ; + assign axi_w2_wren = axi_ifu_wr_en_new_q & (replace_way_mb_any[3:0] == 4'b0100) & miss_pending ; + assign axi_w3_wren = axi_ifu_wr_en_new_q & (replace_way_mb_any[3:0] == 4'b1000) & miss_pending ; + + assign axi_ic_wr_en[3:0] = {axi_w3_wren , axi_w2_wren , axi_w1_wren , axi_w0_wren} ; + + assign axi_w0_wren_last = axi_ifu_wr_en_new_wo_err & (replace_way_mb_any[3:0] == 4'b0001) & miss_pending & axi_last_data_beat; + assign axi_w1_wren_last = axi_ifu_wr_en_new_wo_err & (replace_way_mb_any[3:0] == 4'b0010) & miss_pending & axi_last_data_beat; + assign axi_w2_wren_last = axi_ifu_wr_en_new_wo_err & (replace_way_mb_any[3:0] == 4'b0100) & miss_pending & axi_last_data_beat; + assign axi_w3_wren_last = axi_ifu_wr_en_new_wo_err & (replace_way_mb_any[3:0] == 4'b1000) & miss_pending & axi_last_data_beat; + + rvdff #(1) act_miss_ff (.*, .clk(free_clk), .din (ic_act_miss_f2), .dout(ic_act_miss_f2_delayed)); + assign reset_tag_valid_for_miss = ic_act_miss_f2_delayed & (miss_state == CRIT_BYP_OK) ; + + assign w0_wren_reset_miss = (replace_way_mb_any[3:0] == 4'b0001) & reset_tag_valid_for_miss ; + assign w1_wren_reset_miss = (replace_way_mb_any[3:0] == 4'b0010) & reset_tag_valid_for_miss ; + assign w2_wren_reset_miss = (replace_way_mb_any[3:0] == 4'b0100) & reset_tag_valid_for_miss ; + assign w3_wren_reset_miss = (replace_way_mb_any[3:0] == 4'b1000) & reset_tag_valid_for_miss ; + + assign axi_ifu_wr_data_error = |ifu_axi_rresp_ff[1:0] & ifu_axi_rvalid_ff & miss_pending; + + rvdff #(1) dma_ok_prev_ff (.*, .clk(free_clk), .din(ifc_dma_access_ok_d), .dout(ifc_dma_access_ok_prev)); + + assign ic_crit_wd_rdy = ic_crit_wd_rdy_ff ; + assign last_beat = axi_last_data_beat ; + assign ifu_wr_data_error = axi_ifu_wr_data_error ; + assign reset_beat_cnt = axi_reset_data_beat_cnt ; + +// DMA + // Making sure that the dma_access is allowed when we have 2 back to back dma_access_ok. Also gating with current state == idle + assign ifc_dma_access_ok_d = ifc_dma_access_ok & ~iccm_correct_ecc; + assign ifc_dma_access_q_ok = ifc_dma_access_ok & ~iccm_correct_ecc & ifc_dma_access_ok_prev & (perr_state == ERR_IDLE) ; + assign iccm_ready = ifc_dma_access_q_ok ; + + `ifdef RV_ICCM_ENABLE + logic dma_select_upper ; + logic iccm_dma_rden ; + +// logic ic_dma_active_in; + logic iccm_dma_ecc_error_in; + logic [13:0] dma_mem_ecc; + logic [63:0] iccm_dma_rdata_in; + +// assign ic_dma_active_in = ifc_dma_access_q_ok & dma_iccm_req ; + assign iccm_wren = (ifc_dma_access_q_ok & dma_iccm_req & dma_mem_write) | iccm_correct_ecc; + assign iccm_rden = (ifc_dma_access_q_ok & dma_iccm_req & ~dma_mem_write) | ifc_iccm_access_f1; + assign iccm_dma_rden = (ifc_dma_access_q_ok & dma_iccm_req & ~dma_mem_write) ; + assign iccm_wr_size[2:0] = {3{dma_iccm_req & dma_mem_write}} & dma_mem_sz[2:0] ; + + rvecc_encode iccm_ecc_encode0 ( + .din(dma_mem_wdata[31:0]), + .ecc_out(dma_mem_ecc[6:0])); + + rvecc_encode iccm_ecc_encode1 ( + .din(dma_mem_wdata[63:32]), + .ecc_out(dma_mem_ecc[13:7])); + + assign iccm_wr_data[38:0] = (iccm_correct_ecc & ~(ifc_dma_access_q_ok & dma_iccm_req)) ? iccm_ecc_corr_data_ff[38:0] : + {dma_mem_ecc[ 6:0],dma_mem_wdata[31:0]}; + assign iccm_wr_data[77:39] = (iccm_correct_ecc & ~(ifc_dma_access_q_ok & dma_iccm_req)) ? iccm_ecc_corr_data_ff[38:0] : + {dma_mem_ecc[13:7],dma_mem_wdata[63:32]}; + + assign iccm_dma_rdata_in[63:0] = iccm_dma_ecc_error_in ? {2{dma_mem_addr[31:0]}} : dma_select_upper ? {iccm_corrected_data[3], iccm_corrected_data[2]} : {iccm_corrected_data[1],iccm_corrected_data[0]}; + assign iccm_dma_ecc_error_in = dma_select_upper ? |(iccm_double_ecc_error[3:2]) : |(iccm_double_ecc_error[1:0]); + + rvdff #(1) dma_addr_bt3_ff (.*, .clk(free_clk), .din(dma_mem_addr[3]), .dout(dma_select_upper)); + rvdff #(1) ccm_rdy_in_ff (.*, .clk(free_clk), .din(iccm_dma_rden), .dout(iccm_dma_rvalid_in)); + rvdff #(1) ccm_rdy_ff (.*, .clk(free_clk), .din(iccm_dma_rvalid_in), .dout(iccm_dma_rvalid)); + rvdff #(1) ccm_err_ff (.*, .clk(free_clk), .din(iccm_dma_ecc_error_in), .dout(iccm_dma_ecc_error)); + rvdff #(64) dma_data_ff (.*, .clk(free_clk), .din(iccm_dma_rdata_in[63:0]), .dout(iccm_dma_rdata[63:0])); + + assign iccm_rw_addr[ICCM_BITS-1:2] = ( ifc_dma_access_q_ok & dma_iccm_req & ~iccm_correct_ecc) ? dma_mem_addr[ICCM_BITS-1:2] : + (~(ifc_dma_access_q_ok & dma_iccm_req) & iccm_correct_ecc) ? iccm_ecc_corr_index_ff[ICCM_BITS-1:2] : fetch_addr_f1[ICCM_BITS-1:2] ; + `else + assign iccm_dma_rvalid = 1'b0 ; + assign iccm_dma_ecc_error = 1'b0 ; + assign iccm_dma_rdata[63:0] = '0 ; + `endif + + +////// ICCM signals + +assign ic_rd_en = ifc_fetch_req_f1 & ~ifc_fetch_uncacheable_f1; + +assign ifu_tag_wren[0] = axi_w0_wren_last | w0_wren_reset_miss; +assign ifu_tag_wren[1] = axi_w1_wren_last | w1_wren_reset_miss; +assign ifu_tag_wren[2] = axi_w2_wren_last | w2_wren_reset_miss; +assign ifu_tag_wren[3] = axi_w3_wren_last | w3_wren_reset_miss; +assign ifu_wr_en_new = axi_ifu_wr_en_new; +assign ifu_wr_en_new_q = axi_ifu_wr_en_new_q; +assign ifu_wr_data_new[63:0]= axi_ifu_wr_data_new[63:0]; +assign ic_wr_en[3:0] = axi_ic_wr_en[3:0]; +assign ic_write_stall = ifu_wr_en_new & ~(((miss_state== CRIT_BYP_OK) & ~(ifu_wr_en_new & last_beat))); + + rvdff #(1) reset_all_tag_ff (.*, .clk(active_clk), .din(dec_tlu_fence_i_wb), .dout(reset_all_tags)); + + + +/////////////////////////////////////////////////////////////// +// Icache status and LRU +/////////////////////////////////////////////////////////////// +`ifdef RV_ICACHE_ENABLE + assign ic_valid = ~ifu_wr_cumulative_err_data & ~(reset_ic_in | reset_ic_ff) & ~reset_tag_valid_for_miss; + + assign ifu_status_wr_addr_w_debug[ICACHE_TAG_HIGH-1:ICACHE_TAG_LOW] = ((ic_debug_rd_en | ic_debug_wr_en ) & ic_debug_tag_array) ? + ic_debug_addr[ICACHE_TAG_HIGH-1:ICACHE_TAG_LOW] : + ifu_status_wr_addr[ICACHE_TAG_HIGH-1:ICACHE_TAG_LOW]; + + // status + rvdff #(ICACHE_TAG_HIGH-ICACHE_TAG_LOW) status_wr_addr_ff (.*, .clk(free_clk), .din(ifu_status_wr_addr_w_debug[ICACHE_TAG_HIGH-1:ICACHE_TAG_LOW]), + .dout(ifu_status_wr_addr_ff[ICACHE_TAG_HIGH-1:ICACHE_TAG_LOW])); + + assign way_status_wr_en_w_debug = way_status_wr_en | (ic_debug_wr_en & ic_debug_tag_array); + rvdff #(1) status_wren_ff (.*, .clk(free_clk), .din(way_status_wr_en_w_debug), .dout(way_status_wr_en_ff)); + + assign way_status_new_w_debug[2:0] = (ic_debug_wr_en & ic_debug_tag_array) ? ic_debug_wr_data[6:4] : + way_status_new[2:0] ; + rvdff #(3) status_data_ff (.*, .clk(free_clk), .din(way_status_new_w_debug[2:0]), .dout(way_status_new_ff[2:0])); + + logic [(ICACHE_TAG_DEPTH/8)-1 : 0] way_status_clken; + logic [(ICACHE_TAG_DEPTH/8)-1 : 0] way_status_clk; + + genvar j; + for (i=0 ; i ~ifc_fetch_req_f1; + endproperty + assert_fetch_stall: assert property (fetch_stall) else + $display("Assertion fetch_stall: ic_debug_rd_en=1'b%b, ic_debug_wr_en=1'b%b, ic_dma_active=1'b%b, ic_write_stall=1'b%b",ic_debug_rd_en,ic_debug_wr_en, ic_dma_active, ic_write_stall); + + + assert_perr_one_z_hot: assert #0 ($onehot0({ifu_icache_error_val,ifu_icache_sb_error_val})); + +`endif + +endmodule // ifu_mem_ctl diff --git a/design/include/build.h b/design/include/build.h new file mode 100644 index 0000000..0304711 --- /dev/null +++ b/design/include/build.h @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// BUILD_ICACHE_SIZE = { 32, 64, 128, 256 } +//`define BUILD_ICACHE_SIZE 256 + +// BUILD_ICACHE_LINE_SIZE = { 16 } +//`define BUILD_ICACHE_LINE_SIZE 64 + +//// BUILD_BTB_SIZE = {256, 512} +//`define BUILD_BTB_SIZE 512 +////`define BUILD_ICCM_SIZE 128 +// +////---------------------------------------------------------------------- +//// For configurable BTB size +//`define BTB_INDEX1_HI ((`BUILD_BTB_SIZE==256) ? 9 : 10) +//`define BTB_INDEX1_LO 4 +//`define BTB_INDEX2_HI ((`BUILD_BTB_SIZE==256) ? 15 : 17) +//`define BTB_INDEX2_LO ((`BUILD_BTB_SIZE==256) ? 10 : 11) +//`define BTB_INDEX3_HI ((`BUILD_BTB_SIZE==256) ? 21 : 24) +//`define BTB_INDEX3_LO ((`BUILD_BTB_SIZE==256) ? 16 : 18) +//`define BTB_ADDR_HI ((`BUILD_BTB_SIZE==256) ? 9 : 10) +//`define BTB_ADDR_LO 4 +//// ---------------------------------------------------------------------- + + +// BUILD_DTCM_SADDR +//`define BUILD_DTCM_SADR 32'hf0000000 +// BUILD_DTCM_EADDR = {256, 512} +//`define BUILD_DTCM_EADR 32'hf0020000 + +// BUILD_ITCM_SADDR +//`define BUILD_ITCM_SADR 32'hee000000 +// BUILD_ITCM_EADDR = {256, 512} +//`define BUILD_ITCM_EADR 32'hee020000 + +//---------------------------------------------------------------------- +//`define TOTAL_INT 256 +//`define INTPEND_BASE_ADDR 32'hcc000400 +//`define INTENABLE_BASE_ADDR 32'hcc000800 +//`define INTPRIORITY_BASE_ADDR 32'hcc000c00 +//`define CLAIMID_ADDR 32'hcc001000 +//`define PRITHRESH_ADDR 32'hcc001010 + +//---------------------------------------------------------------------- + + + + +// Enable assertions +//`define ASSERT_ON + + diff --git a/design/include/global.h b/design/include/global.h new file mode 100644 index 0000000..4c33dad --- /dev/null +++ b/design/include/global.h @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +localparam TOTAL_INT = `RV_PIC_TOTAL_INT_PLUS1; + +localparam DCCM_BITS = `RV_DCCM_BITS; +localparam DCCM_BANK_BITS = `RV_DCCM_BANK_BITS; +localparam DCCM_NUM_BANKS = `RV_DCCM_NUM_BANKS; +localparam DCCM_DATA_WIDTH = `RV_DCCM_DATA_WIDTH; +localparam DCCM_FDATA_WIDTH = `RV_DCCM_FDATA_WIDTH; +localparam DCCM_BYTE_WIDTH = `RV_DCCM_BYTE_WIDTH; +localparam DCCM_ECC_WIDTH = `RV_DCCM_ECC_WIDTH; + +localparam LSU_RDBUF_DEPTH = `RV_LSU_NUM_NBLOAD; +localparam DMA_BUF_DEPTH = `RV_DMA_BUF_DEPTH; +localparam LSU_STBUF_DEPTH = `RV_LSU_STBUF_DEPTH; +localparam LSU_SB_BITS = `RV_LSU_SB_BITS; + +localparam DEC_INSTBUF_DEPTH = `RV_DEC_INSTBUF_DEPTH; + +localparam ICCM_SIZE = `RV_ICCM_SIZE; +localparam ICCM_BITS = `RV_ICCM_BITS; +localparam ICCM_NUM_BANKS = `RV_ICCM_NUM_BANKS; +localparam ICCM_BANK_BITS = `RV_ICCM_BANK_BITS; +localparam ICCM_INDEX_BITS = `RV_ICCM_INDEX_BITS; +localparam ICCM_BANK_HI = 4 + (`RV_ICCM_BANK_BITS/4); + +localparam ICACHE_TAG_HIGH = `RV_ICACHE_TAG_HIGH; +localparam ICACHE_TAG_LOW = `RV_ICACHE_TAG_LOW; +localparam ICACHE_IC_DEPTH = `RV_ICACHE_IC_DEPTH; +localparam ICACHE_TAG_DEPTH = `RV_ICACHE_TAG_DEPTH; + +localparam LSU_BUS_TAG = `RV_LSU_BUS_TAG; +localparam DMA_BUS_TAG = `RV_DMA_BUS_TAG; +localparam SB_BUS_TAG = `RV_SB_BUS_TAG; + +localparam IFU_BUS_TAG = `RV_IFU_BUS_TAG; + + diff --git a/design/include/swerv_types.sv b/design/include/swerv_types.sv new file mode 100644 index 0000000..483fb73 --- /dev/null +++ b/design/include/swerv_types.sv @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package swerv_types; + +// performance monitor stuff +typedef struct packed { + logic [2:0] trace_rv_i_valid_ip; + logic [95:0] trace_rv_i_insn_ip; + logic [95:0] trace_rv_i_address_ip; + logic [2:0] trace_rv_i_exception_ip; + logic [4:0] trace_rv_i_ecause_ip; + logic [2:0] trace_rv_i_interrupt_ip; + logic [31:0] trace_rv_i_tval_ip; + } trace_pkt_t; + + +typedef enum logic [3:0] { + NULL = 4'b0000, + MUL = 4'b0001, + LOAD = 4'b0010, + STORE = 4'b0011, + ALU = 4'b0100, + CSRREAD = 4'b0101, + CSRWRITE = 4'b0110, + CSRRW = 4'b0111, + EBREAK = 4'b1000, + ECALL = 4'b1001, + FENCE = 4'b1010, + FENCEI = 4'b1011, + MRET = 4'b1100, + CONDBR = 4'b1101, + JAL = 4'b1110 + } inst_t; + +typedef struct packed { +`ifdef RV_ICACHE_ECC + logic [39:0] ecc; +`else + logic [7:0] parity; +`endif + } icache_err_pkt_t; + +typedef struct packed { + logic valid; + logic wb; + logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] tag; + logic [4:0] rd; + } load_cam_pkt_t; + +typedef struct packed { + logic pc0_call; + logic pc0_ret; + logic pc0_pc4; + logic pc1_call; + logic pc1_ret; + logic pc1_pc4; + } rets_pkt_t; +typedef struct packed { + logic valid; + logic [11:0] toffset; + logic [1:0] hist; + logic br_error; + logic br_start_error; + logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] index; + logic [1:0] bank; + logic [31:1] prett; // predicted ret target + logic [`RV_BHT_GHR_RANGE] fghr; +`ifdef RV_BTB_48 + logic [1:0] way; +`else + logic way; +`endif + logic ret; + logic [`RV_BTB_BTAG_SIZE-1:0] btag; + } br_pkt_t; + +typedef struct packed { + logic valid; + logic [1:0] hist; + logic br_error; + logic br_start_error; + logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] index; + logic [1:0] bank; + logic [`RV_BHT_GHR_RANGE] fghr; +`ifdef RV_BTB_48 + logic [1:0] way; +`else + logic way; +`endif + logic middle; + } br_tlu_pkt_t; + +typedef struct packed { + logic misp; + logic ataken; + logic boffset; + logic pc4; + logic [1:0] hist; + logic [11:0] toffset; + logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] index; + logic [1:0] bank; + logic valid; + logic br_error; + logic br_start_error; + logic [31:1] prett; + logic pcall; + logic pret; + logic pja; + logic [`RV_BTB_BTAG_SIZE-1:0] btag; + logic [`RV_BHT_GHR_RANGE] fghr; +`ifdef RV_BTB_48 + logic [1:0] way; +`else + logic way; +`endif + } predict_pkt_t; + +typedef struct packed { + logic legal; + logic icaf; + logic icaf_f1; + logic perr; + logic sbecc; + logic fence_i; + logic [3:0] i0trigger; + logic [3:0] i1trigger; + inst_t pmu_i0_itype; // pmu - instruction type + inst_t pmu_i1_itype; // pmu - instruction type + logic pmu_i0_br_unpred; // pmu + logic pmu_i1_br_unpred; // pmu + logic pmu_divide; + logic pmu_lsu_misaligned; + } trap_pkt_t; + +typedef struct packed { + logic [4:0] i0rd; + logic i0mul; + logic i0load; + logic i0store; + logic i0div; + logic i0v; + logic i0valid; + logic i0secondary; + logic [1:0] i0rs1bype2; + logic [1:0] i0rs2bype2; + logic [3:0] i0rs1bype3; + logic [3:0] i0rs2bype3; + logic [4:0] i1rd; + logic i1mul; + logic i1load; + logic i1store; + logic i1v; + logic i1valid; + logic csrwen; + logic csrwonly; + logic [11:0] csrwaddr; + logic i1secondary; + logic [1:0] i1rs1bype2; + logic [1:0] i1rs2bype2; + logic [6:0] i1rs1bype3; + logic [6:0] i1rs2bype3; + } dest_pkt_t; + +typedef struct packed { + logic mul; + logic load; + logic sec; + logic alu; + } class_pkt_t; + +typedef struct packed { + logic [4:0] rs1; + logic [4:0] rs2; + logic [4:0] rd; + } reg_pkt_t; + + +typedef struct packed { + logic valid; + logic land; + logic lor; + logic lxor; + logic sll; + logic srl; + logic sra; + logic beq; + logic bne; + logic blt; + logic bge; + logic add; + logic sub; + logic slt; + logic unsign; + logic jal; + logic predict_t; + logic predict_nt; + logic csr_write; + logic csr_imm; + } alu_pkt_t; + +typedef struct packed { + logic by; + logic half; + logic word; + logic dword; // for dma + logic load; + logic store; + logic unsign; + logic dma; // dma pkt + logic store_data_bypass_c1; + logic load_ldst_bypass_c1; + logic store_data_bypass_c2; + logic store_data_bypass_i0_e2_c2; + logic [1:0] store_data_bypass_e4_c1; + logic [1:0] store_data_bypass_e4_c2; + logic [1:0] store_data_bypass_e4_c3; + logic valid; + } lsu_pkt_t; + +typedef struct packed { + logic exc_valid; + logic single_ecc_error; + logic inst_type; //0: Load, 1: Store + logic inst_pipe; //0: i0, 1: i1 + logic dma_valid; + logic exc_type; //0: MisAligned, 1: Access Fault + logic [31:0] addr; + } lsu_error_pkt_t; + +typedef struct packed { + logic alu; + logic rs1; + logic rs2; + logic imm12; + logic rd; + logic shimm5; + logic imm20; + logic pc; + logic load; + logic store; + logic lsu; + logic add; + logic sub; + logic land; + logic lor; + logic lxor; + logic sll; + logic sra; + logic srl; + logic slt; + logic unsign; + logic condbr; + logic beq; + logic bne; + logic bge; + logic blt; + logic jal; + logic by; + logic half; + logic word; + logic csr_read; + logic csr_clr; + logic csr_set; + logic csr_write; + logic csr_imm; + logic presync; + logic postsync; + logic ebreak; + logic ecall; + logic mret; + logic mul; + logic rs1_sign; + logic rs2_sign; + logic low; + logic div; + logic rem; + logic fence; + logic fence_i; + logic pm_alu; + logic legal; + } dec_pkt_t; + + +typedef struct packed { + logic valid; + logic rs1_sign; + logic rs2_sign; + logic low; + logic load_mul_rs1_bypass_e1; + logic load_mul_rs2_bypass_e1; + } mul_pkt_t; + +typedef struct packed { + logic valid; + logic unsign; + logic rem; + } div_pkt_t; + + +typedef struct packed { + logic select; + logic match; + logic store; + logic load; + logic execute; + logic m; + logic [31:0] tdata2; + } trigger_pkt_t; + + +typedef struct packed { +`ifdef RV_ICACHE_ECC + logic [41:0] icache_wrdata; // {dicad0[31:0], dicad1[1:0]} +`else + logic [33:0] icache_wrdata; // {dicad0[31:0], dicad1[1:0]} +`endif + logic [18:2] icache_dicawics; + logic icache_rd_valid; + logic icache_wr_valid; + } cache_debug_pkt_t; + + +endpackage // swerv_types diff --git a/design/lib/ahb_to_axi4.sv b/design/lib/ahb_to_axi4.sv new file mode 100644 index 0000000..ac36fea --- /dev/null +++ b/design/lib/ahb_to_axi4.sv @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** +// $Id$ +// +// Owner: +// Function: AHB to AXI4 Bridge +// Comments: +// +//******************************************************************************** +module ahb_to_axi4 #(parameter TAG = 1) ( + + input clk, + input rst_l, + input scan_mode, + input bus_clk_en, + input clk_override, + + // AXI signals + // AXI Write Channels + output logic axi_awvalid, + input logic axi_awready, + output logic [TAG-1:0] axi_awid, + output logic [31:0] axi_awaddr, + output logic [2:0] axi_awsize, + output logic [2:0] axi_awprot, + output logic [7:0] axi_awlen, + output logic [1:0] axi_awburst, + + output logic axi_wvalid, + input logic axi_wready, + output logic [63:0] axi_wdata, + output logic [7:0] axi_wstrb, + output logic axi_wlast, + + input logic axi_bvalid, + output logic axi_bready, + input logic [1:0] axi_bresp, + input logic [TAG-1:0] axi_bid, + + // AXI Read Channels + output logic axi_arvalid, + input logic axi_arready, + output logic [TAG-1:0] axi_arid, + output logic [31:0] axi_araddr, + output logic [2:0] axi_arsize, + output logic [2:0] axi_arprot, + output logic [7:0] axi_arlen, + output logic [1:0] axi_arburst, + + input logic axi_rvalid, + output logic axi_rready, + input logic [TAG-1:0] axi_rid, + input logic [63:0] axi_rdata, + input logic [1:0] axi_rresp, + + // AHB-Lite signals + input logic [31:0] ahb_haddr, // ahb bus address + input logic [2:0] ahb_hburst, // tied to 0 + input logic ahb_hmastlock, // tied to 0 + input logic [3:0] ahb_hprot, // tied to 4'b0011 + input logic [2:0] ahb_hsize, // size of bus transaction (possible values 0,1,2,3) + input logic [1:0] ahb_htrans, // Transaction type (possible values 0,2 only right now) + input logic ahb_hwrite, // ahb bus write + input logic [63:0] ahb_hwdata, // ahb bus write data + input logic ahb_hsel, // this slave was selected + input logic ahb_hreadyin, // previous hready was accepted or not + + output logic [63:0] ahb_hrdata, // ahb bus read data + output logic ahb_hreadyout, // slave ready to accept transaction + output logic ahb_hresp // slave response (high indicates erro) + +); + + logic [7:0] master_wstrb; + + typedef enum logic [1:0] { IDLE = 2'b00, // Nothing in the buffer. No commands yet recieved + WR = 2'b01, // Write Command recieved + RD = 2'b10, // Read Command recieved + PEND = 2'b11 // Waiting on Read Data from core + } state_t; + state_t buf_state, buf_nxtstate; + logic buf_state_en; + + // Buffer signals (one entry buffer) + logic buf_read_error_in, buf_read_error; + logic [63:0] buf_rdata; + + logic ahb_hready; + logic ahb_hready_q; + logic [1:0] ahb_htrans_in, ahb_htrans_q; + logic [2:0] ahb_hsize_q; + logic ahb_hwrite_q; + logic [31:0] ahb_haddr_q; + logic [63:0] ahb_hwdata_q; + logic ahb_hresp_q; + + //Miscellaneous signals + logic ahb_addr_in_dccm, ahb_addr_in_iccm, ahb_addr_in_pic; + logic ahb_addr_in_dccm_region_nc, ahb_addr_in_iccm_region_nc, ahb_addr_in_pic_region_nc; + // signals needed for the read data coming back from the core and to block any further commands as AHB is a blocking bus + logic buf_rdata_en; + + logic ahb_bus_addr_clk_en, buf_rdata_clk_en; + logic ahb_clk, ahb_addr_clk, buf_rdata_clk; + // Command buffer is the holding station where we convert to AXI and send to core + logic cmdbuf_wr_en, cmdbuf_rst; + logic cmdbuf_full; + logic cmdbuf_vld, cmdbuf_write; + logic [1:0] cmdbuf_size; + logic [7:0] cmdbuf_wstrb; + logic [31:0] cmdbuf_addr; + logic [63:0] cmdbuf_wdata; + + logic bus_clk; + +// FSM to control the bus states and when to block the hready and load the command buffer + always_comb begin + buf_nxtstate = IDLE; + buf_state_en = 1'b0; + buf_rdata_en = 1'b0; // signal to load the buffer when the core sends read data back + buf_read_error_in = 1'b0; // signal indicating that an error came back with the read from the core + cmdbuf_wr_en = 1'b0; // all clear from the gasket to load the buffer with the command for reads, command/dat for writes + case (buf_state) + IDLE: begin // No commands recieved + buf_nxtstate = ahb_hwrite ? WR : RD; + buf_state_en = ahb_hready & ahb_htrans[1] & ahb_hsel; // only transition on a valid hrtans + end + WR: begin // Write command recieved last cycle + buf_nxtstate = (ahb_hresp | (ahb_htrans[1:0] == 2'b0) | ~ahb_hsel) ? IDLE : (ahb_hwrite ? WR : RD); + buf_state_en = (~cmdbuf_full | ahb_hresp) ; + cmdbuf_wr_en = ~cmdbuf_full & ~(ahb_hresp | ((ahb_htrans[1:0] == 2'b01) & ahb_hsel)); // Dont send command to the buffer in case of an error or when the master is not ready with the data now. + end + RD: begin // Read command recieved last cycle. + buf_nxtstate = ahb_hresp ? IDLE :PEND; // If error go to idle, else wait for read data + buf_state_en = (~cmdbuf_full | ahb_hresp); // only when command can go, or if its an error + cmdbuf_wr_en = ~ahb_hresp & ~cmdbuf_full; // send command only when no error + end + PEND: begin // Read Command has been sent. Waiting on Data. + buf_nxtstate = IDLE; // go back for next command and present data next cycle + buf_state_en = axi_rvalid & ~cmdbuf_write; // read data is back + buf_rdata_en = buf_state_en; // buffer the read data coming back from core + buf_read_error_in = buf_state_en & |axi_rresp[1:0]; // buffer error flag if return has Error ( ECC ) + end + endcase + end // always_comb begin + + rvdffs #($bits(state_t)) state_reg (.*, .din(buf_nxtstate), .dout({buf_state}), .en(buf_state_en), .clk(ahb_clk)); + + assign master_wstrb[7:0] = ({8{ahb_hsize_q[2:0] == 3'b0}} & (8'b1 << ahb_haddr_q[2:0])) | + ({8{ahb_hsize_q[2:0] == 3'b1}} & (8'b11 << ahb_haddr_q[2:0])) | + ({8{ahb_hsize_q[2:0] == 3'b10}} & (8'b1111 << ahb_haddr_q[2:0])) | + ({8{ahb_hsize_q[2:0] == 3'b11}} & 8'b1111_1111); + + // AHB signals + assign ahb_hreadyout = ahb_hresp ? (ahb_hresp_q & ~ahb_hready_q) : + ((~cmdbuf_full | (buf_state == IDLE)) & ~(buf_state == RD | buf_state == PEND) & ~buf_read_error); + + assign ahb_hready = ahb_hreadyout & ahb_hreadyin; + assign ahb_htrans_in[1:0] = {2{ahb_hsel}} & ahb_htrans[1:0]; + assign ahb_hrdata[63:0] = buf_rdata[63:0]; + assign ahb_hresp = ((ahb_htrans_q[1:0] != 2'b0) & (buf_state != IDLE) & + ((~(ahb_addr_in_dccm | ahb_addr_in_iccm)) | // request not for ICCM or DCCM + ((ahb_addr_in_iccm | (ahb_addr_in_dccm & ahb_hwrite_q)) & ~((ahb_hsize_q[1:0] == 2'b10) | (ahb_hsize_q[1:0] == 2'b11))) | // ICCM Rd/Wr OR DCCM Wr not the right size + ((ahb_hsize_q[2:0] == 3'h1) & ahb_haddr_q[0]) | // HW size but unaligned + ((ahb_hsize_q[2:0] == 3'h2) & (|ahb_haddr_q[1:0])) | // W size but unaligned + ((ahb_hsize_q[2:0] == 3'h3) & (|ahb_haddr_q[2:0])))) | // DW size but unaligned + buf_read_error | // Read ECC error + (ahb_hresp_q & ~ahb_hready_q); // This is for second cycle of hresp protocol + + // Buffer signals - needed for the read data and ECC error response + rvdff #(.WIDTH(64)) buf_rdata_ff (.din(axi_rdata[63:0]), .dout(buf_rdata[63:0]), .clk(buf_rdata_clk), .*); + rvdff #(.WIDTH(1)) buf_read_error_ff(.din(buf_read_error_in), .dout(buf_read_error), .clk(ahb_clk), .*); // buf_read_error will be high only one cycle + + // All the Master signals are captured before presenting it to the command buffer. We check for Hresp before sending it to the cmd buffer. + rvdff #(.WIDTH(1)) hresp_ff (.din(ahb_hresp), .dout(ahb_hresp_q), .clk(ahb_clk), .*); + rvdff #(.WIDTH(1)) hready_ff (.din(ahb_hready), .dout(ahb_hready_q), .clk(ahb_clk), .*); + rvdff #(.WIDTH(2)) htrans_ff (.din(ahb_htrans_in[1:0]), .dout(ahb_htrans_q[1:0]), .clk(ahb_clk), .*); + rvdff #(.WIDTH(3)) hsize_ff (.din(ahb_hsize[2:0]), .dout(ahb_hsize_q[2:0]), .clk(ahb_addr_clk), .*); + rvdff #(.WIDTH(1)) hwrite_ff (.din(ahb_hwrite), .dout(ahb_hwrite_q), .clk(ahb_addr_clk), .*); + rvdff #(.WIDTH(32)) haddr_ff (.din(ahb_haddr[31:0]), .dout(ahb_haddr_q[31:0]), .clk(ahb_addr_clk), .*); + + // Clock header logic + assign ahb_bus_addr_clk_en = bus_clk_en & (ahb_hready & ahb_htrans[1]); + assign buf_rdata_clk_en = bus_clk_en & buf_rdata_en; + + rvclkhdr ahb_cgc (.en(bus_clk_en), .l1clk(ahb_clk), .*); + rvclkhdr ahb_addr_cgc (.en(ahb_bus_addr_clk_en), .l1clk(ahb_addr_clk), .*); + rvclkhdr buf_rdata_cgc (.en(buf_rdata_clk_en), .l1clk(buf_rdata_clk), .*); + + // Address check dccm + rvrangecheck #(.CCM_SADR(`RV_DCCM_SADR), + .CCM_SIZE(`RV_DCCM_SIZE)) addr_dccm_rangecheck ( + .addr(ahb_haddr_q[31:0]), + .in_range(ahb_addr_in_dccm), + .in_region(ahb_addr_in_dccm_region_nc) + ); + + // Address check iccm +`ifdef RV_ICCM_ENABLE + rvrangecheck #(.CCM_SADR(`RV_ICCM_SADR), + .CCM_SIZE(`RV_ICCM_SIZE)) addr_iccm_rangecheck ( + .addr(ahb_haddr_q[31:0]), + .in_range(ahb_addr_in_iccm), + .in_region(ahb_addr_in_iccm_region_nc) + ); +`else + assign ahb_addr_in_iccm = '0; + assign ahb_addr_in_iccm_region_nc = '0; +`endif + + // PIC memory address check + rvrangecheck #(.CCM_SADR(`RV_PIC_BASE_ADDR), + .CCM_SIZE(`RV_PIC_SIZE)) addr_pic_rangecheck ( + .addr(ahb_haddr_q[31:0]), + .in_range(ahb_addr_in_pic), + .in_region(ahb_addr_in_pic_region_nc) + ); + + // Command Buffer - Holding for the commands to be sent for the AXI. It will be converted to the AXI signals. + assign cmdbuf_rst = (((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready)) & ~cmdbuf_wr_en) | (ahb_hresp & ~cmdbuf_write); + assign cmdbuf_full = (cmdbuf_vld & ~((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready))); + + rvdffsc #(.WIDTH(1)) cmdbuf_vldff (.din(1'b1), .dout(cmdbuf_vld), .en(cmdbuf_wr_en), .clear(cmdbuf_rst), .clk(bus_clk), .*); + rvdffs #(.WIDTH(1)) cmdbuf_writeff (.din(ahb_hwrite_q), .dout(cmdbuf_write), .en(cmdbuf_wr_en), .clk(bus_clk), .*); + rvdffs #(.WIDTH(2)) cmdbuf_sizeff (.din(ahb_hsize_q[1:0]), .dout(cmdbuf_size[1:0]), .en(cmdbuf_wr_en), .clk(bus_clk), .*); + rvdffs #(.WIDTH(8)) cmdbuf_wstrbff (.din(master_wstrb[7:0]), .dout(cmdbuf_wstrb[7:0]), .en(cmdbuf_wr_en), .clk(bus_clk), .*); + rvdffe #(.WIDTH(32)) cmdbuf_addrff (.din(ahb_haddr_q[31:0]), .dout(cmdbuf_addr[31:0]), .en(cmdbuf_wr_en), .clk(bus_clk), .*); + rvdffe #(.WIDTH(64)) cmdbuf_wdataff (.din(ahb_hwdata[63:0]), .dout(cmdbuf_wdata[63:0]), .en(cmdbuf_wr_en), .clk(bus_clk), .*); + + // AXI Write Command Channel + assign axi_awvalid = cmdbuf_vld & cmdbuf_write; + assign axi_awid[TAG-1:0] = '0; + assign axi_awaddr[31:0] = cmdbuf_addr[31:0]; + assign axi_awsize[2:0] = {1'b0, cmdbuf_size[1:0]}; + assign axi_awprot[2:0] = 3'b0; + assign axi_awlen[7:0] = '0; + assign axi_awburst[1:0] = 2'b01; + // AXI Write Data Channel - This is tied to the command channel as we only write the command buffer once we have the data. + assign axi_wvalid = cmdbuf_vld & cmdbuf_write; + assign axi_wdata[63:0] = cmdbuf_wdata[63:0]; + assign axi_wstrb[7:0] = cmdbuf_wstrb[7:0]; + assign axi_wlast = 1'b1; + // AXI Write Response - Always ready. AHB does not require a write response. + assign axi_bready = 1'b1; + // AXI Read Channels + assign axi_arvalid = cmdbuf_vld & ~cmdbuf_write; + assign axi_arid[TAG-1:0] = '0; + assign axi_araddr[31:0] = cmdbuf_addr[31:0]; + assign axi_arsize[2:0] = {1'b0, cmdbuf_size[1:0]}; + assign axi_arprot = 3'b0; + assign axi_arlen[7:0] = '0; + assign axi_arburst[1:0] = 2'b01; + // AXI Read Response Channel - Always ready as AHB reads are blocking and the the buffer is available for the read coming back always. + assign axi_rready = 1'b1; + + // Clock header logic + rvclkhdr bus_cgc (.en(bus_clk_en), .l1clk(bus_clk), .*); + +`ifdef ASSERT_ON + property ahb_error_protocol; + @(posedge ahb_clk) (ahb_hready & ahb_hresp) |-> (~$past(ahb_hready) & $past(ahb_hresp)); + endproperty + assert_ahb_error_protocol: assert property (ahb_error_protocol) else + $display("Bus Error with hReady isn't preceded with Bus Error without hready"); + +`endif + +endmodule // ahb_to_axi4 \ No newline at end of file diff --git a/design/lib/axi4_to_ahb.sv b/design/lib/axi4_to_ahb.sv new file mode 100644 index 0000000..b3e2715 --- /dev/null +++ b/design/lib/axi4_to_ahb.sv @@ -0,0 +1,472 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// Owner: +// Function: AXI4 -> AHB Bridge +// Comments: +// +//******************************************************************************** +module axi4_to_ahb #(parameter TAG = 1) ( + + input clk, + input rst_l, + input scan_mode, + input bus_clk_en, + input clk_override, + + // AXI signals + // AXI Write Channels + input logic axi_awvalid, + output logic axi_awready, + input logic [TAG-1:0] axi_awid, + input logic [31:0] axi_awaddr, + input logic [2:0] axi_awsize, + input logic [2:0] axi_awprot, + + input logic axi_wvalid, + output logic axi_wready, + input logic [63:0] axi_wdata, + input logic [7:0] axi_wstrb, + input logic axi_wlast, + + output logic axi_bvalid, + input logic axi_bready, + output logic [1:0] axi_bresp, + output logic [TAG-1:0] axi_bid, + + // AXI Read Channels + input logic axi_arvalid, + output logic axi_arready, + input logic [TAG-1:0] axi_arid, + input logic [31:0] axi_araddr, + input logic [2:0] axi_arsize, + input logic [2:0] axi_arprot, + + output logic axi_rvalid, + input logic axi_rready, + output logic [TAG-1:0] axi_rid, + output logic [63:0] axi_rdata, + output logic [1:0] axi_rresp, + output logic axi_rlast, + + // AHB-Lite signals + output logic [31:0] ahb_haddr, // ahb bus address + output logic [2:0] ahb_hburst, // tied to 0 + output logic ahb_hmastlock, // tied to 0 + output logic [3:0] ahb_hprot, // tied to 4'b0011 + output logic [2:0] ahb_hsize, // size of bus transaction (possible values 0,1,2,3) + output logic [1:0] ahb_htrans, // Transaction type (possible values 0,2 only right now) + output logic ahb_hwrite, // ahb bus write + output logic [63:0] ahb_hwdata, // ahb bus write data + + input logic [63:0] ahb_hrdata, // ahb bus read data + input logic ahb_hready, // slave ready to accept transaction + input logic ahb_hresp // slave response (high indicates erro) + +); + + localparam ID = 1; + localparam PRTY = 1; + typedef enum logic [2:0] {IDLE=3'b000, CMD_RD=3'b001, CMD_WR=3'b010, DATA_RD=3'b011, DATA_WR=3'b100, DONE=3'b101, STREAM_RD=3'b110, STREAM_ERR_RD=3'b111} state_t; + state_t buf_state, buf_nxtstate; + + logic slave_valid; + logic slave_ready; + logic [TAG-1:0] slave_tag; + logic [63:0] slave_rdata; + logic [3:0] slave_opc; + + logic wrbuf_en, wrbuf_data_en; + logic wrbuf_cmd_sent, wrbuf_rst; + logic wrbuf_vld; + logic wrbuf_data_vld; + logic [TAG-1:0] wrbuf_tag; + logic [2:0] wrbuf_size; + logic [31:0] wrbuf_addr; + logic [63:0] wrbuf_data; + logic [7:0] wrbuf_byteen; + + logic bus_write_clk_en; + logic bus_clk, bus_write_clk; + + logic master_valid; + logic master_ready; + logic [TAG-1:0] master_tag; + logic [31:0] master_addr; + logic [63:0] master_wdata; + logic [2:0] master_size; + logic [2:0] master_opc; + + // Buffer signals (one entry buffer) + logic [31:0] buf_addr; + logic [1:0] buf_size; + logic buf_write; + logic [7:0] buf_byteen; + logic buf_aligned; + logic [63:0] buf_data; + logic [TAG-1:0] buf_tag; + + //Miscellaneous signals + logic buf_rst; + logic [TAG-1:0] buf_tag_in; + logic [31:0] buf_addr_in; + logic [7:0] buf_byteen_in; + logic [63:0] buf_data_in; + logic buf_write_in; + logic buf_aligned_in; + logic [2:0] buf_size_in; + + logic buf_state_en; + logic buf_wr_en; + logic buf_data_wr_en; + logic slvbuf_error_en; + logic wr_cmd_vld; + + logic cmd_done_rst, cmd_done, cmd_doneQ; + logic trxn_done; + logic [2:0] buf_cmd_byte_ptr, buf_cmd_byte_ptrQ, buf_cmd_nxtbyte_ptr; + logic buf_cmd_byte_ptr_en; + logic found; + + logic slave_valid_pre; + logic ahb_hready_q; + logic ahb_hresp_q; + logic [1:0] ahb_htrans_q; + logic ahb_hwrite_q; + logic [63:0] ahb_hrdata_q; + + + logic slvbuf_write; + logic slvbuf_error; + logic [TAG-1:0] slvbuf_tag; + + logic slvbuf_error_in; + logic slvbuf_wr_en; + logic bypass_en; + logic rd_bypass_idle; + + logic last_addr_en; + logic [31:0] last_bus_addr; + + // Clocks + logic buf_clken, slvbuf_clken; + logic ahbm_addr_clken; + logic ahbm_data_clken; + + logic buf_clk, slvbuf_clk; + logic ahbm_clk; + logic ahbm_addr_clk; + logic ahbm_data_clk; + + // Function to get the length from byte enable + function automatic logic [1:0] get_write_size; + input logic [7:0] byteen; + + logic [1:0] size; + + size[1:0] = (2'b11 & {2{(byteen[7:0] == 8'hff)}}) | + (2'b10 & {2{((byteen[7:0] == 8'hf0) | (byteen[7:0] == 8'h0f))}}) | + (2'b01 & {2{((byteen[7:0] == 8'hc0) | (byteen[7:0] == 8'h30) | (byteen[7:0] == 8'h0c) | (byteen[7:0] == 8'h03))}}); + + return size[1:0]; + endfunction // get_write_size + + // Function to get the length from byte enable + function automatic logic [2:0] get_write_addr; + input logic [7:0] byteen; + + logic [2:0] addr; + + addr[2:0] = (3'h0 & {3{((byteen[7:0] == 8'hff) | (byteen[7:0] == 8'h0f) | (byteen[7:0] == 8'h03))}}) | + (3'h2 & {3{(byteen[7:0] == 8'h0c)}}) | + (3'h4 & {3{((byteen[7:0] == 8'hf0) | (byteen[7:0] == 8'h03))}}) | + (3'h6 & {3{(byteen[7:0] == 8'hc0)}}); + + return addr[2:0]; + endfunction // get_write_size + + // Function to get the next byte pointer + function automatic logic [2:0] get_nxtbyte_ptr (logic [2:0] current_byte_ptr, logic [7:0] byteen, logic get_next); + logic [2:0] start_ptr; + logic found; + found = '0; + //get_nxtbyte_ptr[2:0] = current_byte_ptr[2:0]; + start_ptr[2:0] = get_next ? (current_byte_ptr[2:0] + 3'b1) : current_byte_ptr[2:0]; + for (int j=0; j<8; j++) begin + if (~found) begin + get_nxtbyte_ptr[2:0] = 3'(j); + found |= (byteen[j] & (3'(j) >= start_ptr[2:0])) ; + end + end + endfunction // get_nextbyte_ptr + + + // Write buffer + assign wrbuf_en = axi_awvalid & axi_awready & master_ready; + assign wrbuf_data_en = axi_wvalid & axi_wready & master_ready; + assign wrbuf_cmd_sent = master_valid & master_ready & (master_opc[2:1] == 2'b01); + assign wrbuf_rst = wrbuf_cmd_sent & ~wrbuf_en; + + assign axi_awready = ~(wrbuf_vld & ~wrbuf_cmd_sent) & master_ready; + assign axi_wready = ~(wrbuf_data_vld & ~wrbuf_cmd_sent) & master_ready; + assign axi_arready = ~(wrbuf_vld & wrbuf_data_vld) & master_ready; + assign axi_rlast = 1'b1; + + assign wr_cmd_vld = (wrbuf_vld & wrbuf_data_vld); + assign master_valid = wr_cmd_vld | axi_arvalid; + assign master_tag[TAG-1:0] = wr_cmd_vld ? wrbuf_tag[TAG-1:0] : axi_arid[TAG-1:0]; + assign master_opc[2:0] = wr_cmd_vld ? 3'b011 : 3'b0; + assign master_addr[31:0] = wr_cmd_vld ? wrbuf_addr[31:0] : axi_araddr[31:0]; + assign master_size[2:0] = wr_cmd_vld ? wrbuf_size[2:0] : axi_arsize[2:0]; + assign master_wdata[63:0] = wrbuf_data[63:0]; + + // AXI response channel signals + assign axi_bvalid = slave_valid & slave_ready & slave_opc[3]; + assign axi_bresp[1:0] = slave_opc[0] ? 2'b10 : (slave_opc[1] ? 2'b11 : 2'b0); + assign axi_bid[TAG-1:0] = slave_tag[TAG-1:0]; + + assign axi_rvalid = slave_valid & slave_ready & (slave_opc[3:2] == 2'b0); + assign axi_rresp[1:0] = slave_opc[0] ? 2'b10 : (slave_opc[1] ? 2'b11 : 2'b0); + assign axi_rid[TAG-1:0] = slave_tag[TAG-1:0]; + assign axi_rdata[63:0] = slave_rdata[63:0]; + assign slave_ready = axi_bready & axi_rready; + + // Clock header logic + assign bus_write_clk_en = bus_clk_en & ((axi_awvalid & axi_awready) | (axi_wvalid & axi_wready)); + + rvclkhdr bus_cgc (.en(bus_clk_en), .l1clk(bus_clk), .*); + rvclkhdr bus_write_cgc (.en(bus_write_clk_en), .l1clk(bus_write_clk), .*); + + + // FIFO state machine + always_comb begin + buf_nxtstate = IDLE; + buf_state_en = 1'b0; + buf_wr_en = 1'b0; + buf_data_wr_en = 1'b0; + slvbuf_error_in = 1'b0; + slvbuf_error_en = 1'b0; + buf_write_in = 1'b0; + cmd_done = 1'b0; + trxn_done = 1'b0; + buf_cmd_byte_ptr_en = 1'b0; + buf_cmd_byte_ptr[2:0] = '0; + slave_valid_pre = 1'b0; + master_ready = 1'b0; + ahb_htrans[1:0] = 2'b0; + slvbuf_wr_en = 1'b0; + bypass_en = 1'b0; + rd_bypass_idle = 1'b0; + + case (buf_state) + IDLE: begin + master_ready = 1'b1; + buf_write_in = (master_opc[2:1] == 2'b01); + buf_nxtstate = buf_write_in ? CMD_WR : CMD_RD; + buf_state_en = master_valid & master_ready; + buf_wr_en = buf_state_en; + buf_data_wr_en = buf_state_en & (buf_nxtstate == CMD_WR); + buf_cmd_byte_ptr_en = buf_state_en; + buf_cmd_byte_ptr[2:0] = buf_write_in ? get_nxtbyte_ptr(3'b0,buf_byteen_in[7:0],1'b0) : master_addr[2:0]; + bypass_en = buf_state_en; + rd_bypass_idle = bypass_en & (buf_nxtstate == CMD_RD); + ahb_htrans[1:0] = {2{bypass_en}} & 2'b10; + end + CMD_RD: begin + buf_nxtstate = (master_valid & (master_opc[2:0] == 3'b000))? STREAM_RD : DATA_RD; + buf_state_en = ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & ~ahb_hwrite_q; + cmd_done = buf_state_en & ~master_valid; + slvbuf_wr_en = buf_state_en; + master_ready = buf_state_en & (buf_nxtstate == STREAM_RD); + buf_wr_en = master_ready; + bypass_en = master_ready & master_valid; + buf_cmd_byte_ptr[2:0] = bypass_en ? master_addr[2:0] : buf_addr[2:0]; + ahb_htrans[1:0] = 2'b10 & {2{~buf_state_en | bypass_en}}; + end + STREAM_RD: begin + master_ready = (ahb_hready_q & ~ahb_hresp_q) & ~(master_valid & master_opc[2:1] == 2'b01); + buf_wr_en = (master_valid & master_ready & (master_opc[2:0] == 3'b000)); // update the fifo if we are streaming the read commands + buf_nxtstate = ahb_hresp_q ? STREAM_ERR_RD : (buf_wr_en ? STREAM_RD : DATA_RD); // assuming that the master accpets the slave response right away. + buf_state_en = (ahb_hready_q | ahb_hresp_q); + buf_data_wr_en = buf_state_en; + slvbuf_error_in = ahb_hresp_q; + slvbuf_error_en = buf_state_en; + slave_valid_pre = buf_state_en & ~ahb_hresp_q; // send a response right away if we are not going through an error response. + cmd_done = buf_state_en & ~master_valid; // last one of the stream should not send a htrans + bypass_en = master_ready & master_valid & (buf_nxtstate == STREAM_RD) & buf_state_en; + buf_cmd_byte_ptr[2:0] = bypass_en ? master_addr[2:0] : buf_addr[2:0]; + ahb_htrans[1:0] = 2'b10 & {2{~((buf_nxtstate != STREAM_RD) & buf_state_en)}}; + slvbuf_wr_en = buf_wr_en; // shifting the contents from the buf to slv_buf for streaming cases + end // case: STREAM_RD + STREAM_ERR_RD: begin + buf_nxtstate = DATA_RD; + buf_state_en = ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & ~ahb_hwrite_q; + slave_valid_pre = buf_state_en; + slvbuf_wr_en = buf_state_en; // Overwrite slvbuf with buffer + buf_cmd_byte_ptr[2:0] = buf_addr[2:0]; + ahb_htrans[1:0] = 2'b10 & {2{~buf_state_en}}; + end + DATA_RD: begin + buf_nxtstate = DONE; + buf_state_en = (ahb_hready_q | ahb_hresp_q); + buf_data_wr_en = buf_state_en; + slvbuf_error_in= ahb_hresp_q; + slvbuf_error_en= buf_state_en; + slvbuf_wr_en = buf_state_en; + end + CMD_WR: begin + buf_nxtstate = DATA_WR; + trxn_done = ahb_hready_q & ahb_hwrite_q & (ahb_htrans_q[1:0] != 2'b0); + buf_state_en = trxn_done; + buf_cmd_byte_ptr_en = buf_state_en; + slvbuf_wr_en = buf_state_en; + buf_cmd_byte_ptr = trxn_done ? get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1) : buf_cmd_byte_ptrQ; + cmd_done = trxn_done & (buf_aligned | (buf_cmd_byte_ptrQ == 3'b111) | + (buf_byteen[get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1)] == 1'b0)); + ahb_htrans[1:0] = {2{~(cmd_done | cmd_doneQ)}} & 2'b10; + end + DATA_WR: begin + buf_state_en = (cmd_doneQ & ahb_hready_q) | ahb_hresp_q; + master_ready = buf_state_en & ~ahb_hresp_q & slave_ready; // Ready to accept new command if current command done and no error + buf_nxtstate = (ahb_hresp_q | ~slave_ready) ? DONE : + ((master_valid & master_ready) ? ((master_opc[2:1] == 2'b01) ? CMD_WR : CMD_RD) : IDLE); + slvbuf_error_in = ahb_hresp_q; + slvbuf_error_en = buf_state_en; + + buf_write_in = (master_opc[2:1] == 2'b01); + buf_wr_en = buf_state_en & ((buf_nxtstate == CMD_WR) | (buf_nxtstate == CMD_RD)); + buf_data_wr_en = buf_wr_en; + + cmd_done = (ahb_hresp_q | (ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & + ((buf_cmd_byte_ptrQ == 3'b111) | (buf_byteen[get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1)] == 1'b0)))); + bypass_en = buf_state_en & buf_write_in & (buf_nxtstate == CMD_WR); // Only bypass for writes for the time being + ahb_htrans[1:0] = {2{(~(cmd_done | cmd_doneQ) | bypass_en)}} & 2'b10; + slave_valid_pre = buf_state_en & (buf_nxtstate != DONE); + + trxn_done = ahb_hready_q & ahb_hwrite_q & (ahb_htrans_q[1:0] != 2'b0); + buf_cmd_byte_ptr_en = trxn_done | bypass_en; + buf_cmd_byte_ptr = bypass_en ? get_nxtbyte_ptr(3'b0,buf_byteen_in[7:0],1'b0) : + trxn_done ? get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1) : buf_cmd_byte_ptrQ; + end + DONE: begin + buf_nxtstate = IDLE; + buf_state_en = slave_ready; + slvbuf_error_en = 1'b1; + slave_valid_pre = 1'b1; + end + endcase + end + + assign buf_rst = 1'b0; + assign cmd_done_rst = slave_valid_pre; + assign buf_addr_in[2:0] = (buf_aligned_in & (master_opc[2:1] == 2'b01)) ? get_write_addr(wrbuf_byteen[7:0]) : master_addr[2:0]; + assign buf_addr_in[31:3] = master_addr[31:3]; + assign buf_tag_in[TAG-1:0] = master_tag[TAG-1:0]; + assign buf_byteen_in[7:0] = wrbuf_byteen[7:0]; + assign buf_data_in[63:0] = (buf_state == DATA_RD) ? ahb_hrdata_q[63:0] : master_wdata[63:0]; + assign buf_size_in[1:0] = (buf_aligned_in & (master_size[1:0] == 2'b11) & (master_opc[2:1] == 2'b01)) ? get_write_size(wrbuf_byteen[7:0]) : master_size[1:0]; + assign buf_aligned_in = (master_opc[2:0] == 3'b0) | // reads are always aligned since they are either DW or sideeffects + (master_size[1:0] == 2'b0) | (master_size[1:0] == 2'b01) | (master_size[1:0] == 2'b10) | // Always aligned for Byte/HW/Word since they can be only for non-idempotent. IFU/SB are always aligned + ((master_size[1:0] == 2'b11) & + ((wrbuf_byteen[7:0] == 8'h3) | (wrbuf_byteen[7:0] == 8'hc) | (wrbuf_byteen[7:0] == 8'h30) | (wrbuf_byteen[7:0] == 8'hc0) | + (wrbuf_byteen[7:0] == 8'hf) | (wrbuf_byteen[7:0] == 8'hf0) | (wrbuf_byteen[7:0] == 8'hff))); + + // Generate the ahb signals + assign ahb_haddr[31:0] = bypass_en ? {master_addr[31:3],buf_cmd_byte_ptr[2:0]} : {buf_addr[31:3],buf_cmd_byte_ptr[2:0]}; + // assign ahb_hsize[2:0] = ((buf_state == CMD_RD) | (buf_state == STREAM_RD) | (buf_state == STREAM_ERR_RD) | rd_bypass_idle) ? 3'b011 : + // bypass_en ? {1'b0, ({2{buf_aligned_in}} & buf_size_in[1:0])} : + // {1'b0, ({2{buf_aligned}} & buf_size[1:0])}; // Send the full size for aligned trxn + assign ahb_hsize[2:0] = bypass_en ? {1'b0, ({2{buf_aligned_in}} & buf_size_in[1:0])} : + {1'b0, ({2{buf_aligned}} & buf_size[1:0])}; // Send the full size for aligned trxn + assign ahb_hburst[2:0] = 3'b0; + assign ahb_hmastlock = 1'b0; + assign ahb_hprot[3:0] = {3'b001,~axi_arprot[2]}; + assign ahb_hwrite = bypass_en ? (master_opc[2:1] == 2'b01) : buf_write; + assign ahb_hwdata[63:0] = buf_data[63:0]; + + assign slave_valid = slave_valid_pre; + assign slave_opc[3:2] = slvbuf_write ? 2'b11 : 2'b00; + assign slave_opc[1:0] = {2{slvbuf_error}} & 2'b10; + assign slave_rdata[63:0] = slvbuf_error ? {2{last_bus_addr[31:0]}} : ((buf_state == DONE) ? buf_data[63:0] : ahb_hrdata_q[63:0]); + assign slave_tag[TAG-1:0] = slvbuf_tag[TAG-1:0]; + + assign last_addr_en = (ahb_htrans[1:0] != 2'b0) & ahb_hready & ahb_hwrite ; + + + rvdffsc #(.WIDTH(1)) wrbuf_vldff (.din(1'b1), .dout(wrbuf_vld), .en(wrbuf_en), .clear(wrbuf_rst), .clk(bus_clk), .*); + rvdffsc #(.WIDTH(1)) wrbuf_data_vldff(.din(1'b1), .dout(wrbuf_data_vld), .en(wrbuf_data_en), .clear(wrbuf_rst), .clk(bus_clk), .*); + rvdffs #(.WIDTH(TAG)) wrbuf_tagff (.din(axi_awid[TAG-1:0]), .dout(wrbuf_tag[TAG-1:0]), .en(wrbuf_en), .clk(bus_clk), .*); + rvdffs #(.WIDTH(3)) wrbuf_sizeff (.din(axi_awsize[2:0]), .dout(wrbuf_size[2:0]), .en(wrbuf_en), .clk(bus_clk), .*); + rvdffe #(.WIDTH(32)) wrbuf_addrff (.din(axi_awaddr[31:0]), .dout(wrbuf_addr[31:0]), .en(wrbuf_en), .clk(bus_clk), .*); + rvdffe #(.WIDTH(64)) wrbuf_dataff (.din(axi_wdata[63:0]), .dout(wrbuf_data[63:0]), .en(wrbuf_data_en), .clk(bus_clk), .*); + rvdffs #(.WIDTH(8)) wrbuf_byteenff (.din(axi_wstrb[7:0]), .dout(wrbuf_byteen[7:0]), .en(wrbuf_data_en), .clk(bus_clk), .*); + + rvdffs #(.WIDTH(32)) last_bus_addrff (.din(ahb_haddr[31:0]), .dout(last_bus_addr[31:0]), .en(last_addr_en), .clk(ahbm_clk), .*); + + rvdffsc #(.WIDTH($bits(state_t))) buf_state_ff (.din(buf_nxtstate), .dout({buf_state}), .en(buf_state_en), .clear(buf_rst), .clk(ahbm_clk), .*); + rvdffs #(.WIDTH(1)) buf_writeff (.din(buf_write_in), .dout(buf_write), .en(buf_wr_en), .clk(buf_clk), .*); + rvdffs #(.WIDTH(TAG)) buf_tagff (.din(buf_tag_in[TAG-1:0]), .dout(buf_tag[TAG-1:0]), .en(buf_wr_en), .clk(buf_clk), .*); + rvdffe #(.WIDTH(32)) buf_addrff (.din(buf_addr_in[31:0]), .dout(buf_addr[31:0]), .en(buf_wr_en & bus_clk_en), .*); + rvdffs #(.WIDTH(2)) buf_sizeff (.din(buf_size_in[1:0]), .dout(buf_size[1:0]), .en(buf_wr_en), .clk(buf_clk), .*); + rvdffs #(.WIDTH(1)) buf_alignedff (.din(buf_aligned_in), .dout(buf_aligned), .en(buf_wr_en), .clk(buf_clk), .*); + rvdffs #(.WIDTH(8)) buf_byteenff (.din(buf_byteen_in[7:0]), .dout(buf_byteen[7:0]), .en(buf_wr_en), .clk(buf_clk), .*); + rvdffe #(.WIDTH(64)) buf_dataff (.din(buf_data_in[63:0]), .dout(buf_data[63:0]), .en(buf_data_wr_en & bus_clk_en), .*); + + + rvdffs #(.WIDTH(1)) slvbuf_writeff (.din(buf_write), .dout(slvbuf_write), .en(slvbuf_wr_en), .clk(buf_clk), .*); + rvdffs #(.WIDTH(TAG)) slvbuf_tagff (.din(buf_tag[TAG-1:0]), .dout(slvbuf_tag[TAG-1:0]), .en(slvbuf_wr_en), .clk(buf_clk), .*); + rvdffs #(.WIDTH(1)) slvbuf_errorff (.din(slvbuf_error_in), .dout(slvbuf_error), .en(slvbuf_error_en), .clk(ahbm_clk), .*); + + rvdffsc #(.WIDTH(1)) buf_cmd_doneff (.din(1'b1), .en(cmd_done), .dout(cmd_doneQ), .clear(cmd_done_rst), .clk(ahbm_clk), .*); + rvdffs #(.WIDTH(3)) buf_cmd_byte_ptrff (.din(buf_cmd_byte_ptr[2:0]), .dout(buf_cmd_byte_ptrQ[2:0]), .en(buf_cmd_byte_ptr_en), .clk(ahbm_clk), .*); + + rvdff #(.WIDTH(1)) hready_ff (.din(ahb_hready), .dout(ahb_hready_q), .clk(ahbm_clk), .*); + rvdff #(.WIDTH(2)) htrans_ff (.din(ahb_htrans[1:0]), .dout(ahb_htrans_q[1:0]), .clk(ahbm_clk), .*); + rvdff #(.WIDTH(1)) hwrite_ff (.din(ahb_hwrite), .dout(ahb_hwrite_q), .clk(ahbm_addr_clk), .*); + rvdff #(.WIDTH(1)) hresp_ff (.din(ahb_hresp), .dout(ahb_hresp_q), .clk(ahbm_clk), .*); + rvdff #(.WIDTH(64)) hrdata_ff (.din(ahb_hrdata[63:0]), .dout(ahb_hrdata_q[63:0]), .clk(ahbm_data_clk), .*); + + // Clock headers + // clock enables for ahbm addr/data + assign buf_clken = bus_clk_en & (buf_wr_en | slvbuf_wr_en | clk_override); + assign ahbm_addr_clken = bus_clk_en & ((ahb_hready & ahb_htrans[1]) | clk_override); + assign ahbm_data_clken = bus_clk_en & ((buf_state != IDLE) | clk_override); + + rvclkhdr buf_cgc (.en(buf_clken), .l1clk(buf_clk), .*); + rvclkhdr ahbm_cgc (.en(bus_clk_en), .l1clk(ahbm_clk), .*); + rvclkhdr ahbm_addr_cgc (.en(ahbm_addr_clken), .l1clk(ahbm_addr_clk), .*); + rvclkhdr ahbm_data_cgc (.en(ahbm_data_clken), .l1clk(ahbm_data_clk), .*); + +`ifdef ASSERT_ON + property ahb_trxn_aligned; + @(posedge ahbm_clk) ahb_htrans[1] |-> ((ahb_hsize[2:0] == 3'h0) | + ((ahb_hsize[2:0] == 3'h1) & (ahb_haddr[0] == 1'b0)) | + ((ahb_hsize[2:0] == 3'h2) & (ahb_haddr[1:0] == 2'b0)) | + ((ahb_hsize[2:0] == 3'h3) & (ahb_haddr[2:0] == 3'b0))); + endproperty + assert_ahb_trxn_aligned: assert property (ahb_trxn_aligned) else + $display("Assertion ahb_trxn_aligned failed: ahb_htrans=2'h%h, ahb_hsize=3'h%h, ahb_haddr=32'h%h",ahb_htrans[1:0], ahb_hsize[2:0], ahb_haddr[31:0]); + + property ahb_error_protocol; + @(posedge ahbm_clk) (ahb_hready & ahb_hresp) |-> (~$past(ahb_hready) & $past(ahb_hresp)); + endproperty + assert_ahb_error_protocol: assert property (ahb_error_protocol) else + $display("Bus Error with hReady isn't preceded with Bus Error without hready"); +`endif + +endmodule // axi4_to_ahb diff --git a/design/lib/beh_lib.sv b/design/lib/beh_lib.sv new file mode 100644 index 0000000..ba01eca --- /dev/null +++ b/design/lib/beh_lib.sv @@ -0,0 +1,452 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// all flops call the rvdff flop + + +module rvdff #( parameter WIDTH=1 ) + ( + input logic [WIDTH-1:0] din, + input logic clk, + input logic rst_l, + + output logic [WIDTH-1:0] dout + ); + +`ifdef CLOCKGATE + always @(posedge tb_top.clk) begin + #0 $strobe("CG: %0t %m din %x dout %x clk %b width %d",$time,din,dout,clk,WIDTH); + end +`endif + + always_ff @(posedge clk or negedge rst_l) begin + if (rst_l == 0) + dout[WIDTH-1:0] <= 0; + else + dout[WIDTH-1:0] <= din[WIDTH-1:0]; + end + + +endmodule + +// rvdff with 2:1 input mux to flop din iff sel==1 +module rvdffs #( parameter WIDTH=1 ) + ( + input logic [WIDTH-1:0] din, + input logic en, + input logic clk, + input logic rst_l, + output logic [WIDTH-1:0] dout + ); + + rvdff #(WIDTH) dffs (.din((en) ? din[WIDTH-1:0] : dout[WIDTH-1:0]), .*); + +endmodule + +// rvdff with en and clear +module rvdffsc #( parameter WIDTH=1 ) + ( + input logic [WIDTH-1:0] din, + input logic en, + input logic clear, + input logic clk, + input logic rst_l, + output logic [WIDTH-1:0] dout + ); + + logic [WIDTH-1:0] din_new; + assign din_new = {WIDTH{~clear}} & (en ? din[WIDTH-1:0] : dout[WIDTH-1:0]); + rvdff #(WIDTH) dffsc (.din(din_new[WIDTH-1:0]), .*); + +endmodule + +module `TEC_RV_ICG + ( + input logic TE, E, CP, + output Q + ); + + logic en_ff; + logic enable; + + assign enable = E | TE; + +`ifdef VERILATOR + always @(negedge CP) begin + en_ff <= enable; + end +`else + always @(CP, enable) begin + if(!CP) + en_ff = enable; + end +`endif + assign Q = CP & en_ff; + +endmodule + +module rvclkhdr + ( + input logic en, + input logic clk, + input logic scan_mode, + output logic l1clk + ); + + logic TE; + assign TE = scan_mode; + + `TEC_RV_ICG rvclkhdr ( .*, .E(en), .CP(clk), .Q(l1clk)); + +endmodule + +module rvdffe #( parameter WIDTH=1 ) + ( + input logic [WIDTH-1:0] din, + input logic en, + input logic clk, + input logic rst_l, + input logic scan_mode, + output logic [WIDTH-1:0] dout + ); + + logic l1clk; + +`ifndef PHYSICAL + if (WIDTH >= 8) begin: genblock +`endif + rvclkhdr clkhdr ( .* ); + rvdff #(WIDTH) dff (.*, .clk(l1clk)); +`ifndef PHYSICAL + end + else + $error("%m: rvdffe width must be >= 8"); +`endif + +endmodule // rvdffe + +module rvsyncss #(parameter WIDTH = 251) + ( + input logic clk, + input logic rst_l, + input logic [WIDTH-1:0] din, + output logic [WIDTH-1:0] dout + ); + + logic [WIDTH-1:0] din_ff1; + + rvdff #(WIDTH) sync_ff1 (.*, .din (din[WIDTH-1:0]), .dout(din_ff1[WIDTH-1:0])); + rvdff #(WIDTH) sync_ff2 (.*, .din (din_ff1[WIDTH-1:0]), .dout(dout[WIDTH-1:0])); + +endmodule // rvsyncss + +module rvlsadder + ( + input logic [31:0] rs1, + input logic [11:0] offset, + + output logic [31:0] dout + ); + + logic cout; + logic sign; + + logic [31:12] rs1_inc; + logic [31:12] rs1_dec; + + assign {cout,dout[11:0]} = {1'b0,rs1[11:0]} + {1'b0,offset[11:0]}; + + assign rs1_inc[31:12] = rs1[31:12] + 1; + + assign rs1_dec[31:12] = rs1[31:12] - 1; + + assign sign = offset[11]; + + assign dout[31:12] = ({20{ sign ^~ cout}} & rs1[31:12]) | + ({20{ ~sign & cout}} & rs1_inc[31:12]) | + ({20{ sign & ~cout}} & rs1_dec[31:12]); + +endmodule // rvlsadder + +// assume we only maintain pc[31:1] in the pipe + +module rvbradder + ( + input [31:1] pc, + input [12:1] offset, + + output [31:1] dout + ); + + logic cout; + logic sign; + + logic [31:13] pc_inc; + logic [31:13] pc_dec; + + assign {cout,dout[12:1]} = {1'b0,pc[12:1]} + {1'b0,offset[12:1]}; + + assign pc_inc[31:13] = pc[31:13] + 1; + + assign pc_dec[31:13] = pc[31:13] - 1; + + assign sign = offset[12]; + + + assign dout[31:13] = ({19{ sign ^~ cout}} & pc[31:13]) | + ({19{ ~sign & cout}} & pc_inc[31:13]) | + ({19{ sign & ~cout}} & pc_dec[31:13]); + + +endmodule // rvbradder + + +// 2s complement circuit +module rvtwoscomp #( parameter WIDTH=32 ) + ( + input logic [WIDTH-1:0] din, + + output logic [WIDTH-1:0] dout + ); + + logic [WIDTH-1:1] dout_temp; // holding for all other bits except for the lsb. LSB is always din + + genvar i; + + for ( i = 1; i < WIDTH; i++ ) begin : flip_after_first_one + assign dout_temp[i] = (|din[i-1:0]) ? ~din[i] : din[i]; + end : flip_after_first_one + + assign dout[WIDTH-1:0] = { dout_temp[WIDTH-1:1], din[0] }; + +endmodule // 2'scomp + +// find first +module rvfindfirst1 #( parameter WIDTH=32, SHIFT=$clog2(WIDTH) ) + ( + input logic [WIDTH-1:0] din, + + output logic [SHIFT-1:0] dout + ); + logic done; + + always_comb begin + dout[SHIFT-1:0] = {SHIFT{1'b0}}; + done = 1'b0; + + for ( int i = WIDTH-1; i > 0; i-- ) begin : find_first_one + done |= din[i]; + dout[SHIFT-1:0] += done ? 1'b0 : 1'b1; + end : find_first_one + end +endmodule // rvfindfirst1 + +module rvfindfirst1hot #( parameter WIDTH=32 ) + ( + input logic [WIDTH-1:0] din, + + output logic [WIDTH-1:0] dout + ); + logic done; + + always_comb begin + dout[WIDTH-1:0] = {WIDTH{1'b0}}; + done = 1'b0; + for ( int i = 0; i < WIDTH; i++ ) begin : find_first_one + dout[i] = ~done & din[i]; + done |= din[i]; + end : find_first_one + end +endmodule // rvfindfirst1hot + +// mask and match function matches bits after finding the first 0 position +// find first starting from LSB. Skip that location and match the rest of the bits +module rvmaskandmatch #( parameter WIDTH=32 ) + ( + input logic [WIDTH-1:0] mask, // this will have the mask in the lower bit positions + input logic [WIDTH-1:0] data, // this is what needs to be matched on the upper bits with the mask's upper bits + input logic masken, // when 1 : do mask. 0 : full match + output logic match + ); + + logic [WIDTH-1:0] matchvec; + logic masken_or_fullmask; + + assign masken_or_fullmask = masken & ~(&mask[WIDTH-1:0]); + + assign matchvec[0] = masken_or_fullmask | (mask[0] == data[0]); + genvar i; + + for ( i = 1; i < WIDTH; i++ ) begin : match_after_first_zero + assign matchvec[i] = (&mask[i-1:0] & masken_or_fullmask) ? 1'b1 : (mask[i] == data[i]); + end : match_after_first_zero + + assign match = &matchvec[WIDTH-1:0]; // all bits either matched or were masked off + +endmodule // rvmaskandmatch + +module rvbtb_tag_hash ( + input logic [31:1] pc, + output logic [`RV_BTB_BTAG_SIZE-1:0] hash + ); +`ifndef RV_BTB_BTAG_FOLD + assign hash = {(pc[`RV_BTB_ADDR_HI+`RV_BTB_BTAG_SIZE+`RV_BTB_BTAG_SIZE+`RV_BTB_BTAG_SIZE:`RV_BTB_ADDR_HI+`RV_BTB_BTAG_SIZE+`RV_BTB_BTAG_SIZE+1] ^ + pc[`RV_BTB_ADDR_HI+`RV_BTB_BTAG_SIZE+`RV_BTB_BTAG_SIZE:`RV_BTB_ADDR_HI+`RV_BTB_BTAG_SIZE+1] ^ + pc[`RV_BTB_ADDR_HI+`RV_BTB_BTAG_SIZE:`RV_BTB_ADDR_HI+1])}; +`else + assign hash = {( + pc[`RV_BTB_ADDR_HI+`RV_BTB_BTAG_SIZE+`RV_BTB_BTAG_SIZE:`RV_BTB_ADDR_HI+`RV_BTB_BTAG_SIZE+1] ^ + pc[`RV_BTB_ADDR_HI+`RV_BTB_BTAG_SIZE:`RV_BTB_ADDR_HI+1])}; +`endif + +// assign hash = {pc[`RV_BTB_ADDR_HI+1],(pc[`RV_BTB_ADDR_HI+13:`RV_BTB_ADDR_HI+10] ^ +// pc[`RV_BTB_ADDR_HI+9:`RV_BTB_ADDR_HI+6] ^ +// pc[`RV_BTB_ADDR_HI+5:`RV_BTB_ADDR_HI+2])}; + +endmodule + +module rvbtb_addr_hash ( + input logic [31:1] pc, + output logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] hash + ); + + assign hash[`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] = pc[`RV_BTB_INDEX1_HI:`RV_BTB_INDEX1_LO] ^ + +`ifndef RV_BTB_FOLD2_INDEX_HASH + pc[`RV_BTB_INDEX2_HI:`RV_BTB_INDEX2_LO] ^ +`endif + + pc[`RV_BTB_INDEX3_HI:`RV_BTB_INDEX3_LO]; + +endmodule + +module rvbtb_ghr_hash ( + input logic [`RV_BTB_ADDR_HI:`RV_BTB_ADDR_LO] hashin, + input logic [`RV_BHT_GHR_RANGE] ghr, + output logic [`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO] hash + ); + + // The hash function is too complex to write in verilog for all cases. + // The config script generates the logic string based on the bp config. + assign hash[`RV_BHT_ADDR_HI:`RV_BHT_ADDR_LO] = `RV_BHT_HASH_STRING; + +endmodule + + +// Check if the S_ADDR <= addr < E_ADDR +module rvrangecheck #(CCM_SADR = 32'h0, + CCM_SIZE = 128) ( + input logic [31:0] addr, // Address to be checked for range + output logic in_range, // S_ADDR <= start_addr < E_ADDR + output logic in_region +); + + localparam REGION_BITS = 4; + localparam MASK_BITS = 10 + $clog2(CCM_SIZE); + + logic [31:0] start_addr; + logic [3:0] region; + + assign start_addr[31:0] = CCM_SADR; + assign region[REGION_BITS-1:0] = start_addr[31:(32-REGION_BITS)]; + + assign in_region = (addr[31:(32-REGION_BITS)] == region[REGION_BITS-1:0]); + if (CCM_SIZE == 48) + assign in_range = (addr[31:MASK_BITS] == start_addr[31:MASK_BITS]) & ~(&addr[MASK_BITS-1 : MASK_BITS-2]); + else + assign in_range = (addr[31:MASK_BITS] == start_addr[31:MASK_BITS]); + +endmodule // rvrangechecker + +// 16 bit even parity generator +module rveven_paritygen #(WIDTH = 16) ( + input logic [WIDTH-1:0] data_in, // Data + output logic parity_out // generated even parity + ); + + assign parity_out = ^(data_in[WIDTH-1:0]) ; + +endmodule // rveven_paritygen + +module rveven_paritycheck #(WIDTH = 16) ( + input logic [WIDTH-1:0] data_in, // Data + input logic parity_in, + output logic parity_err // Parity error + ); + + assign parity_err = ^(data_in[WIDTH-1:0]) ^ parity_in ; + +endmodule // rveven_paritycheck + +module rvecc_encode ( + input [31:0] din, + output [6:0] ecc_out + ); +logic [5:0] ecc_out_temp; + + assign ecc_out_temp[0] = din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]; + assign ecc_out_temp[1] = din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]; + assign ecc_out_temp[2] = din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]; + assign ecc_out_temp[3] = din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; + assign ecc_out_temp[4] = din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; + assign ecc_out_temp[5] = din[26]^din[27]^din[28]^din[29]^din[30]^din[31]; + + assign ecc_out[6:0] = {(^din[31:0])^(^ecc_out_temp[5:0]),ecc_out_temp[5:0]}; + +endmodule // rvecc_encode + +module rvecc_decode ( + input en, + input [31:0] din, + input [6:0] ecc_in, + input sed_ded, // only do detection and no correction. Used for the I$ + output [31:0] dout, + output [6:0] ecc_out, + output single_ecc_error, + output double_ecc_error + + ); + + logic [6:0] ecc_check; + logic [38:0] error_mask; + logic [38:0] din_plus_parity, dout_plus_parity; + + // Generate the ecc bits + assign ecc_check[0] = ecc_in[0]^din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]; + assign ecc_check[1] = ecc_in[1]^din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]; + assign ecc_check[2] = ecc_in[2]^din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]; + assign ecc_check[3] = ecc_in[3]^din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; + assign ecc_check[4] = ecc_in[4]^din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; + assign ecc_check[5] = ecc_in[5]^din[26]^din[27]^din[28]^din[29]^din[30]^din[31]; + + // This is the parity bit + assign ecc_check[6] = ((^din[31:0])^(^ecc_in[6:0])) & ~sed_ded; + + assign single_ecc_error = en & (ecc_check[6:0] != 0) & ecc_check[6]; // this will never be on for sed_ded + assign double_ecc_error = en & (ecc_check[6:0] != 0) & ~ecc_check[6]; // all errors in the sed_ded case will be recorded as DE + + // Generate the mask for error correctiong + for (genvar i=1; i<40; i++) begin + assign error_mask[i-1] = (ecc_check[5:0] == i); + end + + // Generate the corrected data + assign din_plus_parity[38:0] = {ecc_in[6], din[31:26], ecc_in[5], din[25:11], ecc_in[4], din[10:4], ecc_in[3], din[3:1], ecc_in[2], din[0], ecc_in[1:0]}; + + assign dout_plus_parity[38:0] = single_ecc_error ? (error_mask[38:0] ^ din_plus_parity[38:0]) : din_plus_parity[38:0]; + assign dout[31:0] = {dout_plus_parity[37:32], dout_plus_parity[30:16], dout_plus_parity[14:8], dout_plus_parity[6:4], dout_plus_parity[2]}; + assign ecc_out[6:0] = {(dout_plus_parity[38] ^ (ecc_check[6:0] == 7'b1000000)), dout_plus_parity[31], dout_plus_parity[15], dout_plus_parity[7], dout_plus_parity[3], dout_plus_parity[1:0]}; + +endmodule // rvecc_decode diff --git a/design/lib/mem_lib.sv b/design/lib/mem_lib.sv new file mode 100644 index 0000000..daa8cab --- /dev/null +++ b/design/lib/mem_lib.sv @@ -0,0 +1,1025 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//========================================================================================================================= +//=================================== START OF CCM ======================================================================= +//============= Possible sram sizes for a 39 bit wide memory ( 4 bytes + 7 bits ECC ) ===================================== +//------------------------------------------------------------------------------------------------------------------------- +module ram_32768x39 + ( input logic CLK, + input logic [14:0] ADR, + input logic [38:0] D, + + output logic [38:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [38:0] ram_core [32767:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_32768x39 + + +module ram_16384x39 + ( input logic CLK, + input logic [13:0] ADR, + input logic [38:0] D, + + output logic [38:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [38:0] ram_core [16383:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_16384x39 + +module ram_8192x39 + ( input logic CLK, + input logic [12:0] ADR, + input logic [38:0] D, + + output logic [38:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [38:0] ram_core [8191:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_8192x39 + +module ram_4096x39 + ( input logic CLK, + input logic [11:0] ADR, + input logic [38:0] D, + + output logic [38:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [38:0] ram_core [4095:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_4096x39 + +module ram_3072x39 + ( input logic CLK, + input logic [11:0] ADR, + input logic [38:0] D, + + output logic [38:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [38:0] ram_core [3071:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_3072x39 + + + +module ram_2048x39 + ( input logic CLK, + input logic [10:0] ADR, + input logic [38:0] D, + + output logic [38:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [38:0] ram_core [2047:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_2048x39 + +module ram_1536x39 // need this for the 48KB DCCM option + ( input logic CLK, + input logic [10:0] ADR, + input logic [38:0] D, + + output logic [38:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [38:0] ram_core [1535:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_1536x39 + + +module ram_1024x39 + ( input logic CLK, + input logic [9:0] ADR, + input logic [38:0] D, + + output logic [38:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [38:0] ram_core [1023:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_1024x39 + +module ram_768x39 + ( input logic CLK, + input logic [9:0] ADR, + input logic [38:0] D, + + output logic [38:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [38:0] ram_core [767:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_768x39 + + +module ram_512x39 + ( input logic CLK, + input logic [8:0] ADR, + input logic [38:0] D, + + output logic [38:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [38:0] ram_core [511:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_512x39 + + +module ram_256x39 + ( input logic CLK, + input logic [7:0] ADR, + input logic [38:0] D, + + output logic [38:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [38:0] ram_core [255:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_512x39 + + +module ram_128x39 + ( input logic CLK, + input logic [6:0] ADR, + input logic [38:0] D, + + output logic [38:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [38:0] ram_core [127:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_128x39 + +//========================================================================================================================= +//=================================== START OF TAGS ======================================================================= +// I CACHE TAGS +module ram_1024x20 + ( input logic CLK, + + input logic [9:0] ADR, + input logic [19:0] D, + + output logic [19:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [19:0] ram_core [1023:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + +endmodule // ram_1024x20 + +module ram_512x20 + ( input logic CLK, + input logic [8:0] ADR, + input logic [19:0] D, + + output logic [19:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [19:0] ram_core [511:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_512x20 + +module ram_256x20 + ( input logic CLK, + input logic [7:0] ADR, + input logic [19:0] D, + + output logic [19:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [19:0] ram_core [255:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + +endmodule // ram_256x20 + +module ram_128x20 + ( input logic CLK, + input logic [6:0] ADR, + input logic [19:0] D, + + output logic [19:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [19:0] ram_core [127:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_128x20 + +module ram_64x20 + ( input logic CLK, + input logic [5:0] ADR, + input logic [19:0] D, + + output logic [19:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [19:0] ram_core [63:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_64x20 + +// LATEST ICACHE MEMORIES + + +// 4096 x 34 +module ram_4096x34 + ( input logic CLK, + input logic [11:0] ADR, + input logic [33:0] D, + + output logic [33:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [33:0] ram_core [4095:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_4096x34 + +// 2048x34 +module ram_2048x34 + ( input logic CLK, + input logic [10:0] ADR, + input logic [33:0] D, + + output logic [33:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [33:0] ram_core [2047:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_2048x34 + +// 1024x34 +module ram_1024x34 + ( input logic CLK, + input logic [9:0] ADR, + input logic [33:0] D, + + output logic [33:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [33:0] ram_core [1023:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_1024x34 + +// 512x34 +module ram_512x34 + ( input logic CLK, + input logic [8:0] ADR, + input logic [33:0] D, + + output logic [33:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [33:0] ram_core [511:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_512x34 + +// 256x34 +module ram_256x34 + ( input logic CLK, + input logic [7:0] ADR, + input logic [33:0] D, + + output logic [33:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [33:0] ram_core [255:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_256x34 + +// 128x34 +module ram_128x34 + ( input logic CLK, + input logic [6:0] ADR, + input logic [33:0] D, + + output logic [33:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [33:0] ram_core [127:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_128x34 + +// 64x34 +module ram_64x34 + ( input logic CLK, + input logic [5:0] ADR, + input logic [33:0] D, + + output logic [33:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [33:0] ram_core [63:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_64x34 + +// New SRAMS for ECC; ECC on 16b boundaries + +// 4096x44 +module ram_4096x42 + ( input logic CLK, + input logic [11:0] ADR, + input logic [41:0] D, + + output logic [41:0] Q, + + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [41:0] ram_core [4095:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_4096x42 + + +// 2048x44 +module ram_2048x42 + ( input logic CLK, + input logic [10:0] ADR, + input logic [41:0] D, + + output logic [41:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [41:0] ram_core [2047:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + +endmodule // ram_2048x42 + +// 1024x44 +module ram_1024x42 + ( input logic CLK, + input logic [9:0] ADR, + input logic [41:0] D, + + output logic [41:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [41:0] ram_core [1023:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + +endmodule // ram_1024x42 + + +// 512x44 +module ram_512x42 + ( input logic CLK, + input logic [8:0] ADR, + input logic [41:0] D, + + output logic [41:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [41:0] ram_core [511:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_512x42 + + +// 256x42 +module ram_256x42 + ( input logic CLK, + input logic [7:0] ADR, + input logic [41:0] D, + + output logic [41:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [41:0] ram_core [255:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_256x42 + +// 128x42 +module ram_128x42 + ( input logic CLK, + input logic [6:0] ADR, + input logic [41:0] D, + + output logic [41:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [41:0] ram_core [127:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_128x42 + +// 64x42 +module ram_64x42 + ( input logic CLK, + input logic [5:0] ADR, + input logic [41:0] D, + + output logic [41:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [41:0] ram_core [63:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + +endmodule // ram_64x42 + + +/// END DATA + +// START TAGS + +// 1024x21 +module ram_1024x21 + ( input logic CLK, + input logic [9:0] ADR, + input logic [20:0] D, + + output logic [20:0] Q, + + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [20:0] ram_core [1023:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + +endmodule // ram_1024x21 + +// 512x21 +module ram_512x21 + ( input logic CLK, + input logic [8:0] ADR, + input logic [20:0] D, + + output logic [20:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [20:0] ram_core [511:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_512x21 + +// 256x21 +module ram_256x21 + ( input logic CLK, + input logic [7:0] ADR, + input logic [20:0] D, + + output logic [20:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [20:0] ram_core [255:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_256x21 + +// 128x21 +module ram_128x21 + ( input logic CLK, + input logic [6:0] ADR, + input logic [20:0] D, + + output logic [20:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [20:0] ram_core [127:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_128x21 + +// 64x21 +module ram_64x21 + ( input logic CLK, + input logic [5:0] ADR, + input logic [20:0] D, + + output logic [20:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [20:0] ram_core [63:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + +endmodule // ram_64x21 + +// New tag rams for ECC. + +// 1024x25 +module ram_1024x25 + ( input logic CLK, + input logic [9:0] ADR, + input logic [24:0] D, + + output logic [24:0] Q, + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [24:0] ram_core [1023:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_1024x25 + +// 512x25 +module ram_512x25 + ( input logic CLK, + input logic [8:0] ADR, + input logic [24:0] D, + + output logic [24:0] Q, + + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [24:0] ram_core [511:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_512x25 + +// 256x25 +module ram_256x25 + ( input logic CLK, + input logic [7:0] ADR, + input logic [24:0] D, + + output logic [24:0] Q, + + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [24:0] ram_core [255:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_256x25 + +// 128x25 +module ram_128x25 + ( input logic CLK, + input logic [6:0] ADR, + input logic [24:0] D, + + output logic [24:0] Q, + + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [24:0] ram_core [127:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_128x25 + +// 64x25 +module ram_64x25 + ( input logic CLK, + input logic [5:0] ADR, + input logic [24:0] D, + + output logic [24:0] Q, + + input logic WE ); + + // behavior to be replaced by actual SRAM in VLE + + reg [24:0] ram_core [63:0]; + + always_ff @(posedge CLK) begin + if (WE) begin// for active high WE - must be specified by user + ram_core[ADR] <= D; Q <= 'x; end else + Q <= ram_core[ADR]; + end + + + + + +endmodule // ram_64x25 diff --git a/design/lsu/lsu.sv b/design/lsu/lsu.sv new file mode 100644 index 0000000..20ea991 --- /dev/null +++ b/design/lsu/lsu.sv @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Function: Top level file for load store unit +// Comments: +// +// +// DC1 -> DC2 -> DC3 -> DC4 (Commit) +// +//******************************************************************************** + +module lsu + import swerv_types::*; +( + + input logic [31:0] i0_result_e4_eff, // I0 e4 result for e4 -> dc3 store forwarding + input logic [31:0] i1_result_e4_eff, // I1 e4 result for e4 -> dc3 store forwarding + input logic [31:0] i0_result_e2, // I0 e2 result for e2 -> dc2 store forwarding + + input logic flush_final_e3, // I0/I1 flush in e3 + input logic i0_flush_final_e3, // I0 flush in e3 + input logic dec_tlu_flush_lower_wb, // I0/I1 writeback flush. This is used to flush the old packets only + input logic dec_tlu_i0_kill_writeb_wb, // I0 is flushed, don't writeback any results to arch state + input logic dec_tlu_i1_kill_writeb_wb, // I1 is flushed, don't writeback any results to arch state + input logic dec_tlu_cancel_e4, // cancel the bus load in dc4 and reset the freeze + + // chicken signals + input logic dec_tlu_non_blocking_disable, // disable the non block + input logic dec_tlu_wb_coalescing_disable, // disable the write buffer coalesce + input logic dec_tlu_ld_miss_byp_wb_disable, // disable the miss bypass in the write buffer + input logic dec_tlu_sideeffect_posted_disable, // disable posted writes to sideeffect addr to the bus + input logic dec_tlu_core_ecc_disable, // disable the generation of the ecc + + input logic [31:0] exu_lsu_rs1_d, // address rs operand + input logic [31:0] exu_lsu_rs2_d, // store data + input logic [11:0] dec_lsu_offset_d, // address offset operand + + input lsu_pkt_t lsu_p, // lsu control packet + input logic dec_i0_lsu_decode_d, // lsu is in i0 + input logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control + + output logic [31:0] lsu_result_dc3, // lsu load data + output logic [31:0] lsu_result_corr_dc4, // This is the ECC corrected data going to RF + output logic lsu_freeze_dc3, // lsu freeze due to load to external + output logic lsu_load_stall_any, // This is for blocking loads in the decode + output logic lsu_store_stall_any, // This is for blocking stores in the decode + output logic lsu_idle_any, // lsu buffers are empty and no instruction in the pipeline + output logic lsu_halt_idle_any, // This is used to enter halt mode. Exclude DMA + + output lsu_error_pkt_t lsu_error_pkt_dc3, // lsu exception packet + output logic lsu_freeze_external_ints_dc3, // freeze due to sideeffects loads need to suppress external interrupt + output logic lsu_imprecise_error_load_any, // bus load imprecise error + output logic lsu_imprecise_error_store_any, // bus store imprecise error + output logic [31:0] lsu_imprecise_error_addr_any, // bus store imprecise error address + + // Non-blocking loads + input logic dec_nonblock_load_freeze_dc2, // + output logic lsu_nonblock_load_valid_dc3, // there is an external load -> put in the cam + output logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_dc3, // the tag of the external non block load + output logic lsu_nonblock_load_inv_dc5, // invalidate signal for the cam entry for non block loads + output logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_dc5, // tag of the enrty which needs to be invalidated + output logic lsu_nonblock_load_data_valid, // the non block is valid - sending information back to the cam + output logic lsu_nonblock_load_data_error, // non block load has an error + output logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error + output logic [31:0] lsu_nonblock_load_data, // Data of the non block load + + output logic lsu_pmu_misaligned_dc3, // PMU : misaligned + output logic lsu_pmu_bus_trxn, // PMU : bus transaction + output logic lsu_pmu_bus_misaligned, // PMU : misaligned access going to the bus + output logic lsu_pmu_bus_error, // PMU : bus sending error back + output logic lsu_pmu_bus_busy, // PMU : bus is not ready + + // Trigger signals + input trigger_pkt_t [3:0] trigger_pkt_any, // Trigger info from the decode + output logic [3:0] lsu_trigger_match_dc3, // lsu trigger hit (one bit per trigger) + + // DCCM ports + output logic dccm_wren, // DCCM write enable + output logic dccm_rden, // DCCM read enable + output logic [`RV_DCCM_BITS-1:0] dccm_wr_addr, // DCCM write address (write can happen to one bank only) + output logic [`RV_DCCM_BITS-1:0] dccm_rd_addr_lo, // DCCM read address low bank + output logic [`RV_DCCM_BITS-1:0] dccm_rd_addr_hi, // DCCM read address hi bank (hi and low same if aligned read) + output logic [`RV_DCCM_FDATA_WIDTH-1:0] dccm_wr_data, // DCCM write data (this is always aligned) + + input logic [`RV_DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // DCCM read data low bank + input logic [`RV_DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // DCCM read data hi bank + + // PIC ports + output logic picm_wren, // PIC memory write enable + output logic picm_rden, // PIC memory read enable + output logic picm_mken, // Need to read the mask for stores to determine which bits to write/forward + output logic [31:0] picm_addr, // PIC memory address + output logic [31:0] picm_wr_data, // PIC memory write data + input logic [31:0] picm_rd_data, // PIC memory read/mask data + + // AXI Write Channels + output logic lsu_axi_awvalid, + input logic lsu_axi_awready, + output logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_awid, + output logic [31:0] lsu_axi_awaddr, + output logic [3:0] lsu_axi_awregion, + output logic [7:0] lsu_axi_awlen, + output logic [2:0] lsu_axi_awsize, + output logic [1:0] lsu_axi_awburst, + output logic lsu_axi_awlock, + output logic [3:0] lsu_axi_awcache, + output logic [2:0] lsu_axi_awprot, + output logic [3:0] lsu_axi_awqos, + + output logic lsu_axi_wvalid, + input logic lsu_axi_wready, + output logic [63:0] lsu_axi_wdata, + output logic [7:0] lsu_axi_wstrb, + output logic lsu_axi_wlast, + + input logic lsu_axi_bvalid, + output logic lsu_axi_bready, + input logic [1:0] lsu_axi_bresp, + input logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_bid, + + // AXI Read Channels + output logic lsu_axi_arvalid, + input logic lsu_axi_arready, + output logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_arid, + output logic [31:0] lsu_axi_araddr, + output logic [3:0] lsu_axi_arregion, + output logic [7:0] lsu_axi_arlen, + output logic [2:0] lsu_axi_arsize, + output logic [1:0] lsu_axi_arburst, + output logic lsu_axi_arlock, + output logic [3:0] lsu_axi_arcache, + output logic [2:0] lsu_axi_arprot, + output logic [3:0] lsu_axi_arqos, + + input logic lsu_axi_rvalid, + output logic lsu_axi_rready, + input logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_rid, + input logic [63:0] lsu_axi_rdata, + input logic [1:0] lsu_axi_rresp, + input logic lsu_axi_rlast, + + input logic lsu_bus_clk_en, // external drives a clock_en to control bus ratio + + // DMA slave + input logic dma_dccm_req, // DMA read/write to dccm + input logic [31:0] dma_mem_addr, // DMA address + input logic [2:0] dma_mem_sz, // DMA access size + input logic dma_mem_write, // DMA access is a write + input logic [63:0] dma_mem_wdata, // DMA write data + + output logic dccm_dma_rvalid, // lsu data valid for DMA dccm read + output logic dccm_dma_ecc_error, // DMA load had ecc error + output logic [63:0] dccm_dma_rdata, // lsu data for DMA dccm read + output logic dccm_ready, // lsu ready for DMA access + + input logic clk_override, // Disable clock gating + input logic scan_mode, // scan + input logic clk, + input logic free_clk, + input logic rst_l + + ); + + +`include "global.h" + + logic lsu_dccm_rden_dc3; + logic [63:0] store_data_dc2; + logic [63:0] store_data_dc3; + logic [31:0] store_data_dc4; + logic [31:0] store_data_dc5; + logic [31:0] store_ecc_datafn_hi_dc3; + logic [31:0] store_ecc_datafn_lo_dc3; + + logic single_ecc_error_hi_dc3, single_ecc_error_lo_dc3; + logic lsu_single_ecc_error_dc3, lsu_single_ecc_error_dc4, lsu_single_ecc_error_dc5; + logic lsu_double_ecc_error_dc3; + + logic [31:0] dccm_data_hi_dc3; + logic [31:0] dccm_data_lo_dc3; + logic [6:0] dccm_data_ecc_hi_dc3; + logic [6:0] dccm_data_ecc_lo_dc3; + + logic [31:0] lsu_ld_data_dc3; + logic [31:0] lsu_ld_data_corr_dc3; + logic [31:0] picm_mask_data_dc3; + + logic [31:0] lsu_addr_dc1, lsu_addr_dc2, lsu_addr_dc3, lsu_addr_dc4, lsu_addr_dc5; + logic [31:0] end_addr_dc1, end_addr_dc2, end_addr_dc3, end_addr_dc4, end_addr_dc5; + + lsu_pkt_t lsu_pkt_dc1, lsu_pkt_dc2, lsu_pkt_dc3, lsu_pkt_dc4, lsu_pkt_dc5; + logic lsu_i0_valid_dc1, lsu_i0_valid_dc2, lsu_i0_valid_dc3, lsu_i0_valid_dc4, lsu_i0_valid_dc5; + + // Store Buffer signals + logic isldst_dc1, dccm_ldst_dc2, dccm_ldst_dc3; + logic store_stbuf_reqvld_dc3; + logic load_stbuf_reqvld_dc3; + logic ldst_stbuf_reqvld_dc3; + logic lsu_commit_dc5; + logic lsu_exc_dc2; + + logic addr_in_dccm_dc1, addr_in_dccm_dc2, addr_in_dccm_dc3; + logic addr_in_pic_dc1, addr_in_pic_dc2, addr_in_pic_dc3; + logic addr_external_dc2, addr_external_dc3, addr_external_dc4, addr_external_dc5; + + logic stbuf_reqvld_any; + logic stbuf_reqvld_flushed_any; + logic stbuf_addr_in_pic_any; + logic [DCCM_BYTE_WIDTH-1:0] stbuf_byteen_any; + logic [LSU_SB_BITS-1:0] stbuf_addr_any; + logic [DCCM_DATA_WIDTH-1:0] stbuf_data_any; + logic [(DCCM_FDATA_WIDTH-DCCM_DATA_WIDTH-1):0] stbuf_ecc_any; + + logic lsu_cmpen_dc2; + logic [DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_dc3; + logic [DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_dc3; + logic [DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_dc3; + logic [DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_dc3; + + logic lsu_stbuf_commit_any; + logic lsu_stbuf_empty_any; + logic lsu_stbuf_nodma_empty_any; // Store Buffer is empty except dma writes + logic lsu_stbuf_full_any; + + // Bus signals + logic lsu_busreq_dc5; + logic lsu_bus_buffer_pend_any; + logic lsu_bus_buffer_empty_any; + logic lsu_bus_buffer_full_any; + logic lsu_busreq_dc2; + logic [31:0] bus_read_data_dc3; + logic ld_bus_error_dc3; + logic [31:0] ld_bus_error_addr_dc3; + + logic flush_dc2_up, flush_dc3, flush_dc4, flush_dc5, flush_prior_dc5; + logic is_sideeffects_dc2, is_sideeffects_dc3; + logic ldst_nodma_dc1todc3; + + + // Clocks + logic lsu_c1_dc3_clk, lsu_c1_dc4_clk, lsu_c1_dc5_clk; + logic lsu_c2_dc3_clk, lsu_c2_dc4_clk, lsu_c2_dc5_clk; + logic lsu_freeze_c1_dc1_clk, lsu_freeze_c1_dc2_clk, lsu_freeze_c1_dc3_clk; + logic lsu_store_c1_dc1_clk, lsu_store_c1_dc2_clk, lsu_store_c1_dc3_clk, lsu_store_c1_dc4_clk, lsu_store_c1_dc5_clk; + + logic lsu_freeze_c2_dc1_clk, lsu_freeze_c2_dc2_clk, lsu_freeze_c2_dc3_clk, lsu_freeze_c2_dc4_clk; + logic lsu_stbuf_c1_clk; + logic lsu_bus_ibuf_c1_clk, lsu_bus_obuf_c1_clk, lsu_bus_buf_c1_clk; + logic lsu_dccm_c1_dc3_clk, lsu_pic_c1_dc3_clk; + logic lsu_busm_clk; + logic lsu_free_c2_clk; + + + lsu_lsc_ctl lsu_lsc_ctl(.*); + + // block stores in decode - for either bus or stbuf reasons + assign lsu_store_stall_any = lsu_stbuf_full_any | lsu_bus_buffer_full_any; + assign lsu_load_stall_any = lsu_bus_buffer_full_any; + + // Ready to accept dma trxns + // There can't be any inpipe forwarding from non-dma packet to dma packet since they can be flushed so we can't have ld/st in dc3-dc5 when dma is in dc2 + assign ldst_nodma_dc1todc3 = (lsu_pkt_dc1.valid & ~lsu_pkt_dc1.dma) | (lsu_pkt_dc2.valid & ~lsu_pkt_dc2.dma) | (lsu_pkt_dc3.valid & ~lsu_pkt_dc3.dma); + assign dccm_ready = ~(lsu_p.valid | lsu_stbuf_full_any | lsu_freeze_dc3 | ldst_nodma_dc1todc3); + + // Generate per cycle flush signals + assign flush_dc2_up = flush_final_e3 | i0_flush_final_e3 | dec_tlu_flush_lower_wb; + assign flush_dc3 = (flush_final_e3 & i0_flush_final_e3) | dec_tlu_flush_lower_wb; + assign flush_dc4 = dec_tlu_flush_lower_wb; + assign flush_dc5 = (dec_tlu_i0_kill_writeb_wb | (dec_tlu_i1_kill_writeb_wb & ~lsu_i0_valid_dc5)); + assign flush_prior_dc5 = dec_tlu_i0_kill_writeb_wb & ~lsu_i0_valid_dc5; // Flush is due to i0 instruction and ld/st is in i1 + + // lsu idle + assign lsu_idle_any = ~(lsu_pkt_dc1.valid | lsu_pkt_dc2.valid | lsu_pkt_dc3.valid | lsu_pkt_dc4.valid | lsu_pkt_dc5.valid) & + lsu_bus_buffer_empty_any & lsu_stbuf_empty_any; + + // lsu halt idle. This is used for entering the halt mode + // Indicates non-idle if there is a instruction valid in dc1-dc5 or read/write buffers are non-empty since they can come with error + // Need to make sure bus trxns are done and there are no non-dma writes in store buffer + assign lsu_halt_idle_any = ~((lsu_pkt_dc1.valid & ~lsu_pkt_dc1.dma) | + (lsu_pkt_dc2.valid & ~lsu_pkt_dc2.dma) | + (lsu_pkt_dc3.valid & ~lsu_pkt_dc3.dma) | + (lsu_pkt_dc4.valid & ~lsu_pkt_dc4.dma) | + (lsu_pkt_dc5.valid & ~lsu_pkt_dc5.dma)) & + lsu_bus_buffer_empty_any & lsu_stbuf_nodma_empty_any; + + // Instantiate the store buffer + //assign ldst_stbuf_reqvld_dc3 = store_stbuf_reqvld_dc3 | load_stbuf_reqvld_dc3; + assign store_stbuf_reqvld_dc3 = lsu_pkt_dc3.valid & lsu_pkt_dc3.store & (addr_in_dccm_dc3 | addr_in_pic_dc3) & (~flush_dc3 | lsu_pkt_dc3.dma) & ~lsu_freeze_dc3; + assign load_stbuf_reqvld_dc3 = lsu_pkt_dc3.valid & lsu_pkt_dc3.load & (addr_in_dccm_dc3 | addr_in_pic_dc3) & lsu_single_ecc_error_dc3 & (~flush_dc3 | lsu_pkt_dc3.dma) & ~lsu_freeze_dc3; + + // These go to store buffer to detect full + assign isldst_dc1 = lsu_pkt_dc1.valid & (lsu_pkt_dc1.load | lsu_pkt_dc1.store); + assign dccm_ldst_dc2 = lsu_pkt_dc2.valid & (lsu_pkt_dc2.load | lsu_pkt_dc2.store) & (addr_in_dccm_dc2 | addr_in_pic_dc2); + assign dccm_ldst_dc3 = lsu_pkt_dc3.valid & (lsu_pkt_dc3.load | lsu_pkt_dc3.store) & (addr_in_dccm_dc3 | addr_in_pic_dc3); + + // Disable Forwarding for now + assign lsu_cmpen_dc2 = lsu_pkt_dc2.valid & (lsu_pkt_dc2.load | lsu_pkt_dc2.store) & (addr_in_dccm_dc2 | addr_in_pic_dc2); + + // Bus signals + assign lsu_busreq_dc2 = lsu_pkt_dc2.valid & (lsu_pkt_dc2.load | lsu_pkt_dc2.store) & addr_external_dc2 & ~flush_dc2_up & ~lsu_exc_dc2; + + // PMU signals + assign lsu_pmu_misaligned_dc3 = lsu_pkt_dc3.valid & ((lsu_pkt_dc3.half & lsu_addr_dc3[0]) | (lsu_pkt_dc3.word & (|lsu_addr_dc3[1:0]))); + + + lsu_dccm_ctl dccm_ctl ( + .lsu_addr_dc1(lsu_addr_dc1[31:0]), + .end_addr_dc1(end_addr_dc1[DCCM_BITS-1:0]), + .lsu_addr_dc3(lsu_addr_dc3[DCCM_BITS-1:0]), + .* + ); + + lsu_stbuf stbuf( + .lsu_addr_dc1(lsu_addr_dc1[LSU_SB_BITS-1:0]), + .end_addr_dc1(end_addr_dc1[LSU_SB_BITS-1:0]), + .lsu_addr_dc2(lsu_addr_dc2[LSU_SB_BITS-1:0]), + .end_addr_dc2(end_addr_dc2[LSU_SB_BITS-1:0]), + .lsu_addr_dc3(lsu_addr_dc3[LSU_SB_BITS-1:0]), + .end_addr_dc3(end_addr_dc3[LSU_SB_BITS-1:0]), + .* + + ); + + lsu_ecc ecc ( + .lsu_addr_dc3(lsu_addr_dc3[DCCM_BITS-1:0]), + .end_addr_dc3(end_addr_dc3[DCCM_BITS-1:0]), + .* + ); + + lsu_trigger trigger ( + .store_data_dc3(store_data_dc3[31:0]), + .* + ); + + // Clk domain + lsu_clkdomain clkdomain (.*); + + // Bus interface + lsu_bus_intf bus_intf (.*); + + //Flops + //rvdffs #(1) lsu_i0_valid_dc1ff (.*, .din(dec_i0_lsu_decode_d), .dout(lsu_i0_valid_dc1), .en(~lsu_freeze_dc3)); + rvdff #(1) lsu_i0_valid_dc1ff (.*, .din(dec_i0_lsu_decode_d), .dout(lsu_i0_valid_dc1), .clk(lsu_freeze_c2_dc1_clk)); + rvdff #(1) lsu_i0_valid_dc2ff (.*, .din(lsu_i0_valid_dc1), .dout(lsu_i0_valid_dc2), .clk(lsu_freeze_c2_dc2_clk)); + rvdff #(1) lsu_i0_valid_dc3ff (.*, .din(lsu_i0_valid_dc2), .dout(lsu_i0_valid_dc3), .clk(lsu_freeze_c2_dc3_clk)); + rvdff #(1) lsu_i0_valid_dc4ff (.*, .din(lsu_i0_valid_dc3), .dout(lsu_i0_valid_dc4), .clk(lsu_freeze_c2_dc4_clk)); + rvdff #(1) lsu_i0_valid_dc5ff (.*, .din(lsu_i0_valid_dc4), .dout(lsu_i0_valid_dc5), .clk(lsu_c2_dc5_clk)); + rvdff #(1) lsu_single_ecc_err_dc4(.*, .din(lsu_single_ecc_error_dc3), .dout(lsu_single_ecc_error_dc4), .clk(lsu_c2_dc4_clk)); + rvdff #(1) lsu_single_ecc_err_dc5(.*, .din(lsu_single_ecc_error_dc4), .dout(lsu_single_ecc_error_dc5), .clk(lsu_c2_dc5_clk)); + +`ifdef ASSERT_ON + logic [8:0] store_data_bypass_sel; + assign store_data_bypass_sel[8:0] = {lsu_p.store_data_bypass_c1, + lsu_p.store_data_bypass_c2, + lsu_p.store_data_bypass_i0_e2_c2, + lsu_p.store_data_bypass_e4_c1[1:0], + lsu_p.store_data_bypass_e4_c2[1:0], + lsu_p.store_data_bypass_e4_c3[1:0]}; + assert_store_data_bypass_onehot: assert #0 ($onehot0(store_data_bypass_sel[8:0])); + + assert_picm_rden_and_wren: assert #0 ($onehot0({(picm_rden | picm_mken),picm_wren})); + assert_picm_rden_and_dccmen: assert #0 ($onehot0({(picm_rden | picm_mken),dccm_rden})); + assert_picm_wren_and_dccmen: assert #0 ($onehot0({picm_wren, dccm_wren})); + + //assert_no_exceptions: assert #0 (lsu_exc_pkt_dc3.exc_valid == 1'b0); + property exception_no_lsu_flush; + @(posedge clk) disable iff(~rst_l) lsu_error_pkt_dc3.exc_valid |-> ##[1:2] (flush_dc4 | flush_dc5); + endproperty + assert_exception_no_lsu_flush: assert property (exception_no_lsu_flush) else + $display("No flush within 2 cycles of exception"); +`endif + +endmodule // lsu diff --git a/design/lsu/lsu_addrcheck.sv b/design/lsu/lsu_addrcheck.sv new file mode 100644 index 0000000..6bc483b --- /dev/null +++ b/design/lsu/lsu_addrcheck.sv @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: Checks the memory map for the address +// Comments: +// +//******************************************************************************** +module lsu_addrcheck + import swerv_types::*; +( + input logic lsu_freeze_c2_dc2_clk, // clock + input logic lsu_freeze_c2_dc3_clk, + input logic rst_l, // reset + + input logic [31:0] start_addr_dc1, // start address for lsu + input logic [31:0] end_addr_dc1, // end address for lsu + input lsu_pkt_t lsu_pkt_dc1, // packet in dc1 + input logic [31:0] dec_tlu_mrac_ff, // CSR read + + + output logic is_sideeffects_dc2, // is sideffects space + output logic is_sideeffects_dc3, + output logic addr_in_dccm_dc1, // address in dccm + output logic addr_in_pic_dc1, // address in pic + output logic addr_external_dc1, // address in external + + output logic access_fault_dc1, // access fault + output logic misaligned_fault_dc1, // misaligned + + input logic scan_mode +); + +`include "global.h" + + localparam DCCM_REGION = `RV_DCCM_REGION; + localparam PIC_REGION = `RV_PIC_REGION; + localparam ICCM_REGION = `RV_ICCM_REGION; + + `ifdef RV_ICCM_ENABLE + localparam ICCM_ENABLE = 1'b1; + `else + localparam ICCM_ENABLE = 1'b0; + `endif + + `ifdef RV_DCCM_ENABLE + localparam DCCM_ENABLE = 1'b1; + `else + localparam DCCM_ENABLE = 1'b0; + `endif + + logic is_sideeffects_dc1, is_aligned_dc1; + logic start_addr_in_dccm_dc1, end_addr_in_dccm_dc1; + logic start_addr_in_dccm_region_dc1, end_addr_in_dccm_region_dc1; + logic start_addr_in_pic_dc1, end_addr_in_pic_dc1; + logic start_addr_in_pic_region_dc1, end_addr_in_pic_region_dc1; + logic [4:0] csr_idx; + logic addr_in_iccm; + logic non_dccm_access_ok; + + if (DCCM_ENABLE == 1) begin: Gen_dccm_enable + // Start address check + rvrangecheck #(.CCM_SADR(`RV_DCCM_SADR), + .CCM_SIZE(`RV_DCCM_SIZE)) start_addr_dccm_rangecheck ( + .addr(start_addr_dc1[31:0]), + .in_range(start_addr_in_dccm_dc1), + .in_region(start_addr_in_dccm_region_dc1) + ); + + // End address check + rvrangecheck #(.CCM_SADR(`RV_DCCM_SADR), + .CCM_SIZE(`RV_DCCM_SIZE)) end_addr_dccm_rangecheck ( + .addr(end_addr_dc1[31:0]), + .in_range(end_addr_in_dccm_dc1), + .in_region(end_addr_in_dccm_region_dc1) + ); + end else begin: Gen_dccm_disable // block: Gen_dccm_enable + assign start_addr_in_dccm_dc1 = '0; + assign start_addr_in_dccm_region_dc1 = '0; + assign end_addr_in_dccm_dc1 = '0; + assign end_addr_in_dccm_region_dc1 = '0; + end + if (ICCM_ENABLE == 1) begin : check_iccm + assign addr_in_iccm = (start_addr_dc1[31:28] == ICCM_REGION); + end + else begin + assign addr_in_iccm = 1'b0; + end + // PIC memory check + // Start address check + rvrangecheck #(.CCM_SADR(`RV_PIC_BASE_ADDR), + .CCM_SIZE(`RV_PIC_SIZE)) start_addr_pic_rangecheck ( + .addr(start_addr_dc1[31:0]), + .in_range(start_addr_in_pic_dc1), + .in_region(start_addr_in_pic_region_dc1) + ); + + // End address check + rvrangecheck #(.CCM_SADR(`RV_PIC_BASE_ADDR), + .CCM_SIZE(`RV_PIC_SIZE)) end_addr_pic_rangecheck ( + .addr(end_addr_dc1[31:0]), + .in_range(end_addr_in_pic_dc1), + .in_region(end_addr_in_pic_region_dc1) + ); + + assign addr_in_dccm_dc1 = (start_addr_in_dccm_dc1 & end_addr_in_dccm_dc1); + assign addr_in_pic_dc1 = (start_addr_in_pic_dc1 & end_addr_in_pic_dc1); + + assign addr_external_dc1 = ~(addr_in_dccm_dc1 | addr_in_pic_dc1); //~addr_in_dccm_region_dc1; + assign csr_idx[4:0] = {start_addr_dc1[31:28], 1'b1}; + assign is_sideeffects_dc1 = dec_tlu_mrac_ff[csr_idx] & ~(start_addr_in_dccm_region_dc1 | start_addr_in_pic_region_dc1 | addr_in_iccm); //every region has the 2 LSB indicating ( 1: sideeffects/no_side effects, and 0: cacheable ). Ignored in internal regions + assign is_aligned_dc1 = (lsu_pkt_dc1.word & (start_addr_dc1[1:0] == 2'b0)) | + (lsu_pkt_dc1.half & (start_addr_dc1[0] == 1'b0)) | + lsu_pkt_dc1.by; + + assign non_dccm_access_ok = (~(|{`RV_DATA_ACCESS_ENABLE0,`RV_DATA_ACCESS_ENABLE1,`RV_DATA_ACCESS_ENABLE2,`RV_DATA_ACCESS_ENABLE3,`RV_DATA_ACCESS_ENABLE4,`RV_DATA_ACCESS_ENABLE5,`RV_DATA_ACCESS_ENABLE6,`RV_DATA_ACCESS_ENABLE7})) | + + (((`RV_DATA_ACCESS_ENABLE0 & ((start_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK0)) == (`RV_DATA_ACCESS_ADDR0 | `RV_DATA_ACCESS_MASK0)) | + (`RV_DATA_ACCESS_ENABLE1 & ((start_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK1)) == (`RV_DATA_ACCESS_ADDR1 | `RV_DATA_ACCESS_MASK1)) | + (`RV_DATA_ACCESS_ENABLE2 & ((start_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK2)) == (`RV_DATA_ACCESS_ADDR2 | `RV_DATA_ACCESS_MASK2)) | + (`RV_DATA_ACCESS_ENABLE3 & ((start_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK3)) == (`RV_DATA_ACCESS_ADDR3 | `RV_DATA_ACCESS_MASK3)) | + (`RV_DATA_ACCESS_ENABLE4 & ((start_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK4)) == (`RV_DATA_ACCESS_ADDR4 | `RV_DATA_ACCESS_MASK4)) | + (`RV_DATA_ACCESS_ENABLE5 & ((start_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK5)) == (`RV_DATA_ACCESS_ADDR5 | `RV_DATA_ACCESS_MASK5)) | + (`RV_DATA_ACCESS_ENABLE6 & ((start_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK6)) == (`RV_DATA_ACCESS_ADDR6 | `RV_DATA_ACCESS_MASK6)) | + (`RV_DATA_ACCESS_ENABLE7 & ((start_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK7)) == (`RV_DATA_ACCESS_ADDR7 | `RV_DATA_ACCESS_MASK7))) & + + ((`RV_DATA_ACCESS_ENABLE0 & ((end_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK0)) == (`RV_DATA_ACCESS_ADDR0 | `RV_DATA_ACCESS_MASK0)) | + (`RV_DATA_ACCESS_ENABLE1 & ((end_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK1)) == (`RV_DATA_ACCESS_ADDR1 | `RV_DATA_ACCESS_MASK1)) | + (`RV_DATA_ACCESS_ENABLE2 & ((end_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK2)) == (`RV_DATA_ACCESS_ADDR2 | `RV_DATA_ACCESS_MASK2)) | + (`RV_DATA_ACCESS_ENABLE3 & ((end_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK3)) == (`RV_DATA_ACCESS_ADDR3 | `RV_DATA_ACCESS_MASK3)) | + (`RV_DATA_ACCESS_ENABLE4 & ((end_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK4)) == (`RV_DATA_ACCESS_ADDR4 | `RV_DATA_ACCESS_MASK4)) | + (`RV_DATA_ACCESS_ENABLE5 & ((end_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK5)) == (`RV_DATA_ACCESS_ADDR5 | `RV_DATA_ACCESS_MASK5)) | + (`RV_DATA_ACCESS_ENABLE6 & ((end_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK6)) == (`RV_DATA_ACCESS_ADDR6 | `RV_DATA_ACCESS_MASK6)) | + (`RV_DATA_ACCESS_ENABLE7 & ((end_addr_dc1[31:0] | `RV_DATA_ACCESS_MASK7)) == (`RV_DATA_ACCESS_ADDR7 | `RV_DATA_ACCESS_MASK7)))); + + // Access fault logic + // 1. Addr in dccm region but not in dccm offset + // 2. Addr in picm region but not in picm offset + // 3. DCCM -> PIC offset cross when DCCM/PIC in same region (PIC access are always word aligned so no cross possible from PIC->DCCM) + // 4. Ld/St access to picm are not word aligned + // 5. Address not in protected space or dccm/pic region + if (DCCM_REGION == PIC_REGION) begin + assign access_fault_dc1 = ((start_addr_in_dccm_region_dc1 & ~(start_addr_in_dccm_dc1 | start_addr_in_pic_dc1)) | + (end_addr_in_dccm_region_dc1 & ~(end_addr_in_dccm_dc1 | end_addr_in_pic_dc1)) | + ((start_addr_dc1[27:18] != end_addr_dc1[27:18]) & start_addr_in_dccm_dc1) | + ((addr_in_pic_dc1 & ((start_addr_dc1[1:0] != 2'b0) | ~lsu_pkt_dc1.word))) | + (~start_addr_in_dccm_region_dc1 & ~non_dccm_access_ok)) & lsu_pkt_dc1.valid & ~lsu_pkt_dc1.dma; + end else begin + assign access_fault_dc1 = ((start_addr_in_dccm_region_dc1 & ~start_addr_in_dccm_dc1) | + (end_addr_in_dccm_region_dc1 & ~end_addr_in_dccm_dc1) | + (start_addr_in_pic_region_dc1 & ~start_addr_in_pic_dc1) | + (end_addr_in_pic_region_dc1 & ~end_addr_in_pic_dc1) | + ((addr_in_pic_dc1 & ((start_addr_dc1[1:0] != 2'b0) | ~lsu_pkt_dc1.word))) | + (~start_addr_in_pic_region_dc1 & ~start_addr_in_dccm_region_dc1 & ~non_dccm_access_ok)) & lsu_pkt_dc1.valid & ~lsu_pkt_dc1.dma; + end + + // Misaligned happens due to 2 reasons + // 1. Region cross + // 2. sideeffects access which are not aligned + assign misaligned_fault_dc1 = ((start_addr_dc1[31:28] != end_addr_dc1[31:28]) | + (is_sideeffects_dc1 & ~is_aligned_dc1)) & addr_external_dc1 & lsu_pkt_dc1.valid & ~lsu_pkt_dc1.dma; + + rvdff #(.WIDTH(1)) is_sideeffects_dc2ff (.din(is_sideeffects_dc1), .dout(is_sideeffects_dc2), .clk(lsu_freeze_c2_dc2_clk), .*); + rvdff #(.WIDTH(1)) is_sideeffects_dc3ff (.din(is_sideeffects_dc2), .dout(is_sideeffects_dc3), .clk(lsu_freeze_c2_dc3_clk), .*); + +endmodule // lsu_addrcheck + diff --git a/design/lsu/lsu_bus_buffer.sv b/design/lsu/lsu_bus_buffer.sv new file mode 100644 index 0000000..a7fc5e8 --- /dev/null +++ b/design/lsu/lsu_bus_buffer.sv @@ -0,0 +1,923 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: lsu interface with interface queue +// Comments: +// +//******************************************************************************** + +// Function to do 8 to 3 bit encoding +function automatic logic [2:0] f_Enc8to3; + input logic [7:0] Dec_value; + + logic [2:0] Enc_value; + Enc_value[0] = Dec_value[1] | Dec_value[3] | Dec_value[5] | Dec_value[7]; + Enc_value[1] = Dec_value[2] | Dec_value[3] | Dec_value[6] | Dec_value[7]; + Enc_value[2] = Dec_value[4] | Dec_value[5] | Dec_value[6] | Dec_value[7]; + + return Enc_value[2:0]; +endfunction // f_Enc8to3 + + +module lsu_bus_buffer + import swerv_types::*; +( + input logic clk, + input logic rst_l, + input logic scan_mode, + input logic dec_tlu_non_blocking_disable, // disable non block + input logic dec_tlu_wb_coalescing_disable, // disable write buffer coalescing + input logic dec_tlu_ld_miss_byp_wb_disable, // disable ld miss bypass of the write buffer + input logic dec_tlu_sideeffect_posted_disable, // disable posted writes to sideeffect addr to the bus + + // various clocks needed for the bus reads and writes + input logic lsu_c1_dc3_clk, + input logic lsu_c1_dc4_clk, + input logic lsu_c1_dc5_clk, + input logic lsu_c2_dc3_clk, + input logic lsu_c2_dc4_clk, + input logic lsu_c2_dc5_clk, + input logic lsu_freeze_c1_dc2_clk, + input logic lsu_freeze_c1_dc3_clk, + input logic lsu_freeze_c2_dc2_clk, + input logic lsu_freeze_c2_dc3_clk, + input logic lsu_bus_ibuf_c1_clk, + input logic lsu_bus_obuf_c1_clk, + input logic lsu_bus_buf_c1_clk, + input logic lsu_free_c2_clk, + input logic lsu_busm_clk, + + + input lsu_pkt_t lsu_pkt_dc1, // lsu packet flowing down the pipe + input lsu_pkt_t lsu_pkt_dc2, // lsu packet flowing down the pipe + input lsu_pkt_t lsu_pkt_dc3, // lsu packet flowing down the pipe + input lsu_pkt_t lsu_pkt_dc4, // lsu packet flowing down the pipe + input lsu_pkt_t lsu_pkt_dc5, // lsu packet flowing down the pipe + + input logic [31:0] lsu_addr_dc2, // lsu address flowing down the pipe + input logic [31:0] end_addr_dc2, // lsu address flowing down the pipe + input logic [31:0] lsu_addr_dc5, // lsu address flowing down the pipe + input logic [31:0] end_addr_dc5, // lsu address flowing down the pipe + input logic [31:0] store_data_dc5, // store data flowing down the pipe + + input logic no_word_merge_dc5, // dc5 store doesn't need to wait in ibuf since it will not coalesce + input logic no_dword_merge_dc5, // dc5 store doesn't need to wait in ibuf since it will not coalesce + input logic lsu_busreq_dc2, // bus request is in dc2 + output logic lsu_busreq_dc3, // bus request is in dc2 + output logic lsu_busreq_dc4, // bus request is in dc4 + output logic lsu_busreq_dc5, // bus request is in dc5 + input logic ld_full_hit_dc2, // load can get all its byte from a write buffer entry + input logic flush_dc2_up, // flush + input logic flush_dc3, // flush + input logic flush_dc4, // flush + input logic flush_dc5, // flush + input logic lsu_freeze_dc3, + input logic dec_tlu_cancel_e4, // cancel the bus load in dc4 and reset the freeze + input logic lsu_commit_dc5, // lsu instruction in dc5 commits + input logic is_sideeffects_dc2, // lsu attribute is side_effects + input logic is_sideeffects_dc5, // lsu attribute is side_effects + input logic ldst_dual_dc1, // load/store is unaligned at 32 bit boundary + input logic ldst_dual_dc2, // load/store is unaligned at 32 bit boundary + input logic ldst_dual_dc3, // load/store is unaligned at 32 bit boundary + input logic ldst_dual_dc4, // load/store is unaligned at 32 bit boundary + input logic ldst_dual_dc5, // load/store is unaligned at 32 bit boundary + + input logic [7:0] ldst_byteen_ext_dc2, + + output logic ld_freeze_dc3, // load goes to external and asserts freeze + output logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry + output logic lsu_bus_buffer_full_any, // bus buffer is full + output logic lsu_bus_buffer_empty_any, // bus buffer is empty + + output logic ld_bus_error_dc3, // bus error in dc3 + output logic [31:0] ld_bus_error_addr_dc3, // address of the bus error + output logic [31:0] ld_bus_data_dc3, // the Dc3 load data from bus + + output logic [3:0] ld_byte_hit_buf_lo, ld_byte_hit_buf_hi, // Byte enables for forwarding data + output logic [31:0] ld_fwddata_buf_lo, ld_fwddata_buf_hi, // load forwarding data + + output logic lsu_imprecise_error_load_any, // imprecise load bus error + output logic lsu_imprecise_error_store_any, // imprecise store bus error + output logic [31:0] lsu_imprecise_error_addr_any, // address of the imprecise error + + // Non-blocking loads + input logic dec_nonblock_load_freeze_dc2, + output logic lsu_nonblock_load_valid_dc3, // there is an external load -> put in the cam + output logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_dc3, // the tag of the external non block load + output logic lsu_nonblock_load_inv_dc5, // invalidate signal for the cam entry for non block loads + output logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_dc5, // tag of the enrty which needs to be invalidated + output logic lsu_nonblock_load_data_valid, // the non block is valid - sending information back to the cam + output logic lsu_nonblock_load_data_error, // non block load has an error + output logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error + output logic [31:0] lsu_nonblock_load_data, // Data of the non block load + + // PMU events + output logic lsu_pmu_bus_trxn, + output logic lsu_pmu_bus_misaligned, + output logic lsu_pmu_bus_error, + output logic lsu_pmu_bus_busy, + + // AXI Write Channels + output logic lsu_axi_awvalid, + input logic lsu_axi_awready, + output logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_awid, + output logic [31:0] lsu_axi_awaddr, + output logic [3:0] lsu_axi_awregion, + output logic [7:0] lsu_axi_awlen, + output logic [2:0] lsu_axi_awsize, + output logic [1:0] lsu_axi_awburst, + output logic lsu_axi_awlock, + output logic [3:0] lsu_axi_awcache, + output logic [2:0] lsu_axi_awprot, + output logic [3:0] lsu_axi_awqos, + + output logic lsu_axi_wvalid, + input logic lsu_axi_wready, + output logic [63:0] lsu_axi_wdata, + output logic [7:0] lsu_axi_wstrb, + output logic lsu_axi_wlast, + + input logic lsu_axi_bvalid, + output logic lsu_axi_bready, + input logic [1:0] lsu_axi_bresp, + input logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_bid, + + // AXI Read Channels + output logic lsu_axi_arvalid, + input logic lsu_axi_arready, + output logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_arid, + output logic [31:0] lsu_axi_araddr, + output logic [3:0] lsu_axi_arregion, + output logic [7:0] lsu_axi_arlen, + output logic [2:0] lsu_axi_arsize, + output logic [1:0] lsu_axi_arburst, + output logic lsu_axi_arlock, + output logic [3:0] lsu_axi_arcache, + output logic [2:0] lsu_axi_arprot, + output logic [3:0] lsu_axi_arqos, + + input logic lsu_axi_rvalid, + output logic lsu_axi_rready, + input logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_rid, + input logic [63:0] lsu_axi_rdata, + input logic [1:0] lsu_axi_rresp, + input logic lsu_axi_rlast, + + input logic lsu_bus_clk_en, + input logic lsu_bus_clk_en_q + +); + +`include "global.h" + + // For Ld: IDLE -> WAIT -> CMD -> RESP -> DONE -> IDLE + // For St: IDLE -> WAIT -> CMD -> RESP(?) -> IDLE + typedef enum logic [2:0] {IDLE=3'b000, WAIT=3'b001, CMD=3'b010, RESP=3'b011, DONE=3'b100} state_t; + + localparam DEPTH = `RV_LSU_NUM_NBLOAD; + localparam DEPTH_LOG2 = `RV_LSU_NUM_NBLOAD_WIDTH; + localparam TIMER = 8; // This can be only power of 2 + localparam TIMER_LOG2 = (TIMER < 2) ? 1 : $clog2(TIMER); + localparam TIMER_MAX = (TIMER == 0) ? TIMER_LOG2'(0) : TIMER_LOG2'(TIMER - 1); // Maximum value of timer + + logic [3:0] ldst_byteen_hi_dc2, ldst_byteen_lo_dc2; + logic [DEPTH-1:0] ld_addr_hitvec_lo, ld_addr_hitvec_hi; + logic [3:0][DEPTH-1:0] ld_byte_hitvec_lo, ld_byte_hitvec_hi; + logic [3:0][DEPTH-1:0] ld_byte_hitvecfn_lo, ld_byte_hitvecfn_hi; + + logic ld_addr_ibuf_hit_lo, ld_addr_ibuf_hit_hi; + logic [3:0] ld_byte_ibuf_hit_lo, ld_byte_ibuf_hit_hi; + + logic [3:0] ldst_byteen_dc5; + logic [7:0] ldst_byteen_ext_dc5; + logic [3:0] ldst_byteen_hi_dc5, ldst_byteen_lo_dc5; + logic [31:0] store_data_hi_dc5, store_data_lo_dc5; + logic ldst_samedw_dc5; + + logic lsu_nonblock_load_valid_dc4,lsu_nonblock_load_valid_dc5; + logic [31:0] lsu_nonblock_load_data_hi, lsu_nonblock_load_data_lo, lsu_nonblock_data_unalgn; + logic [1:0] lsu_nonblock_addr_offset; + logic [1:0] lsu_nonblock_sz; + logic lsu_nonblock_load_data_valid_hi, lsu_nonblock_load_data_valid_lo; + logic lsu_nonblock_load_data_error_hi, lsu_nonblock_load_data_error_lo; + logic lsu_nonblock_unsign, lsu_nonblock_dual; + logic dec_nonblock_load_freeze_dc3; + logic ld_precise_bus_error; + logic [DEPTH_LOG2-1:0] lsu_imprecise_error_load_tag; + logic [31:0] ld_block_bus_data; + + logic [DEPTH-1:0] CmdPtr0Dec, CmdPtr1Dec; + logic [DEPTH_LOG2-1:0] CmdPtr0, CmdPtr1; + logic [DEPTH_LOG2-1:0] WrPtr0_dc3, WrPtr0_dc4, WrPtr0_dc5; + logic [DEPTH_LOG2-1:0] WrPtr1_dc3, WrPtr1_dc4, WrPtr1_dc5; + logic found_wrptr0, found_wrptr1, found_cmdptr0, found_cmdptr1; + logic [3:0] buf_numvld_any, buf_numvld_wrcmd_any, buf_numvld_pend_any, buf_numvld_cmd_any; + logic bus_sideeffect_pend; + logic bus_coalescing_disable; + + logic ld_freeze_en, ld_freeze_rst; + logic FreezePtrEn; + logic [DEPTH_LOG2-1:0] FreezePtr; + + logic bus_addr_match_pending; + logic bus_cmd_sent, bus_cmd_ready; + logic bus_wcmd_sent, bus_wdata_sent; + logic bus_rsp_read, bus_rsp_write; + logic [LSU_BUS_TAG-1:0] bus_rsp_read_tag, bus_rsp_write_tag; + logic bus_rsp_read_error, bus_rsp_write_error; + logic [63:0] bus_rsp_rdata; + + // Bus buffer signals + state_t [DEPTH-1:0] buf_state; + logic [DEPTH-1:0][2:0] buf_state_out; + logic [DEPTH-1:0][1:0] buf_sz; + logic [DEPTH-1:0][31:0] buf_addr; + logic [DEPTH-1:0][3:0] buf_byteen; + logic [DEPTH-1:0] buf_sideeffect; + logic [DEPTH-1:0] buf_write; + logic [DEPTH-1:0] buf_unsign; + logic [DEPTH-1:0] buf_dual; + logic [DEPTH-1:0] buf_samedw; + logic [DEPTH-1:0] buf_nomerge; + logic [DEPTH-1:0] buf_dualhi; + logic [DEPTH-1:0][DEPTH_LOG2-1:0] buf_dualtag; + logic [DEPTH-1:0] buf_nb; + logic [DEPTH-1:0] buf_error; + logic [DEPTH-1:0][31:0] buf_data; + logic [DEPTH-1:0][DEPTH-1:0] buf_age, buf_age_younger, buf_age_temp; + + state_t [DEPTH-1:0] buf_nxtstate; + logic [DEPTH-1:0] buf_rst; + logic [DEPTH-1:0] buf_state_en; + logic [DEPTH-1:0] buf_cmd_state_bus_en; + logic [DEPTH-1:0] buf_resp_state_bus_en; + logic [DEPTH-1:0] buf_state_bus_en; + logic [DEPTH-1:0] buf_dual_in; + logic [DEPTH-1:0] buf_samedw_in; + logic [DEPTH-1:0] buf_nomerge_in; + logic [DEPTH-1:0] buf_nb_in; + logic [DEPTH-1:0] buf_sideeffect_in; + logic [DEPTH-1:0] buf_unsign_in; + logic [DEPTH-1:0][1:0] buf_sz_in; + logic [DEPTH-1:0] buf_write_in; + logic [DEPTH-1:0] buf_wr_en; + logic [DEPTH-1:0] buf_dualhi_in; + logic [DEPTH-1:0][DEPTH_LOG2-1:0] buf_dualtag_in; + logic [DEPTH-1:0][3:0] buf_byteen_in; + logic [DEPTH-1:0][31:0] buf_addr_in; + logic [DEPTH-1:0][31:0] buf_data_in; + logic [DEPTH-1:0] buf_error_en; + logic [DEPTH-1:0] buf_data_en; + logic [DEPTH-1:0][DEPTH-1:0] buf_age_in; + logic [DEPTH-1:0][DEPTH-1:0] buf_ageQ; + + // Input buffer signals + logic ibuf_valid; + logic ibuf_dual; + logic ibuf_samedw; + logic ibuf_nomerge; + logic [DEPTH_LOG2-1:0] ibuf_tag; + logic [DEPTH_LOG2-1:0] ibuf_dualtag; + logic ibuf_nb; + logic ibuf_sideeffect; + logic ibuf_unsign; + logic ibuf_write; + logic [1:0] ibuf_sz; + logic [3:0] ibuf_byteen; + logic [31:0] ibuf_addr; + logic [31:0] ibuf_data; + logic [TIMER_LOG2-1:0] ibuf_timer; + + logic ibuf_byp; + logic ibuf_wr_en; + logic ibuf_rst; + logic ibuf_force_drain; + logic ibuf_drain_vld; + logic [DEPTH-1:0] ibuf_drainvec_vld; + logic [DEPTH_LOG2-1:0] ibuf_tag_in; + logic [DEPTH_LOG2-1:0] ibuf_dualtag_in; + logic [1:0] ibuf_sz_in; + logic [31:0] ibuf_addr_in; + logic [3:0] ibuf_byteen_in; + logic [31:0] ibuf_data_in; + logic [TIMER_LOG2-1:0] ibuf_timer_in; + logic [3:0] ibuf_byteen_out; + logic [31:0] ibuf_data_out; + logic ibuf_merge_en, ibuf_merge_in; + + // Output buffer signals + logic obuf_valid; + logic obuf_write; + logic obuf_sideeffect; + logic [31:0] obuf_addr; + logic [63:0] obuf_data; + logic [1:0] obuf_sz; + logic [7:0] obuf_byteen; + logic obuf_merge; + logic obuf_cmd_done, obuf_data_done; + logic [LSU_BUS_TAG-1:0] obuf_tag0; + logic [LSU_BUS_TAG-1:0] obuf_tag1; + + logic ibuf_buf_byp; + logic obuf_force_wr_en; + logic obuf_wr_wait; + logic obuf_wr_en, obuf_wr_enQ; + logic obuf_rst; + logic obuf_write_in; + logic obuf_sideeffect_in; + logic [31:0] obuf_addr_in; + logic [63:0] obuf_data_in; + logic [1:0] obuf_sz_in; + logic [7:0] obuf_byteen_in; + logic obuf_merge_in; + logic obuf_cmd_done_in, obuf_data_done_in; + logic [LSU_BUS_TAG-1:0] obuf_tag0_in; + logic [LSU_BUS_TAG-1:0] obuf_tag1_in; + + logic obuf_merge_en; + logic [TIMER_LOG2-1:0] obuf_wr_timer, obuf_wr_timer_in; + logic [7:0] obuf_byteen0_in, obuf_byteen1_in; + logic [63:0] obuf_data0_in, obuf_data1_in; + + logic lsu_axi_awvalid_q, lsu_axi_awready_q; + logic lsu_axi_wvalid_q, lsu_axi_wready_q; + logic lsu_axi_arvalid_q, lsu_axi_arready_q; + logic lsu_axi_bvalid_q, lsu_axi_bready_q; + logic lsu_axi_rvalid_q, lsu_axi_rready_q; + logic [LSU_BUS_TAG-1:0] lsu_axi_bid_q, lsu_axi_rid_q; + logic [1:0] lsu_axi_bresp_q, lsu_axi_rresp_q; + logic [63:0] lsu_axi_rdata_q; + + //------------------------------------------------------------------------------ + // Load forwarding logic start + //------------------------------------------------------------------------------ + + // Buffer hit logic for bus load forwarding + assign ldst_byteen_hi_dc2[3:0] = ldst_byteen_ext_dc2[7:4]; + assign ldst_byteen_lo_dc2[3:0] = ldst_byteen_ext_dc2[3:0]; + for (genvar i=0; i 4'b0) & (obuf_wr_timer < TIMER_MAX)) ? (obuf_wr_timer + 1'b1) : obuf_wr_timer); + assign obuf_force_wr_en = lsu_busreq_dc2 & ~lsu_busreq_dc3 & ~lsu_busreq_dc4 & ~lsu_busreq_dc5 & ~ibuf_valid & (buf_numvld_cmd_any[3:0] == 4'b1) & (lsu_addr_dc2[31:2] != buf_addr[CmdPtr0][31:2]); // Entry in dc2 can't merge with entry going to obuf and there is no entry in between + assign ibuf_buf_byp = ibuf_byp & (buf_numvld_pend_any[3:0] == 4'b0) & ~ldst_dual_dc5 & lsu_pkt_dc5.store; + + assign obuf_wr_en = (lsu_bus_clk_en & ((ibuf_buf_byp & lsu_commit_dc5) | + ((buf_state[CmdPtr0] == CMD) & found_cmdptr0 & ~buf_cmd_state_bus_en[CmdPtr0] & + (~(buf_dual[CmdPtr0] & buf_samedw[CmdPtr0] & ~buf_write[CmdPtr0]) | found_cmdptr1 | buf_nomerge[CmdPtr0] | obuf_force_wr_en)))) & + (bus_cmd_ready | ~obuf_valid) & ~obuf_wr_wait & ~bus_sideeffect_pend & ~bus_addr_match_pending; + assign obuf_rst = bus_cmd_sent & ~obuf_wr_en; + assign obuf_write_in = ibuf_buf_byp ? lsu_pkt_dc5.store : buf_write[CmdPtr0]; + assign obuf_sideeffect_in = ibuf_buf_byp ? is_sideeffects_dc5 : buf_sideeffect[CmdPtr0]; + assign obuf_addr_in[31:0] = ibuf_buf_byp ? lsu_addr_dc5[31:0] : buf_addr[CmdPtr0]; + assign obuf_sz_in[1:0] = ibuf_buf_byp ? {lsu_pkt_dc5.word, lsu_pkt_dc5.half} : buf_sz[CmdPtr0]; + assign obuf_merge_in = obuf_merge_en; + assign obuf_tag0_in[LSU_BUS_TAG-1:0] = ibuf_buf_byp ? LSU_BUS_TAG'(WrPtr0_dc5) : LSU_BUS_TAG'(CmdPtr0); + assign obuf_tag1_in[LSU_BUS_TAG-1:0] = LSU_BUS_TAG'(CmdPtr1); + + assign obuf_cmd_done_in = ~(obuf_wr_en | obuf_rst) & (obuf_cmd_done | bus_wcmd_sent); + assign obuf_data_done_in = ~(obuf_wr_en | obuf_rst) & (obuf_data_done | bus_wdata_sent); + + assign obuf_byteen0_in[7:0] = ibuf_buf_byp ? (lsu_addr_dc5[2] ? {ldst_byteen_lo_dc5[3:0],4'b0} : {4'b0,ldst_byteen_lo_dc5[3:0]}) : + (buf_addr[CmdPtr0][2] ? {buf_byteen[CmdPtr0],4'b0} : {4'b0,buf_byteen[CmdPtr0]}); + assign obuf_byteen1_in[7:0] = buf_addr[CmdPtr1][2] ? {buf_byteen[CmdPtr1],4'b0} : {4'b0,buf_byteen[CmdPtr1]}; + assign obuf_data0_in[63:0] = ibuf_buf_byp ? (lsu_addr_dc5[2] ? {store_data_lo_dc5[31:0],32'b0} : {32'b0,store_data_lo_dc5[31:0]}) : + (buf_addr[CmdPtr0][2] ? {buf_data[CmdPtr0],32'b0} : {32'b0,buf_data[CmdPtr0]}); + assign obuf_data1_in[63:0] = buf_addr[CmdPtr1][2] ? {buf_data[CmdPtr1],32'b0} : {32'b0,buf_data[CmdPtr1]}; + for (genvar i=0 ;i<8; i++) begin + assign obuf_byteen_in[i] = obuf_byteen0_in[i] | (obuf_merge_en & obuf_byteen1_in[i]); + assign obuf_data_in[(8*i)+7:(8*i)] = (obuf_merge_en & obuf_byteen1_in[i]) ? obuf_data1_in[(8*i)+7:(8*i)] : obuf_data0_in[(8*i)+7:(8*i)]; + end + + // No store obuf merging for AXI since all stores are sent non-posted. Can't track the second id right now + assign obuf_merge_en = (CmdPtr0 != CmdPtr1) & found_cmdptr0 & found_cmdptr1 & (buf_state[CmdPtr0] == CMD) & (buf_state[CmdPtr1] == CMD) & ~buf_cmd_state_bus_en[CmdPtr0] & ~buf_sideeffect[CmdPtr0] & + (~buf_write[CmdPtr0] & buf_dual[CmdPtr0] & ~buf_dualhi[CmdPtr0] & buf_samedw[CmdPtr0]); // CmdPtr0/CmdPtr1 are for same load which is within a DW + + rvdff #(.WIDTH(1)) obuf_wren_ff (.din(obuf_wr_en), .dout(obuf_wr_enQ), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(1)) obuf_cmd_done_ff (.din(obuf_cmd_done_in), .dout(obuf_cmd_done), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(1)) obuf_data_done_ff (.din(obuf_data_done_in), .dout(obuf_data_done), .clk(lsu_busm_clk), .*); + rvdffsc #(.WIDTH(1)) obuf_valid_ff (.din(1'b1), .dout(obuf_valid), .en(obuf_wr_en), .clear(obuf_rst), .clk(lsu_busm_clk), .*); + rvdffs #(.WIDTH(LSU_BUS_TAG)) obuf_tag0ff (.din(obuf_tag0_in), .dout(obuf_tag0), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); + rvdffs #(.WIDTH(LSU_BUS_TAG)) obuf_tag1ff (.din(obuf_tag1_in), .dout(obuf_tag1), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); + rvdffs #(.WIDTH(1)) obuf_mergeff (.din(obuf_merge_in), .dout(obuf_merge), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); + rvdffs #(.WIDTH(1)) obuf_writeff (.din(obuf_write_in), .dout(obuf_write), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); + rvdffs #(.WIDTH(1)) obuf_sideeffectff (.din(obuf_sideeffect_in), .dout(obuf_sideeffect), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); + rvdffs #(.WIDTH(2)) obuf_szff (.din(obuf_sz_in[1:0]), .dout(obuf_sz), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); + rvdffe #(.WIDTH(32)) obuf_addrff (.din(obuf_addr_in[31:0]), .dout(obuf_addr), .en(obuf_wr_en), .*); + rvdffs #(.WIDTH(8)) obuf_byteenff (.din(obuf_byteen_in[7:0]), .dout(obuf_byteen), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); + rvdffe #(.WIDTH(64)) obuf_dataff (.din(obuf_data_in[63:0]), .dout(obuf_data), .en(obuf_wr_en), .*); + rvdff #(.WIDTH(TIMER_LOG2)) obuf_timerff (.din(obuf_wr_timer_in), .dout(obuf_wr_timer), .clk(lsu_busm_clk), .*); + + //------------------------------------------------------------------------------ + // Output buffer logic ends here + //------------------------------------------------------------------------------ + + // Find the entry to allocate and entry to send + always_comb begin + WrPtr0_dc3[DEPTH_LOG2-1:0] = '0; + WrPtr1_dc3[DEPTH_LOG2-1:0] = '0; + found_wrptr0 = '0; + found_wrptr1 = '0; + + // Find first write pointer + for (int i=0; i= (DEPTH-1)); + assign lsu_bus_buffer_empty_any = ~(|buf_state[DEPTH-1:0]) & ~ibuf_valid & ~obuf_valid; + + // Freeze logic + assign FreezePtrEn = lsu_busreq_dc3 & lsu_pkt_dc3.load & ld_freeze_dc3; + assign ld_freeze_en = (is_sideeffects_dc2 | dec_nonblock_load_freeze_dc2 | dec_tlu_non_blocking_disable) & lsu_busreq_dc2 & lsu_pkt_dc2.load & ~lsu_freeze_dc3 & ~flush_dc2_up & ~ld_full_hit_dc2; + always_comb begin + ld_freeze_rst = flush_dc3 | (dec_tlu_cancel_e4 & ld_freeze_dc3); + for (int i=0; i> (8*buf_addr[FreezePtr][1:0])); + assign ld_precise_bus_error = (buf_error[FreezePtr] | (buf_dual[FreezePtr] & buf_error[buf_dualtag[FreezePtr]])) & ~buf_write[FreezePtr] & buf_rst[FreezePtr] & lsu_freeze_dc3 & ld_freeze_rst & ~flush_dc3; // Don't give bus error for interrupts + assign ld_bus_error_addr_dc3[31:0] = buf_addr[FreezePtr][31:0]; + + // Non blocking ports + assign lsu_nonblock_load_valid_dc3 = lsu_busreq_dc3 & lsu_pkt_dc3.valid & lsu_pkt_dc3.load & ~flush_dc3 & ~dec_nonblock_load_freeze_dc3 & ~lsu_freeze_dc3 & ~dec_tlu_non_blocking_disable; + assign lsu_nonblock_load_tag_dc3[DEPTH_LOG2-1:0] = WrPtr0_dc3[DEPTH_LOG2-1:0]; + assign lsu_nonblock_load_inv_dc5 = lsu_nonblock_load_valid_dc5 & ~lsu_commit_dc5; + assign lsu_nonblock_load_inv_tag_dc5[DEPTH_LOG2-1:0] = WrPtr0_dc5[DEPTH_LOG2-1:0]; // dc5 tag needs to be accurate even if there is no invalidate + + always_comb begin + lsu_nonblock_load_data_valid_lo = '0; + lsu_nonblock_load_data_valid_hi = '0; + lsu_nonblock_load_data_error_lo = '0; + lsu_nonblock_load_data_error_hi = '0; + lsu_nonblock_load_data_tag[DEPTH_LOG2-1:0] = '0; + lsu_nonblock_load_data_lo[31:0] = '0; + lsu_nonblock_load_data_hi[31:0] = '0; + for (int i=0; i> 8*lsu_nonblock_addr_offset[1:0]); + + assign lsu_nonblock_load_data_valid = lsu_nonblock_load_data_valid_lo & (~lsu_nonblock_dual | lsu_nonblock_load_data_valid_hi); + assign lsu_nonblock_load_data_error = lsu_nonblock_load_data_error_lo | (lsu_nonblock_dual & lsu_nonblock_load_data_error_hi); + assign lsu_nonblock_load_data = ({32{ lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b00)}} & {24'b0,lsu_nonblock_data_unalgn[7:0]}) | + ({32{ lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b01)}} & {16'b0,lsu_nonblock_data_unalgn[15:0]}) | + ({32{~lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b00)}} & {{24{lsu_nonblock_data_unalgn[7]}}, lsu_nonblock_data_unalgn[7:0]}) | + ({32{~lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b01)}} & {{16{lsu_nonblock_data_unalgn[15]}},lsu_nonblock_data_unalgn[15:0]}) | + ({32{(lsu_nonblock_sz[1:0] == 2'b10)}} & lsu_nonblock_data_unalgn[31:0]); + + // Determine if there is a pending return to sideeffect load/store + always_comb begin + bus_sideeffect_pend = obuf_valid & obuf_sideeffect & dec_tlu_sideeffect_posted_disable; + for (int i=0; i put in the cam + output logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_dc3, // the tag of the external non block load + output logic lsu_nonblock_load_inv_dc5, // invalidate signal for the cam entry for non block loads + output logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_dc5, // tag of the enrty which needs to be invalidated + output logic lsu_nonblock_load_data_valid, // the non block is valid - sending information back to the cam + output logic lsu_nonblock_load_data_error, // non block load has an error + output logic [`RV_LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error + output logic [31:0] lsu_nonblock_load_data, // Data of the non block load + + // PMU events + output logic lsu_pmu_bus_trxn, + output logic lsu_pmu_bus_misaligned, + output logic lsu_pmu_bus_error, + output logic lsu_pmu_bus_busy, + + // AXI Write Channels + output logic lsu_axi_awvalid, + input logic lsu_axi_awready, + output logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_awid, + output logic [31:0] lsu_axi_awaddr, + output logic [3:0] lsu_axi_awregion, + output logic [7:0] lsu_axi_awlen, + output logic [2:0] lsu_axi_awsize, + output logic [1:0] lsu_axi_awburst, + output logic lsu_axi_awlock, + output logic [3:0] lsu_axi_awcache, + output logic [2:0] lsu_axi_awprot, + output logic [3:0] lsu_axi_awqos, + + output logic lsu_axi_wvalid, + input logic lsu_axi_wready, + output logic [63:0] lsu_axi_wdata, + output logic [7:0] lsu_axi_wstrb, + output logic lsu_axi_wlast, + + input logic lsu_axi_bvalid, + output logic lsu_axi_bready, + input logic [1:0] lsu_axi_bresp, + input logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_bid, + + // AXI Read Channels + output logic lsu_axi_arvalid, + input logic lsu_axi_arready, + output logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_arid, + output logic [31:0] lsu_axi_araddr, + output logic [3:0] lsu_axi_arregion, + output logic [7:0] lsu_axi_arlen, + output logic [2:0] lsu_axi_arsize, + output logic [1:0] lsu_axi_arburst, + output logic lsu_axi_arlock, + output logic [3:0] lsu_axi_arcache, + output logic [2:0] lsu_axi_arprot, + output logic [3:0] lsu_axi_arqos, + + input logic lsu_axi_rvalid, + output logic lsu_axi_rready, + input logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_rid, + input logic [63:0] lsu_axi_rdata, + input logic [1:0] lsu_axi_rresp, + input logic lsu_axi_rlast, + + input logic lsu_bus_clk_en + +); + +`include "global.h" + + logic ld_freeze_dc3; + + logic lsu_bus_clk_en_q; + logic ldst_dual_dc1, ldst_dual_dc2, ldst_dual_dc3, ldst_dual_dc4, ldst_dual_dc5; + logic lsu_busreq_dc3, lsu_busreq_dc4; + + logic [3:0] ldst_byteen_dc2, ldst_byteen_dc3, ldst_byteen_dc4, ldst_byteen_dc5; + logic [7:0] ldst_byteen_ext_dc2, ldst_byteen_ext_dc3, ldst_byteen_ext_dc4, ldst_byteen_ext_dc5; + logic [3:0] ldst_byteen_hi_dc2, ldst_byteen_hi_dc3, ldst_byteen_hi_dc4, ldst_byteen_hi_dc5; + logic [3:0] ldst_byteen_lo_dc2, ldst_byteen_lo_dc3, ldst_byteen_lo_dc4, ldst_byteen_lo_dc5; + logic is_sideeffects_dc4, is_sideeffects_dc5; + + + logic [63:0] store_data_ext_dc3, store_data_ext_dc4, store_data_ext_dc5; + logic [31:0] store_data_hi_dc3, store_data_hi_dc4, store_data_hi_dc5; + logic [31:0] store_data_lo_dc3, store_data_lo_dc4, store_data_lo_dc5; + + logic addr_match_dw_lo_dc5_dc4, addr_match_dw_lo_dc5_dc3, addr_match_dw_lo_dc5_dc2; + logic addr_match_word_lo_dc5_dc4, addr_match_word_lo_dc5_dc3, addr_match_word_lo_dc5_dc2; + logic no_word_merge_dc5, no_dword_merge_dc5; + + logic ld_addr_dc3hit_lo_lo, ld_addr_dc3hit_hi_lo, ld_addr_dc3hit_lo_hi, ld_addr_dc3hit_hi_hi; + logic ld_addr_dc4hit_lo_lo, ld_addr_dc4hit_hi_lo, ld_addr_dc4hit_lo_hi, ld_addr_dc4hit_hi_hi; + logic ld_addr_dc5hit_lo_lo, ld_addr_dc5hit_hi_lo, ld_addr_dc5hit_lo_hi, ld_addr_dc5hit_hi_hi; + + logic [3:0] ld_byte_dc3hit_lo_lo, ld_byte_dc3hit_hi_lo, ld_byte_dc3hit_lo_hi, ld_byte_dc3hit_hi_hi; + logic [3:0] ld_byte_dc4hit_lo_lo, ld_byte_dc4hit_hi_lo, ld_byte_dc4hit_lo_hi, ld_byte_dc4hit_hi_hi; + logic [3:0] ld_byte_dc5hit_lo_lo, ld_byte_dc5hit_hi_lo, ld_byte_dc5hit_lo_hi, ld_byte_dc5hit_hi_hi; + + logic [3:0] ld_byte_hit_lo, ld_byte_dc3hit_lo, ld_byte_dc4hit_lo, ld_byte_dc5hit_lo; + logic [3:0] ld_byte_hit_hi, ld_byte_dc3hit_hi, ld_byte_dc4hit_hi, ld_byte_dc5hit_hi; + + logic [31:0] ld_fwddata_dc3pipe_lo, ld_fwddata_dc4pipe_lo, ld_fwddata_dc5pipe_lo; + logic [31:0] ld_fwddata_dc3pipe_hi, ld_fwddata_dc4pipe_hi, ld_fwddata_dc5pipe_hi; + + logic [3:0] ld_byte_hit_buf_lo, ld_byte_hit_buf_hi; + logic [31:0] ld_fwddata_buf_lo, ld_fwddata_buf_hi; + + logic ld_hit_rdbuf_hi, ld_hit_rdbuf_lo; + logic [31:0] ld_fwddata_rdbuf_hi, ld_fwddata_rdbuf_lo; + + logic [63:0] ld_fwddata_lo, ld_fwddata_hi; + logic [31:0] ld_fwddata_dc2, ld_fwddata_dc3; + logic [31:0] ld_bus_data_dc3; + + logic ld_full_hit_hi_dc2, ld_full_hit_lo_dc2; + logic ld_hit_dc2, ld_full_hit_dc2, ld_full_hit_dc3; + logic is_aligned_dc5; + + logic [63:32] ld_fwddata_dc2_nc; + + logic lsu_write_buffer_empty_any; + assign lsu_write_buffer_empty_any = 1'b1; + + assign ldst_byteen_dc2[3:0] = ({4{lsu_pkt_dc2.by}} & 4'b0001) | + ({4{lsu_pkt_dc2.half}} & 4'b0011) | + ({4{lsu_pkt_dc2.word}} & 4'b1111); + assign ldst_dual_dc1 = (lsu_addr_dc1[2] != end_addr_dc1[2]); + assign lsu_freeze_dc3 = ld_freeze_dc3 & ~(flush_dc4 | flush_dc5); + + // Determine if the packet is word aligned + assign is_aligned_dc5 = (lsu_pkt_dc5.word & (lsu_addr_dc5[1:0] == 2'b0)) | + (lsu_pkt_dc5.half & (lsu_addr_dc5[0] == 1'b0)); + + // Read/Write Buffer + lsu_bus_buffer bus_buffer ( + .* + ); + + // Logic to determine if dc5 store can be coalesced or not with younger stores. Bypass ibuf if cannot colaesced + assign addr_match_dw_lo_dc5_dc4 = (lsu_addr_dc5[31:3] == lsu_addr_dc4[31:3]); + assign addr_match_dw_lo_dc5_dc3 = (lsu_addr_dc5[31:3] == lsu_addr_dc3[31:3]); + assign addr_match_dw_lo_dc5_dc2 = (lsu_addr_dc5[31:3] == lsu_addr_dc2[31:3]); + + assign addr_match_word_lo_dc5_dc4 = addr_match_dw_lo_dc5_dc4 & ~(lsu_addr_dc5[2]^lsu_addr_dc4[2]); + assign addr_match_word_lo_dc5_dc3 = addr_match_dw_lo_dc5_dc3 & ~(lsu_addr_dc5[2]^lsu_addr_dc3[2]); + assign addr_match_word_lo_dc5_dc2 = addr_match_dw_lo_dc5_dc2 & ~(lsu_addr_dc5[2]^lsu_addr_dc2[2]); + + assign no_word_merge_dc5 = lsu_busreq_dc5 & ~ldst_dual_dc5 & + ((lsu_busreq_dc4 & (lsu_pkt_dc4.load | ~addr_match_word_lo_dc5_dc4)) | + (lsu_busreq_dc3 & ~lsu_busreq_dc4 & (lsu_pkt_dc3.load | ~addr_match_word_lo_dc5_dc3)) | + (lsu_busreq_dc2 & ~lsu_busreq_dc3 & ~lsu_busreq_dc4 & (lsu_pkt_dc2.load | ~addr_match_word_lo_dc5_dc2))); + + assign no_dword_merge_dc5 = lsu_busreq_dc5 & ~ldst_dual_dc5 & + ((lsu_busreq_dc4 & (lsu_pkt_dc4.load | ~addr_match_dw_lo_dc5_dc4)) | + (lsu_busreq_dc3 & ~lsu_busreq_dc4 & (lsu_pkt_dc3.load | ~addr_match_dw_lo_dc5_dc3)) | + (lsu_busreq_dc2 & ~lsu_busreq_dc3 & ~lsu_busreq_dc4 & (lsu_pkt_dc2.load | ~addr_match_dw_lo_dc5_dc2))); + + // Create Hi/Lo signals + assign ldst_byteen_ext_dc2[7:0] = {4'b0,ldst_byteen_dc2[3:0]} << lsu_addr_dc2[1:0]; + assign ldst_byteen_ext_dc3[7:0] = {4'b0,ldst_byteen_dc3[3:0]} << lsu_addr_dc3[1:0]; + assign ldst_byteen_ext_dc4[7:0] = {4'b0,ldst_byteen_dc4[3:0]} << lsu_addr_dc4[1:0]; + assign ldst_byteen_ext_dc5[7:0] = {4'b0,ldst_byteen_dc5[3:0]} << lsu_addr_dc5[1:0]; + + assign store_data_ext_dc3[63:0] = {32'b0,store_data_dc3[31:0]} << {lsu_addr_dc3[1:0],3'b0}; + assign store_data_ext_dc4[63:0] = {32'b0,store_data_dc4[31:0]} << {lsu_addr_dc4[1:0],3'b0}; + assign store_data_ext_dc5[63:0] = {32'b0,store_data_dc5[31:0]} << {lsu_addr_dc5[1:0],3'b0}; + + assign ldst_byteen_hi_dc2[3:0] = ldst_byteen_ext_dc2[7:4]; + assign ldst_byteen_lo_dc2[3:0] = ldst_byteen_ext_dc2[3:0]; + assign ldst_byteen_hi_dc3[3:0] = ldst_byteen_ext_dc3[7:4]; + assign ldst_byteen_lo_dc3[3:0] = ldst_byteen_ext_dc3[3:0]; + assign ldst_byteen_hi_dc4[3:0] = ldst_byteen_ext_dc4[7:4]; + assign ldst_byteen_lo_dc4[3:0] = ldst_byteen_ext_dc4[3:0]; + assign ldst_byteen_hi_dc5[3:0] = ldst_byteen_ext_dc5[7:4]; + assign ldst_byteen_lo_dc5[3:0] = ldst_byteen_ext_dc5[3:0]; + + assign store_data_hi_dc3[31:0] = store_data_ext_dc3[63:32]; + assign store_data_lo_dc3[31:0] = store_data_ext_dc3[31:0]; + assign store_data_hi_dc4[31:0] = store_data_ext_dc4[63:32]; + assign store_data_lo_dc4[31:0] = store_data_ext_dc4[31:0]; + assign store_data_hi_dc5[31:0] = store_data_ext_dc5[63:32]; + assign store_data_lo_dc5[31:0] = store_data_ext_dc5[31:0]; + + assign ld_addr_dc3hit_lo_lo = (lsu_addr_dc2[31:2] == lsu_addr_dc3[31:2]) & lsu_pkt_dc3.valid & lsu_pkt_dc3.store & lsu_busreq_dc2; + assign ld_addr_dc3hit_lo_hi = (end_addr_dc2[31:2] == lsu_addr_dc3[31:2]) & lsu_pkt_dc3.valid & lsu_pkt_dc3.store & lsu_busreq_dc2; + assign ld_addr_dc3hit_hi_lo = (lsu_addr_dc2[31:2] == end_addr_dc3[31:2]) & lsu_pkt_dc3.valid & lsu_pkt_dc3.store & lsu_busreq_dc2; + assign ld_addr_dc3hit_hi_hi = (end_addr_dc2[31:2] == end_addr_dc3[31:2]) & lsu_pkt_dc3.valid & lsu_pkt_dc3.store & lsu_busreq_dc2; + + assign ld_addr_dc4hit_lo_lo = (lsu_addr_dc2[31:2] == lsu_addr_dc4[31:2]) & lsu_pkt_dc4.valid & lsu_pkt_dc4.store & lsu_busreq_dc2; + assign ld_addr_dc4hit_lo_hi = (end_addr_dc2[31:2] == lsu_addr_dc4[31:2]) & lsu_pkt_dc4.valid & lsu_pkt_dc4.store & lsu_busreq_dc2; + assign ld_addr_dc4hit_hi_lo = (lsu_addr_dc2[31:2] == end_addr_dc4[31:2]) & lsu_pkt_dc4.valid & lsu_pkt_dc4.store & lsu_busreq_dc2; + assign ld_addr_dc4hit_hi_hi = (end_addr_dc2[31:2] == end_addr_dc4[31:2]) & lsu_pkt_dc4.valid & lsu_pkt_dc4.store & lsu_busreq_dc2; + + assign ld_addr_dc5hit_lo_lo = (lsu_addr_dc2[31:2] == lsu_addr_dc5[31:2]) & lsu_pkt_dc5.valid & lsu_pkt_dc5.store & lsu_busreq_dc2; + assign ld_addr_dc5hit_lo_hi = (end_addr_dc2[31:2] == lsu_addr_dc5[31:2]) & lsu_pkt_dc5.valid & lsu_pkt_dc5.store & lsu_busreq_dc2; + assign ld_addr_dc5hit_hi_lo = (lsu_addr_dc2[31:2] == end_addr_dc5[31:2]) & lsu_pkt_dc5.valid & lsu_pkt_dc5.store & lsu_busreq_dc2; + assign ld_addr_dc5hit_hi_hi = (end_addr_dc2[31:2] == end_addr_dc5[31:2]) & lsu_pkt_dc5.valid & lsu_pkt_dc5.store & lsu_busreq_dc2; + + for (genvar i=0; i<4; i++) begin + assign ld_byte_dc3hit_lo_lo[i] = ld_addr_dc3hit_lo_lo & ldst_byteen_lo_dc3[i] & ldst_byteen_lo_dc2[i]; + assign ld_byte_dc3hit_lo_hi[i] = ld_addr_dc3hit_lo_hi & ldst_byteen_lo_dc3[i] & ldst_byteen_hi_dc2[i]; + assign ld_byte_dc3hit_hi_lo[i] = ld_addr_dc3hit_hi_lo & ldst_byteen_hi_dc3[i] & ldst_byteen_lo_dc2[i]; + assign ld_byte_dc3hit_hi_hi[i] = ld_addr_dc3hit_hi_hi & ldst_byteen_hi_dc3[i] & ldst_byteen_hi_dc2[i]; + + assign ld_byte_dc4hit_lo_lo[i] = ld_addr_dc4hit_lo_lo & ldst_byteen_lo_dc4[i] & ldst_byteen_lo_dc2[i]; + assign ld_byte_dc4hit_lo_hi[i] = ld_addr_dc4hit_lo_hi & ldst_byteen_lo_dc4[i] & ldst_byteen_hi_dc2[i]; + assign ld_byte_dc4hit_hi_lo[i] = ld_addr_dc4hit_hi_lo & ldst_byteen_hi_dc4[i] & ldst_byteen_lo_dc2[i]; + assign ld_byte_dc4hit_hi_hi[i] = ld_addr_dc4hit_hi_hi & ldst_byteen_hi_dc4[i] & ldst_byteen_hi_dc2[i]; + + assign ld_byte_dc5hit_lo_lo[i] = ld_addr_dc5hit_lo_lo & ldst_byteen_lo_dc5[i] & ldst_byteen_lo_dc2[i]; + assign ld_byte_dc5hit_lo_hi[i] = ld_addr_dc5hit_lo_hi & ldst_byteen_lo_dc5[i] & ldst_byteen_hi_dc2[i]; + assign ld_byte_dc5hit_hi_lo[i] = ld_addr_dc5hit_hi_lo & ldst_byteen_hi_dc5[i] & ldst_byteen_lo_dc2[i]; + assign ld_byte_dc5hit_hi_hi[i] = ld_addr_dc5hit_hi_hi & ldst_byteen_hi_dc5[i] & ldst_byteen_hi_dc2[i]; + + assign ld_byte_hit_lo[i] = ld_byte_dc3hit_lo_lo[i] | ld_byte_dc3hit_hi_lo[i] | + ld_byte_dc4hit_lo_lo[i] | ld_byte_dc4hit_hi_lo[i] | + ld_byte_dc5hit_lo_lo[i] | ld_byte_dc5hit_hi_lo[i] | + ld_byte_hit_buf_lo[i]; + //ld_hit_rdbuf_lo; + assign ld_byte_hit_hi[i] = ld_byte_dc3hit_lo_hi[i] | ld_byte_dc3hit_hi_hi[i] | + ld_byte_dc4hit_lo_hi[i] | ld_byte_dc4hit_hi_hi[i] | + ld_byte_dc5hit_lo_hi[i] | ld_byte_dc5hit_hi_hi[i] | + ld_byte_hit_buf_hi[i]; + //ld_hit_rdbuf_hi; + + assign ld_byte_dc3hit_lo[i] = ld_byte_dc3hit_lo_lo[i] | ld_byte_dc3hit_hi_lo[i]; + assign ld_byte_dc4hit_lo[i] = ld_byte_dc4hit_lo_lo[i] | ld_byte_dc4hit_hi_lo[i]; + assign ld_byte_dc5hit_lo[i] = ld_byte_dc5hit_lo_lo[i] | ld_byte_dc5hit_hi_lo[i]; + + assign ld_byte_dc3hit_hi[i] = ld_byte_dc3hit_lo_hi[i] | ld_byte_dc3hit_hi_hi[i]; + assign ld_byte_dc4hit_hi[i] = ld_byte_dc4hit_lo_hi[i] | ld_byte_dc4hit_hi_hi[i]; + assign ld_byte_dc5hit_hi[i] = ld_byte_dc5hit_lo_hi[i] | ld_byte_dc5hit_hi_hi[i]; + + assign ld_fwddata_dc3pipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_dc3hit_lo_lo[i]}} & store_data_lo_dc3[(8*i)+7:(8*i)]) | + ({8{ld_byte_dc3hit_hi_lo[i]}} & store_data_hi_dc3[(8*i)+7:(8*i)]); + assign ld_fwddata_dc4pipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_dc4hit_lo_lo[i]}} & store_data_lo_dc4[(8*i)+7:(8*i)]) | + ({8{ld_byte_dc4hit_hi_lo[i]}} & store_data_hi_dc4[(8*i)+7:(8*i)]); + assign ld_fwddata_dc5pipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_dc5hit_lo_lo[i]}} & store_data_lo_dc5[(8*i)+7:(8*i)]) | + ({8{ld_byte_dc5hit_hi_lo[i]}} & store_data_hi_dc5[(8*i)+7:(8*i)]); + + assign ld_fwddata_dc3pipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_dc3hit_lo_hi[i]}} & store_data_lo_dc3[(8*i)+7:(8*i)]) | + ({8{ld_byte_dc3hit_hi_hi[i]}} & store_data_hi_dc3[(8*i)+7:(8*i)]); + assign ld_fwddata_dc4pipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_dc4hit_lo_hi[i]}} & store_data_lo_dc4[(8*i)+7:(8*i)]) | + ({8{ld_byte_dc4hit_hi_hi[i]}} & store_data_hi_dc4[(8*i)+7:(8*i)]); + assign ld_fwddata_dc5pipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_dc5hit_lo_hi[i]}} & store_data_lo_dc5[(8*i)+7:(8*i)]) | + ({8{ld_byte_dc5hit_hi_hi[i]}} & store_data_hi_dc5[(8*i)+7:(8*i)]); + + // Final muxing between dc3/dc4/dc5 + assign ld_fwddata_lo[(8*i)+7:(8*i)] = ld_byte_dc3hit_lo[i] ? ld_fwddata_dc3pipe_lo[(8*i)+7:(8*i)] : + ld_byte_dc4hit_lo[i] ? ld_fwddata_dc4pipe_lo[(8*i)+7:(8*i)] : + ld_byte_dc5hit_lo[i] ? ld_fwddata_dc5pipe_lo[(8*i)+7:(8*i)] : + ld_fwddata_buf_lo[(8*i)+7:(8*i)]; + + assign ld_fwddata_hi[(8*i)+7:(8*i)] = ld_byte_dc3hit_hi[i] ? ld_fwddata_dc3pipe_hi[(8*i)+7:(8*i)] : + ld_byte_dc4hit_hi[i] ? ld_fwddata_dc4pipe_hi[(8*i)+7:(8*i)] : + ld_byte_dc5hit_hi[i] ? ld_fwddata_dc5pipe_hi[(8*i)+7:(8*i)] : + ld_fwddata_buf_hi[(8*i)+7:(8*i)]; + + end + + always_comb begin + ld_full_hit_lo_dc2 = 1'b1; + ld_full_hit_hi_dc2 = 1'b1; + for (int i=0; i<4; i++) begin + ld_full_hit_lo_dc2 &= (ld_byte_hit_lo[i] | ~ldst_byteen_lo_dc2[i]); + ld_full_hit_hi_dc2 &= (ld_byte_hit_hi[i] | ~ldst_byteen_hi_dc2[i]); + end + end + + // This will be high if atleast one byte hit the stores in pipe/write buffer (dc3/dc4/dc5/wrbuf) + assign ld_hit_dc2 = (|ld_byte_hit_lo[3:0]) | (|ld_byte_hit_hi[3:0]); + + // This will be high if all the bytes of load hit the stores in pipe/write buffer (dc3/dc4/dc5/wrbuf) + assign ld_full_hit_dc2 = ld_full_hit_lo_dc2 & ld_full_hit_hi_dc2 & lsu_busreq_dc2 & lsu_pkt_dc2.load & ~is_sideeffects_dc2; + + assign {ld_fwddata_dc2_nc[63:32], ld_fwddata_dc2[31:0]} = {ld_fwddata_hi[31:0], ld_fwddata_lo[31:0]} >> (8*lsu_addr_dc2[1:0]); + assign bus_read_data_dc3[31:0] = ld_full_hit_dc3 ? ld_fwddata_dc3[31:0] : ld_bus_data_dc3[31:0]; + + // Fifo flops + rvdff #(.WIDTH(1)) lsu_full_hit_dc3ff (.din(ld_full_hit_dc2), .dout(ld_full_hit_dc3), .clk(lsu_freeze_c2_dc3_clk), .*); + rvdff #(.WIDTH(32)) lsu_fwddata_dc3ff (.din(ld_fwddata_dc2[31:0]), .dout(ld_fwddata_dc3[31:0]), .clk(lsu_c1_dc3_clk), .*); + + rvdff #(.WIDTH(1)) clken_ff (.din(lsu_bus_clk_en), .dout(lsu_bus_clk_en_q), .clk(free_clk), .*); + + rvdff #(.WIDTH(1)) ldst_dual_dc2ff (.din(ldst_dual_dc1), .dout(ldst_dual_dc2), .clk(lsu_freeze_c1_dc2_clk), .*); + rvdff #(.WIDTH(1)) ldst_dual_dc3ff (.din(ldst_dual_dc2), .dout(ldst_dual_dc3), .clk(lsu_freeze_c1_dc3_clk), .*); + rvdff #(.WIDTH(1)) ldst_dual_dc4ff (.din(ldst_dual_dc3), .dout(ldst_dual_dc4), .clk(lsu_c1_dc4_clk), .*); + rvdff #(.WIDTH(1)) ldst_dual_dc5ff (.din(ldst_dual_dc4), .dout(ldst_dual_dc5), .clk(lsu_c1_dc5_clk), .*); + rvdff #(.WIDTH(1)) is_sideeffects_dc4ff (.din(is_sideeffects_dc3), .dout(is_sideeffects_dc4), .clk(lsu_c1_dc4_clk), .*); + rvdff #(.WIDTH(1)) is_sideeffects_dc5ff (.din(is_sideeffects_dc4), .dout(is_sideeffects_dc5), .clk(lsu_c1_dc5_clk), .*); + + rvdff #(4) lsu_byten_dc3ff (.*, .din(ldst_byteen_dc2[3:0]), .dout(ldst_byteen_dc3[3:0]), .clk(lsu_freeze_c1_dc3_clk)); + rvdff #(4) lsu_byten_dc4ff (.*, .din(ldst_byteen_dc3[3:0]), .dout(ldst_byteen_dc4[3:0]), .clk(lsu_c1_dc4_clk)); + rvdff #(4) lsu_byten_dc5ff (.*, .din(ldst_byteen_dc4[3:0]), .dout(ldst_byteen_dc5[3:0]), .clk(lsu_c1_dc5_clk)); + +`ifdef ASSERT_ON + // Assertion to check ld imprecise error comes with right address + // property lsu_ld_imprecise_error_check; + // @(posedge clk) disable iff (~rst_l) lsu_imprecise_error_load_any |-> (lsu_imprecise_error_addr_any[31:0] == ld_imprecise_bus_error_addr[31:0]); + // endproperty + // assert_ld_imprecise_error_check: assert property (lsu_ld_imprecise_error_check) else + // $display("Wrong imprecise error address when lsu_imprecise_error_load_any asserted"); + + // // Assertion to check st imprecise error comes with right address + // property lsu_st_imprecise_error_check; + // @(posedge clk) disable iff (~rst_l) lsu_imprecise_error_store_any |-> (lsu_imprecise_error_addr_any[31:0] == store_bus_error_addr[31:0]); + // endproperty + // assert_st_imprecise_error_check: assert property (lsu_st_imprecise_error_check) else + // $display("Wrong imprecise error address when lsu_imprecise_error_store_any asserted"); + +`endif + +endmodule // lsu_bus_intf diff --git a/design/lsu/lsu_clkdomain.sv b/design/lsu/lsu_clkdomain.sv new file mode 100644 index 0000000..2ccd92c --- /dev/null +++ b/design/lsu/lsu_clkdomain.sv @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: Clock Generation Block +// Comments: All the clocks are generate here +// +// //******************************************************************************** + + +module lsu_clkdomain + import swerv_types::*; +( + input logic clk, // clock + input logic free_clk, // clock + input logic rst_l, // reset + + // Inputs + input logic clk_override, // chciken bit to turn off clock gating + input logic lsu_freeze_dc3, // freeze + input logic addr_in_dccm_dc2, // address in dccm + input logic addr_in_pic_dc2, // address is in pic + input logic dma_dccm_req, // dma is active + input logic dma_mem_write, // dma write is active + input logic load_stbuf_reqvld_dc3, // instruction to stbuf + input logic store_stbuf_reqvld_dc3, // instruction to stbuf + input logic stbuf_reqvld_any, // stbuf is draining + input logic stbuf_reqvld_flushed_any, // instruction going to stbuf is flushed + input logic lsu_busreq_dc5, // busreq in dc5 + input logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry + input logic lsu_bus_buffer_empty_any, // external bus buffer is empty + input logic lsu_stbuf_empty_any, // stbuf is empty + //input logic lsu_load_stall_any, // Need to turn on clocks for this case + + input logic lsu_bus_clk_en, // bus clock enable + + input lsu_pkt_t lsu_p, // lsu packet in decode + input lsu_pkt_t lsu_pkt_dc1, // lsu packet in dc1 + input lsu_pkt_t lsu_pkt_dc2, // lsu packet in dc2 + input lsu_pkt_t lsu_pkt_dc3, // lsu packet in dc3 + input lsu_pkt_t lsu_pkt_dc4, // lsu packet in dc4 + input lsu_pkt_t lsu_pkt_dc5, // lsu packet in dc5 + + // Outputs + output logic lsu_c1_dc3_clk, // dc3 pipe single pulse clock + output logic lsu_c1_dc4_clk, // dc4 pipe single pulse clock + output logic lsu_c1_dc5_clk, // dc5 pipe single pulse clock + + output logic lsu_c2_dc3_clk, // dc3 pipe double pulse clock + output logic lsu_c2_dc4_clk, // dc4 pipe double pulse clock + output logic lsu_c2_dc5_clk, // dc5 pipe double pulse clock + + output logic lsu_store_c1_dc1_clk, // store in dc1 + output logic lsu_store_c1_dc2_clk, // store in dc2 + output logic lsu_store_c1_dc3_clk, // store in dc3 + output logic lsu_store_c1_dc4_clk, // store in dc4 + output logic lsu_store_c1_dc5_clk, // store in dc5 + + output logic lsu_freeze_c1_dc1_clk, // freeze + output logic lsu_freeze_c1_dc2_clk, // freeze + output logic lsu_freeze_c1_dc3_clk, // freeze + + output logic lsu_freeze_c2_dc1_clk, + output logic lsu_freeze_c2_dc2_clk, + output logic lsu_freeze_c2_dc3_clk, + output logic lsu_freeze_c2_dc4_clk, + + output logic lsu_dccm_c1_dc3_clk, // dccm clock + output logic lsu_pic_c1_dc3_clk, // pic clock + + output logic lsu_stbuf_c1_clk, + output logic lsu_bus_obuf_c1_clk, // ibuf clock + output logic lsu_bus_ibuf_c1_clk, // ibuf clock + output logic lsu_bus_buf_c1_clk, // ibuf clock + output logic lsu_busm_clk, // bus clock + + output logic lsu_free_c2_clk, + + input logic scan_mode +); + + logic lsu_c1_dc1_clken, lsu_c1_dc2_clken, lsu_c1_dc3_clken, lsu_c1_dc4_clken, lsu_c1_dc5_clken; + logic lsu_c2_dc3_clken, lsu_c2_dc4_clken, lsu_c2_dc5_clken; + logic lsu_c1_dc1_clken_q, lsu_c1_dc2_clken_q, lsu_c1_dc3_clken_q, lsu_c1_dc4_clken_q, lsu_c1_dc5_clken_q; + logic lsu_store_c1_dc1_clken, lsu_store_c1_dc2_clken, lsu_store_c1_dc3_clken, lsu_store_c1_dc4_clken, lsu_store_c1_dc5_clken; + + logic lsu_freeze_c1_dc1_clken, lsu_freeze_c1_dc2_clken, lsu_freeze_c1_dc3_clken, lsu_freeze_c1_dc4_clken; + logic lsu_freeze_c2_dc1_clken, lsu_freeze_c2_dc2_clken, lsu_freeze_c2_dc3_clken, lsu_freeze_c2_dc4_clken; + logic lsu_freeze_c1_dc1_clken_q, lsu_freeze_c1_dc2_clken_q, lsu_freeze_c1_dc3_clken_q, lsu_freeze_c1_dc4_clken_q; + + logic lsu_stbuf_c1_clken; + logic lsu_bus_ibuf_c1_clken, lsu_bus_obuf_c1_clken, lsu_bus_buf_c1_clken; + + logic lsu_dccm_c1_dc3_clken, lsu_pic_c1_dc3_clken; + + logic lsu_free_c1_clken, lsu_free_c1_clken_q, lsu_free_c2_clken; + logic lsu_bus_valid_clken; + + //------------------------------------------------------------------------------------------- + // Clock Enable logic + //------------------------------------------------------------------------------------------- + + // Also use the flopped clock enable. We want to turn on the clocks from dc1->dc5 even if there is a freeze + assign lsu_c1_dc1_clken = lsu_p.valid | dma_dccm_req | clk_override; + assign lsu_c1_dc2_clken = lsu_pkt_dc1.valid | lsu_c1_dc1_clken_q | clk_override; + assign lsu_c1_dc3_clken = lsu_pkt_dc2.valid | lsu_c1_dc2_clken_q | clk_override; + assign lsu_c1_dc4_clken = lsu_pkt_dc3.valid | lsu_c1_dc3_clken_q | clk_override; + assign lsu_c1_dc5_clken = lsu_pkt_dc4.valid | lsu_c1_dc4_clken_q | clk_override; + + assign lsu_c2_dc3_clken = lsu_c1_dc3_clken | lsu_c1_dc3_clken_q | clk_override; + assign lsu_c2_dc4_clken = lsu_c1_dc4_clken | lsu_c1_dc4_clken_q | clk_override; + assign lsu_c2_dc5_clken = lsu_c1_dc5_clken | lsu_c1_dc5_clken_q | clk_override; + + assign lsu_store_c1_dc1_clken = ((lsu_c1_dc1_clken & (lsu_p.store | dma_mem_write)) | clk_override) & ~lsu_freeze_dc3; + assign lsu_store_c1_dc2_clken = ((lsu_c1_dc2_clken & lsu_pkt_dc1.store) | clk_override) & ~lsu_freeze_dc3; + assign lsu_store_c1_dc3_clken = ((lsu_c1_dc3_clken & lsu_pkt_dc2.store) | clk_override) & ~lsu_freeze_dc3; + assign lsu_store_c1_dc4_clken = (lsu_c1_dc4_clken & lsu_pkt_dc3.store) | clk_override; + assign lsu_store_c1_dc5_clken = (lsu_c1_dc5_clken & lsu_pkt_dc4.store) | clk_override; + + assign lsu_freeze_c1_dc1_clken = (lsu_p.valid | dma_dccm_req | clk_override) & ~lsu_freeze_dc3; + assign lsu_freeze_c1_dc2_clken = (lsu_pkt_dc1.valid | clk_override) & ~lsu_freeze_dc3; + assign lsu_freeze_c1_dc3_clken = (lsu_pkt_dc2.valid | clk_override) & ~lsu_freeze_dc3; + assign lsu_freeze_c1_dc4_clken = (lsu_pkt_dc3.valid | clk_override) & ~lsu_freeze_dc3; + + assign lsu_freeze_c2_dc1_clken = (lsu_freeze_c1_dc1_clken | lsu_freeze_c1_dc1_clken_q | clk_override) & ~lsu_freeze_dc3; + assign lsu_freeze_c2_dc2_clken = (lsu_freeze_c1_dc2_clken | lsu_freeze_c1_dc2_clken_q | clk_override) & ~lsu_freeze_dc3; + assign lsu_freeze_c2_dc3_clken = (lsu_freeze_c1_dc3_clken | lsu_freeze_c1_dc3_clken_q | clk_override) & ~lsu_freeze_dc3; + assign lsu_freeze_c2_dc4_clken = (lsu_freeze_c1_dc4_clken | lsu_freeze_c1_dc4_clken_q | clk_override) & ~lsu_freeze_dc3; + + + assign lsu_stbuf_c1_clken = load_stbuf_reqvld_dc3 | store_stbuf_reqvld_dc3 | stbuf_reqvld_any | stbuf_reqvld_flushed_any | clk_override; + assign lsu_bus_ibuf_c1_clken = lsu_busreq_dc5 | clk_override; + assign lsu_bus_obuf_c1_clken = ((lsu_bus_buffer_pend_any | lsu_busreq_dc5) & lsu_bus_clk_en) | clk_override; + assign lsu_bus_buf_c1_clken = ~lsu_bus_buffer_empty_any | lsu_busreq_dc5 | clk_override; + + assign lsu_dccm_c1_dc3_clken = ((lsu_c1_dc3_clken & addr_in_dccm_dc2) | clk_override) & ~lsu_freeze_dc3; + assign lsu_pic_c1_dc3_clken = ((lsu_c1_dc3_clken & addr_in_pic_dc2) | clk_override) & ~lsu_freeze_dc3; + + assign lsu_free_c1_clken = (lsu_p.valid | lsu_pkt_dc1.valid | lsu_pkt_dc2.valid | lsu_pkt_dc3.valid | lsu_pkt_dc4.valid | lsu_pkt_dc5.valid) | + ~lsu_bus_buffer_empty_any | ~lsu_stbuf_empty_any | clk_override; + assign lsu_free_c2_clken = lsu_free_c1_clken | lsu_free_c1_clken_q | clk_override; + + // Flops + rvdff #(1) lsu_free_c1_clkenff (.din(lsu_free_c1_clken), .dout(lsu_free_c1_clken_q), .clk(free_clk), .*); + + rvdff #(1) lsu_c1_dc1_clkenff (.din(lsu_c1_dc1_clken), .dout(lsu_c1_dc1_clken_q), .clk(lsu_free_c2_clk), .*); + rvdff #(1) lsu_c1_dc2_clkenff (.din(lsu_c1_dc2_clken), .dout(lsu_c1_dc2_clken_q), .clk(lsu_free_c2_clk), .*); + rvdff #(1) lsu_c1_dc3_clkenff (.din(lsu_c1_dc3_clken), .dout(lsu_c1_dc3_clken_q), .clk(lsu_free_c2_clk), .*); + rvdff #(1) lsu_c1_dc4_clkenff (.din(lsu_c1_dc4_clken), .dout(lsu_c1_dc4_clken_q), .clk(lsu_free_c2_clk), .*); + rvdff #(1) lsu_c1_dc5_clkenff (.din(lsu_c1_dc5_clken), .dout(lsu_c1_dc5_clken_q), .clk(lsu_free_c2_clk), .*); + + rvdff #(1) lsu_freeze_c1_dc1_clkenff (.din(lsu_freeze_c1_dc1_clken), .dout(lsu_freeze_c1_dc1_clken_q), .clk(lsu_freeze_c2_dc1_clk), .*); + rvdff #(1) lsu_freeze_c1_dc2_clkenff (.din(lsu_freeze_c1_dc2_clken), .dout(lsu_freeze_c1_dc2_clken_q), .clk(lsu_freeze_c2_dc2_clk), .*); + rvdff #(1) lsu_freeze_c1_dc3_clkenff (.din(lsu_freeze_c1_dc3_clken), .dout(lsu_freeze_c1_dc3_clken_q), .clk(lsu_freeze_c2_dc3_clk), .*); + rvdff #(1) lsu_freeze_c1_dc4_clkenff (.din(lsu_freeze_c1_dc4_clken), .dout(lsu_freeze_c1_dc4_clken_q), .clk(lsu_freeze_c2_dc4_clk), .*); + + // Clock Headers + rvclkhdr lsu_c1dc3_cgc ( .en(lsu_c1_dc3_clken), .l1clk(lsu_c1_dc3_clk), .* ); + rvclkhdr lsu_c1dc4_cgc ( .en(lsu_c1_dc4_clken), .l1clk(lsu_c1_dc4_clk), .* ); + rvclkhdr lsu_c1dc5_cgc ( .en(lsu_c1_dc5_clken), .l1clk(lsu_c1_dc5_clk), .* ); + + rvclkhdr lsu_c2dc3_cgc ( .en(lsu_c2_dc3_clken), .l1clk(lsu_c2_dc3_clk), .* ); + rvclkhdr lsu_c2dc4_cgc ( .en(lsu_c2_dc4_clken), .l1clk(lsu_c2_dc4_clk), .* ); + rvclkhdr lsu_c2dc5_cgc ( .en(lsu_c2_dc5_clken), .l1clk(lsu_c2_dc5_clk), .* ); + + rvclkhdr lsu_store_c1dc1_cgc (.en(lsu_store_c1_dc1_clken), .l1clk(lsu_store_c1_dc1_clk), .*); + rvclkhdr lsu_store_c1dc2_cgc (.en(lsu_store_c1_dc2_clken), .l1clk(lsu_store_c1_dc2_clk), .*); + rvclkhdr lsu_store_c1dc3_cgc (.en(lsu_store_c1_dc3_clken), .l1clk(lsu_store_c1_dc3_clk), .*); + rvclkhdr lsu_store_c1dc4_cgc (.en(lsu_store_c1_dc4_clken), .l1clk(lsu_store_c1_dc4_clk), .*); + rvclkhdr lsu_store_c1dc5_cgc (.en(lsu_store_c1_dc5_clken), .l1clk(lsu_store_c1_dc5_clk), .*); + + rvclkhdr lsu_freeze_c1dc1_cgc ( .en(lsu_freeze_c1_dc1_clken), .l1clk(lsu_freeze_c1_dc1_clk), .* ); + rvclkhdr lsu_freeze_c1dc2_cgc ( .en(lsu_freeze_c1_dc2_clken), .l1clk(lsu_freeze_c1_dc2_clk), .* ); + rvclkhdr lsu_freeze_c1dc3_cgc ( .en(lsu_freeze_c1_dc3_clken), .l1clk(lsu_freeze_c1_dc3_clk), .* ); + + rvclkhdr lsu_freeze_c2dc1_cgc ( .en(lsu_freeze_c2_dc1_clken), .l1clk(lsu_freeze_c2_dc1_clk), .* ); + rvclkhdr lsu_freeze_c2dc2_cgc ( .en(lsu_freeze_c2_dc2_clken), .l1clk(lsu_freeze_c2_dc2_clk), .* ); + rvclkhdr lsu_freeze_c2dc3_cgc ( .en(lsu_freeze_c2_dc3_clken), .l1clk(lsu_freeze_c2_dc3_clk), .* ); + rvclkhdr lsu_freeze_c2dc4_cgc ( .en(lsu_freeze_c2_dc4_clken), .l1clk(lsu_freeze_c2_dc4_clk), .* ); + + rvclkhdr lsu_stbuf_c1_cgc ( .en(lsu_stbuf_c1_clken), .l1clk(lsu_stbuf_c1_clk), .* ); + rvclkhdr lsu_bus_ibuf_c1_cgc ( .en(lsu_bus_ibuf_c1_clken), .l1clk(lsu_bus_ibuf_c1_clk), .* ); + rvclkhdr lsu_bus_obuf_c1_cgc ( .en(lsu_bus_obuf_c1_clken), .l1clk(lsu_bus_obuf_c1_clk), .* ); + rvclkhdr lsu_bus_buf_c1_cgc ( .en(lsu_bus_buf_c1_clken), .l1clk(lsu_bus_buf_c1_clk), .* ); + + rvclkhdr lsu_busm_cgc (.en(lsu_bus_clk_en), .l1clk(lsu_busm_clk), .*); + + rvclkhdr lsu_dccm_c1dc3_cgc (.en(lsu_dccm_c1_dc3_clken), .l1clk(lsu_dccm_c1_dc3_clk), .*); + rvclkhdr lsu_pic_c1dc3_cgc (.en(lsu_pic_c1_dc3_clken), .l1clk(lsu_pic_c1_dc3_clk), .*); + + rvclkhdr lsu_free_cgc (.en(lsu_free_c2_clken), .l1clk(lsu_free_c2_clk), .*); + +endmodule + diff --git a/design/lsu/lsu_dccm_ctl.sv b/design/lsu/lsu_dccm_ctl.sv new file mode 100644 index 0000000..6a62db0 --- /dev/null +++ b/design/lsu/lsu_dccm_ctl.sv @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: DCCM for LSU pipe +// Comments: Single ported memory +// +// +// DC1 -> DC2 -> DC3 -> DC4 (Commit) +// +// //******************************************************************************** + +module lsu_dccm_ctl + import swerv_types::*; +( + input logic lsu_freeze_c2_dc2_clk, // clocks + input logic lsu_freeze_c2_dc3_clk, + input logic lsu_dccm_c1_dc3_clk, + input logic lsu_pic_c1_dc3_clk, + + input logic rst_l, + input logic lsu_freeze_dc3, // freze + + input lsu_pkt_t lsu_pkt_dc3, // lsu packets + input lsu_pkt_t lsu_pkt_dc1, + input logic addr_in_dccm_dc1, // address maps to dccm + input logic addr_in_pic_dc1, // address maps to pic + input logic addr_in_pic_dc3, // address maps to pic + input logic [31:0] lsu_addr_dc1, // starting byte address for loads + input logic [`RV_DCCM_BITS-1:0] end_addr_dc1, // last address used to calculate unaligned + input logic [`RV_DCCM_BITS-1:0] lsu_addr_dc3, // starting byte address for loads + + input logic stbuf_reqvld_any, // write enable + input logic stbuf_addr_in_pic_any, // stbuf is going to pic + input logic [`RV_LSU_SB_BITS-1:0] stbuf_addr_any, // stbuf address (aligned) + + input logic [`RV_DCCM_DATA_WIDTH-1:0] stbuf_data_any, // the read out from stbuf + input logic [`RV_DCCM_ECC_WIDTH-1:0] stbuf_ecc_any, // the encoded data with ECC bits + input logic [`RV_DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_dc3, // stbuf fowarding to load + input logic [`RV_DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_dc3, // stbuf fowarding to load + input logic [`RV_DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_dc3, // stbuf fowarding to load + input logic [`RV_DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_dc3, // stbuf fowarding to load + + input logic lsu_double_ecc_error_dc3, // lsu has a DED + input logic [`RV_DCCM_DATA_WIDTH-1:0] store_ecc_datafn_hi_dc3, // store data + input logic [`RV_DCCM_DATA_WIDTH-1:0] store_ecc_datafn_lo_dc3, // store data + + output logic [`RV_DCCM_DATA_WIDTH-1:0] dccm_data_hi_dc3, // data from the dccm + output logic [`RV_DCCM_DATA_WIDTH-1:0] dccm_data_lo_dc3, // data from the dccm + output logic [`RV_DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_dc3, // data from the dccm + ecc + output logic [`RV_DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_dc3, + output logic [`RV_DCCM_DATA_WIDTH-1:0] lsu_ld_data_dc3, // right justified, ie load byte will have data at 7:0 + output logic [`RV_DCCM_DATA_WIDTH-1:0] lsu_ld_data_corr_dc3, // right justified, ie load byte will have data at 7:0 + output logic [31:0] picm_mask_data_dc3, // pic data to stbuf + output logic lsu_stbuf_commit_any, // stbuf wins the dccm port or is to pic + output logic lsu_dccm_rden_dc3, // dccm read + + output logic dccm_dma_rvalid, // dccm serviving the dma load + output logic dccm_dma_ecc_error, // DMA load had ecc error + output logic [63:0] dccm_dma_rdata, // dccm data to dma request + + // DCCM ports + output logic dccm_wren, // dccm interface -- write + output logic dccm_rden, // dccm interface -- write + output logic [`RV_DCCM_BITS-1:0] dccm_wr_addr, // dccm interface -- wr addr + output logic [`RV_DCCM_BITS-1:0] dccm_rd_addr_lo, // dccm interface -- read address for lo bank + output logic [`RV_DCCM_BITS-1:0] dccm_rd_addr_hi, // dccm interface -- read address for hi bank + output logic [`RV_DCCM_FDATA_WIDTH-1:0] dccm_wr_data, // dccm write data + + input logic [`RV_DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // dccm read data back from the dccm + input logic [`RV_DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // dccm read data back from the dccm + + // PIC ports + output logic picm_wren, // write to pic + output logic picm_rden, // read to pick + output logic picm_mken, // write to pic need a mask + output logic [31:0] picm_addr, // address for pic access - shared between reads and write + output logic [31:0] picm_wr_data, // write data + input logic [31:0] picm_rd_data, // read data + + input logic scan_mode // scan mode +); + +`include "global.h" + + `ifdef RV_DCCM_ENABLE + localparam DCCM_ENABLE = 1'b1; + `else + localparam DCCM_ENABLE = 1'b0; + `endif + + localparam DCCM_WIDTH_BITS = $clog2(DCCM_BYTE_WIDTH); + localparam PIC_BITS =`RV_PIC_BITS; + + logic lsu_dccm_rden_dc1, lsu_dccm_rden_dc2; + logic [DCCM_DATA_WIDTH-1:0] dccm_data_hi_dc2, dccm_data_lo_dc2; + logic [DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_dc2, dccm_data_ecc_lo_dc2; + logic [63:0] dccm_dout_dc3, dccm_corr_dout_dc3; + logic [63:0] stbuf_fwddata_dc3; + logic [7:0] stbuf_fwdbyteen_dc3; + logic [63:0] lsu_rdata_dc3, lsu_rdata_corr_dc3; + logic [63:0] picm_rd_data_dc3; + logic [31:0] picm_rd_data_lo_dc3; + logic [63:32] lsu_ld_data_dc3_nc, lsu_ld_data_corr_dc3_nc; + + assign dccm_dma_rvalid = lsu_pkt_dc3.valid & lsu_pkt_dc3.load & lsu_pkt_dc3.dma; + assign dccm_dma_ecc_error = lsu_double_ecc_error_dc3; + assign dccm_dma_rdata[63:0] = lsu_rdata_corr_dc3[63:0]; + + + assign {lsu_ld_data_dc3_nc[63:32], lsu_ld_data_dc3[31:0]} = lsu_rdata_dc3[63:0] >> 8*lsu_addr_dc3[1:0]; + assign {lsu_ld_data_corr_dc3_nc[63:32], lsu_ld_data_corr_dc3[31:0]} = lsu_rdata_corr_dc3[63:0] >> 8*lsu_addr_dc3[1:0]; + + assign dccm_dout_dc3[63:0] = {dccm_data_hi_dc3[DCCM_DATA_WIDTH-1:0], dccm_data_lo_dc3[DCCM_DATA_WIDTH-1:0]}; + assign dccm_corr_dout_dc3[63:0] = {store_ecc_datafn_hi_dc3[DCCM_DATA_WIDTH-1:0], store_ecc_datafn_lo_dc3[DCCM_DATA_WIDTH-1:0]}; + assign stbuf_fwddata_dc3[63:0] = {stbuf_fwddata_hi_dc3[DCCM_DATA_WIDTH-1:0], stbuf_fwddata_lo_dc3[DCCM_DATA_WIDTH-1:0]}; + assign stbuf_fwdbyteen_dc3[7:0] = {stbuf_fwdbyteen_hi_dc3[DCCM_BYTE_WIDTH-1:0], stbuf_fwdbyteen_lo_dc3[DCCM_BYTE_WIDTH-1:0]}; + + for (genvar i=0; i<8; i++) begin: GenLoop + assign lsu_rdata_dc3[(8*i)+7:8*i] = stbuf_fwdbyteen_dc3[i] ? stbuf_fwddata_dc3[(8*i)+7:8*i] : + (addr_in_pic_dc3 ? picm_rd_data_dc3[(8*i)+7:8*i] : dccm_dout_dc3[(8*i)+7:8*i]); + assign lsu_rdata_corr_dc3[(8*i)+7:8*i] = stbuf_fwdbyteen_dc3[i] ? stbuf_fwddata_dc3[(8*i)+7:8*i] : + (addr_in_pic_dc3 ? picm_rd_data_dc3[(8*i)+7:8*i] : dccm_corr_dout_dc3[(8*i)+7:8*i]); + end + + assign lsu_stbuf_commit_any = stbuf_reqvld_any & ~lsu_freeze_dc3 & ( + (~(lsu_dccm_rden_dc1 | picm_rden | picm_mken)) | + ((picm_rden | picm_mken) & ~stbuf_addr_in_pic_any) | + (lsu_dccm_rden_dc1 & (stbuf_addr_in_pic_any | (~((stbuf_addr_any[DCCM_WIDTH_BITS+:DCCM_BANK_BITS] == lsu_addr_dc1[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]) | + (stbuf_addr_any[DCCM_WIDTH_BITS+:DCCM_BANK_BITS] == end_addr_dc1[DCCM_WIDTH_BITS+:DCCM_BANK_BITS])))))); + + // No need to read for aligned word/dword stores since ECC will come by new data completely + assign lsu_dccm_rden_dc1 = lsu_pkt_dc1.valid & (lsu_pkt_dc1.load | (lsu_pkt_dc1.store & (~(lsu_pkt_dc1.word | lsu_pkt_dc1.dword) | (lsu_addr_dc1[1:0] != 2'b0)))) & addr_in_dccm_dc1; + + // DCCM inputs + assign dccm_wren = lsu_stbuf_commit_any & ~stbuf_addr_in_pic_any; + assign dccm_rden = lsu_dccm_rden_dc1 & addr_in_dccm_dc1; + assign dccm_wr_addr[DCCM_BITS-1:0] = stbuf_addr_any[DCCM_BITS-1:0]; + assign dccm_rd_addr_lo[DCCM_BITS-1:0] = lsu_addr_dc1[DCCM_BITS-1:0]; + assign dccm_rd_addr_hi[DCCM_BITS-1:0] = end_addr_dc1[DCCM_BITS-1:0]; + assign dccm_wr_data[DCCM_FDATA_WIDTH-1:0] = {stbuf_ecc_any[DCCM_ECC_WIDTH-1:0],stbuf_data_any[DCCM_DATA_WIDTH-1:0]}; + + // DCCM outputs + assign dccm_data_lo_dc2[DCCM_DATA_WIDTH-1:0] = dccm_rd_data_lo[DCCM_DATA_WIDTH-1:0]; + assign dccm_data_hi_dc2[DCCM_DATA_WIDTH-1:0] = dccm_rd_data_hi[DCCM_DATA_WIDTH-1:0]; + + assign dccm_data_ecc_lo_dc2[DCCM_ECC_WIDTH-1:0] = dccm_rd_data_lo[DCCM_FDATA_WIDTH-1:DCCM_DATA_WIDTH]; + assign dccm_data_ecc_hi_dc2[DCCM_ECC_WIDTH-1:0] = dccm_rd_data_hi[DCCM_FDATA_WIDTH-1:DCCM_DATA_WIDTH]; + + // PIC signals. PIC ignores the lower 2 bits of address since PIC memory registers are 32-bits + assign picm_wren = lsu_stbuf_commit_any & stbuf_addr_in_pic_any; + assign picm_rden = lsu_pkt_dc1.valid & lsu_pkt_dc1.load & addr_in_pic_dc1; + assign picm_mken = lsu_pkt_dc1.valid & lsu_pkt_dc1.store & addr_in_pic_dc1; // Get the mask for stores + assign picm_addr[31:0] = (picm_rden | picm_mken) ? (`RV_PIC_BASE_ADDR | {17'b0,lsu_addr_dc1[14:0]}) : (`RV_PIC_BASE_ADDR | {{32-PIC_BITS{1'b0}},stbuf_addr_any[`RV_PIC_BITS-1:0]}); + //assign picm_addr[31:0] = (picm_rden | picm_mken) ? {`RV_PIC_REGION,`RV_PIC_OFFSET,3'b0,lsu_addr_dc1[14:0]} : {`RV_PIC_REGION,`RV_PIC_OFFSET,{18-PIC_BITS{1'b0}},stbuf_addr_any[`RV_PIC_BITS-1:0]}; + assign picm_wr_data[31:0] = stbuf_data_any[31:0]; + + + // Flops + assign picm_mask_data_dc3[31:0] = picm_rd_data_lo_dc3[31:0]; + assign picm_rd_data_dc3[63:0] = {picm_rd_data_lo_dc3[31:0], picm_rd_data_lo_dc3[31:0]} ; + rvdff #(32) picm_data_ff (.*, .din(picm_rd_data[31:0]), .dout(picm_rd_data_lo_dc3[31:0]), .clk(lsu_pic_c1_dc3_clk)); + if (DCCM_ENABLE == 1) begin: Gen_dccm_enable + rvdff #(1) dccm_rden_dc2ff (.*, .din(lsu_dccm_rden_dc1), .dout(lsu_dccm_rden_dc2), .clk(lsu_freeze_c2_dc2_clk)); + rvdff #(1) dccm_rden_dc3ff (.*, .din(lsu_dccm_rden_dc2), .dout(lsu_dccm_rden_dc3), .clk(lsu_freeze_c2_dc3_clk)); + + rvdff #(DCCM_DATA_WIDTH) dccm_data_hi_ff (.*, .din(dccm_data_hi_dc2[DCCM_DATA_WIDTH-1:0]), .dout(dccm_data_hi_dc3[DCCM_DATA_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk)); + rvdff #(DCCM_DATA_WIDTH) dccm_data_lo_ff (.*, .din(dccm_data_lo_dc2[DCCM_DATA_WIDTH-1:0]), .dout(dccm_data_lo_dc3[DCCM_DATA_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk)); + + rvdff #(DCCM_ECC_WIDTH) dccm_data_ecc_hi_ff (.*, .din(dccm_data_ecc_hi_dc2[DCCM_ECC_WIDTH-1:0]), .dout(dccm_data_ecc_hi_dc3[DCCM_ECC_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk)); + rvdff #(DCCM_ECC_WIDTH) dccm_data_ecc_lo_ff (.*, .din(dccm_data_ecc_lo_dc2[DCCM_ECC_WIDTH-1:0]), .dout(dccm_data_ecc_lo_dc3[DCCM_ECC_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk)); + end else begin: Gen_dccm_disable + assign lsu_dccm_rden_dc2 = '0; + assign lsu_dccm_rden_dc3 = '0; + assign dccm_data_hi_dc3[DCCM_DATA_WIDTH-1:0] = '0; + assign dccm_data_lo_dc3[DCCM_DATA_WIDTH-1:0] = '0; + assign dccm_data_ecc_hi_dc3[DCCM_ECC_WIDTH-1:0] = '0; + assign dccm_data_ecc_lo_dc3[DCCM_ECC_WIDTH-1:0] = '0; + end + +endmodule diff --git a/design/lsu/lsu_dccm_mem.sv b/design/lsu/lsu_dccm_mem.sv new file mode 100644 index 0000000..30a6ae4 --- /dev/null +++ b/design/lsu/lsu_dccm_mem.sv @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: DCCM for LSU pipe +// Comments: Single ported memory +// +// +// DC1 -> DC2 -> DC3 -> DC4 (Commit) +// +// //******************************************************************************** + +module lsu_dccm_mem + import swerv_types::*; +( + input logic clk, // clock + input logic rst_l, + input logic lsu_freeze_dc3, // freeze + input logic clk_override, // clock override + + input logic dccm_wren, // write enable + input logic dccm_rden, // read enable + input logic [`RV_DCCM_BITS-1:0] dccm_wr_addr, // write address + input logic [`RV_DCCM_BITS-1:0] dccm_rd_addr_lo, // read address + input logic [`RV_DCCM_BITS-1:0] dccm_rd_addr_hi, // read address for the upper bank in case of a misaligned access + input logic [`RV_DCCM_FDATA_WIDTH-1:0] dccm_wr_data, // write data + + output logic [`RV_DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // read data from the lo bank + output logic [`RV_DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // read data from the hi bank + + input logic scan_mode +); + +`include "global.h" + + localparam DCCM_WIDTH_BITS = $clog2(DCCM_BYTE_WIDTH); + localparam DCCM_INDEX_BITS = (DCCM_BITS - DCCM_BANK_BITS - DCCM_WIDTH_BITS); + + logic [DCCM_NUM_BANKS-1:0] wren_bank; + logic [DCCM_NUM_BANKS-1:0] rden_bank; + logic [DCCM_NUM_BANKS-1:0] [DCCM_BITS-1:(DCCM_BANK_BITS+2)] addr_bank; + logic [DCCM_BITS-1:(DCCM_BANK_BITS+DCCM_WIDTH_BITS)] rd_addr_even, rd_addr_odd; + logic rd_unaligned; + logic [DCCM_NUM_BANKS-1:0] [DCCM_FDATA_WIDTH-1:0] dccm_bank_dout; + logic [DCCM_FDATA_WIDTH-1:0] wrdata; + + logic [DCCM_NUM_BANKS-1:0] wren_bank_q; + logic [DCCM_NUM_BANKS-1:0] rden_bank_q; + logic [DCCM_NUM_BANKS-1:0][DCCM_BITS-1:(DCCM_BANK_BITS+2)] addr_bank_q; + logic [DCCM_FDATA_WIDTH-1:0] dccm_wr_data_q; + + logic [(DCCM_WIDTH_BITS+DCCM_BANK_BITS-1):DCCM_WIDTH_BITS] dccm_rd_addr_lo_q; + logic [(DCCM_WIDTH_BITS+DCCM_BANK_BITS-1):DCCM_WIDTH_BITS] dccm_rd_addr_hi_q; + + logic [DCCM_NUM_BANKS-1:0] dccm_clk; + logic [DCCM_NUM_BANKS-1:0] dccm_clken; + + assign rd_unaligned = (dccm_rd_addr_lo[DCCM_WIDTH_BITS+:DCCM_BANK_BITS] != dccm_rd_addr_hi[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]); + + // Align the read data + assign dccm_rd_data_lo[DCCM_FDATA_WIDTH-1:0] = dccm_bank_dout[dccm_rd_addr_lo_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]][DCCM_FDATA_WIDTH-1:0]; + assign dccm_rd_data_hi[DCCM_FDATA_WIDTH-1:0] = dccm_bank_dout[dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]][DCCM_FDATA_WIDTH-1:0]; + + // Generate even/odd address + // assign rd_addr_even[(DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] = dccm_rd_addr_lo[2] ? dccm_rd_addr_hi[(DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] : + // dccm_rd_addr_lo[(DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS]; + + // assign rd_addr_odd[(DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] = dccm_rd_addr_lo[2] ? dccm_rd_addr_lo[(DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] : + // dccm_rd_addr_hi[(DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS]; + + // 8 Banks, 16KB each (2048 x 72) + for (genvar i=0; i DC2 -> DC3 -> DC4 (Commit) +// +//******************************************************************************** +module lsu_ecc + import swerv_types::*; +( + + input logic lsu_c2_dc4_clk, // clocks + input logic lsu_c1_dc4_clk, + input logic lsu_c1_dc5_clk, + input logic clk, + input logic rst_l, + + input lsu_pkt_t lsu_pkt_dc3, // packet in dc3 + input logic lsu_dccm_rden_dc3, // dccm rden + input logic addr_in_dccm_dc3, // address in dccm + input logic [`RV_DCCM_BITS-1:0] lsu_addr_dc3, // start address + input logic [`RV_DCCM_BITS-1:0] end_addr_dc3, // end address + input logic [63:0] store_data_dc3, // store data + input logic [`RV_DCCM_DATA_WIDTH-1:0] stbuf_data_any, + + input logic [`RV_DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_dc3, // data forward from the store buffer + input logic [`RV_DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_dc3, // data forward from the store buffer + input logic [`RV_DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_dc3,// which bytes from the store buffer are on + input logic [`RV_DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_dc3,// which bytes from the store buffer are on + + input logic [`RV_DCCM_DATA_WIDTH-1:0] dccm_data_hi_dc3, // raw data from mem + input logic [`RV_DCCM_DATA_WIDTH-1:0] dccm_data_lo_dc3, // raw data from mem + input logic [`RV_DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_dc3, // ecc read out from mem + input logic [`RV_DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_dc3, // ecc read out from mem + + input logic dec_tlu_core_ecc_disable, // disables the ecc computation and error flagging + + output logic [`RV_DCCM_DATA_WIDTH-1:0] store_ecc_datafn_hi_dc3, // final store data either from stbuf or SEC DCCM readout + output logic [`RV_DCCM_DATA_WIDTH-1:0] store_ecc_datafn_lo_dc3, + + output logic [`RV_DCCM_ECC_WIDTH-1:0] stbuf_ecc_any, + output logic single_ecc_error_hi_dc3, // sec detected + output logic single_ecc_error_lo_dc3, // sec detected on lower dccm bank + output logic lsu_single_ecc_error_dc3, // or of the 2 + output logic lsu_double_ecc_error_dc3, // double error detected + + input logic scan_mode + ); + +`include "global.h" + + `ifdef RV_DCCM_ENABLE + localparam DCCM_ENABLE = 1'b1; + `else + localparam DCCM_ENABLE = 1'b0; + `endif + + logic [DCCM_DATA_WIDTH-1:0] sec_data_hi_dc3; + logic [DCCM_DATA_WIDTH-1:0] sec_data_lo_dc3; + + + logic double_ecc_error_hi_dc3, double_ecc_error_lo_dc3; + + logic ldst_dual_dc3; + logic is_ldst_dc3; + logic is_ldst_hi_dc3, is_ldst_lo_dc3; + logic [7:0] ldst_byteen_dc3; + logic [7:0] store_byteen_dc3; + logic [7:0] store_byteen_ext_dc3; + logic [DCCM_BYTE_WIDTH-1:0] store_byteen_hi_dc3, store_byteen_lo_dc3; + + logic [163:0] store_data_ext_dc3; + logic [DCCM_DATA_WIDTH-1:0] store_data_hi_dc3, store_data_lo_dc3; + logic [6:0] ecc_out_hi_nc, ecc_out_lo_nc; + + + assign ldst_dual_dc3 = (lsu_addr_dc3[2] != end_addr_dc3[2]); + assign is_ldst_dc3 = lsu_pkt_dc3.valid & (lsu_pkt_dc3.load | lsu_pkt_dc3.store) & addr_in_dccm_dc3 & lsu_dccm_rden_dc3; + assign is_ldst_lo_dc3 = is_ldst_dc3 & ~dec_tlu_core_ecc_disable; + assign is_ldst_hi_dc3 = is_ldst_dc3 & ldst_dual_dc3 & ~dec_tlu_core_ecc_disable; + + assign ldst_byteen_dc3[7:0] = ({8{lsu_pkt_dc3.by}} & 8'b0000_0001) | + ({8{lsu_pkt_dc3.half}} & 8'b0000_0011) | + ({8{lsu_pkt_dc3.word}} & 8'b0000_1111) | + ({8{lsu_pkt_dc3.dword}} & 8'b1111_1111); + assign store_byteen_dc3[7:0] = ldst_byteen_dc3[7:0] & {8{lsu_pkt_dc3.store}}; + + assign store_byteen_ext_dc3[7:0] = store_byteen_dc3[7:0] << lsu_addr_dc3[1:0]; + assign store_byteen_hi_dc3[DCCM_BYTE_WIDTH-1:0] = store_byteen_ext_dc3[7:4]; + assign store_byteen_lo_dc3[DCCM_BYTE_WIDTH-1:0] = store_byteen_ext_dc3[3:0]; + + assign store_data_ext_dc3[63:0] = store_data_dc3[63:0] << {lsu_addr_dc3[1:0], 3'b000}; + assign store_data_hi_dc3[DCCM_DATA_WIDTH-1:0] = store_data_ext_dc3[63:32]; + assign store_data_lo_dc3[DCCM_DATA_WIDTH-1:0] = store_data_ext_dc3[31:0]; + + + // Merge store data and sec data + // This is used for loads as well for ecc error case. store_byteen will be 0 for loads + for (genvar i=0; i> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores + assign store_data_d[63:0] = dma_dccm_req ? dma_mem_wdata_shifted[63:0] : {32'b0,exu_lsu_rs2_d[31:0]}; + + assign store_data_dc2_in[63:32] = store_data_dc1[63:32]; + assign store_data_dc2_in[31:0] = (lsu_pkt_dc1.store_data_bypass_c1) ? lsu_result_dc3[31:0] : + (lsu_pkt_dc1.store_data_bypass_e4_c1[1]) ? i1_result_e4_eff[31:0] : + (lsu_pkt_dc1.store_data_bypass_e4_c1[0]) ? i0_result_e4_eff[31:0] : store_data_dc1[31:0]; + + assign store_data_dc2[63:32] = store_data_pre_dc2[63:32]; + assign store_data_dc2[31:0] = (lsu_pkt_dc2.store_data_bypass_i0_e2_c2) ? i0_result_e2[31:0] : + (lsu_pkt_dc2.store_data_bypass_c2) ? lsu_result_dc3[31:0] : + (lsu_pkt_dc2.store_data_bypass_e4_c2[1]) ? i1_result_e4_eff[31:0] : + (lsu_pkt_dc2.store_data_bypass_e4_c2[0]) ? i0_result_e4_eff[31:0] : store_data_pre_dc2[31:0]; + + assign store_data_dc3[63:32] = store_data_pre_dc3[63:32]; + assign store_data_dc3[31:0] = (picm_mask_data_dc3[31:0] | {32{~addr_in_pic_dc3}}) & + ((lsu_pkt_dc3.store_data_bypass_e4_c3[1]) ? i1_result_e4_eff[31:0] : + (lsu_pkt_dc3.store_data_bypass_e4_c3[0]) ? i0_result_e4_eff[31:0] : store_data_pre_dc3[31:0]); + + rvdff #(32) lsu_result_corr_dc4ff (.*, .din(lsu_result_corr_dc3[31:0]), .dout(lsu_result_corr_dc4[31:0]), .clk(lsu_c1_dc4_clk)); + + rvdff #(64) sddc1ff (.*, .din(store_data_d[63:0]), .dout(store_data_dc1[63:0]), .clk(lsu_store_c1_dc1_clk)); + rvdff #(64) sddc2ff (.*, .din(store_data_dc2_in[63:0]), .dout(store_data_pre_dc2[63:0]), .clk(lsu_store_c1_dc2_clk)); + rvdffs #(64) sddc3ff (.*, .din(store_data_dc2[63:0]), .dout(store_data_pre_dc3[63:0]), .en(~lsu_freeze_dc3), .clk(lsu_store_c1_dc3_clk)); + rvdff #(32) sddc4ff (.*, .din(store_data_dc3[31:0]), .dout(store_data_dc4[31:0]), .clk(lsu_store_c1_dc4_clk)); + rvdff #(32) sddc5ff (.*, .din(store_data_dc4[31:0]), .dout(store_data_dc5[31:0]), .clk(lsu_store_c1_dc5_clk)); + + rvdff #(32) sadc2ff (.*, .din(lsu_addr_dc1[31:0]), .dout(lsu_addr_dc2[31:0]), .clk(lsu_freeze_c1_dc2_clk)); + rvdff #(32) sadc3ff (.*, .din(lsu_addr_dc2[31:0]), .dout(lsu_addr_dc3[31:0]), .clk(lsu_freeze_c1_dc3_clk)); + rvdff #(32) sadc4ff (.*, .din(lsu_addr_dc3[31:0]), .dout(lsu_addr_dc4[31:0]), .clk(lsu_c1_dc4_clk)); + rvdff #(32) sadc5ff (.*, .din(lsu_addr_dc4[31:0]), .dout(lsu_addr_dc5[31:0]), .clk(lsu_c1_dc5_clk)); + + rvdff #(32) end_addr_dc2ff (.*, .din(end_addr_dc1[31:0]), .dout(end_addr_dc2[31:0]), .clk(lsu_freeze_c1_dc2_clk)); + rvdff #(32) end_addr_dc3ff (.*, .din(end_addr_dc2[31:0]), .dout(end_addr_dc3[31:0]), .clk(lsu_freeze_c1_dc3_clk)); + rvdff #(32) end_addr_dc4ff (.*, .din(end_addr_dc3[31:0]), .dout(end_addr_dc4[31:0]), .clk(lsu_c1_dc4_clk)); + rvdff #(32) end_addr_dc5ff (.*, .din(end_addr_dc4[31:0]), .dout(end_addr_dc5[31:0]), .clk(lsu_c1_dc5_clk)); + + rvdff #(1) addr_in_dccm_dc2ff(.din(addr_in_dccm_dc1), .dout(addr_in_dccm_dc2), .clk(lsu_freeze_c1_dc2_clk), .*); + rvdff #(1) addr_in_dccm_dc3ff(.din(addr_in_dccm_dc2), .dout(addr_in_dccm_dc3), .clk(lsu_freeze_c1_dc3_clk), .*); + rvdff #(1) addr_in_pic_dc2ff(.din(addr_in_pic_dc1), .dout(addr_in_pic_dc2), .clk(lsu_freeze_c1_dc2_clk), .*); + rvdff #(1) addr_in_pic_dc3ff(.din(addr_in_pic_dc2), .dout(addr_in_pic_dc3), .clk(lsu_freeze_c1_dc3_clk), .*); + + rvdff #(1) addr_external_dc2ff(.din(addr_external_dc1), .dout(addr_external_dc2), .clk(lsu_freeze_c1_dc2_clk), .*); + rvdff #(1) addr_external_dc3ff(.din(addr_external_dc2), .dout(addr_external_dc3), .clk(lsu_freeze_c1_dc3_clk), .*); + rvdff #(1) addr_external_dc4ff(.din(addr_external_dc3), .dout(addr_external_dc4), .clk(lsu_c1_dc4_clk), .*); + rvdff #(1) addr_external_dc5ff(.din(addr_external_dc4), .dout(addr_external_dc5), .clk(lsu_c1_dc5_clk), .*); + + rvdff #(1) access_fault_dc2ff(.din(access_fault_dc1), .dout(access_fault_dc2), .clk(lsu_freeze_c1_dc2_clk), .*); + rvdff #(1) access_fault_dc3ff(.din(access_fault_dc2), .dout(access_fault_dc3), .clk(lsu_freeze_c1_dc3_clk), .*); + rvdff #(1) misaligned_fault_dc2ff(.din(misaligned_fault_dc1), .dout(misaligned_fault_dc2), .clk(lsu_freeze_c1_dc2_clk), .*); + rvdff #(1) misaligned_fault_dc3ff(.din(misaligned_fault_dc2), .dout(misaligned_fault_dc3), .clk(lsu_freeze_c1_dc3_clk), .*); + +endmodule diff --git a/design/lsu/lsu_stbuf.sv b/design/lsu/lsu_stbuf.sv new file mode 100644 index 0000000..851c05e --- /dev/null +++ b/design/lsu/lsu_stbuf.sv @@ -0,0 +1,399 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: Store Buffer +// Comments: Dual writes and single drain +// +// +// DC1 -> DC2 -> DC3 -> DC4 (Commit) +// +// //******************************************************************************** + + +module lsu_stbuf + import swerv_types::*; +( + input logic clk, // core clock + input logic rst_l, // reset + + input logic lsu_freeze_c2_dc2_clk, // freeze clock + input logic lsu_freeze_c2_dc3_clk, // freeze clock + input logic lsu_freeze_c1_dc2_clk, // freeze clock + input logic lsu_freeze_c1_dc3_clk, // freeze clock + input logic lsu_c1_dc4_clk, // lsu pipe clock + input logic lsu_c1_dc5_clk, // lsu pipe clock + input logic lsu_c2_dc4_clk, // lsu pipe clock + input logic lsu_c2_dc5_clk, // lsu pipe clock + input logic lsu_stbuf_c1_clk, // stbuf clock + input logic lsu_free_c2_clk, // free clk + + // Store Buffer input + input logic load_stbuf_reqvld_dc3, // core instruction goes to stbuf + input logic store_stbuf_reqvld_dc3, // core instruction goes to stbuf + //input logic ldst_stbuf_reqvld_dc3, + input logic addr_in_pic_dc2, // address is in pic + input logic addr_in_pic_dc3, // address is in pic + input logic addr_in_dccm_dc2, // address is in pic + input logic addr_in_dccm_dc3, // address is in pic + input logic [`RV_DCCM_DATA_WIDTH-1:0] store_ecc_datafn_hi_dc3, // data to write + input logic [`RV_DCCM_DATA_WIDTH-1:0] store_ecc_datafn_lo_dc3, // data to write + + input logic isldst_dc1, // instruction in dc1 is lsu + input logic dccm_ldst_dc2, // instruction in dc2 is lsu + input logic dccm_ldst_dc3, // instruction in dc3 is lsu + + input logic single_ecc_error_hi_dc3, // single ecc error in hi bank + input logic single_ecc_error_lo_dc3, // single ecc error in lo bank + input logic lsu_single_ecc_error_dc5, // single_ecc_error in either bank staged to the dc5 - needed for the load repairs + input logic lsu_commit_dc5, // lsu commits + input logic lsu_freeze_dc3, // lsu freeze + input logic flush_prior_dc5, // Flush is due to i0 and ld/st is in i1 + + // Store Buffer output + output logic stbuf_reqvld_any, // stbuf is draining + output logic stbuf_reqvld_flushed_any, // Top entry is flushed + output logic stbuf_addr_in_pic_any, // address maps to pic + output logic [`RV_DCCM_BYTE_WIDTH-1:0] stbuf_byteen_any, // which bytes are active + output logic [`RV_LSU_SB_BITS-1:0] stbuf_addr_any, // address + output logic [`RV_DCCM_DATA_WIDTH-1:0] stbuf_data_any, // stbuf data + + input logic lsu_stbuf_commit_any, // pop the stbuf as it commite + output logic lsu_stbuf_full_any, // stbuf is full + output logic lsu_stbuf_empty_any, // stbuf is empty + output logic lsu_stbuf_nodma_empty_any, // stbuf is empty except dma + + input logic [`RV_LSU_SB_BITS-1:0] lsu_addr_dc1, // lsu address + input logic [`RV_LSU_SB_BITS-1:0] lsu_addr_dc2, + input logic [`RV_LSU_SB_BITS-1:0] lsu_addr_dc3, + + input logic [`RV_LSU_SB_BITS-1:0] end_addr_dc1, // lsu end addrress - needed to check unaligned + input logic [`RV_LSU_SB_BITS-1:0] end_addr_dc2, + input logic [`RV_LSU_SB_BITS-1:0] end_addr_dc3, + + // Forwarding signals + input logic lsu_cmpen_dc2, // needed for forwarding stbuf - load + input lsu_pkt_t lsu_pkt_dc2, + input lsu_pkt_t lsu_pkt_dc3, + input lsu_pkt_t lsu_pkt_dc5, + + output logic [`RV_DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_dc3, // stbuf data + output logic [`RV_DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_dc3, + output logic [`RV_DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_dc3, + output logic [`RV_DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_dc3, + + input logic scan_mode + +); + +`include "global.h" + + localparam DEPTH = LSU_STBUF_DEPTH; + localparam DATA_WIDTH = DCCM_DATA_WIDTH; + localparam BYTE_WIDTH = DCCM_BYTE_WIDTH; + localparam DEPTH_LOG2 = $clog2(DEPTH); + + logic [DEPTH-1:0] stbuf_data_vld; + logic [DEPTH-1:0] stbuf_drain_vld; + logic [DEPTH-1:0] stbuf_flush_vld; + logic [DEPTH-1:0] stbuf_addr_in_pic; + logic [DEPTH-1:0] stbuf_dma; + logic [DEPTH-1:0][LSU_SB_BITS-1:0] stbuf_addr; + logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_byteen; + logic [DEPTH-1:0][DATA_WIDTH-1:0] stbuf_data; + + logic [DEPTH-1:0] sel_lo; + logic [DEPTH-1:0] stbuf_wr_en; + logic [DEPTH-1:0] stbuf_data_en; + logic [DEPTH-1:0] stbuf_drain_or_flush_en; + logic [DEPTH-1:0] stbuf_flush_en; + logic [DEPTH-1:0] stbuf_drain_en; + logic [DEPTH-1:0] stbuf_reset; + logic [DEPTH-1:0][LSU_SB_BITS-1:0] stbuf_addrin; + logic [DEPTH-1:0][DATA_WIDTH-1:0] stbuf_datain; + logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_byteenin; + + logic [7:0] ldst_byteen_dc3; + logic [7:0] store_byteen_ext_dc3; + logic [BYTE_WIDTH-1:0] store_byteen_hi_dc3; + logic [BYTE_WIDTH-1:0] store_byteen_lo_dc3; + + logic ldst_stbuf_reqvld_dc3; + logic dual_ecc_error_dc3; + logic dual_stbuf_write_dc3; + + logic WrPtrEn, RdPtrEn; + logic [DEPTH_LOG2-1:0] WrPtr, RdPtr; + logic [DEPTH_LOG2-1:0] NxtWrPtr, NxtRdPtr; + logic [DEPTH_LOG2-1:0] WrPtrPlus1, WrPtrPlus1_dc5, WrPtrPlus2, RdPtrPlus1; + logic [DEPTH_LOG2-1:0] WrPtr_dc3, WrPtr_dc4, WrPtr_dc5; + logic ldst_dual_dc1, ldst_dual_dc2, ldst_dual_dc3, ldst_dual_dc4, ldst_dual_dc5; + logic ldst_stbuf_reqvld_dc4, ldst_stbuf_reqvld_dc5; + logic dual_stbuf_write_dc4, dual_stbuf_write_dc5; + + logic [3:0] stbuf_numvld_any, stbuf_specvld_any; + logic [1:0] stbuf_specvld_dc1, stbuf_specvld_dc2, stbuf_specvld_dc3; + logic stbuf_oneavl_any, stbuf_twoavl_any; + + logic cmpen_hi_dc2, cmpen_lo_dc2, jit_in_same_region; + + logic [LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] cmpaddr_hi_dc2, cmpaddr_lo_dc2; + + logic stbuf_ldmatch_hi_hi, stbuf_ldmatch_hi_lo; + logic stbuf_ldmatch_lo_hi, stbuf_ldmatch_lo_lo; + logic [BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_hi, stbuf_fwdbyteen_hi_lo; + logic [BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_hi, stbuf_fwdbyteen_lo_lo; + logic [DATA_WIDTH-1:0] stbuf_fwddata_hi_hi, stbuf_fwddata_hi_lo; + logic [DATA_WIDTH-1:0] stbuf_fwddata_lo_hi, stbuf_fwddata_lo_lo; + + logic [DEPTH-1:0] stbuf_ldmatch_hi, stbuf_ldmatch_lo; + logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_fwdbyteenvec_hi, stbuf_fwdbyteenvec_lo; + logic [DEPTH-1:0][DATA_WIDTH-1:0] stbuf_fwddatavec_hi, stbuf_fwddatavec_lo; + logic [DATA_WIDTH-1:0] stbuf_fwddata_hi_dc2, stbuf_fwddata_lo_dc2; + logic [DATA_WIDTH-1:0] stbuf_fwddata_hi_fn_dc2, stbuf_fwddata_lo_fn_dc2; + logic [BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_dc2, stbuf_fwdbyteen_lo_dc2; + logic [BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_fn_dc2, stbuf_fwdbyteen_lo_fn_dc2; + logic stbuf_load_repair_dc5; + //---------------------------------------- + // Logic starts here + //---------------------------------------- + // Create high/low byte enables + assign ldst_byteen_dc3[7:0] = ({8{lsu_pkt_dc3.by}} & 8'b0000_0001) | + ({8{lsu_pkt_dc3.half}} & 8'b0000_0011) | + ({8{lsu_pkt_dc3.word}} & 8'b0000_1111) | + ({8{lsu_pkt_dc3.dword}} & 8'b1111_1111); + assign store_byteen_ext_dc3[7:0] = ldst_byteen_dc3[7:0] << lsu_addr_dc3[1:0]; + assign store_byteen_hi_dc3[BYTE_WIDTH-1:0] = store_byteen_ext_dc3[7:4]; + assign store_byteen_lo_dc3[BYTE_WIDTH-1:0] = store_byteen_ext_dc3[3:0]; + + assign RdPtrPlus1[DEPTH_LOG2-1:0] = RdPtr[DEPTH_LOG2-1:0] + 1'b1; + assign WrPtrPlus1[DEPTH_LOG2-1:0] = WrPtr[DEPTH_LOG2-1:0] + 1'b1; + assign WrPtrPlus2[DEPTH_LOG2-1:0] = WrPtr[DEPTH_LOG2-1:0] + 2'b10; + assign WrPtrPlus1_dc5[DEPTH_LOG2-1:0] = WrPtr_dc5[DEPTH_LOG2-1:0] + 1'b1; + + // ecc error on both hi/lo + assign ldst_dual_dc1 = (lsu_addr_dc1[2] != end_addr_dc1[2]); + assign dual_ecc_error_dc3 = (single_ecc_error_hi_dc3 & single_ecc_error_lo_dc3); + assign dual_stbuf_write_dc3 = ldst_dual_dc3 & (store_stbuf_reqvld_dc3 | dual_ecc_error_dc3); + assign ldst_stbuf_reqvld_dc3 = store_stbuf_reqvld_dc3 | + (load_stbuf_reqvld_dc3 & (dual_ecc_error_dc3 ? stbuf_twoavl_any : stbuf_oneavl_any)); // Don't correct ecc if not enough entries. Load will be flushed and come back again + assign stbuf_load_repair_dc5 = lsu_single_ecc_error_dc5 & (lsu_pkt_dc5.valid & lsu_pkt_dc5.load & ~flush_prior_dc5); + + // Store Buffer instantiation + for (genvar i=0; i (DEPTH - 2)); + assign lsu_stbuf_empty_any = (stbuf_numvld_any[3:0] == 4'b0); + assign lsu_stbuf_nodma_empty_any = ~(|(stbuf_data_vld[DEPTH-1:0] & ~stbuf_dma[DEPTH-1:0])); + + assign stbuf_oneavl_any = (stbuf_numvld_any[3:0] < DEPTH); + assign stbuf_twoavl_any = (stbuf_numvld_any[3:0] < (DEPTH - 1)); + + // Load forwarding logic + assign cmpen_hi_dc2 = lsu_cmpen_dc2 & ldst_dual_dc2; + assign cmpaddr_hi_dc2[LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = end_addr_dc2[LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]; + + assign cmpen_lo_dc2 = lsu_cmpen_dc2; + assign cmpaddr_lo_dc2[LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = lsu_addr_dc2[LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]; + assign jit_in_same_region = (addr_in_pic_dc2 & addr_in_pic_dc3) | (addr_in_dccm_dc2 & addr_in_dccm_dc3); + + // JIT forwarding + assign stbuf_ldmatch_hi_hi = (end_addr_dc3[LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_hi_dc2[LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & ~(cmpen_hi_dc2 & lsu_pkt_dc2.dma & ~lsu_pkt_dc3.dma) & jit_in_same_region; + assign stbuf_ldmatch_hi_lo = (lsu_addr_dc3[LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_hi_dc2[LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & ~(cmpen_hi_dc2 & lsu_pkt_dc2.dma & ~lsu_pkt_dc3.dma) & jit_in_same_region; + assign stbuf_ldmatch_lo_hi = (end_addr_dc3[LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_lo_dc2[LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & ~(cmpen_lo_dc2 & lsu_pkt_dc2.dma & ~lsu_pkt_dc3.dma) & jit_in_same_region; + assign stbuf_ldmatch_lo_lo = (lsu_addr_dc3[LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_lo_dc2[LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & ~(cmpen_lo_dc2 & lsu_pkt_dc2.dma & ~lsu_pkt_dc3.dma) & jit_in_same_region; + + for (genvar i=0; i meipt_inv[INTPRIORITY_BITS-1:0]) & + ( selected_int_priority[INTPRIORITY_BITS-1:0] > meicurpl_inv[INTPRIORITY_BITS-1:0]) ); +rvdff #(1) mexintpend_ff (.*, .clk(free_clk), .din (mexintpend_in), .dout(mexintpend)); + +assign maxint[INTPRIORITY_BITS-1:0] = intpriord ? 0 : 15 ; +assign mhwakeup_in = ( pl_in_q[INTPRIORITY_BITS-1:0] == maxint) ; +rvdff #(1) wake_up_ff (.*, .clk(free_clk), .din (mhwakeup_in), .dout(mhwakeup)); + + +// assign atleast_one_int_enabled_in = |intenable_reg[TOTAL_INT-1:0] ; +// rvdff #(1) one_int_en_ff (.*, .din (atleast_one_int_enabled_in), .dout(atleast_one_int_enabled)); +// +// assign mexintpend = mexintpend_unq & atleast_one_int_enabled ; +// assign mhwakeup = mhwakeup_unq & atleast_one_int_enabled ; + + + +////////////////////////////////////////////////////////////////////////// +// Reads of register. +// 1- intpending +////////////////////////////////////////////////////////////////////////// + +assign intpend_reg_read = addr_intpend_base_match & picm_rden_ff ; +assign intpriority_reg_read = addr_intpriority_base_match & picm_rden_ff; +assign intenable_reg_read = addr_intenable_base_match & picm_rden_ff; +assign gw_config_reg_read = addr_config_gw_base_match & picm_rden_ff; + +assign intpend_reg_extended[INTPEND_SIZE-1:0] = {{INTPEND_SIZE-TOTAL_INT{1'b0}},extintsrc_req_gw[TOTAL_INT-1:0]} ; + + for (i=0; i<(INT_GRPS); i++) begin + assign intpend_rd_part_out[i] = (({32{intpend_reg_read & picm_addr_ff[5:2] == i}}) & intpend_reg_extended[((32*i)+31):(32*i)]) ; + end + + always_comb begin : INTPEND_RD + intpend_rd_out = '0 ; + for (int i=0; i AHB Gasket for LSU + axi4_to_ahb #(.TAG(LSU_BUS_TAG)) lsu_axi4_to_ahb ( + .clk_override(dec_tlu_bus_clk_override), + .bus_clk_en(lsu_bus_clk_en), + + // AXI Write Channels + .axi_awvalid(lsu_axi_awvalid), + .axi_awready(lsu_axi_awready), + .axi_awid(lsu_axi_awid[LSU_BUS_TAG-1:0]), + .axi_awaddr(lsu_axi_awaddr[31:0]), + .axi_awsize(lsu_axi_awsize[2:0]), + .axi_awprot(lsu_axi_awprot[2:0]), + + .axi_wvalid(lsu_axi_wvalid), + .axi_wready(lsu_axi_wready), + .axi_wdata(lsu_axi_wdata[63:0]), + .axi_wstrb(lsu_axi_wstrb[7:0]), + .axi_wlast(lsu_axi_wlast), + + .axi_bvalid(lsu_axi_bvalid), + .axi_bready(lsu_axi_bready), + .axi_bresp(lsu_axi_bresp[1:0]), + .axi_bid(lsu_axi_bid[LSU_BUS_TAG-1:0]), + + // AXI Read Channels + .axi_arvalid(lsu_axi_arvalid), + .axi_arready(lsu_axi_arready), + .axi_arid(lsu_axi_arid[LSU_BUS_TAG-1:0]), + .axi_araddr(lsu_axi_araddr[31:0]), + .axi_arsize(lsu_axi_arsize[2:0]), + .axi_arprot(lsu_axi_arprot[2:0]), + + .axi_rvalid(lsu_axi_rvalid), + .axi_rready(lsu_axi_rready), + .axi_rid(lsu_axi_rid[LSU_BUS_TAG-1:0]), + .axi_rdata(lsu_axi_rdata[63:0]), + .axi_rresp(lsu_axi_rresp[1:0]), + .axi_rlast(lsu_axi_rlast), + // AHB-LITE signals + .ahb_haddr(lsu_haddr[31:0]), + .ahb_hburst(lsu_hburst), + .ahb_hmastlock(lsu_hmastlock), + .ahb_hprot(lsu_hprot[3:0]), + .ahb_hsize(lsu_hsize[2:0]), + .ahb_htrans(lsu_htrans[1:0]), + .ahb_hwrite(lsu_hwrite), + .ahb_hwdata(lsu_hwdata[63:0]), + + .ahb_hrdata(lsu_hrdata[63:0]), + .ahb_hready(lsu_hready), + .ahb_hresp(lsu_hresp), + + .* + ); + + // AXI4 -> AHB Gasket for System Bus + axi4_to_ahb #(.TAG(SB_BUS_TAG)) sb_axi4_to_ahb ( + .clk_override(dec_tlu_bus_clk_override), + .bus_clk_en(dbg_bus_clk_en), + + // AXI Write Channels + .axi_awvalid(sb_axi_awvalid), + .axi_awready(sb_axi_awready), + .axi_awid(sb_axi_awid[SB_BUS_TAG-1:0]), + .axi_awaddr(sb_axi_awaddr[31:0]), + .axi_awsize(sb_axi_awsize[2:0]), + .axi_awprot(sb_axi_awprot[2:0]), + + .axi_wvalid(sb_axi_wvalid), + .axi_wready(sb_axi_wready), + .axi_wdata(sb_axi_wdata[63:0]), + .axi_wstrb(sb_axi_wstrb[7:0]), + .axi_wlast(sb_axi_wlast), + + .axi_bvalid(sb_axi_bvalid), + .axi_bready(sb_axi_bready), + .axi_bresp(sb_axi_bresp[1:0]), + .axi_bid(sb_axi_bid[SB_BUS_TAG-1:0]), + + // AXI Read Channels + .axi_arvalid(sb_axi_arvalid), + .axi_arready(sb_axi_arready), + .axi_arid(sb_axi_arid[SB_BUS_TAG-1:0]), + .axi_araddr(sb_axi_araddr[31:0]), + .axi_arsize(sb_axi_arsize[2:0]), + .axi_arprot(sb_axi_arprot[2:0]), + + .axi_rvalid(sb_axi_rvalid), + .axi_rready(sb_axi_rready), + .axi_rid(sb_axi_rid[SB_BUS_TAG-1:0]), + .axi_rdata(sb_axi_rdata[63:0]), + .axi_rresp(sb_axi_rresp[1:0]), + .axi_rlast(sb_axi_rlast), + // AHB-LITE signals + .ahb_haddr(sb_haddr[31:0]), + .ahb_hburst(sb_hburst), + .ahb_hmastlock(sb_hmastlock), + .ahb_hprot(sb_hprot[3:0]), + .ahb_hsize(sb_hsize[2:0]), + .ahb_htrans(sb_htrans[1:0]), + .ahb_hwrite(sb_hwrite), + .ahb_hwdata(sb_hwdata[63:0]), + + .ahb_hrdata(sb_hrdata[63:0]), + .ahb_hready(sb_hready), + .ahb_hresp(sb_hresp), + + .* + ); + + axi4_to_ahb #(.TAG(IFU_BUS_TAG)) ifu_axi4_to_ahb ( + .clk(clk), + .clk_override(dec_tlu_bus_clk_override), + .bus_clk_en(ifu_bus_clk_en), + + // AHB-Lite signals + .ahb_haddr(haddr[31:0]), + .ahb_hburst(hburst), + .ahb_hmastlock(hmastlock), + .ahb_hprot(hprot[3:0]), + .ahb_hsize(hsize[2:0]), + .ahb_htrans(htrans[1:0]), + .ahb_hwrite(hwrite), + .ahb_hwdata(hwdata_nc[63:0]), + + .ahb_hrdata(hrdata[63:0]), + .ahb_hready(hready), + .ahb_hresp(hresp), + + // AXI Write Channels + .axi_awvalid(ifu_axi_awvalid), + .axi_awready(ifu_axi_awready), + .axi_awid(ifu_axi_awid[IFU_BUS_TAG-1:0]), + .axi_awaddr(ifu_axi_awaddr[31:0]), + .axi_awsize(ifu_axi_awsize[2:0]), + .axi_awprot(ifu_axi_awprot[2:0]), + + .axi_wvalid(ifu_axi_wvalid), + .axi_wready(ifu_axi_wready), + .axi_wdata(ifu_axi_wdata[63:0]), + .axi_wstrb(ifu_axi_wstrb[7:0]), + .axi_wlast(ifu_axi_wlast), + + .axi_bvalid(ifu_axi_bvalid), + .axi_bready(ifu_axi_bready), + .axi_bresp(ifu_axi_bresp[1:0]), + .axi_bid(ifu_axi_bid[IFU_BUS_TAG-1:0]), + + // AXI Read Channels + .axi_arvalid(ifu_axi_arvalid), + .axi_arready(ifu_axi_arready), + .axi_arid(ifu_axi_arid[IFU_BUS_TAG-1:0]), + .axi_araddr(ifu_axi_araddr[31:0]), + .axi_arsize(ifu_axi_arsize[2:0]), + .axi_arprot(ifu_axi_arprot[2:0]), + + .axi_rvalid(ifu_axi_rvalid), + .axi_rready(ifu_axi_rready), + .axi_rid(ifu_axi_rid[IFU_BUS_TAG-1:0]), + .axi_rdata(ifu_axi_rdata[63:0]), + .axi_rresp(ifu_axi_rresp[1:0]), + .axi_rlast(ifu_axi_rlast), + .* + ); + + //AHB -> AXI4 Gasket for DMA + ahb_to_axi4 #(.TAG(DMA_BUS_TAG)) dma_ahb_to_axi4 ( + .clk_override(dec_tlu_bus_clk_override), + .bus_clk_en(dma_bus_clk_en), + + // AXI Write Channels + .axi_awvalid(dma_axi_awvalid), + .axi_awready(dma_axi_awready), + .axi_awid(dma_axi_awid[DMA_BUS_TAG-1:0]), + .axi_awaddr(dma_axi_awaddr[31:0]), + .axi_awsize(dma_axi_awsize[2:0]), + .axi_awprot(dma_axi_awprot[2:0]), + .axi_awlen(dma_axi_awlen[7:0]), + .axi_awburst(dma_axi_awburst[1:0]), + + .axi_wvalid(dma_axi_wvalid), + .axi_wready(dma_axi_wready), + .axi_wdata(dma_axi_wdata[63:0]), + .axi_wstrb(dma_axi_wstrb[7:0]), + .axi_wlast(dma_axi_wlast), + + .axi_bvalid(dma_axi_bvalid), + .axi_bready(dma_axi_bready), + .axi_bresp(dma_axi_bresp[1:0]), + .axi_bid(dma_axi_bid[DMA_BUS_TAG-1:0]), + + // AXI Read Channels + .axi_arvalid(dma_axi_arvalid), + .axi_arready(dma_axi_arready), + .axi_arid(dma_axi_arid[DMA_BUS_TAG-1:0]), + .axi_araddr(dma_axi_araddr[31:0]), + .axi_arsize(dma_axi_arsize[2:0]), + .axi_arprot(dma_axi_arprot[2:0]), + .axi_arlen(dma_axi_arlen[7:0]), + .axi_arburst(dma_axi_arburst[1:0]), + + .axi_rvalid(dma_axi_rvalid), + .axi_rready(dma_axi_rready), + .axi_rid(dma_axi_rid[DMA_BUS_TAG-1:0]), + .axi_rdata(dma_axi_rdata[63:0]), + .axi_rresp(dma_axi_rresp[1:0]), + + // AHB signals + .ahb_haddr(dma_haddr[31:0]), + .ahb_hburst(dma_hburst), + .ahb_hmastlock(dma_hmastlock), + .ahb_hprot(dma_hprot[3:0]), + .ahb_hsize(dma_hsize[2:0]), + .ahb_htrans(dma_htrans[1:0]), + .ahb_hwrite(dma_hwrite), + .ahb_hwdata(dma_hwdata[63:0]), + + .ahb_hrdata(dma_hrdata[63:0]), + .ahb_hreadyout(dma_hreadyout), + .ahb_hresp(dma_hresp), + .ahb_hreadyin(dma_hreadyin), + .ahb_hsel(dma_hsel), + .* + ); + +`endif + + +`ifdef RV_BUILD_AHB_LITE +`ifdef ASSERT_ON + property ahb_trxn_aligned; + @(posedge clk) disable iff(~rst_l) (lsu_htrans[1:0] != 2'b0) |-> ((lsu_hsize[2:0] == 3'h0) | + ((lsu_hsize[2:0] == 3'h1) & (lsu_haddr[0] == 1'b0)) | + ((lsu_hsize[2:0] == 3'h2) & (lsu_haddr[1:0] == 2'b0)) | + ((lsu_hsize[2:0] == 3'h3) & (lsu_haddr[2:0] == 3'b0))); + endproperty + assert_ahb_trxn_aligned: assert property (ahb_trxn_aligned) else + $display("Assertion ahb_trxn_aligned failed: lsu_htrans=2'h%h, lsu_hsize=3'h%h, lsu_haddr=32'h%h",lsu_htrans[1:0], lsu_hsize[2:0], lsu_haddr[31:0]); + + property dma_trxn_aligned; + @(posedge clk) disable iff(~rst_l) (dma_htrans[1:0] != 2'b0) |-> ((dma_hsize[2:0] == 3'h0) | + ((dma_hsize[2:0] == 3'h1) & (dma_haddr[0] == 1'b0)) | + ((dma_hsize[2:0] == 3'h2) & (dma_haddr[1:0] == 2'b0)) | + ((dma_hsize[2:0] == 3'h3) & (dma_haddr[2:0] == 3'b0))); + endproperty + //assert_dma_trxn_aligned: assert property (dma_trxn_aligned) else + // $display("Assertion dma_trxn_aligned failed: dma_htrans=2'h%h, dma_hsize=3'h%h, dma_haddr=32'h%h",dma_htrans[1:0], dma_hsize[2:0], dma_haddr[31:0]); + +`endif +`endif + + + // unpack packet + // also need retires_p==3 + + assign trace_rv_i_insn_ip[63:0] = trace_rv_trace_pkt.trace_rv_i_insn_ip[63:0]; + + assign trace_rv_i_address_ip[63:0] = trace_rv_trace_pkt.trace_rv_i_address_ip[63:0]; + + assign trace_rv_i_valid_ip[2:0] = trace_rv_trace_pkt.trace_rv_i_valid_ip[2:0]; + + assign trace_rv_i_exception_ip[2:0] = trace_rv_trace_pkt.trace_rv_i_exception_ip[2:0]; + + assign trace_rv_i_ecause_ip[4:0] = trace_rv_trace_pkt.trace_rv_i_ecause_ip[4:0]; + + assign trace_rv_i_interrupt_ip[2:0] = trace_rv_trace_pkt.trace_rv_i_interrupt_ip[2:0]; + + assign trace_rv_i_tval_ip[31:0] = trace_rv_trace_pkt.trace_rv_i_tval_ip[31:0]; + + + // constants should be hooked up at platform level + // trace_rv_i_context_ip = '0; + // trace_rv_i_privilege_ip = {3{4'b0011}}; + // trace_rv_i_status_ip = '0; + // trace_rv_i_user_ip = '0; + + // trace_rv_halted_ip = o_cpu_halt_status; hook this up at platform level + + + + + +endmodule // swerv + diff --git a/design/swerv_wrapper.sv b/design/swerv_wrapper.sv new file mode 100644 index 0000000..351cf79 --- /dev/null +++ b/design/swerv_wrapper.sv @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// Function: Top wrapper file with swerv/mem instantiated inside +// Comments: +// +//******************************************************************************** +`include "build.h" +//`include "def.sv" +module swerv_wrapper + import swerv_types::*; +( + input logic clk, + input logic rst_l, + input logic [31:1] rst_vec, + input logic nmi_int, + input logic [31:1] nmi_vec, + input logic [31:1] jtag_id, + + + output logic [63:0] trace_rv_i_insn_ip, + output logic [63:0] trace_rv_i_address_ip, + output logic [2:0] trace_rv_i_valid_ip, + output logic [2:0] trace_rv_i_exception_ip, + output logic [4:0] trace_rv_i_ecause_ip, + output logic [2:0] trace_rv_i_interrupt_ip, + output logic [31:0] trace_rv_i_tval_ip, + + // Bus signals + +`ifdef RV_BUILD_AXI4 + //-------------------------- LSU AXI signals-------------------------- + // AXI Write Channels + output logic lsu_axi_awvalid, + input logic lsu_axi_awready, + output logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_awid, + output logic [31:0] lsu_axi_awaddr, + output logic [3:0] lsu_axi_awregion, + output logic [7:0] lsu_axi_awlen, + output logic [2:0] lsu_axi_awsize, + output logic [1:0] lsu_axi_awburst, + output logic lsu_axi_awlock, + output logic [3:0] lsu_axi_awcache, + output logic [2:0] lsu_axi_awprot, + output logic [3:0] lsu_axi_awqos, + + output logic lsu_axi_wvalid, + input logic lsu_axi_wready, + output logic [63:0] lsu_axi_wdata, + output logic [7:0] lsu_axi_wstrb, + output logic lsu_axi_wlast, + + input logic lsu_axi_bvalid, + output logic lsu_axi_bready, + input logic [1:0] lsu_axi_bresp, + input logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_bid, + + // AXI Read Channels + output logic lsu_axi_arvalid, + input logic lsu_axi_arready, + output logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_arid, + output logic [31:0] lsu_axi_araddr, + output logic [3:0] lsu_axi_arregion, + output logic [7:0] lsu_axi_arlen, + output logic [2:0] lsu_axi_arsize, + output logic [1:0] lsu_axi_arburst, + output logic lsu_axi_arlock, + output logic [3:0] lsu_axi_arcache, + output logic [2:0] lsu_axi_arprot, + output logic [3:0] lsu_axi_arqos, + + input logic lsu_axi_rvalid, + output logic lsu_axi_rready, + input logic [`RV_LSU_BUS_TAG-1:0] lsu_axi_rid, + input logic [63:0] lsu_axi_rdata, + input logic [1:0] lsu_axi_rresp, + input logic lsu_axi_rlast, + + //-------------------------- IFU AXI signals-------------------------- + // AXI Write Channels + output logic ifu_axi_awvalid, + input logic ifu_axi_awready, + output logic [`RV_IFU_BUS_TAG-1:0] ifu_axi_awid, + output logic [31:0] ifu_axi_awaddr, + output logic [3:0] ifu_axi_awregion, + output logic [7:0] ifu_axi_awlen, + output logic [2:0] ifu_axi_awsize, + output logic [1:0] ifu_axi_awburst, + output logic ifu_axi_awlock, + output logic [3:0] ifu_axi_awcache, + output logic [2:0] ifu_axi_awprot, + output logic [3:0] ifu_axi_awqos, + + output logic ifu_axi_wvalid, + input logic ifu_axi_wready, + output logic [63:0] ifu_axi_wdata, + output logic [7:0] ifu_axi_wstrb, + output logic ifu_axi_wlast, + + input logic ifu_axi_bvalid, + output logic ifu_axi_bready, + input logic [1:0] ifu_axi_bresp, + input logic [`RV_IFU_BUS_TAG-1:0] ifu_axi_bid, + + // AXI Read Channels + output logic ifu_axi_arvalid, + input logic ifu_axi_arready, + output logic [`RV_IFU_BUS_TAG-1:0] ifu_axi_arid, + output logic [31:0] ifu_axi_araddr, + output logic [3:0] ifu_axi_arregion, + output logic [7:0] ifu_axi_arlen, + output logic [2:0] ifu_axi_arsize, + output logic [1:0] ifu_axi_arburst, + output logic ifu_axi_arlock, + output logic [3:0] ifu_axi_arcache, + output logic [2:0] ifu_axi_arprot, + output logic [3:0] ifu_axi_arqos, + + input logic ifu_axi_rvalid, + output logic ifu_axi_rready, + input logic [`RV_IFU_BUS_TAG-1:0] ifu_axi_rid, + input logic [63:0] ifu_axi_rdata, + input logic [1:0] ifu_axi_rresp, + input logic ifu_axi_rlast, + + //-------------------------- SB AXI signals-------------------------- + // AXI Write Channels + output logic sb_axi_awvalid, + input logic sb_axi_awready, + output logic [`RV_SB_BUS_TAG-1:0] sb_axi_awid, + output logic [31:0] sb_axi_awaddr, + output logic [3:0] sb_axi_awregion, + output logic [7:0] sb_axi_awlen, + output logic [2:0] sb_axi_awsize, + output logic [1:0] sb_axi_awburst, + output logic sb_axi_awlock, + output logic [3:0] sb_axi_awcache, + output logic [2:0] sb_axi_awprot, + output logic [3:0] sb_axi_awqos, + + output logic sb_axi_wvalid, + input logic sb_axi_wready, + output logic [63:0] sb_axi_wdata, + output logic [7:0] sb_axi_wstrb, + output logic sb_axi_wlast, + + input logic sb_axi_bvalid, + output logic sb_axi_bready, + input logic [1:0] sb_axi_bresp, + input logic [`RV_SB_BUS_TAG-1:0] sb_axi_bid, + + // AXI Read Channels + output logic sb_axi_arvalid, + input logic sb_axi_arready, + output logic [`RV_SB_BUS_TAG-1:0] sb_axi_arid, + output logic [31:0] sb_axi_araddr, + output logic [3:0] sb_axi_arregion, + output logic [7:0] sb_axi_arlen, + output logic [2:0] sb_axi_arsize, + output logic [1:0] sb_axi_arburst, + output logic sb_axi_arlock, + output logic [3:0] sb_axi_arcache, + output logic [2:0] sb_axi_arprot, + output logic [3:0] sb_axi_arqos, + + input logic sb_axi_rvalid, + output logic sb_axi_rready, + input logic [`RV_SB_BUS_TAG-1:0] sb_axi_rid, + input logic [63:0] sb_axi_rdata, + input logic [1:0] sb_axi_rresp, + input logic sb_axi_rlast, + + //-------------------------- DMA AXI signals-------------------------- + // AXI Write Channels + input logic dma_axi_awvalid, + output logic dma_axi_awready, + input logic [`RV_DMA_BUS_TAG-1:0] dma_axi_awid, + input logic [31:0] dma_axi_awaddr, + input logic [2:0] dma_axi_awsize, + input logic [2:0] dma_axi_awprot, + input logic [7:0] dma_axi_awlen, + input logic [1:0] dma_axi_awburst, + + + input logic dma_axi_wvalid, + output logic dma_axi_wready, + input logic [63:0] dma_axi_wdata, + input logic [7:0] dma_axi_wstrb, + input logic dma_axi_wlast, + + output logic dma_axi_bvalid, + input logic dma_axi_bready, + output logic [1:0] dma_axi_bresp, + output logic [`RV_DMA_BUS_TAG-1:0] dma_axi_bid, + + // AXI Read Channels + input logic dma_axi_arvalid, + output logic dma_axi_arready, + input logic [`RV_DMA_BUS_TAG-1:0] dma_axi_arid, + input logic [31:0] dma_axi_araddr, + input logic [2:0] dma_axi_arsize, + input logic [2:0] dma_axi_arprot, + input logic [7:0] dma_axi_arlen, + input logic [1:0] dma_axi_arburst, + + output logic dma_axi_rvalid, + input logic dma_axi_rready, + output logic [`RV_DMA_BUS_TAG-1:0] dma_axi_rid, + output logic [63:0] dma_axi_rdata, + output logic [1:0] dma_axi_rresp, + output logic dma_axi_rlast, + +`endif + +`ifdef RV_BUILD_AHB_LITE + //// AHB LITE BUS + output logic [31:0] haddr, + output logic [2:0] hburst, + output logic hmastlock, + output logic [3:0] hprot, + output logic [2:0] hsize, + output logic [1:0] htrans, + output logic hwrite, + + input logic [63:0] hrdata, + input logic hready, + input logic hresp, + + // LSU AHB Master + output logic [31:0] lsu_haddr, + output logic [2:0] lsu_hburst, + output logic lsu_hmastlock, + output logic [3:0] lsu_hprot, + output logic [2:0] lsu_hsize, + output logic [1:0] lsu_htrans, + output logic lsu_hwrite, + output logic [63:0] lsu_hwdata, + + input logic [63:0] lsu_hrdata, + input logic lsu_hready, + input logic lsu_hresp, + // Debug Syster Bus AHB + output logic [31:0] sb_haddr, + output logic [2:0] sb_hburst, + output logic sb_hmastlock, + output logic [3:0] sb_hprot, + output logic [2:0] sb_hsize, + output logic [1:0] sb_htrans, + output logic sb_hwrite, + output logic [63:0] sb_hwdata, + + input logic [63:0] sb_hrdata, + input logic sb_hready, + input logic sb_hresp, + + // DMA Slave + input logic [31:0] dma_haddr, + input logic [2:0] dma_hburst, + input logic dma_hmastlock, + input logic [3:0] dma_hprot, + input logic [2:0] dma_hsize, + input logic [1:0] dma_htrans, + input logic dma_hwrite, + input logic [63:0] dma_hwdata, + input logic dma_hsel, + input logic dma_hreadyin, + + output logic [63:0] dma_hrdata, + output logic dma_hreadyout, + output logic dma_hresp, + +`endif + + + // clk ratio signals + input logic lsu_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface + input logic ifu_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface + input logic dbg_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface + input logic dma_bus_clk_en, // Clock ratio b/w cpu core clk & AHB slave interface + + +// input logic ext_int, + input logic timer_int, + input logic [`RV_PIC_TOTAL_INT:1] extintsrc_req, + + output logic [1:0] dec_tlu_perfcnt0, // toggles when perf counter 0 has an event inc + output logic [1:0] dec_tlu_perfcnt1, + output logic [1:0] dec_tlu_perfcnt2, + output logic [1:0] dec_tlu_perfcnt3, + + // ports added by the soc team + input logic jtag_tck, // JTAG clk + input logic jtag_tms, // JTAG TMS + input logic jtag_tdi, // JTAG tdi + input logic jtag_trst_n, // JTAG Reset + output logic jtag_tdo, // JTAG TDO + // external MPC halt/run interface + input logic mpc_debug_halt_req, // Async halt request + input logic mpc_debug_run_req, // Async run request + input logic mpc_reset_run_req, // Run/halt after reset + output logic mpc_debug_halt_ack, // Halt ack + output logic mpc_debug_run_ack, // Run ack + output logic debug_brkpt_status, // debug breakpoint + + input logic i_cpu_halt_req, // Async halt req to CPU + output logic o_cpu_halt_ack, // core response to halt + output logic o_cpu_halt_status, // 1'b1 indicates core is halted + output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request + input logic i_cpu_run_req, // Async restart req to CPU + output logic o_cpu_run_ack, // Core response to run req + input logic scan_mode, // To enable scan mode + input logic mbist_mode // to enable mbist +); + +`include "global.h" + + // DCCM ports + logic dccm_wren; + logic dccm_rden; + logic [DCCM_BITS-1:0] dccm_wr_addr; + logic [DCCM_BITS-1:0] dccm_rd_addr_lo; + logic [DCCM_BITS-1:0] dccm_rd_addr_hi; + logic [DCCM_FDATA_WIDTH-1:0] dccm_wr_data; + + logic [DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo; + logic [DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi; + + logic lsu_freeze_dc3; + + // PIC ports + + // Icache & Itag ports + logic [31:3] ic_rw_addr; + logic [3:0] ic_wr_en ; // Which way to write + logic ic_rd_en ; + + + logic [3:0] ic_tag_valid; // Valid from the I$ tag valid outside (in flops). + + logic [3:0] ic_rd_hit; // ic_rd_hit[3:0] + logic ic_tag_perr; // Ic tag parity error + + logic [15:2] ic_debug_addr; // Read/Write addresss to the Icache. + logic ic_debug_rd_en; // Icache debug rd + logic ic_debug_wr_en; // Icache debug wr + logic ic_debug_tag_array; // Debug tag array + logic [3:0] ic_debug_way; // Debug way. Rd or Wr. + +`ifdef RV_ICACHE_ECC + logic [24:0] ictag_debug_rd_data;// Debug icache tag. + logic [83:0] ic_wr_data; // ic_wr_data[135:0] + logic [167:0] ic_rd_data; // ic_rd_data[135:0] + logic [41:0] ic_debug_wr_data; // Debug wr cache. +`else + logic [20:0] ictag_debug_rd_data;// Debug icache tag. + logic [67:0] ic_wr_data; // ic_wr_data[135:0] + logic [135:0] ic_rd_data; // ic_rd_data[135:0] + logic [33:0] ic_debug_wr_data; // Debug wr cache. +`endif + + logic [127:0] ic_premux_data; + logic ic_sel_premux_data; + +`ifdef RV_ICCM_ENABLE + // ICCM ports + logic [`RV_ICCM_BITS-1:2] iccm_rw_addr; + logic iccm_wren; + logic iccm_rden; + logic [2:0] iccm_wr_size; + logic [77:0] iccm_wr_data; + logic [155:0] iccm_rd_data; +`endif + + logic core_rst_l; // Core reset including rst_l and dbg_rst_l + logic jtag_tdoEn; + + logic dccm_clk_override; + logic icm_clk_override; + logic dec_tlu_core_ecc_disable; + + // Instantiate the swerv core + swerv swerv ( + .* + ); + + // Instantiate the mem + mem mem ( + .rst_l(core_rst_l), + .* + ); + + +endmodule + diff --git a/testbench/ahb_sif.sv b/testbench/ahb_sif.sv new file mode 100644 index 0000000..e0a3095 --- /dev/null +++ b/testbench/ahb_sif.sv @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +module ahb_sif ( + input logic [63:0] HWDATA, + input logic HCLK, + input logic HSEL, + input logic [3:0] HPROT, + input logic HWRITE, + input logic [1:0] HTRANS, + input logic [2:0] HSIZE, + input logic HREADY, + input logic HRESETn, + input logic [31:0] HADDR, + input logic [2:0] HBURST, + + output logic HREADYOUT, + output logic HRESP, + output logic [63:0] HRDATA + +); + +localparam MEM_SIZE_DW = 8192; +localparam MAILBOX_ADDR = 32'hD0580000; + +logic Last_HSEL; +logic NextLast_HSEL; +logic Last_HWRITE; +logic [1:0] Last_HTRANS; +logic [1:0] NextLast_HTRANS; +logic [31:0] Last_HADDR; +logic [63:0] Next_HRDATA; +logic [63:0] WriteReadData; +logic [63:0] WriteMask; + +bit [7:0] mem [0:MEM_SIZE_DW-1]; + + +// Wires +wire [63:0] Next_WriteMask = HSIZE == 3'b000 ? (64'hff << {HADDR[2:0], 3'b000}) : (HSIZE == 3'b001 ? (64'hffff << {HADDR[2], 4'h0}) : (HSIZE == 3'b010 ? (64'hffff_ffff << {HADDR[3],5'h0}) : 64'hffff_ffff_ffff_ffff)); + +wire [63:0] MaskedWriteData = HWDATA & WriteMask; +wire [63:0] MaskedWriteReadData = WriteReadData & ~WriteMask; +wire [63:0] WriteData = (MaskedWriteData | MaskedWriteReadData ); +wire Write = &{Last_HSEL, Last_HWRITE, Last_HTRANS[1]}; +wire Read = &{ HSEL, ~HWRITE, HTRANS[1]}; + +wire mailbox_write = &{Write, Last_HADDR==MAILBOX_ADDR, HRESETn==1}; +wire Next_HWRITE = |{HTRANS} ? HWRITE : Last_HWRITE; +wire [63:0] mem_dout = {mem[{Last_HADDR[12:3],3'b0}+7],mem[{Last_HADDR[12:3],3'b0}+6],mem[{Last_HADDR[12:3],3'b0}+5],mem[{Last_HADDR[12:3],3'b0}+4],mem[{Last_HADDR[12:3],3'b0}+3],mem[{Last_HADDR[12:3],3'b0}+2],mem[{Last_HADDR[12:3],3'b0}+1],mem[{Last_HADDR[12:3],3'b0}]}; + + +always @ (posedge HCLK or negedge HRESETn) begin + if (Write && Last_HADDR == 32'h0) begin + mem[{Last_HADDR[12:3],3'b0}+7] <= #1 { WriteData[63:56] }; + mem[{Last_HADDR[12:3],3'b0}+6] <= #1 { WriteData[55:48] }; + mem[{Last_HADDR[12:3],3'b0}+5] <= #1 { WriteData[47:40] }; + mem[{Last_HADDR[12:3],3'b0}+4] <= #1 { WriteData[39:32] }; + mem[{Last_HADDR[12:3],3'b0}+3] <= #1 { WriteData[31:24] }; + mem[{Last_HADDR[12:3],3'b0}+2] <= #1 { WriteData[23:16] }; + mem[{Last_HADDR[12:3],3'b0}+1] <= #1 { WriteData[15:08] }; + mem[{Last_HADDR[12:3],3'b0}+0] <= #1 { WriteData[07:00] }; + end +end + +always @(posedge HCLK or negedge HRESETn) begin + if(~HRESETn) begin + HREADYOUT <= #1 1'b0 ; + HRESP <= #1 1'b0; + end else begin + HREADYOUT <= #1 |HTRANS; + HRESP <= #1 1'b0; + WriteMask <= #1 Next_WriteMask; + end +end + +`ifdef VERILATOR +always @(posedge HCLK or negedge HRESETn) begin +`else +always @(negedge HCLK or negedge HRESETn) begin +`endif + if(~HRESETn) begin + Last_HADDR <= #1 32'b0; + end else begin + Last_HADDR <= #1 |{HTRANS} ? {HADDR[31:2], 2'b00} : Last_HADDR; + end +end + +always @(posedge HCLK or negedge HRESETn) begin + if(~HRESETn) begin + Last_HWRITE <= #1 1'b0; + end else begin + Last_HWRITE <= #1 Next_HWRITE; + end +end + +always @(posedge HCLK or negedge HRESETn) begin + if(~HRESETn) begin + Last_HTRANS <= #1 2'b0; + end else begin + Last_HTRANS <= #1 HTRANS; + end +end + +always @(posedge HCLK or negedge HRESETn) begin + if(~HRESETn) begin + Last_HSEL <= #1 1'b0; + end else begin + Last_HSEL <= #1 HSEL; + end +end + + +`ifndef VERILATOR + +always @(posedge HCLK or negedge HRESETn) begin + if(~HRESETn) begin + HRDATA <= #1 Next_HRDATA ; + end else begin + HRDATA <= #1 Next_HRDATA ; + end +end + +always @* begin + Next_HRDATA = mem_dout; +end + +`else + +always @(posedge HCLK) begin + Next_HRDATA <= mem_dout; +end + +assign HRDATA = mem_dout; + +`endif + + +always @* begin + if(Last_HSEL) begin + WriteReadData[07:00] = mem[{Last_HADDR[12:3],3'b0}]; + WriteReadData[15:08] = mem[{Last_HADDR[12:3],3'b0}+1]; + WriteReadData[23:16] = mem[{Last_HADDR[12:3],3'b0}+2]; + WriteReadData[31:24] = mem[{Last_HADDR[12:3],3'b0}+3]; + WriteReadData[39:32] = mem[{Last_HADDR[12:3],3'b0}+4]; + WriteReadData[47:40] = mem[{Last_HADDR[12:3],3'b0}+5]; + WriteReadData[55:48] = mem[{Last_HADDR[12:3],3'b0}+6]; + WriteReadData[63:56] = mem[{Last_HADDR[12:3],3'b0}+7]; + end +end + + +endmodule diff --git a/testbench/asm/hello_world.s b/testbench/asm/hello_world.s new file mode 100644 index 0000000..d5eae52 --- /dev/null +++ b/testbench/asm/hello_world.s @@ -0,0 +1,67 @@ + + +.global _start +_start: + csrrw x2, 0xb02, x3 + + + lui x5, 974848 + ori x5, x5, 0 + csrrw x2, 0x305, x5 + + + lui x6, 382293 + ori x6, x6, 1365 + csrrw x1, 0x7c0, x6 + + + + + lui x5, 0 + ori x5, x5, 0 + csrrw x2, 0x7f8, x5 + + + + + lui x5, 0 + ori x5, x5, 0 + csrrw x2, 0x7f9, x5 + + + addi x0, x0, 0 + lui x11, 853376 + ori x9, x0, 'H' + sw x9, 0 (x11) + ori x9, x0, 'E' + sw x9, 0 (x11) + ori x9, x0, 'L' + sw x9, 0 (x11) + sw x9, 0 (x11) + ori x9, x0, 'O' + sw x9, 0 (x11) + ori x9, x0, ' ' + sw x9, 0 (x11) + addi x9, x0, 'W' + sw x9, 0 (x11) + ori x9, x0, 'O' + sw x9, 0 (x11) + ori x9, x0, 'R' + sw x9, 0 (x11) + ori x9, x0, 'L' + sw x9, 0 (x11) + ori x9, x0, 'D' + sw x9, 0 (x11) + ori x9, x0, '!' + sw x9, 0 (x11) + ori x9, x0, 255 + sw x9, 0 (x11) + addi x1,x0,0 + +finish: + addi x1,x1,1 + jal x0, finish; + addi x0,x0,0 + addi x0,x0,0 + addi x0,x0,0 + addi x0,x0,0 diff --git a/testbench/asm/hello_world2.s b/testbench/asm/hello_world2.s new file mode 100644 index 0000000..0526579 --- /dev/null +++ b/testbench/asm/hello_world2.s @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Assembly code for Hello World +// Not using only ALU ops for creating the string + + +#include "defines.h" + +#define STDOUT 0xd0580000 + + +// Code to execute +.section .text +.global _start +_start: + + // Clear minstret + csrw minstret, zero + csrw minstreth, zero + + // Set up MTVEC - not expecting to use it though + li x1, RV_ICCM_SADR + csrw mtvec, x1 + + + // Enable Caches in MRAC + li x1, 0x55555555 + csrw 0x7c0, x1 + + // Load string from hw_data + // and write to stdout address + + li x3, STDOUT + la x4, hw_data + +loop: + lb x5, 0(x4) + sb x5, 0(x3) + addi x4, x4, 1 + bnez x5, loop + +// Write 0xff to STDOUT for TB to termiate test. +_finish: + li x3, STDOUT + addi x5, x0, 0xff + sb x5, 0(x3) + beq x0, x0, _finish +.rept 100 + nop +.endr + +.data +hw_data: +.ascii "------------------------------------\n" +.ascii "Hello World from SweRV EH1.1 @WDC !!\n" +.ascii "------------------------------------" +.byte 0 diff --git a/testbench/flist.spyglass b/testbench/flist.spyglass new file mode 100644 index 0000000..8f8aa3d --- /dev/null +++ b/testbench/flist.spyglass @@ -0,0 +1,42 @@ +$RV_ROOT/design/swerv_wrapper.sv +$RV_ROOT/design/mem.sv +$RV_ROOT/design/pic_ctrl.sv +$RV_ROOT/design/swerv.sv +$RV_ROOT/design/dma_ctrl.sv +$RV_ROOT/design/ifu/ifu_aln_ctl.sv +$RV_ROOT/design/ifu/ifu_compress_ctl.sv +$RV_ROOT/design/ifu/ifu_ifc_ctl.sv +$RV_ROOT/design/ifu/ifu_bp_ctl.sv +$RV_ROOT/design/ifu/ifu_ic_mem.sv +$RV_ROOT/design/ifu/ifu_mem_ctl.sv +$RV_ROOT/design/ifu/ifu_iccm_mem.sv +$RV_ROOT/design/ifu/ifu.sv +$RV_ROOT/design/dec/dec_decode_ctl.sv +$RV_ROOT/design/dec/dec_gpr_ctl.sv +$RV_ROOT/design/dec/dec_ib_ctl.sv +$RV_ROOT/design/dec/dec_tlu_ctl.sv +$RV_ROOT/design/dec/dec_trigger.sv +$RV_ROOT/design/dec/dec.sv +$RV_ROOT/design/exu/exu_alu_ctl.sv +$RV_ROOT/design/exu/exu_mul_ctl.sv +$RV_ROOT/design/exu/exu_div_ctl.sv +$RV_ROOT/design/exu/exu.sv +$RV_ROOT/design/lsu/lsu.sv +$RV_ROOT/design/lsu/lsu_clkdomain.sv +$RV_ROOT/design/lsu/lsu_addrcheck.sv +$RV_ROOT/design/lsu/lsu_lsc_ctl.sv +$RV_ROOT/design/lsu/lsu_stbuf.sv +$RV_ROOT/design/lsu/lsu_bus_buffer.sv +$RV_ROOT/design/lsu/lsu_bus_intf.sv +$RV_ROOT/design/lsu/lsu_ecc.sv +$RV_ROOT/design/lsu/lsu_dccm_mem.sv +$RV_ROOT/design/lsu/lsu_dccm_ctl.sv +$RV_ROOT/design/lsu/lsu_trigger.sv +$RV_ROOT/design/dbg/dbg.sv +$RV_ROOT/design/dmi/dmi_wrapper.v +$RV_ROOT/design/dmi/dmi_jtag_to_core_sync.v +$RV_ROOT/design/dmi/rvjtag_tap.sv +$RV_ROOT/design/lib/beh_lib.sv +$RV_ROOT/design/lib/mem_lib.sv +$RV_ROOT/design/lib/ahb_to_axi4.sv +$RV_ROOT/design/lib/axi4_to_ahb.sv diff --git a/testbench/flist.vcs b/testbench/flist.vcs new file mode 100644 index 0000000..8f8aa3d --- /dev/null +++ b/testbench/flist.vcs @@ -0,0 +1,42 @@ +$RV_ROOT/design/swerv_wrapper.sv +$RV_ROOT/design/mem.sv +$RV_ROOT/design/pic_ctrl.sv +$RV_ROOT/design/swerv.sv +$RV_ROOT/design/dma_ctrl.sv +$RV_ROOT/design/ifu/ifu_aln_ctl.sv +$RV_ROOT/design/ifu/ifu_compress_ctl.sv +$RV_ROOT/design/ifu/ifu_ifc_ctl.sv +$RV_ROOT/design/ifu/ifu_bp_ctl.sv +$RV_ROOT/design/ifu/ifu_ic_mem.sv +$RV_ROOT/design/ifu/ifu_mem_ctl.sv +$RV_ROOT/design/ifu/ifu_iccm_mem.sv +$RV_ROOT/design/ifu/ifu.sv +$RV_ROOT/design/dec/dec_decode_ctl.sv +$RV_ROOT/design/dec/dec_gpr_ctl.sv +$RV_ROOT/design/dec/dec_ib_ctl.sv +$RV_ROOT/design/dec/dec_tlu_ctl.sv +$RV_ROOT/design/dec/dec_trigger.sv +$RV_ROOT/design/dec/dec.sv +$RV_ROOT/design/exu/exu_alu_ctl.sv +$RV_ROOT/design/exu/exu_mul_ctl.sv +$RV_ROOT/design/exu/exu_div_ctl.sv +$RV_ROOT/design/exu/exu.sv +$RV_ROOT/design/lsu/lsu.sv +$RV_ROOT/design/lsu/lsu_clkdomain.sv +$RV_ROOT/design/lsu/lsu_addrcheck.sv +$RV_ROOT/design/lsu/lsu_lsc_ctl.sv +$RV_ROOT/design/lsu/lsu_stbuf.sv +$RV_ROOT/design/lsu/lsu_bus_buffer.sv +$RV_ROOT/design/lsu/lsu_bus_intf.sv +$RV_ROOT/design/lsu/lsu_ecc.sv +$RV_ROOT/design/lsu/lsu_dccm_mem.sv +$RV_ROOT/design/lsu/lsu_dccm_ctl.sv +$RV_ROOT/design/lsu/lsu_trigger.sv +$RV_ROOT/design/dbg/dbg.sv +$RV_ROOT/design/dmi/dmi_wrapper.v +$RV_ROOT/design/dmi/dmi_jtag_to_core_sync.v +$RV_ROOT/design/dmi/rvjtag_tap.sv +$RV_ROOT/design/lib/beh_lib.sv +$RV_ROOT/design/lib/mem_lib.sv +$RV_ROOT/design/lib/ahb_to_axi4.sv +$RV_ROOT/design/lib/axi4_to_ahb.sv diff --git a/testbench/flist.verilator b/testbench/flist.verilator new file mode 100644 index 0000000..8f8aa3d --- /dev/null +++ b/testbench/flist.verilator @@ -0,0 +1,42 @@ +$RV_ROOT/design/swerv_wrapper.sv +$RV_ROOT/design/mem.sv +$RV_ROOT/design/pic_ctrl.sv +$RV_ROOT/design/swerv.sv +$RV_ROOT/design/dma_ctrl.sv +$RV_ROOT/design/ifu/ifu_aln_ctl.sv +$RV_ROOT/design/ifu/ifu_compress_ctl.sv +$RV_ROOT/design/ifu/ifu_ifc_ctl.sv +$RV_ROOT/design/ifu/ifu_bp_ctl.sv +$RV_ROOT/design/ifu/ifu_ic_mem.sv +$RV_ROOT/design/ifu/ifu_mem_ctl.sv +$RV_ROOT/design/ifu/ifu_iccm_mem.sv +$RV_ROOT/design/ifu/ifu.sv +$RV_ROOT/design/dec/dec_decode_ctl.sv +$RV_ROOT/design/dec/dec_gpr_ctl.sv +$RV_ROOT/design/dec/dec_ib_ctl.sv +$RV_ROOT/design/dec/dec_tlu_ctl.sv +$RV_ROOT/design/dec/dec_trigger.sv +$RV_ROOT/design/dec/dec.sv +$RV_ROOT/design/exu/exu_alu_ctl.sv +$RV_ROOT/design/exu/exu_mul_ctl.sv +$RV_ROOT/design/exu/exu_div_ctl.sv +$RV_ROOT/design/exu/exu.sv +$RV_ROOT/design/lsu/lsu.sv +$RV_ROOT/design/lsu/lsu_clkdomain.sv +$RV_ROOT/design/lsu/lsu_addrcheck.sv +$RV_ROOT/design/lsu/lsu_lsc_ctl.sv +$RV_ROOT/design/lsu/lsu_stbuf.sv +$RV_ROOT/design/lsu/lsu_bus_buffer.sv +$RV_ROOT/design/lsu/lsu_bus_intf.sv +$RV_ROOT/design/lsu/lsu_ecc.sv +$RV_ROOT/design/lsu/lsu_dccm_mem.sv +$RV_ROOT/design/lsu/lsu_dccm_ctl.sv +$RV_ROOT/design/lsu/lsu_trigger.sv +$RV_ROOT/design/dbg/dbg.sv +$RV_ROOT/design/dmi/dmi_wrapper.v +$RV_ROOT/design/dmi/dmi_jtag_to_core_sync.v +$RV_ROOT/design/dmi/rvjtag_tap.sv +$RV_ROOT/design/lib/beh_lib.sv +$RV_ROOT/design/lib/mem_lib.sv +$RV_ROOT/design/lib/ahb_to_axi4.sv +$RV_ROOT/design/lib/axi4_to_ahb.sv diff --git a/testbench/flist.vlog b/testbench/flist.vlog new file mode 100644 index 0000000..8f8aa3d --- /dev/null +++ b/testbench/flist.vlog @@ -0,0 +1,42 @@ +$RV_ROOT/design/swerv_wrapper.sv +$RV_ROOT/design/mem.sv +$RV_ROOT/design/pic_ctrl.sv +$RV_ROOT/design/swerv.sv +$RV_ROOT/design/dma_ctrl.sv +$RV_ROOT/design/ifu/ifu_aln_ctl.sv +$RV_ROOT/design/ifu/ifu_compress_ctl.sv +$RV_ROOT/design/ifu/ifu_ifc_ctl.sv +$RV_ROOT/design/ifu/ifu_bp_ctl.sv +$RV_ROOT/design/ifu/ifu_ic_mem.sv +$RV_ROOT/design/ifu/ifu_mem_ctl.sv +$RV_ROOT/design/ifu/ifu_iccm_mem.sv +$RV_ROOT/design/ifu/ifu.sv +$RV_ROOT/design/dec/dec_decode_ctl.sv +$RV_ROOT/design/dec/dec_gpr_ctl.sv +$RV_ROOT/design/dec/dec_ib_ctl.sv +$RV_ROOT/design/dec/dec_tlu_ctl.sv +$RV_ROOT/design/dec/dec_trigger.sv +$RV_ROOT/design/dec/dec.sv +$RV_ROOT/design/exu/exu_alu_ctl.sv +$RV_ROOT/design/exu/exu_mul_ctl.sv +$RV_ROOT/design/exu/exu_div_ctl.sv +$RV_ROOT/design/exu/exu.sv +$RV_ROOT/design/lsu/lsu.sv +$RV_ROOT/design/lsu/lsu_clkdomain.sv +$RV_ROOT/design/lsu/lsu_addrcheck.sv +$RV_ROOT/design/lsu/lsu_lsc_ctl.sv +$RV_ROOT/design/lsu/lsu_stbuf.sv +$RV_ROOT/design/lsu/lsu_bus_buffer.sv +$RV_ROOT/design/lsu/lsu_bus_intf.sv +$RV_ROOT/design/lsu/lsu_ecc.sv +$RV_ROOT/design/lsu/lsu_dccm_mem.sv +$RV_ROOT/design/lsu/lsu_dccm_ctl.sv +$RV_ROOT/design/lsu/lsu_trigger.sv +$RV_ROOT/design/dbg/dbg.sv +$RV_ROOT/design/dmi/dmi_wrapper.v +$RV_ROOT/design/dmi/dmi_jtag_to_core_sync.v +$RV_ROOT/design/dmi/rvjtag_tap.sv +$RV_ROOT/design/lib/beh_lib.sv +$RV_ROOT/design/lib/mem_lib.sv +$RV_ROOT/design/lib/ahb_to_axi4.sv +$RV_ROOT/design/lib/axi4_to_ahb.sv diff --git a/testbench/hex/data.hex b/testbench/hex/data.hex new file mode 100755 index 0000000..6def292 --- /dev/null +++ b/testbench/hex/data.hex @@ -0,0 +1,7 @@ +@00001000 +2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D +2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 0A 48 +65 6C 6C 6F 20 57 6F 72 6C 64 20 66 72 6F 6D 20 +53 77 65 52 56 20 40 57 44 43 20 21 21 0A 2D 2D +2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D +2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 00 diff --git a/testbench/hex/program.hex b/testbench/hex/program.hex new file mode 100644 index 0000000..9a70923 --- /dev/null +++ b/testbench/hex/program.hex @@ -0,0 +1,6 @@ +@00000000 +73 10 20 B0 73 10 20 B8 B7 00 00 EE 73 90 50 30 +B7 50 55 55 93 80 50 55 73 90 00 7C B7 01 58 D0 +17 12 00 00 13 02 02 FE 83 02 02 00 23 80 51 00 +05 02 E3 9B 02 FE B7 01 58 D0 93 02 F0 0F 23 80 +51 00 E3 0A 00 FE diff --git a/testbench/input.tcl b/testbench/input.tcl new file mode 100644 index 0000000..3de45c5 --- /dev/null +++ b/testbench/input.tcl @@ -0,0 +1,4 @@ +database -open waves -into waves.shm -default +probe -create tb_top -depth all -database waves +run +exit diff --git a/testbench/link.ld b/testbench/link.ld new file mode 100644 index 0000000..ef1ee3a --- /dev/null +++ b/testbench/link.ld @@ -0,0 +1,12 @@ + +OUTPUT_ARCH( "riscv" ) +ENTRY(_start) + +SECTIONS +{ + . = 0x1000; + .data . : { *(.*data) *(.rodata*) } + . = 0x0; + .text . : { *(.text) } + _end = .; +} diff --git a/testbench/tb_top.sv b/testbench/tb_top.sv new file mode 100644 index 0000000..77d3089 --- /dev/null +++ b/testbench/tb_top.sv @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +`ifndef VERILATOR +module tb_top; +`else +module tb_top ( input logic core_clk, input logic reset_l, output finished); +`endif + +`ifndef VERILATOR + logic reset_l; + logic core_clk; +`endif + logic nmi_int; + + logic [31:0] reset_vector; + logic [31:0] nmi_vector; + logic [31:1] jtag_id; + + logic [31:0] ic_haddr ; + logic [2:0] ic_hburst ; + logic ic_hmastlock ; + logic [3:0] ic_hprot ; + logic [2:0] ic_hsize ; + logic [1:0] ic_htrans ; + logic ic_hwrite ; + logic [63:0] ic_hrdata ; + logic ic_hready ; + logic ic_hresp ; + + logic [31:0] lsu_haddr ; + logic [2:0] lsu_hburst ; + logic lsu_hmastlock ; + logic [3:0] lsu_hprot ; + logic [2:0] lsu_hsize ; + logic [1:0] lsu_htrans ; + logic lsu_hwrite ; + logic [63:0] lsu_hrdata ; + logic [63:0] lsu_hwdata ; + logic lsu_hready ; + logic lsu_hresp ; + + logic [31:0] sb_haddr ; + logic [2:0] sb_hburst ; + logic sb_hmastlock ; + logic [3:0] sb_hprot ; + logic [2:0] sb_hsize ; + logic [1:0] sb_htrans ; + logic sb_hwrite ; + + logic [63:0] sb_hrdata ; + logic [63:0] sb_hwdata ; + logic sb_hready ; + logic sb_hresp ; + + logic [63:0] trace_rv_i_insn_ip; + logic [63:0] trace_rv_i_address_ip; + logic [2:0] trace_rv_i_valid_ip; + logic [2:0] trace_rv_i_exception_ip; + logic [4:0] trace_rv_i_ecause_ip; + logic [2:0] trace_rv_i_interrupt_ip; + logic [31:0] trace_rv_i_tval_ip; + + logic o_debug_mode_status; + logic [1:0] dec_tlu_perfcnt0; + logic [1:0] dec_tlu_perfcnt1; + logic [1:0] dec_tlu_perfcnt2; + logic [1:0] dec_tlu_perfcnt3; + + + logic jtag_tdo; + logic o_cpu_halt_ack; + logic o_cpu_halt_status; + logic o_cpu_run_ack; + + logic mailbox_write; + logic [63:0] dma_hrdata ; + logic [63:0] dma_hwdata ; + logic dma_hready ; + logic dma_hresp ; + + logic mpc_debug_halt_req; + logic mpc_debug_run_req; + logic mpc_reset_run_req; + logic mpc_debug_halt_ack; + logic mpc_debug_run_ack; + logic debug_brkpt_status; + + logic [31:0] cycleCnt ; + logic mailbox_data_val; + logic finished; + + wire dma_hready_out; + + + //assign mailbox_write = &{i_ahb_lsu.Write, i_ahb_lsu.Last_HADDR==32'hD0580000, i_ahb_lsu.HRESETn==1}; + assign mailbox_write = i_ahb_lsu.mailbox_write; + //assign mailbox_write = i_ahb_lsu.mailbox_write & !core_clk; + assign mailbox_data_val = (i_ahb_lsu.WriteData[7:0] > 8'h5) & (i_ahb_lsu.WriteData[7:0] < 8'h7f); + + assign finished = finished | &{i_ahb_lsu.mailbox_write, (i_ahb_lsu.WriteData[7:0] == 8'hff)}; + + assign jtag_id[31:28] = 4'b1; + assign jtag_id[27:12] = '0; + assign jtag_id[11:1] = 11'h45; + +`ifndef VERILATOR + `define FORCE force +`else + `define FORCE +`endif + + + integer fd; + initial begin + fd = $fopen("console.log","w"); + end + + integer tp; + + always @(posedge core_clk or negedge reset_l) begin + if( reset_l == 0) + cycleCnt <= 0; + else + cycleCnt <= cycleCnt+1; + end + + always @(posedge core_clk) begin + //if(cycleCnt == 32'h800) + if(cycleCnt == 32'h800) begin + $display ("Hit max cycle count.. stopping"); + $finish; + end + end + + +`ifdef VERILATOR + always @(negedge mailbox_write) +`else + always @(posedge mailbox_write) +`endif + if( mailbox_data_val ) begin + $fwrite(fd,"%c", i_ahb_lsu.WriteData[7:0]); + $write("%c", i_ahb_lsu.WriteData[7:0]); + end + + always @(posedge finished) begin + $display("\n\nFinished : minstret = %0d, mcycle = %0d", rvtop.swerv.dec.tlu.minstretl[31:0],rvtop.swerv.dec.tlu.mcyclel[31:0]); +`ifndef VERILATOR + $finish; +`endif + end + + always @(posedge core_clk) + if (rvtop.trace_rv_i_valid_ip !== 0) begin + $fwrite(tp,"%b,%h,%h,%0h,%0h,3,%b,%h,%h,%b\n", rvtop.trace_rv_i_valid_ip, rvtop.trace_rv_i_address_ip[63:32], rvtop.trace_rv_i_address_ip[31:0], rvtop.trace_rv_i_insn_ip[63:32], rvtop.trace_rv_i_insn_ip[31:0],rvtop.trace_rv_i_exception_ip,rvtop.trace_rv_i_ecause_ip,rvtop.trace_rv_i_tval_ip,rvtop.trace_rv_i_interrupt_ip); + end + + initial begin + +`ifndef VERILATOR + core_clk = 0; + reset_l = 0; +`endif + + reset_vector = 32'h80000000; + nmi_vector = 32'hee000000; + nmi_int = 0; + +`ifndef VERILATOR + @(posedge core_clk); + reset_l = 0; +`endif + + $readmemh("data.hex", i_ahb_lsu.mem); + $readmemh("program.hex", i_ahb_ic.mem); + tp = $fopen("trace_port.csv","w"); + +`ifndef VERILATOR + repeat (5) @(posedge core_clk); + reset_l = 1; + #100000 $display("");$finish; +`endif + end + +`ifndef VERILATOR +initial begin + forever begin + core_clk = #5 ~core_clk; + end +end +`endif + + //=========================================================================- + // RTL instance + //=========================================================================- + swerv_wrapper rvtop ( + .rst_l ( reset_l ), + .clk ( core_clk ), + .rst_vec ( 31'h40000000 ), + .nmi_int ( nmi_int ), + .nmi_vec ( 31'h77000000 ), + .jtag_id (jtag_id[31:1]), + + .haddr ( ic_haddr ), + .hburst ( ic_hburst ), + .hmastlock ( ic_hmastlock ), + .hprot ( ic_hprot ), + .hsize ( ic_hsize ), + .htrans ( ic_htrans ), + .hwrite ( ic_hwrite ), + + .hrdata ( ic_hrdata[63:0]), + .hready ( ic_hready ), + .hresp ( ic_hresp ), + + //--------------------------------------------------------------- + // Debug AHB Master + //--------------------------------------------------------------- + .sb_haddr ( sb_haddr ), + .sb_hburst ( sb_hburst ), + .sb_hmastlock ( sb_hmastlock ), + .sb_hprot ( sb_hprot ), + .sb_hsize ( sb_hsize ), + .sb_htrans ( sb_htrans ), + .sb_hwrite ( sb_hwrite ), + .sb_hwdata ( sb_hwdata ), + + .sb_hrdata ( sb_hrdata ), + .sb_hready ( sb_hready ), + .sb_hresp ( sb_hresp ), + + + //--------------------------------------------------------------- + // LSU AHB Master + //--------------------------------------------------------------- + .lsu_haddr ( lsu_haddr ), + .lsu_hburst ( lsu_hburst ), + .lsu_hmastlock ( lsu_hmastlock ), + .lsu_hprot ( lsu_hprot ), + .lsu_hsize ( lsu_hsize ), + .lsu_htrans ( lsu_htrans ), + .lsu_hwrite ( lsu_hwrite ), + .lsu_hwdata ( lsu_hwdata ), + + .lsu_hrdata ( lsu_hrdata[63:0]), + .lsu_hready ( lsu_hready ), + .lsu_hresp ( lsu_hresp ), + + + //--------------------------------------------------------------- + // DMA Slave + //--------------------------------------------------------------- + .dma_haddr ( '0 ), + .dma_hburst ( '0 ), + .dma_hmastlock ( '0 ), + .dma_hprot ( '0 ), + .dma_hsize ( '0 ), + .dma_htrans ( '0 ), + .dma_hwrite ( '0 ), + .dma_hwdata ( '0 ), + + .dma_hrdata ( dma_hrdata ), + .dma_hresp ( dma_hresp ), + .dma_hsel ( 1'b1 ), + .dma_hreadyin ( dma_hready_out ), + .dma_hreadyout ( dma_hready_out ), + + .timer_int ( 1'b0 ), + `ifdef TB_RESTRUCT + .extintsrc_req ( '0 ), + `else + .extintsrc_req ( '0 ), + `endif + + `ifdef RV_BUILD_AHB_LITE + .lsu_bus_clk_en ( 1'b1 ),// Clock ratio b/w cpu core clk & AHB master interface + .ifu_bus_clk_en ( 1'b1 ),// Clock ratio b/w cpu core clk & AHB master interface + .dbg_bus_clk_en ( 1'b0 ),// Clock ratio b/w cpu core clk & AHB Debug master interface + .dma_bus_clk_en ( 1'b0 ),// Clock ratio b/w cpu core clk & AHB slave interface + `endif + + .trace_rv_i_insn_ip(trace_rv_i_insn_ip), + .trace_rv_i_address_ip(trace_rv_i_address_ip), + .trace_rv_i_valid_ip(trace_rv_i_valid_ip), + .trace_rv_i_exception_ip(trace_rv_i_exception_ip), + .trace_rv_i_ecause_ip(trace_rv_i_ecause_ip), + .trace_rv_i_interrupt_ip(trace_rv_i_interrupt_ip), + .trace_rv_i_tval_ip(trace_rv_i_tval_ip), + + + + + + .jtag_tck ( 1'b0 ), // JTAG clk + .jtag_tms ( 1'b0 ), // JTAG TMS + .jtag_tdi ( 1'b0 ), // JTAG tdi + .jtag_trst_n ( 1'b0 ), // JTAG Reset + .jtag_tdo ( jtag_tdo ), // JTAG TDO + + .mpc_debug_halt_ack ( mpc_debug_halt_ack), + .mpc_debug_halt_req ( 1'b0), + .mpc_debug_run_ack ( mpc_debug_run_ack), + .mpc_debug_run_req ( 1'b1), + .mpc_reset_run_req ( 1'b1), // Start running after reset + .debug_brkpt_status (debug_brkpt_status), + + .i_cpu_halt_req ( 1'b0 ), // Async halt req to CPU + .o_cpu_halt_ack ( o_cpu_halt_ack ), // core response to halt + .o_cpu_halt_status ( o_cpu_halt_status ), // 1'b1 indicates core is halted + .i_cpu_run_req ( 1'b0 ), // Async restart req to CPU + .o_debug_mode_status (o_debug_mode_status), + .o_cpu_run_ack ( o_cpu_run_ack ), // Core response to run req + + .dec_tlu_perfcnt0(dec_tlu_perfcnt0), + .dec_tlu_perfcnt1(dec_tlu_perfcnt1), + .dec_tlu_perfcnt2(dec_tlu_perfcnt2), + .dec_tlu_perfcnt3(dec_tlu_perfcnt3), + + .scan_mode ( 1'b0 ), // To enable scan mode + .mbist_mode ( 1'b0 ) // to enable mbist + + ); + +initial begin + `FORCE rvtop.dccm_rd_data_hi = '0; + `FORCE rvtop.dccm_rd_data_lo = '0; +end + + + //=========================================================================- + // AHB I$ instance + //=========================================================================- + + ahb_sif i_ahb_ic ( + + // Inputs + .HWDATA(64'h0), + .HCLK(core_clk), + .HSEL(1'b1), + .HPROT(ic_hprot), + .HWRITE(ic_hwrite), + .HTRANS(ic_htrans), + .HSIZE(ic_hsize), + .HREADY(ic_hready), + .HRESETn(reset_l), + .HADDR(ic_haddr), + .HBURST(ic_hburst), + + // Outputs + .HREADYOUT(ic_hready), + .HRESP(ic_hresp), + .HRDATA(ic_hrdata[63:0]) + + ); + + + ahb_sif i_ahb_lsu ( + + // Inputs + .HWDATA(lsu_hwdata), + .HCLK(core_clk), + .HSEL(1'b1), + .HPROT(lsu_hprot), + .HWRITE(lsu_hwrite), + .HTRANS(lsu_htrans), + .HSIZE(lsu_hsize), + .HREADY(lsu_hready), + .HRESETn(reset_l), + .HADDR(lsu_haddr), + .HBURST(lsu_hburst), + + // Outputs + .HREADYOUT(lsu_hready), + .HRESP(lsu_hresp), + .HRDATA(lsu_hrdata[63:0]) + + ); + + ahb_sif i_ahb_sb ( + + // Inputs + .HWDATA(sb_hwdata), + .HCLK(core_clk), + .HSEL(1'b1), + .HPROT(sb_hprot), + .HWRITE(sb_hwrite), + .HTRANS(sb_htrans), + .HSIZE(sb_hsize), + .HREADY(1'b0), + .HRESETn(reset_l), + .HADDR(sb_haddr), + .HBURST(sb_hburst), + + // Outputs + .HREADYOUT(sb_hready), + .HRESP(sb_hresp), + .HRDATA(sb_hrdata[63:0]) + + ); + + +endmodule diff --git a/testbench/test_tb_top.cpp b/testbench/test_tb_top.cpp new file mode 100644 index 0000000..1ca4fcb --- /dev/null +++ b/testbench/test_tb_top.cpp @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include +#include +#include +#include +#include "Vtb_top.h" +#include "verilated.h" +#include "verilated_vcd_c.h" + + +// /* +vluint64_t main_time = 0; + +double sc_time_stamp () { + return main_time; +} +// */ + +//int main(int argc, char* argv[]) { +int main(int argc, char** argv) { + + std::cout << "\nStart of sim\n" << std::endl; + Verilated::commandArgs(argc, argv); + + Vtb_top* tb = new Vtb_top; + uint32_t clkCnt = 0; + + // init trace dump + Verilated::traceEverOn(true); + VerilatedVcdC* tfp = new VerilatedVcdC; + tb->trace (tfp, 24); + tfp->open ("sim.vcd"); + + + // Simulate + for(auto i=0; i<200000; ++i){ + clkCnt++; + if(i<10) { + tb->reset_l = 0; + } else { + tb->reset_l = 1; + } + + for (auto clk=0; clk<2; clk++) { + tfp->dump (2*i+clk); + tb->core_clk = !tb->core_clk; + tb->eval(); + } + + if (tb->finished) { + tfp->close(); + break; + } + + } + + for(auto i=0; i<100; ++i){ + clkCnt++; + for (auto clk=0; clk<2; clk++) { + tfp->dump (2*i+clk); + tb->core_clk = !tb->core_clk; + tb->eval(); + } + } + + std::cout << "\nEnd of sim" << std::endl; + exit(EXIT_SUCCESS); + +} diff --git a/tools/JSON.pm b/tools/JSON.pm new file mode 100644 index 0000000..f57c555 --- /dev/null +++ b/tools/JSON.pm @@ -0,0 +1,2267 @@ +package JSON; + + +use strict; +use Carp (); +use base qw(Exporter); +@JSON::EXPORT = qw(from_json to_json jsonToObj objToJson encode_json decode_json); + +BEGIN { + $JSON::VERSION = '2.53'; + $JSON::DEBUG = 0 unless (defined $JSON::DEBUG); + $JSON::DEBUG = $ENV{ PERL_JSON_DEBUG } if exists $ENV{ PERL_JSON_DEBUG }; +} + +my $Module_XS = 'JSON::XS'; +my $Module_PP = 'JSON::PP'; +my $Module_bp = 'JSON::backportPP'; # included in JSON distribution +my $PP_Version = '2.27200'; +my $XS_Version = '2.27'; + + +# XS and PP common methods + +my @PublicMethods = qw/ + ascii latin1 utf8 pretty indent space_before space_after relaxed canonical allow_nonref + allow_blessed convert_blessed filter_json_object filter_json_single_key_object + shrink max_depth max_size encode decode decode_prefix allow_unknown +/; + +my @Properties = qw/ + ascii latin1 utf8 indent space_before space_after relaxed canonical allow_nonref + allow_blessed convert_blessed shrink max_depth max_size allow_unknown +/; + +my @XSOnlyMethods = qw//; # Currently nothing + +my @PPOnlyMethods = qw/ + indent_length sort_by + allow_singlequote allow_bignum loose allow_barekey escape_slash as_nonblessed +/; # JSON::PP specific + + +# used in _load_xs and _load_pp ($INSTALL_ONLY is not used currently) +my $_INSTALL_DONT_DIE = 1; # When _load_xs fails to load XS, don't die. +my $_INSTALL_ONLY = 2; # Don't call _set_methods() +my $_ALLOW_UNSUPPORTED = 0; +my $_UNIV_CONV_BLESSED = 0; +my $_USSING_bpPP = 0; + + +# Check the environment variable to decide worker module. + +unless ($JSON::Backend) { + $JSON::DEBUG and Carp::carp("Check used worker module..."); + + my $backend = exists $ENV{PERL_JSON_BACKEND} ? $ENV{PERL_JSON_BACKEND} : 1; + + if ($backend eq '1' or $backend =~ /JSON::XS\s*,\s*JSON::PP/) { + _load_xs($_INSTALL_DONT_DIE) or _load_pp(); + } + elsif ($backend eq '0' or $backend eq 'JSON::PP') { + _load_pp(); + } + elsif ($backend eq '2' or $backend eq 'JSON::XS') { + _load_xs(); + } + elsif ($backend eq 'JSON::backportPP') { + $_USSING_bpPP = 1; + _load_pp(); + } + else { + Carp::croak "The value of environmental variable 'PERL_JSON_BACKEND' is invalid."; + } +} + + +sub import { + my $pkg = shift; + my @what_to_export; + my $no_export; + + for my $tag (@_) { + if ($tag eq '-support_by_pp') { + if (!$_ALLOW_UNSUPPORTED++) { + JSON::Backend::XS + ->support_by_pp(@PPOnlyMethods) if ($JSON::Backend eq $Module_XS); + } + next; + } + elsif ($tag eq '-no_export') { + $no_export++, next; + } + elsif ( $tag eq '-convert_blessed_universally' ) { + eval q| + require B; + *UNIVERSAL::TO_JSON = sub { + my $b_obj = B::svref_2object( $_[0] ); + return $b_obj->isa('B::HV') ? { %{ $_[0] } } + : $b_obj->isa('B::AV') ? [ @{ $_[0] } ] + : undef + ; + } + | if ( !$_UNIV_CONV_BLESSED++ ); + next; + } + push @what_to_export, $tag; + } + + return if ($no_export); + + __PACKAGE__->export_to_level(1, $pkg, @what_to_export); +} + + +# OBSOLETED + +sub jsonToObj { + my $alternative = 'from_json'; + if (defined $_[0] and UNIVERSAL::isa($_[0], 'JSON')) { + shift @_; $alternative = 'decode'; + } + Carp::carp "'jsonToObj' will be obsoleted. Please use '$alternative' instead."; + return JSON::from_json(@_); +}; + +sub objToJson { + my $alternative = 'to_json'; + if (defined $_[0] and UNIVERSAL::isa($_[0], 'JSON')) { + shift @_; $alternative = 'encode'; + } + Carp::carp "'objToJson' will be obsoleted. Please use '$alternative' instead."; + JSON::to_json(@_); +}; + + +# INTERFACES + +sub to_json ($@) { + if ( + ref($_[0]) eq 'JSON' + or (@_ > 2 and $_[0] eq 'JSON') + ) { + Carp::croak "to_json should not be called as a method."; + } + my $json = new JSON; + + if (@_ == 2 and ref $_[1] eq 'HASH') { + my $opt = $_[1]; + for my $method (keys %$opt) { + $json->$method( $opt->{$method} ); + } + } + + $json->encode($_[0]); +} + + +sub from_json ($@) { + if ( ref($_[0]) eq 'JSON' or $_[0] eq 'JSON' ) { + Carp::croak "from_json should not be called as a method."; + } + my $json = new JSON; + + if (@_ == 2 and ref $_[1] eq 'HASH') { + my $opt = $_[1]; + for my $method (keys %$opt) { + $json->$method( $opt->{$method} ); + } + } + + return $json->decode( $_[0] ); +} + + +sub true { $JSON::true } + +sub false { $JSON::false } + +sub null { undef; } + + +sub require_xs_version { $XS_Version; } + +sub backend { + my $proto = shift; + $JSON::Backend; +} + +#*module = *backend; + + +sub is_xs { + return $_[0]->module eq $Module_XS; +} + + +sub is_pp { + return not $_[0]->xs; +} + + +sub pureperl_only_methods { @PPOnlyMethods; } + + +sub property { + my ($self, $name, $value) = @_; + + if (@_ == 1) { + my %props; + for $name (@Properties) { + my $method = 'get_' . $name; + if ($name eq 'max_size') { + my $value = $self->$method(); + $props{$name} = $value == 1 ? 0 : $value; + next; + } + $props{$name} = $self->$method(); + } + return \%props; + } + elsif (@_ > 3) { + Carp::croak('property() can take only the option within 2 arguments.'); + } + elsif (@_ == 2) { + if ( my $method = $self->can('get_' . $name) ) { + if ($name eq 'max_size') { + my $value = $self->$method(); + return $value == 1 ? 0 : $value; + } + $self->$method(); + } + } + else { + $self->$name($value); + } + +} + + + +# INTERNAL + +sub _load_xs { + my $opt = shift; + + $JSON::DEBUG and Carp::carp "Load $Module_XS."; + + # if called after install module, overload is disable.... why? + JSON::Boolean::_overrride_overload($Module_XS); + JSON::Boolean::_overrride_overload($Module_PP); + + eval qq| + use $Module_XS $XS_Version (); + |; + + if ($@) { + if (defined $opt and $opt & $_INSTALL_DONT_DIE) { + $JSON::DEBUG and Carp::carp "Can't load $Module_XS...($@)"; + return 0; + } + Carp::croak $@; + } + + unless (defined $opt and $opt & $_INSTALL_ONLY) { + _set_module( $JSON::Backend = $Module_XS ); + my $data = join("", ); # this code is from Jcode 2.xx. + close(DATA); + eval $data; + JSON::Backend::XS->init; + } + + return 1; +}; + + +sub _load_pp { + my $opt = shift; + my $backend = $_USSING_bpPP ? $Module_bp : $Module_PP; + + $JSON::DEBUG and Carp::carp "Load $backend."; + + # if called after install module, overload is disable.... why? + JSON::Boolean::_overrride_overload($Module_XS); + JSON::Boolean::_overrride_overload($backend); + + if ( $_USSING_bpPP ) { + eval qq| require $backend |; + } + else { + eval qq| use $backend $PP_Version () |; + } + + if ($@) { + if ( $backend eq $Module_PP ) { + $JSON::DEBUG and Carp::carp "Can't load $Module_PP ($@), so try to load $Module_bp"; + $_USSING_bpPP++; + $backend = $Module_bp; + JSON::Boolean::_overrride_overload($backend); + local $^W; # if PP installed but invalid version, backportPP redifines methods. + eval qq| require $Module_bp |; + } + Carp::croak $@ if $@; + } + + unless (defined $opt and $opt & $_INSTALL_ONLY) { + _set_module( $JSON::Backend = $Module_PP ); # even if backportPP, set $Backend with 'JSON::PP' + JSON::Backend::PP->init; + } +}; + + +sub _set_module { + return if defined $JSON::true; + + my $module = shift; + + local $^W; + no strict qw(refs); + + $JSON::true = ${"$module\::true"}; + $JSON::false = ${"$module\::false"}; + + push @JSON::ISA, $module; + push @{"$module\::Boolean::ISA"}, qw(JSON::Boolean); + + *{"JSON::is_bool"} = \&{"$module\::is_bool"}; + + for my $method ($module eq $Module_XS ? @PPOnlyMethods : @XSOnlyMethods) { + *{"JSON::$method"} = sub { + Carp::carp("$method is not supported in $module."); + $_[0]; + }; + } + + return 1; +} + + + +# +# JSON Boolean +# + +package JSON::Boolean; + +my %Installed; + +sub _overrride_overload { + return if ($Installed{ $_[0] }++); + + my $boolean = $_[0] . '::Boolean'; + + eval sprintf(q| + package %s; + use overload ( + '""' => sub { ${$_[0]} == 1 ? 'true' : 'false' }, + 'eq' => sub { + my ($obj, $op) = ref ($_[0]) ? ($_[0], $_[1]) : ($_[1], $_[0]); + if ($op eq 'true' or $op eq 'false') { + return "$obj" eq 'true' ? 'true' eq $op : 'false' eq $op; + } + else { + return $obj ? 1 == $op : 0 == $op; + } + }, + ); + |, $boolean); + + if ($@) { Carp::croak $@; } + + return 1; +} + + +# +# Helper classes for Backend Module (PP) +# + +package JSON::Backend::PP; + +sub init { + local $^W; + no strict qw(refs); # this routine may be called after JSON::Backend::XS init was called. + *{"JSON::decode_json"} = \&{"JSON::PP::decode_json"}; + *{"JSON::encode_json"} = \&{"JSON::PP::encode_json"}; + *{"JSON::PP::is_xs"} = sub { 0 }; + *{"JSON::PP::is_pp"} = sub { 1 }; + return 1; +} + +# +# To save memory, the below lines are read only when XS backend is used. +# + +package JSON; + +1; +__DATA__ + + +# +# Helper classes for Backend Module (XS) +# + +package JSON::Backend::XS; + +use constant INDENT_LENGTH_FLAG => 15 << 12; + +use constant UNSUPPORTED_ENCODE_FLAG => { + ESCAPE_SLASH => 0x00000010, + ALLOW_BIGNUM => 0x00000020, + AS_NONBLESSED => 0x00000040, + EXPANDED => 0x10000000, # for developer's +}; + +use constant UNSUPPORTED_DECODE_FLAG => { + LOOSE => 0x00000001, + ALLOW_BIGNUM => 0x00000002, + ALLOW_BAREKEY => 0x00000004, + ALLOW_SINGLEQUOTE => 0x00000008, + EXPANDED => 0x20000000, # for developer's +}; + + +sub init { + local $^W; + no strict qw(refs); + *{"JSON::decode_json"} = \&{"JSON::XS::decode_json"}; + *{"JSON::encode_json"} = \&{"JSON::XS::encode_json"}; + *{"JSON::XS::is_xs"} = sub { 1 }; + *{"JSON::XS::is_pp"} = sub { 0 }; + return 1; +} + + +sub support_by_pp { + my ($class, @methods) = @_; + + local $^W; + no strict qw(refs); + + my $JSON_XS_encode_orignal = \&JSON::XS::encode; + my $JSON_XS_decode_orignal = \&JSON::XS::decode; + my $JSON_XS_incr_parse_orignal = \&JSON::XS::incr_parse; + + *JSON::XS::decode = \&JSON::Backend::XS::Supportable::_decode; + *JSON::XS::encode = \&JSON::Backend::XS::Supportable::_encode; + *JSON::XS::incr_parse = \&JSON::Backend::XS::Supportable::_incr_parse; + + *{JSON::XS::_original_decode} = $JSON_XS_decode_orignal; + *{JSON::XS::_original_encode} = $JSON_XS_encode_orignal; + *{JSON::XS::_original_incr_parse} = $JSON_XS_incr_parse_orignal; + + push @JSON::Backend::XS::Supportable::ISA, 'JSON'; + + my $pkg = 'JSON::Backend::XS::Supportable'; + + *{JSON::new} = sub { + my $proto = new JSON::XS; $$proto = 0; + bless $proto, $pkg; + }; + + + for my $method (@methods) { + my $flag = uc($method); + my $type |= (UNSUPPORTED_ENCODE_FLAG->{$flag} || 0); + $type |= (UNSUPPORTED_DECODE_FLAG->{$flag} || 0); + + next unless($type); + + $pkg->_make_unsupported_method($method => $type); + } + + push @{"JSON::XS::Boolean::ISA"}, qw(JSON::PP::Boolean); + push @{"JSON::PP::Boolean::ISA"}, qw(JSON::Boolean); + + $JSON::DEBUG and Carp::carp("set -support_by_pp mode."); + + return 1; +} + + + + +# +# Helper classes for XS +# + +package JSON::Backend::XS::Supportable; + +$Carp::Internal{'JSON::Backend::XS::Supportable'} = 1; + +sub _make_unsupported_method { + my ($pkg, $method, $type) = @_; + + local $^W; + no strict qw(refs); + + *{"$pkg\::$method"} = sub { + local $^W; + if (defined $_[1] ? $_[1] : 1) { + ${$_[0]} |= $type; + } + else { + ${$_[0]} &= ~$type; + } + $_[0]; + }; + + *{"$pkg\::get_$method"} = sub { + ${$_[0]} & $type ? 1 : ''; + }; + +} + + +sub _set_for_pp { + JSON::_load_pp( $_INSTALL_ONLY ); + + my $type = shift; + my $pp = new JSON::PP; + my $prop = $_[0]->property; + + for my $name (keys %$prop) { + $pp->$name( $prop->{$name} ? $prop->{$name} : 0 ); + } + + my $unsupported = $type eq 'encode' ? JSON::Backend::XS::UNSUPPORTED_ENCODE_FLAG + : JSON::Backend::XS::UNSUPPORTED_DECODE_FLAG; + my $flags = ${$_[0]} || 0; + + for my $name (keys %$unsupported) { + next if ($name eq 'EXPANDED'); # for developer's + my $enable = ($flags & $unsupported->{$name}) ? 1 : 0; + my $method = lc $name; + $pp->$method($enable); + } + + $pp->indent_length( $_[0]->get_indent_length ); + + return $pp; +} + +sub _encode { # using with PP encod + if (${$_[0]}) { + _set_for_pp('encode' => @_)->encode($_[1]); + } + else { + $_[0]->_original_encode( $_[1] ); + } +} + + +sub _decode { # if unsupported-flag is set, use PP + if (${$_[0]}) { + _set_for_pp('decode' => @_)->decode($_[1]); + } + else { + $_[0]->_original_decode( $_[1] ); + } +} + + +sub decode_prefix { # if unsupported-flag is set, use PP + _set_for_pp('decode' => @_)->decode_prefix($_[1]); +} + + +sub _incr_parse { + if (${$_[0]}) { + _set_for_pp('decode' => @_)->incr_parse($_[1]); + } + else { + $_[0]->_original_incr_parse( $_[1] ); + } +} + + +sub get_indent_length { + ${$_[0]} << 4 >> 16; +} + + +sub indent_length { + my $length = $_[1]; + + if (!defined $length or $length > 15 or $length < 0) { + Carp::carp "The acceptable range of indent_length() is 0 to 15."; + } + else { + local $^W; + $length <<= 12; + ${$_[0]} &= ~ JSON::Backend::XS::INDENT_LENGTH_FLAG; + ${$_[0]} |= $length; + *JSON::XS::encode = \&JSON::Backend::XS::Supportable::_encode; + } + + $_[0]; +} + + +1; +__END__ + +=head1 NAME + +JSON - JSON (JavaScript Object Notation) encoder/decoder + +=head1 SYNOPSIS + + use JSON; # imports encode_json, decode_json, to_json and from_json. + + # simple and fast interfaces (expect/generate UTF-8) + + $utf8_encoded_json_text = encode_json $perl_hash_or_arrayref; + $perl_hash_or_arrayref = decode_json $utf8_encoded_json_text; + + # OO-interface + + $json = JSON->new->allow_nonref; + + $json_text = $json->encode( $perl_scalar ); + $perl_scalar = $json->decode( $json_text ); + + $pretty_printed = $json->pretty->encode( $perl_scalar ); # pretty-printing + + # If you want to use PP only support features, call with '-support_by_pp' + # When XS unsupported feature is enable, using PP (de|en)code instead of XS ones. + + use JSON -support_by_pp; + + # option-acceptable interfaces (expect/generate UNICODE by default) + + $json_text = to_json( $perl_scalar, { ascii => 1, pretty => 1 } ); + $perl_scalar = from_json( $json_text, { utf8 => 1 } ); + + # Between (en|de)code_json and (to|from)_json, if you want to write + # a code which communicates to an outer world (encoded in UTF-8), + # recommend to use (en|de)code_json. + +=head1 VERSION + + 2.53 + +This version is compatible with JSON::XS B<2.27> and later. + + +=head1 NOTE + +JSON::PP was inculded in C distribution. +It comes to be a perl core module in Perl 5.14. +And L will be split away it. + +C distribution will inculde yet another JSON::PP modules. +They are JSNO::backportPP and so on. JSON.pm should work as it did at all. + +=head1 DESCRIPTION + + ************************** CAUTION ******************************** + * This is 'JSON module version 2' and there are many differences * + * to version 1.xx * + * Please check your applications useing old version. * + * See to 'INCOMPATIBLE CHANGES TO OLD VERSION' * + ******************************************************************* + +JSON (JavaScript Object Notation) is a simple data format. +See to L and C(L). + +This module converts Perl data structures to JSON and vice versa using either +L or L. + +JSON::XS is the fastest and most proper JSON module on CPAN which must be +compiled and installed in your environment. +JSON::PP is a pure-Perl module which is bundled in this distribution and +has a strong compatibility to JSON::XS. + +This module try to use JSON::XS by default and fail to it, use JSON::PP instead. +So its features completely depend on JSON::XS or JSON::PP. + +See to L. + +To distinguish the module name 'JSON' and the format type JSON, +the former is quoted by CEE (its results vary with your using media), +and the latter is left just as it is. + +Module name : C + +Format type : JSON + +=head2 FEATURES + +=over + +=item * correct unicode handling + +This module (i.e. backend modules) knows how to handle Unicode, documents +how and when it does so, and even documents what "correct" means. + +Even though there are limitations, this feature is available since Perl version 5.6. + +JSON::XS requires Perl 5.8.2 (but works correctly in 5.8.8 or later), so in older versions +C sholud call JSON::PP as the backend which can be used since Perl 5.005. + +With Perl 5.8.x JSON::PP works, but from 5.8.0 to 5.8.2, because of a Perl side problem, +JSON::PP works slower in the versions. And in 5.005, the Unicode handling is not available. +See to L for more information. + +See also to L +and L. + + +=item * round-trip integrity + +When you serialise a perl data structure using only data types supported +by JSON and Perl, the deserialised data structure is identical on the Perl +level. (e.g. the string "2.0" doesn't suddenly become "2" just because +it looks like a number). There I minor exceptions to this, read the +L section below to learn about those. + + +=item * strict checking of JSON correctness + +There is no guessing, no generating of illegal JSON texts by default, +and only JSON is accepted as input by default (the latter is a security +feature). + +See to L and L. + +=item * fast + +This module returns a JSON::XS object itself if available. +Compared to other JSON modules and other serialisers such as Storable, +JSON::XS usually compares favourably in terms of speed, too. + +If not available, C returns a JSON::PP object instead of JSON::XS and +it is very slow as pure-Perl. + +=item * simple to use + +This module has both a simple functional interface as well as an +object oriented interface interface. + +=item * reasonably versatile output formats + +You can choose between the most compact guaranteed-single-line format possible +(nice for simple line-based protocols), a pure-ASCII format (for when your transport +is not 8-bit clean, still supports the whole Unicode range), or a pretty-printed +format (for when you want to read that stuff). Or you can combine those features +in whatever way you like. + +=back + +=head1 FUNCTIONAL INTERFACE + +Some documents are copied and modified from L. +C and C are additional functions. + +=head2 encode_json + + $json_text = encode_json $perl_scalar + +Converts the given Perl data structure to a UTF-8 encoded, binary string. + +This function call is functionally identical to: + + $json_text = JSON->new->utf8->encode($perl_scalar) + +=head2 decode_json + + $perl_scalar = decode_json $json_text + +The opposite of C: expects an UTF-8 (binary) string and tries +to parse that as an UTF-8 encoded JSON text, returning the resulting +reference. + +This function call is functionally identical to: + + $perl_scalar = JSON->new->utf8->decode($json_text) + + +=head2 to_json + + $json_text = to_json($perl_scalar) + +Converts the given Perl data structure to a json string. + +This function call is functionally identical to: + + $json_text = JSON->new->encode($perl_scalar) + +Takes a hash reference as the second. + + $json_text = to_json($perl_scalar, $flag_hashref) + +So, + + $json_text = to_json($perl_scalar, {utf8 => 1, pretty => 1}) + +equivalent to: + + $json_text = JSON->new->utf8(1)->pretty(1)->encode($perl_scalar) + +If you want to write a modern perl code which communicates to outer world, +you should use C (supposed that JSON data are encoded in UTF-8). + +=head2 from_json + + $perl_scalar = from_json($json_text) + +The opposite of C: expects a json string and tries +to parse it, returning the resulting reference. + +This function call is functionally identical to: + + $perl_scalar = JSON->decode($json_text) + +Takes a hash reference as the second. + + $perl_scalar = from_json($json_text, $flag_hashref) + +So, + + $perl_scalar = from_json($json_text, {utf8 => 1}) + +equivalent to: + + $perl_scalar = JSON->new->utf8(1)->decode($json_text) + +If you want to write a modern perl code which communicates to outer world, +you should use C (supposed that JSON data are encoded in UTF-8). + +=head2 JSON::is_bool + + $is_boolean = JSON::is_bool($scalar) + +Returns true if the passed scalar represents either JSON::true or +JSON::false, two constants that act like C<1> and C<0> respectively +and are also used to represent JSON C and C in Perl strings. + +=head2 JSON::true + +Returns JSON true value which is blessed object. +It C JSON::Boolean object. + +=head2 JSON::false + +Returns JSON false value which is blessed object. +It C JSON::Boolean object. + +=head2 JSON::null + +Returns C. + +See L, below, for more information on how JSON values are mapped to +Perl. + +=head1 HOW DO I DECODE A DATA FROM OUTER AND ENCODE TO OUTER + +This section supposes that your perl vresion is 5.8 or later. + +If you know a JSON text from an outer world - a network, a file content, and so on, +is encoded in UTF-8, you should use C or C module object +with C enable. And the decoded result will contain UNICODE characters. + + # from network + my $json = JSON->new->utf8; + my $json_text = CGI->new->param( 'json_data' ); + my $perl_scalar = $json->decode( $json_text ); + + # from file content + local $/; + open( my $fh, '<', 'json.data' ); + $json_text = <$fh>; + $perl_scalar = decode_json( $json_text ); + +If an outer data is not encoded in UTF-8, firstly you should C it. + + use Encode; + local $/; + open( my $fh, '<', 'json.data' ); + my $encoding = 'cp932'; + my $unicode_json_text = decode( $encoding, <$fh> ); # UNICODE + + # or you can write the below code. + # + # open( my $fh, "<:encoding($encoding)", 'json.data' ); + # $unicode_json_text = <$fh>; + +In this case, C<$unicode_json_text> is of course UNICODE string. +So you B use C nor C module object with C enable. +Instead of them, you use C module object with C disable or C. + + $perl_scalar = $json->utf8(0)->decode( $unicode_json_text ); + # or + $perl_scalar = from_json( $unicode_json_text ); + +Or C and C: + + $perl_scalar = decode_json( encode( 'utf8', $unicode_json_text ) ); + # this way is not efficient. + +And now, you want to convert your C<$perl_scalar> into JSON data and +send it to an outer world - a network or a file content, and so on. + +Your data usually contains UNICODE strings and you want the converted data to be encoded +in UTF-8, you should use C or C module object with C enable. + + print encode_json( $perl_scalar ); # to a network? file? or display? + # or + print $json->utf8->encode( $perl_scalar ); + +If C<$perl_scalar> does not contain UNICODE but C<$encoding>-encoded strings +for some reason, then its characters are regarded as B for perl +(because it does not concern with your $encoding). +You B use C nor C module object with C enable. +Instead of them, you use C module object with C disable or C. +Note that the resulted text is a UNICODE string but no problem to print it. + + # $perl_scalar contains $encoding encoded string values + $unicode_json_text = $json->utf8(0)->encode( $perl_scalar ); + # or + $unicode_json_text = to_json( $perl_scalar ); + # $unicode_json_text consists of characters less than 0x100 + print $unicode_json_text; + +Or C all string values and C: + + $perl_scalar->{ foo } = decode( $encoding, $perl_scalar->{ foo } ); + # ... do it to each string values, then encode_json + $json_text = encode_json( $perl_scalar ); + +This method is a proper way but probably not efficient. + +See to L, L. + + +=head1 COMMON OBJECT-ORIENTED INTERFACE + +=head2 new + + $json = new JSON + +Returns a new C object inherited from either JSON::XS or JSON::PP +that can be used to de/encode JSON strings. + +All boolean flags described below are by default I. + +The mutators for flags all return the JSON object again and thus calls can +be chained: + + my $json = JSON->new->utf8->space_after->encode({a => [1,2]}) + => {"a": [1, 2]} + +=head2 ascii + + $json = $json->ascii([$enable]) + + $enabled = $json->get_ascii + +If $enable is true (or missing), then the encode method will not generate characters outside +the code range 0..127. Any Unicode characters outside that range will be escaped using either +a single \uXXXX or a double \uHHHH\uLLLLL escape sequence, as per RFC4627. + +If $enable is false, then the encode method will not escape Unicode characters unless +required by the JSON syntax or other flags. This results in a faster and more compact format. + +This feature depends on the used Perl version and environment. + +See to L if the backend is PP. + + JSON->new->ascii(1)->encode([chr 0x10401]) + => ["\ud801\udc01"] + +=head2 latin1 + + $json = $json->latin1([$enable]) + + $enabled = $json->get_latin1 + +If $enable is true (or missing), then the encode method will encode the resulting JSON +text as latin1 (or iso-8859-1), escaping any characters outside the code range 0..255. + +If $enable is false, then the encode method will not escape Unicode characters +unless required by the JSON syntax or other flags. + + JSON->new->latin1->encode (["\x{89}\x{abc}"] + => ["\x{89}\\u0abc"] # (perl syntax, U+abc escaped, U+89 not) + +=head2 utf8 + + $json = $json->utf8([$enable]) + + $enabled = $json->get_utf8 + +If $enable is true (or missing), then the encode method will encode the JSON result +into UTF-8, as required by many protocols, while the decode method expects to be handled +an UTF-8-encoded string. Please note that UTF-8-encoded strings do not contain any +characters outside the range 0..255, they are thus useful for bytewise/binary I/O. + +In future versions, enabling this option might enable autodetection of the UTF-16 and UTF-32 +encoding families, as described in RFC4627. + +If $enable is false, then the encode method will return the JSON string as a (non-encoded) +Unicode string, while decode expects thus a Unicode string. Any decoding or encoding +(e.g. to UTF-8 or UTF-16) needs to be done yourself, e.g. using the Encode module. + + +Example, output UTF-16BE-encoded JSON: + + use Encode; + $jsontext = encode "UTF-16BE", JSON::XS->new->encode ($object); + +Example, decode UTF-32LE-encoded JSON: + + use Encode; + $object = JSON::XS->new->decode (decode "UTF-32LE", $jsontext); + +See to L if the backend is PP. + + +=head2 pretty + + $json = $json->pretty([$enable]) + +This enables (or disables) all of the C, C and +C (and in the future possibly more) flags in one call to +generate the most readable (or most compact) form possible. + +Equivalent to: + + $json->indent->space_before->space_after + +The indent space length is three and JSON::XS cannot change the indent +space length. + +=head2 indent + + $json = $json->indent([$enable]) + + $enabled = $json->get_indent + +If C<$enable> is true (or missing), then the C method will use a multiline +format as output, putting every array member or object/hash key-value pair +into its own line, identing them properly. + +If C<$enable> is false, no newlines or indenting will be produced, and the +resulting JSON text is guarenteed not to contain any C. + +This setting has no effect when decoding JSON texts. + +The indent space length is three. +With JSON::PP, you can also access C to change indent space length. + + +=head2 space_before + + $json = $json->space_before([$enable]) + + $enabled = $json->get_space_before + +If C<$enable> is true (or missing), then the C method will add an extra +optional space before the C<:> separating keys from values in JSON objects. + +If C<$enable> is false, then the C method will not add any extra +space at those places. + +This setting has no effect when decoding JSON texts. + +Example, space_before enabled, space_after and indent disabled: + + {"key" :"value"} + + +=head2 space_after + + $json = $json->space_after([$enable]) + + $enabled = $json->get_space_after + +If C<$enable> is true (or missing), then the C method will add an extra +optional space after the C<:> separating keys from values in JSON objects +and extra whitespace after the C<,> separating key-value pairs and array +members. + +If C<$enable> is false, then the C method will not add any extra +space at those places. + +This setting has no effect when decoding JSON texts. + +Example, space_before and indent disabled, space_after enabled: + + {"key": "value"} + + +=head2 relaxed + + $json = $json->relaxed([$enable]) + + $enabled = $json->get_relaxed + +If C<$enable> is true (or missing), then C will accept some +extensions to normal JSON syntax (see below). C will not be +affected in anyway. I. I suggest only to use this option to +parse application-specific files written by humans (configuration files, +resource files etc.) + +If C<$enable> is false (the default), then C will only accept +valid JSON texts. + +Currently accepted extensions are: + +=over 4 + +=item * list items can have an end-comma + +JSON I array elements and key-value pairs with commas. This +can be annoying if you write JSON texts manually and want to be able to +quickly append elements, so this extension accepts comma at the end of +such items not just between them: + + [ + 1, + 2, <- this comma not normally allowed + ] + { + "k1": "v1", + "k2": "v2", <- this comma not normally allowed + } + +=item * shell-style '#'-comments + +Whenever JSON allows whitespace, shell-style comments are additionally +allowed. They are terminated by the first carriage-return or line-feed +character, after which more white-space and comments are allowed. + + [ + 1, # this comment not allowed in JSON + # neither this one... + ] + +=back + + +=head2 canonical + + $json = $json->canonical([$enable]) + + $enabled = $json->get_canonical + +If C<$enable> is true (or missing), then the C method will output JSON objects +by sorting their keys. This is adding a comparatively high overhead. + +If C<$enable> is false, then the C method will output key-value +pairs in the order Perl stores them (which will likely change between runs +of the same script). + +This option is useful if you want the same data structure to be encoded as +the same JSON text (given the same overall settings). If it is disabled, +the same hash might be encoded differently even if contains the same data, +as key-value pairs have no inherent ordering in Perl. + +This setting has no effect when decoding JSON texts. + +=head2 allow_nonref + + $json = $json->allow_nonref([$enable]) + + $enabled = $json->get_allow_nonref + +If C<$enable> is true (or missing), then the C method can convert a +non-reference into its corresponding string, number or null JSON value, +which is an extension to RFC4627. Likewise, C will accept those JSON +values instead of croaking. + +If C<$enable> is false, then the C method will croak if it isn't +passed an arrayref or hashref, as JSON texts must either be an object +or array. Likewise, C will croak if given something that is not a +JSON object or array. + + JSON->new->allow_nonref->encode ("Hello, World!") + => "Hello, World!" + +=head2 allow_unknown + + $json = $json->allow_unknown ([$enable]) + + $enabled = $json->get_allow_unknown + +If $enable is true (or missing), then "encode" will *not* throw an +exception when it encounters values it cannot represent in JSON (for +example, filehandles) but instead will encode a JSON "null" value. +Note that blessed objects are not included here and are handled +separately by c. + +If $enable is false (the default), then "encode" will throw an +exception when it encounters anything it cannot encode as JSON. + +This option does not affect "decode" in any way, and it is +recommended to leave it off unless you know your communications +partner. + +=head2 allow_blessed + + $json = $json->allow_blessed([$enable]) + + $enabled = $json->get_allow_blessed + +If C<$enable> is true (or missing), then the C method will not +barf when it encounters a blessed reference. Instead, the value of the +B option will decide whether C (C +disabled or no C method found) or a representation of the +object (C enabled and C method found) is being +encoded. Has no effect on C. + +If C<$enable> is false (the default), then C will throw an +exception when it encounters a blessed object. + + +=head2 convert_blessed + + $json = $json->convert_blessed([$enable]) + + $enabled = $json->get_convert_blessed + +If C<$enable> is true (or missing), then C, upon encountering a +blessed object, will check for the availability of the C method +on the object's class. If found, it will be called in scalar context +and the resulting scalar will be encoded instead of the object. If no +C method is found, the value of C will decide what +to do. + +The C method may safely call die if it wants. If C +returns other blessed objects, those will be handled in the same +way. C must take care of not causing an endless recursion cycle +(== crash) in this case. The name of C was chosen because other +methods called by the Perl core (== not by the user of the object) are +usually in upper case letters and to avoid collisions with the C +function or method. + +This setting does not yet influence C in any way. + +If C<$enable> is false, then the C setting will decide what +to do when a blessed object is found. + +=over + +=item convert_blessed_universally mode + +If use C with C<-convert_blessed_universally>, the C +subroutine is defined as the below code: + + *UNIVERSAL::TO_JSON = sub { + my $b_obj = B::svref_2object( $_[0] ); + return $b_obj->isa('B::HV') ? { %{ $_[0] } } + : $b_obj->isa('B::AV') ? [ @{ $_[0] } ] + : undef + ; + } + +This will cause that C method converts simple blessed objects into +JSON objects as non-blessed object. + + JSON -convert_blessed_universally; + $json->allow_blessed->convert_blessed->encode( $blessed_object ) + +This feature is experimental and may be removed in the future. + +=back + +=head2 filter_json_object + + $json = $json->filter_json_object([$coderef]) + +When C<$coderef> is specified, it will be called from C each +time it decodes a JSON object. The only argument passed to the coderef +is a reference to the newly-created hash. If the code references returns +a single scalar (which need not be a reference), this value +(i.e. a copy of that scalar to avoid aliasing) is inserted into the +deserialised data structure. If it returns an empty list +(NOTE: I C, which is a valid scalar), the original deserialised +hash will be inserted. This setting can slow down decoding considerably. + +When C<$coderef> is omitted or undefined, any existing callback will +be removed and C will not change the deserialised hash in any +way. + +Example, convert all JSON objects into the integer 5: + + my $js = JSON->new->filter_json_object (sub { 5 }); + # returns [5] + $js->decode ('[{}]'); # the given subroutine takes a hash reference. + # throw an exception because allow_nonref is not enabled + # so a lone 5 is not allowed. + $js->decode ('{"a":1, "b":2}'); + + +=head2 filter_json_single_key_object + + $json = $json->filter_json_single_key_object($key [=> $coderef]) + +Works remotely similar to C, but is only called for +JSON objects having a single key named C<$key>. + +This C<$coderef> is called before the one specified via +C, if any. It gets passed the single value in the JSON +object. If it returns a single value, it will be inserted into the data +structure. If it returns nothing (not even C but the empty list), +the callback from C will be called next, as if no +single-key callback were specified. + +If C<$coderef> is omitted or undefined, the corresponding callback will be +disabled. There can only ever be one callback for a given key. + +As this callback gets called less often then the C +one, decoding speed will not usually suffer as much. Therefore, single-key +objects make excellent targets to serialise Perl objects into, especially +as single-key JSON objects are as close to the type-tagged value concept +as JSON gets (it's basically an ID/VALUE tuple). Of course, JSON does not +support this in any way, so you need to make sure your data never looks +like a serialised Perl hash. + +Typical names for the single object key are C<__class_whatever__>, or +C<$__dollars_are_rarely_used__$> or C<}ugly_brace_placement>, or even +things like C<__class_md5sum(classname)__>, to reduce the risk of clashing +with real hashes. + +Example, decode JSON objects of the form C<< { "__widget__" => } >> +into the corresponding C<< $WIDGET{} >> object: + + # return whatever is in $WIDGET{5}: + JSON + ->new + ->filter_json_single_key_object (__widget__ => sub { + $WIDGET{ $_[0] } + }) + ->decode ('{"__widget__": 5') + + # this can be used with a TO_JSON method in some "widget" class + # for serialisation to json: + sub WidgetBase::TO_JSON { + my ($self) = @_; + + unless ($self->{id}) { + $self->{id} = ..get..some..id..; + $WIDGET{$self->{id}} = $self; + } + + { __widget__ => $self->{id} } + } + + +=head2 shrink + + $json = $json->shrink([$enable]) + + $enabled = $json->get_shrink + +With JSON::XS, this flag resizes strings generated by either +C or C to their minimum size possible. This can save +memory when your JSON texts are either very very long or you have many +short strings. It will also try to downgrade any strings to octet-form +if possible: perl stores strings internally either in an encoding called +UTF-X or in octet-form. The latter cannot store everything but uses less +space in general (and some buggy Perl or C code might even rely on that +internal representation being used). + +With JSON::PP, it is noop about resizing strings but tries +C to the returned string by C. See to L. + +See to L and L. + +=head2 max_depth + + $json = $json->max_depth([$maximum_nesting_depth]) + + $max_depth = $json->get_max_depth + +Sets the maximum nesting level (default C<512>) accepted while encoding +or decoding. If a higher nesting level is detected in JSON text or a Perl +data structure, then the encoder and decoder will stop and croak at that +point. + +Nesting level is defined by number of hash- or arrayrefs that the encoder +needs to traverse to reach a given point or the number of C<{> or C<[> +characters without their matching closing parenthesis crossed to reach a +given character in a string. + +If no argument is given, the highest possible setting will be used, which +is rarely useful. + +Note that nesting is implemented by recursion in C. The default value has +been chosen to be as large as typical operating systems allow without +crashing. (JSON::XS) + +With JSON::PP as the backend, when a large value (100 or more) was set and +it de/encodes a deep nested object/text, it may raise a warning +'Deep recursion on subroutin' at the perl runtime phase. + +See L for more info on why this is useful. + +=head2 max_size + + $json = $json->max_size([$maximum_string_size]) + + $max_size = $json->get_max_size + +Set the maximum length a JSON text may have (in bytes) where decoding is +being attempted. The default is C<0>, meaning no limit. When C +is called on a string that is longer then this many bytes, it will not +attempt to decode the string but throw an exception. This setting has no +effect on C (yet). + +If no argument is given, the limit check will be deactivated (same as when +C<0> is specified). + +See L, below, for more info on why this is useful. + +=head2 encode + + $json_text = $json->encode($perl_scalar) + +Converts the given Perl data structure (a simple scalar or a reference +to a hash or array) to its JSON representation. Simple scalars will be +converted into JSON string or number sequences, while references to arrays +become JSON arrays and references to hashes become JSON objects. Undefined +Perl values (e.g. C) become JSON C values. +References to the integers C<0> and C<1> are converted into C and C. + +=head2 decode + + $perl_scalar = $json->decode($json_text) + +The opposite of C: expects a JSON text and tries to parse it, +returning the resulting simple scalar or reference. Croaks on error. + +JSON numbers and strings become simple Perl scalars. JSON arrays become +Perl arrayrefs and JSON objects become Perl hashrefs. C becomes +C<1> (C), C becomes C<0> (C) and +C becomes C. + +=head2 decode_prefix + + ($perl_scalar, $characters) = $json->decode_prefix($json_text) + +This works like the C method, but instead of raising an exception +when there is trailing garbage after the first JSON object, it will +silently stop parsing there and return the number of characters consumed +so far. + + JSON->new->decode_prefix ("[1] the tail") + => ([], 3) + +See to L + +=head2 property + + $boolean = $json->property($property_name) + +Returns a boolean value about above some properties. + +The available properties are C, C, C, +C,C, C, C, C, +C, C, C, C, +C, C and C. + + $boolean = $json->property('utf8'); + => 0 + $json->utf8; + $boolean = $json->property('utf8'); + => 1 + +Sets the property with a given boolean value. + + $json = $json->property($property_name => $boolean); + +With no argumnt, it returns all the above properties as a hash reference. + + $flag_hashref = $json->property(); + +=head1 INCREMENTAL PARSING + +Most of this section are copied and modified from L. + +In some cases, there is the need for incremental parsing of JSON texts. +This module does allow you to parse a JSON stream incrementally. +It does so by accumulating text until it has a full JSON object, which +it then can decode. This process is similar to using C +to see if a full JSON object is available, but is much more efficient +(and can be implemented with a minimum of method calls). + +The backend module will only attempt to parse the JSON text once it is sure it +has enough text to get a decisive result, using a very simple but +truly incremental parser. This means that it sometimes won't stop as +early as the full parser, for example, it doesn't detect parenthese +mismatches. The only thing it guarantees is that it starts decoding as +soon as a syntactically valid JSON text has been seen. This means you need +to set resource limits (e.g. C) to ensure the parser will stop +parsing in the presence if syntax errors. + +The following methods implement this incremental parser. + +=head2 incr_parse + + $json->incr_parse( [$string] ) # void context + + $obj_or_undef = $json->incr_parse( [$string] ) # scalar context + + @obj_or_empty = $json->incr_parse( [$string] ) # list context + +This is the central parsing function. It can both append new text and +extract objects from the stream accumulated so far (both of these +functions are optional). + +If C<$string> is given, then this string is appended to the already +existing JSON fragment stored in the C<$json> object. + +After that, if the function is called in void context, it will simply +return without doing anything further. This can be used to add more text +in as many chunks as you want. + +If the method is called in scalar context, then it will try to extract +exactly I JSON object. If that is successful, it will return this +object, otherwise it will return C. If there is a parse error, +this method will croak just as C would do (one can then use +C to skip the errornous part). This is the most common way of +using the method. + +And finally, in list context, it will try to extract as many objects +from the stream as it can find and return them, or the empty list +otherwise. For this to work, there must be no separators between the JSON +objects or arrays, instead they must be concatenated back-to-back. If +an error occurs, an exception will be raised as in the scalar context +case. Note that in this case, any previously-parsed JSON texts will be +lost. + +Example: Parse some JSON arrays/objects in a given string and return them. + + my @objs = JSON->new->incr_parse ("[5][7][1,2]"); + +=head2 incr_text + + $lvalue_string = $json->incr_text + +This method returns the currently stored JSON fragment as an lvalue, that +is, you can manipulate it. This I works when a preceding call to +C in I successfully returned an object. Under +all other circumstances you must not call this function (I mean it. +although in simple tests it might actually work, it I fail under +real world conditions). As a special exception, you can also call this +method before having parsed anything. + +This function is useful in two cases: a) finding the trailing text after a +JSON object or b) parsing multiple JSON objects separated by non-JSON text +(such as commas). + + $json->incr_text =~ s/\s*,\s*//; + +In Perl 5.005, C attribute is not available. +You must write codes like the below: + + $string = $json->incr_text; + $string =~ s/\s*,\s*//; + $json->incr_text( $string ); + +=head2 incr_skip + + $json->incr_skip + +This will reset the state of the incremental parser and will remove the +parsed text from the input buffer. This is useful after C +died, in which case the input buffer and incremental parser state is left +unchanged, to skip the text parsed so far and to reset the parse state. + +=head2 incr_reset + + $json->incr_reset + +This completely resets the incremental parser, that is, after this call, +it will be as if the parser had never parsed anything. + +This is useful if you want ot repeatedly parse JSON objects and want to +ignore any trailing data, which means you have to reset the parser after +each successful decode. + +See to L for examples. + + +=head1 JSON::PP SUPPORT METHODS + +The below methods are JSON::PP own methods, so when C works +with JSON::PP (i.e. the created object is a JSON::PP object), available. +See to L in detail. + +If you use C with additonal C<-support_by_pp>, some methods +are available even with JSON::XS. See to L. + + BEING { $ENV{PERL_JSON_BACKEND} = 'JSON::XS' } + + use JSON -support_by_pp; + + my $json = new JSON; + $json->allow_nonref->escape_slash->encode("/"); + + # functional interfaces too. + print to_json(["/"], {escape_slash => 1}); + print from_json('["foo"]', {utf8 => 1}); + +If you do not want to all functions but C<-support_by_pp>, +use C<-no_export>. + + use JSON -support_by_pp, -no_export; + # functional interfaces are not exported. + +=head2 allow_singlequote + + $json = $json->allow_singlequote([$enable]) + +If C<$enable> is true (or missing), then C will accept +any JSON strings quoted by single quotations that are invalid JSON +format. + + $json->allow_singlequote->decode({"foo":'bar'}); + $json->allow_singlequote->decode({'foo':"bar"}); + $json->allow_singlequote->decode({'foo':'bar'}); + +As same as the C option, this option may be used to parse +application-specific files written by humans. + +=head2 allow_barekey + + $json = $json->allow_barekey([$enable]) + +If C<$enable> is true (or missing), then C will accept +bare keys of JSON object that are invalid JSON format. + +As same as the C option, this option may be used to parse +application-specific files written by humans. + + $json->allow_barekey->decode('{foo:"bar"}'); + +=head2 allow_bignum + + $json = $json->allow_bignum([$enable]) + +If C<$enable> is true (or missing), then C will convert +the big integer Perl cannot handle as integer into a L +object and convert a floating number (any) into a L. + +On the contary, C converts C objects and C +objects into JSON numbers with C enable. + + $json->allow_nonref->allow_blessed->allow_bignum; + $bigfloat = $json->decode('2.000000000000000000000000001'); + print $json->encode($bigfloat); + # => 2.000000000000000000000000001 + +See to L aboout the conversion of JSON number. + +=head2 loose + + $json = $json->loose([$enable]) + +The unescaped [\x00-\x1f\x22\x2f\x5c] strings are invalid in JSON strings +and the module doesn't allow to C to these (except for \x2f). +If C<$enable> is true (or missing), then C will accept these +unescaped strings. + + $json->loose->decode(qq|["abc + def"]|); + +See to L. + +=head2 escape_slash + + $json = $json->escape_slash([$enable]) + +According to JSON Grammar, I (U+002F) is escaped. But by default +JSON backend modules encode strings without escaping slash. + +If C<$enable> is true (or missing), then C will escape slashes. + +=head2 indent_length + + $json = $json->indent_length($length) + +With JSON::XS, The indent space length is 3 and cannot be changed. +With JSON::PP, it sets the indent space length with the given $length. +The default is 3. The acceptable range is 0 to 15. + +=head2 sort_by + + $json = $json->sort_by($function_name) + $json = $json->sort_by($subroutine_ref) + +If $function_name or $subroutine_ref are set, its sort routine are used. + + $js = $pc->sort_by(sub { $JSON::PP::a cmp $JSON::PP::b })->encode($obj); + # is($js, q|{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6,"g":7,"h":8,"i":9}|); + + $js = $pc->sort_by('own_sort')->encode($obj); + # is($js, q|{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6,"g":7,"h":8,"i":9}|); + + sub JSON::PP::own_sort { $JSON::PP::a cmp $JSON::PP::b } + +As the sorting routine runs in the JSON::PP scope, the given +subroutine name and the special variables C<$a>, C<$b> will begin +with 'JSON::PP::'. + +If $integer is set, then the effect is same as C on. + +See to L. + +=head1 MAPPING + +This section is copied from JSON::XS and modified to C. +JSON::XS and JSON::PP mapping mechanisms are almost equivalent. + +See to L. + +=head2 JSON -> PERL + +=over 4 + +=item object + +A JSON object becomes a reference to a hash in Perl. No ordering of object +keys is preserved (JSON does not preserver object key ordering itself). + +=item array + +A JSON array becomes a reference to an array in Perl. + +=item string + +A JSON string becomes a string scalar in Perl - Unicode codepoints in JSON +are represented by the same codepoints in the Perl string, so no manual +decoding is necessary. + +=item number + +A JSON number becomes either an integer, numeric (floating point) or +string scalar in perl, depending on its range and any fractional parts. On +the Perl level, there is no difference between those as Perl handles all +the conversion details, but an integer may take slightly less memory and +might represent more values exactly than floating point numbers. + +If the number consists of digits only, C will try to represent +it as an integer value. If that fails, it will try to represent it as +a numeric (floating point) value if that is possible without loss of +precision. Otherwise it will preserve the number as a string value (in +which case you lose roundtripping ability, as the JSON number will be +re-encoded toa JSON string). + +Numbers containing a fractional or exponential part will always be +represented as numeric (floating point) values, possibly at a loss of +precision (in which case you might lose perfect roundtripping ability, but +the JSON number will still be re-encoded as a JSON number). + +Note that precision is not accuracy - binary floating point values cannot +represent most decimal fractions exactly, and when converting from and to +floating point, C only guarantees precision up to but not including +the leats significant bit. + +If the backend is JSON::PP and C is enable, the big integers +and the numeric can be optionally converted into L and +L objects. + +=item true, false + +These JSON atoms become C and C, +respectively. They are overloaded to act almost exactly like the numbers +C<1> and C<0>. You can check wether a scalar is a JSON boolean by using +the C function. + +If C and C are used as strings or compared as strings, +they represent as C and C respectively. + + print JSON::true . "\n"; + => true + print JSON::true + 1; + => 1 + + ok(JSON::true eq 'true'); + ok(JSON::true eq '1'); + ok(JSON::true == 1); + +C will install these missing overloading features to the backend modules. + + +=item null + +A JSON null atom becomes C in Perl. + +C returns C. + +=back + + +=head2 PERL -> JSON + +The mapping from Perl to JSON is slightly more difficult, as Perl is a +truly typeless language, so we can only guess which JSON type is meant by +a Perl value. + +=over 4 + +=item hash references + +Perl hash references become JSON objects. As there is no inherent ordering +in hash keys (or JSON objects), they will usually be encoded in a +pseudo-random order that can change between runs of the same program but +stays generally the same within a single run of a program. C +optionally sort the hash keys (determined by the I flag), so +the same datastructure will serialise to the same JSON text (given same +settings and version of JSON::XS), but this incurs a runtime overhead +and is only rarely useful, e.g. when you want to compare some JSON text +against another for equality. + +In future, the ordered object feature will be added to JSON::PP using C mechanism. + + +=item array references + +Perl array references become JSON arrays. + +=item other references + +Other unblessed references are generally not allowed and will cause an +exception to be thrown, except for references to the integers C<0> and +C<1>, which get turned into C and C atoms in JSON. You can +also use C and C to improve readability. + + to_json [\0,JSON::true] # yields [false,true] + +=item JSON::true, JSON::false, JSON::null + +These special values become JSON true and JSON false values, +respectively. You can also use C<\1> and C<\0> directly if you want. + +JSON::null returns C. + +=item blessed objects + +Blessed objects are not directly representable in JSON. See the +C and C methods on various options on +how to deal with this: basically, you can choose between throwing an +exception, encoding the reference as if it weren't blessed, or provide +your own serialiser method. + +With C mode, C converts blessed +hash references or blessed array references (contains other blessed references) +into JSON members and arrays. + + use JSON -convert_blessed_universally; + JSON->new->allow_blessed->convert_blessed->encode( $blessed_object ); + +See to L. + +=item simple scalars + +Simple Perl scalars (any scalar that is not a reference) are the most +difficult objects to encode: JSON::XS and JSON::PP will encode undefined scalars as +JSON C values, scalars that have last been used in a string context +before encoding as JSON strings, and anything else as number value: + + # dump as number + encode_json [2] # yields [2] + encode_json [-3.0e17] # yields [-3e+17] + my $value = 5; encode_json [$value] # yields [5] + + # used as string, so dump as string + print $value; + encode_json [$value] # yields ["5"] + + # undef becomes null + encode_json [undef] # yields [null] + +You can force the type to be a string by stringifying it: + + my $x = 3.1; # some variable containing a number + "$x"; # stringified + $x .= ""; # another, more awkward way to stringify + print $x; # perl does it for you, too, quite often + +You can force the type to be a number by numifying it: + + my $x = "3"; # some variable containing a string + $x += 0; # numify it, ensuring it will be dumped as a number + $x *= 1; # same thing, the choise is yours. + +You can not currently force the type in other, less obscure, ways. + +Note that numerical precision has the same meaning as under Perl (so +binary to decimal conversion follows the same rules as in Perl, which +can differ to other languages). Also, your perl interpreter might expose +extensions to the floating point numbers of your platform, such as +infinities or NaN's - these cannot be represented in JSON, and it is an +error to pass those in. + +=item Big Number + +If the backend is JSON::PP and C is enable, +C converts C objects and C +objects into JSON numbers. + + +=back + +=head1 JSON and ECMAscript + +See to L. + +=head1 JSON and YAML + +JSON is not a subset of YAML. +See to L. + + +=head1 BACKEND MODULE DECISION + +When you use C, C tries to C JSON::XS. If this call failed, it will +C JSON::PP. The required JSON::XS version is I<2.2> or later. + +The C constructor method returns an object inherited from the backend module, +and JSON::XS object is a blessed scaler reference while JSON::PP is a blessed hash +reference. + +So, your program should not depend on the backend module, especially +returned objects should not be modified. + + my $json = JSON->new; # XS or PP? + $json->{stash} = 'this is xs object'; # this code may raise an error! + +To check the backend module, there are some methods - C, C and C. + + JSON->backend; # 'JSON::XS' or 'JSON::PP' + + JSON->backend->is_pp: # 0 or 1 + + JSON->backend->is_xs: # 1 or 0 + + $json->is_xs; # 1 or 0 + + $json->is_pp; # 0 or 1 + + +If you set an enviornment variable C, The calling action will be changed. + +=over + +=item PERL_JSON_BACKEND = 0 or PERL_JSON_BACKEND = 'JSON::PP' + +Always use JSON::PP + +=item PERL_JSON_BACKEND == 1 or PERL_JSON_BACKEND = 'JSON::XS,JSON::PP' + +(The default) Use compiled JSON::XS if it is properly compiled & installed, +otherwise use JSON::PP. + +=item PERL_JSON_BACKEND == 2 or PERL_JSON_BACKEND = 'JSON::XS' + +Always use compiled JSON::XS, die if it isn't properly compiled & installed. + +=item PERL_JSON_BACKEND = 'JSON::backportPP' + +Always use JSON::backportPP. +JSON::backportPP is JSON::PP back port module. +C includs JSON::backportPP instead of JSON::PP. + +=back + +These ideas come from L mechanism. + +example: + + BEGIN { $ENV{PERL_JSON_BACKEND} = 'JSON::PP' } + use JSON; # always uses JSON::PP + +In future, it may be able to specify another module. + +=head1 USE PP FEATURES EVEN THOUGH XS BACKEND + +Many methods are available with either JSON::XS or JSON::PP and +when the backend module is JSON::XS, if any JSON::PP specific (i.e. JSON::XS unspported) +method is called, it will C and be noop. + +But If you C C passing the optional string C<-support_by_pp>, +it makes a part of those unupported methods available. +This feature is achieved by using JSON::PP in C. + + BEGIN { $ENV{PERL_JSON_BACKEND} = 2 } # with JSON::XS + use JSON -support_by_pp; + my $json = new JSON; + $json->allow_nonref->escape_slash->encode("/"); + +At this time, the returned object is a C +object (re-blessed XS object), and by checking JSON::XS unsupported flags +in de/encoding, can support some unsupported methods - C, C, +C, C, C and C. + +When any unsupported methods are not enable, C will be +used as is. The switch is achieved by changing the symbolic tables. + +C<-support_by_pp> is effective only when the backend module is JSON::XS +and it makes the de/encoding speed down a bit. + +See to L. + +=head1 INCOMPATIBLE CHANGES TO OLD VERSION + +There are big incompatibility between new version (2.00) and old (1.xx). +If you use old C 1.xx in your code, please check it. + +See to L + +=over + +=item jsonToObj and objToJson are obsoleted. + +Non Perl-style name C and C are obsoleted +(but not yet deleted from the source). +If you use these functions in your code, please replace them +with C and C. + + +=item Global variables are no longer available. + +C class variables - C<$JSON::AUTOCONVERT>, C<$JSON::BareKey>, etc... +- are not available any longer. +Instead, various features can be used through object methods. + + +=item Package JSON::Converter and JSON::Parser are deleted. + +Now C bundles with JSON::PP which can handle JSON more properly than them. + +=item Package JSON::NotString is deleted. + +There was C class which represents JSON value C, C, C +and numbers. It was deleted and replaced by C. + +C represents C and C. + +C does not represent C. + +C returns C. + +C makes L and L is-a relation +to L. + +=item function JSON::Number is obsoleted. + +C is now needless because JSON::XS and JSON::PP have +round-trip integrity. + +=item JSONRPC modules are deleted. + +Perl implementation of JSON-RPC protocol - C, C +and C are deleted in this distribution. +Instead of them, there is L which supports JSON-RPC protocol version 1.1. + +=back + +=head2 Transition ways from 1.xx to 2.xx. + +You should set C mode firstly, because +it is always successful for the below codes even with JSON::XS. + + use JSON -support_by_pp; + +=over + +=item Exported jsonToObj (simple) + + from_json($json_text); + +=item Exported objToJson (simple) + + to_json($perl_scalar); + +=item Exported jsonToObj (advanced) + + $flags = {allow_barekey => 1, allow_singlequote => 1}; + from_json($json_text, $flags); + +equivalent to: + + $JSON::BareKey = 1; + $JSON::QuotApos = 1; + jsonToObj($json_text); + +=item Exported objToJson (advanced) + + $flags = {allow_blessed => 1, allow_barekey => 1}; + to_json($perl_scalar, $flags); + +equivalent to: + + $JSON::BareKey = 1; + objToJson($perl_scalar); + +=item jsonToObj as object method + + $json->decode($json_text); + +=item objToJson as object method + + $json->encode($perl_scalar); + +=item new method with parameters + +The C method in 2.x takes any parameters no longer. +You can set parameters instead; + + $json = JSON->new->pretty; + +=item $JSON::Pretty, $JSON::Indent, $JSON::Delimiter + +If C is enable, that means C<$JSON::Pretty> flag set. And +C<$JSON::Delimiter> was substituted by C and C. +In conclusion: + + $json->indent->space_before->space_after; + +Equivalent to: + + $json->pretty; + +To change indent length, use C. + +(Only with JSON::PP, if C<-support_by_pp> is not used.) + + $json->pretty->indent_length(2)->encode($perl_scalar); + +=item $JSON::BareKey + +(Only with JSON::PP, if C<-support_by_pp> is not used.) + + $json->allow_barekey->decode($json_text) + +=item $JSON::ConvBlessed + +use C<-convert_blessed_universally>. See to L. + +=item $JSON::QuotApos + +(Only with JSON::PP, if C<-support_by_pp> is not used.) + + $json->allow_singlequote->decode($json_text) + +=item $JSON::SingleQuote + +Disable. C does not make such a invalid JSON string any longer. + +=item $JSON::KeySort + + $json->canonical->encode($perl_scalar) + +This is the ascii sort. + +If you want to use with your own sort routine, check the C method. + +(Only with JSON::PP, even if C<-support_by_pp> is used currently.) + + $json->sort_by($sort_routine_ref)->encode($perl_scalar) + + $json->sort_by(sub { $JSON::PP::a <=> $JSON::PP::b })->encode($perl_scalar) + +Can't access C<$a> and C<$b> but C<$JSON::PP::a> and C<$JSON::PP::b>. + +=item $JSON::SkipInvalid + + $json->allow_unknown + +=item $JSON::AUTOCONVERT + +Needless. C backend modules have the round-trip integrity. + +=item $JSON::UTF8 + +Needless because C (JSON::XS/JSON::PP) sets +the UTF8 flag on properly. + + # With UTF8-flagged strings + + $json->allow_nonref; + $str = chr(1000); # UTF8-flagged + + $json_text = $json->utf8(0)->encode($str); + utf8::is_utf8($json_text); + # true + $json_text = $json->utf8(1)->encode($str); + utf8::is_utf8($json_text); + # false + + $str = '"' . chr(1000) . '"'; # UTF8-flagged + + $perl_scalar = $json->utf8(0)->decode($str); + utf8::is_utf8($perl_scalar); + # true + $perl_scalar = $json->utf8(1)->decode($str); + # died because of 'Wide character in subroutine' + +See to L. + +=item $JSON::UnMapping + +Disable. See to L. + +=item $JSON::SelfConvert + +This option was deleted. +Instead of it, if a givien blessed object has the C method, +C will be executed with C. + + $json->convert_blessed->encode($bleesed_hashref_or_arrayref) + # if need, call allow_blessed + +Note that it was C in old version, but now not C but C. + +=back + +=head1 TODO + +=over + +=item example programs + +=back + +=head1 THREADS + +No test with JSON::PP. If with JSON::XS, See to L. + + +=head1 BUGS + +Please report bugs relevant to C to Emakamaka[at]cpan.orgE. + + +=head1 SEE ALSO + +Most of the document is copied and modified from JSON::XS doc. + +L, L + +C(L) + +=head1 AUTHOR + +Makamaka Hannyaharamitu, Emakamaka[at]cpan.orgE + +JSON::XS was written by Marc Lehmann + +The relese of this new version owes to the courtesy of Marc Lehmann. + + +=head1 COPYRIGHT AND LICENSE + +Copyright 2005-2011 by Makamaka Hannyaharamitu + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut + diff --git a/tools/Makefile b/tools/Makefile new file mode 100755 index 0000000..74fcfe6 --- /dev/null +++ b/tools/Makefile @@ -0,0 +1,129 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright 2019 Western Digital Corporation or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Check for RV_ROOT +ifeq (,$(wildcard ${RV_ROOT}/configs/swerv.config)) +$(error env var RV_ROOT does not point to a valid dir! Exiting!) +endif + +# Allow snapshot override +ifeq ($(strip $(snapshot)),) + snapshot = default +endif + +# Allow tool override +SWERV_CONFIG = ${RV_ROOT}/configs/swerv.config +IRUN = irun +VCS = vcs +VERILATOR = verilator +GCC_PREFIX = riscv64-unknown-elf + +# Define test name +ifeq ($(strip $(ASM_TEST)),) + ASM_TEST = hello_world2 +endif + +# Define test name +ifeq ($(strip $(ASM_TEST_DIR)),) + ASM_TEST_DIR = ${RV_ROOT}/testbench/asm +endif + +defines = ${RV_ROOT}/configs/snapshots/$(snapshot)/common_defines.vh ${RV_ROOT}/design/include/build.h ${RV_ROOT}/design/include/global.h ${RV_ROOT}/design/include/swerv_types.sv +includes = -I${RV_ROOT}/design/include -I${RV_ROOT}/design/lib -I${RV_ROOT}/design/dmi -I${RV_ROOT}/configs/snapshots/$(snapshot) + +# CFLAGS for verilator generated Makefiles. Without -std=c++11 it complains for `auto` variables +CFLAGS += "-std=c++11" +# Optimization for better performance; alternative is nothing for slower runtime (faster compiles) +# -O2 for faster runtime (slower compiles), or -O for balance. +VERILATOR_MAKE_FLAGS = OPT_FAST="" + +# Targets +all: clean verilator + +clean: + rm -rf obj_dir *.hex build ${RV_ROOT}/configs/snapshots/$(snapshot) + +verilator: ${RV_ROOT}/configs/snapshots/$(snapshot)/common_defines.vh + echo '`undef ASSERT_ON' >> ${RV_ROOT}/configs/snapshots/$(snapshot)/common_defines.vh + $(VERILATOR) '-UASSERT_ON' --cc -CFLAGS ${CFLAGS} $(defines) $(includes) ${RV_ROOT}/configs/snapshots/$(snapshot)/common_defines.vh \ + -f ${RV_ROOT}/testbench/flist.verilator --top-module swerv_wrapper + $(MAKE) -C obj_dir/ -f Vswerv_wrapper.mk $(VERILATOR_MAKE_FLAGS) + +vcs: ${RV_ROOT}/configs/snapshots/$(snapshot)/common_defines.vh + $(VCS) -full64 -assert svaext -sverilog +define+RV_OPENSOURCE +error+500 +incdir+${RV_ROOT}/design/lib +incdir+${RV_ROOT}/design/include \ + ${RV_ROOT}/configs/snapshots/$(snapshot)/common_defines.vh \ + +incdir+${RV_ROOT}/design/dmi +incdir+${RV_ROOT}/configs/snapshots/$(snapshot) +libext+.v ${RV_ROOT}/configs/snapshots/$(snapshot)/common_defines.vh \ + $(defines)-f ${RV_ROOT}/testbench/flist.vcs -l vcs.log + +irun: ${RV_ROOT}/configs/snapshots/$(snapshot)/common_defines.vh + $(IRUN) -64bit -elaborate -ida -access +rw -q -sv -sysv -nowarn CUVIHR -nclibdirpath ${PWD} -nclibdirname swerv.build \ + -incdir ${RV_ROOT}/design/lib -incdir ${RV_ROOT}/design/include -incdir ${RV_ROOT}/design/dmi -vlog_ext +.vh+.h\ + $(defines) -incdir ${RV_ROOT}/configs/snapshots/$(snapshot) -f ${RV_ROOT}/testbench/flist.vcs -elaborate -snapshot default + +${RV_ROOT}/configs/snapshots/$(snapshot)/common_defines.vh: + $(SWERV_CONFIG) -snapshot=$(snapshot) + +verilator-run: program.hex + snapshot=ahb_lite + $(SWERV_CONFIG) -snapshot=$(snapshot) -ahb_lite + echo '`undef ASSERT_ON' >> ${RV_ROOT}/configs/snapshots/$(snapshot)/common_defines.vh + $(VERILATOR) '-UASSERT_ON' --cc -CFLAGS ${CFLAGS} $(defines) $(includes) ${RV_ROOT}/configs/snapshots/$(snapshot)/common_defines.vh \ + ${RV_ROOT}/testbench/tb_top.sv -I${RV_ROOT}/testbench \ + -f ${RV_ROOT}/testbench/flist.verilator --top-module tb_top -exe test_tb_top.cpp --trace --autoflush + cp ${RV_ROOT}/testbench/test_tb_top.cpp obj_dir/ + $(MAKE) -C obj_dir/ -f Vtb_top.mk $(VERILATOR_MAKE_FLAGS) + ./obj_dir/Vtb_top + +irun-run: program.hex + snapshot=ahb_lite + $(SWERV_CONFIG) -snapshot=$(snapshot) -ahb_lite + $(IRUN) -64bit -ida -access +rw -q -sv -sysv -nowarn CUVIHR -nclibdirpath ${PWD} -nclibdirname swerv.build \ + -incdir ${RV_ROOT}/design/lib -incdir ${RV_ROOT}/design/include -incdir ${RV_ROOT}/design/dmi -vlog_ext +.vh+.h\ + $(defines) -top tb_top ${RV_ROOT}/testbench/tb_top.sv -I${RV_ROOT}/testbench ${RV_ROOT}/testbench/ahb_sif.sv\ + -incdir ${RV_ROOT}/configs/snapshots/$(snapshot) -f ${RV_ROOT}/testbench/flist.vcs -snapshot default + +vcs-run: program.hex + snapshot=ahb_lite + $(SWERV_CONFIG) -snapshot=$(snapshot) -ahb_lite + cp ${RV_ROOT}/testbench/hex/*.hex . + $(VCS) -full64 -assert svaext -sverilog +define+RV_OPENSOURCE +error+500 +incdir+${RV_ROOT}/design/lib +incdir+${RV_ROOT}/design/include \ + ${RV_ROOT}/configs/snapshots/$(snapshot)/common_defines.vh \ + +incdir+${RV_ROOT}/design/dmi +incdir+${RV_ROOT}/configs/snapshots/$(snapshot) +libext+.v \ + $(defines) -f ${RV_ROOT}/testbench/flist.vcs ${RV_ROOT}/testbench/tb_top.sv -I${RV_ROOT}/testbench ${RV_ROOT}/testbench/ahb_sif.sv -l vcs.log + ./simv + +program.hex: $(ASM_TEST_DIR)/$(ASM_TEST).s ${RV_ROOT}/configs/snapshots/$(snapshot)/common_defines.vh + @echo Building $(ASM_TEST) +ifeq ($(shell which $(GCC_PREFIX)-as),) + @echo " !!! No $(GCC_PREFIX)-as in path, using canned hex files !!" + cp ${RV_ROOT}/testbench/hex/*.hex . +else + cp $(ASM_TEST_DIR)/$(ASM_TEST).s . + $(GCC_PREFIX)-cpp -I${RV_ROOT}/configs/snapshots/$(snapshot) $(ASM_TEST).s > $(ASM_TEST).cpp.s + $(GCC_PREFIX)-as -march=rv32imc $(ASM_TEST).cpp.s -o $(ASM_TEST).o + $(GCC_PREFIX)-ld -m elf32lriscv --discard-none -T${RV_ROOT}/testbench/link.ld -o $(ASM_TEST).exe $(ASM_TEST).o + $(GCC_PREFIX)-objcopy -O verilog --only-section ".data*" --only-section ".rodata*" $(ASM_TEST).exe data.hex + $(GCC_PREFIX)-objcopy -O verilog --only-section ".text*" --set-start=0x0 $(ASM_TEST).exe program.hex + $(GCC_PREFIX)-objdump -dS $(ASM_TEST).exe > $(ASM_TEST).dis + $(GCC_PREFIX)-nm -f posix -C $(ASM_TEST).exe > $(ASM_TEST).tbl + @echo Completed building $(ASM_TEST) +endif + +help: + @echo Make sure the environment variable RV_ROOT is set. + @echo Possible targets: verilator vcs irun help clean all verilator-run irun-run vcs-run program.hex + +.PHONY: help clean verilator vcs irun verilator-run irun-run vcs-run diff --git a/tools/addassign b/tools/addassign new file mode 100755 index 0000000..f484e7c --- /dev/null +++ b/tools/addassign @@ -0,0 +1,46 @@ +#!/usr/bin/perl + +use Getopt::Long; + +$helpusage = "placeholder"; + +GetOptions ('in=s' => \$in, + 'prefix=s' => \$prefix) || die("$helpusage"); + + + +@in=`cat $in`; + + +foreach $line (@in) { + + if ($line=~/\#/) { next; } + + if ($line=~/([^=]+)=/) { + $sig=$1; + $sig=~s/\s+//g; + printf("logic $sig;\n"); + } +} + +foreach $line (@in) { + + if ($line=~/\#/) { next; } + + if ($line=~/([^=]+)=\s*;/) { + printf("assign ${prefix}$1 = 1'b0;\n"); + next; + } + + if ($line=~/([^=]+)=\s*\(\s*\);/) { + printf("assign ${prefix}$1 = 1'b0;\n"); + next; + } + + if ($line =~ /=/) { printf("assign ${prefix}$line"); } + else { printf("$line"); } +} + + +exit; + diff --git a/tools/coredecode b/tools/coredecode new file mode 100755 index 0000000..b621e60 --- /dev/null +++ b/tools/coredecode @@ -0,0 +1,198 @@ +#!/usr/bin/perl + +use Getopt::Long; + +$helpusage = "placeholder"; + +GetOptions ('legal' => \$legal, + 'in=s' => \$in, + 'out=s' => \$out, + 'view=s' => \$view ) || die("$helpusage"); + + +if (!defined($in)) { die("must define -in=input"); } +if (!defined($out)) { $out="${in}.out"; } + +if ($in eq "decode") { $view="rv32i"; } +elsif ($in eq "cdecode") { $view="rv32c"; } +elsif ($in eq "csrdecode") { $view="csr"; } + +if (defined($in)) { printf("in=$in\n"); } +if (defined($out)) { printf("out=$out\n"); } +if (defined($view)) { printf("view=$view\n"); } + +@in=`cat $in`; + +$gather=0; + +$TIMEOUT=50; + +foreach $line (@in) { + + #printf("$pstate: $line"); + + if ($line=~/^\s*\#/) { #printf("skip $line"); + next; } + + if ($gather==1) { + if ($line=~/(\S+)/) { + if ($line=~/}/) { $gather=0; $position=0; next; } + $label=$1; + $label=~s/,//g; + if ($pstate==2) { + if (defined($INPUT{$CVIEW}{$label})) { die("input $label already defined"); } + $INPUT{$CVIEW}{$label}=$position++; + $INPUTLEN{$CVIEW}++; + $INPUTSTR{$CVIEW}.=" $label"; + } + elsif ($pstate==3) { + if (defined($OUTPUT{$CVIEW}{$label})) { die("output $label already defined"); } + $OUTPUT{$CVIEW}{$label}=$position++; + $OUTPUTLEN{$CVIEW}++; + $OUTPUTSTR{$CVIEW}.=" $label"; + } + else { die("unknown pstate $pstate in gather"); } + } + } + + if ($line=~/^.definition/) { + $pstate=1; next; + } + if ($pstate==1) { # definition + if ($line!~/^.output/) { + if ($line=~/(\S+)\s*=\s*(\S+)/) { + $key=$1; $value=$2; + $value=~s/\./-/g; + $value=~s/\[//g; + $value=~s/\]//g; + $DEFINITION{$key}=$value; + } + } + else { $pstate=2; next; } + } + + if ($line=~/^.input/) { + $pstate=2; next; + } + + if ($pstate==2) { # input + if ($line=~/(\S+)\s*=\s*\{/) { + $CVIEW=$1; $gather=1; next; + } + } + + if ($line=~/^.output/) { + $pstate=3; next; + } + + if ($pstate==3) { # output + if ($line=~/(\S+)\s*=\s*\{/) { + $CVIEW=$1; $gather=1; next; + } + } + + if ($line=~/^.decode/) { + $pstate=4; next; + } + + if ($pstate==4) { # decode + if ($line=~/([^\[]+)\[([^\]]+)\]\s+=\s+\{([^\}]+)\}/) { + $dview=$1; $inst=$2; $body=$3; + $dview=~s/\s+//g; + $inst=~s/\s+//g; + #printf("$dview $inst $body\n"); + if ($inst=~/([^\{]+)\{([^-]+)-([^\}]+)\}/) { + $base=$1; $lo=$2; $hi=$3; + $hi++; + for ($i=0; $i<$TIMEOUT && $lo ne $hi; $i++) { + #printf("decode $dview $base$lo\n"); + + $expand=$base.$lo; + if (!defined($DEFINITION{$expand})) { die("could not find instruction definition for inst $expand"); } + + $DECODE{$dview}{$expand}=$body; + $lo++; + } + if ($i == $TIMEOUT) { die("timeout in decode expansion"); } + + } + else { + if (!defined($DEFINITION{$inst})) { die("could not find instruction definition for inst $inst"); } + $DECODE{$dview}{$inst}=$body; + } + } + } + +} + + +#printf("view $view len %d\n",$OUTPUTLEN{$view}); + +#printf("$OUTPUTSTR{$view}\n"); + + +# need to switch this somehow based on 16/32 +printf(".i %d\n",$INPUTLEN{$view}); + +if (defined($legal)) { + printf(".o 1\n"); +} +else { + printf(".o %d\n",$OUTPUTLEN{$view}); +} + +printf(".ilb %s\n",$INPUTSTR{$view}); + +if (defined($legal)) { + printf(".ob legal\n"); +} +else { + printf(".ob %s\n",$OUTPUTSTR{$view}); +} + +if (defined($legal)) { + printf(".type fd\n"); +} +else { + printf(".type fr\n"); +} + +$DEFAULT_TEMPLATE='0'x$OUTPUTLEN{$view}; + +foreach $inst (sort keys %{ $DECODE{$view} }) { + + $body=$DECODE{$view}{$inst}; + @sigs=split(' ',$body); + + $template=$DEFAULT_TEMPLATE; + foreach $sig (@sigs) { + if (!defined($OUTPUT{$view}{$sig})) { die("could not find output definition for sig $sig in view $view"); } + $position=$OUTPUT{$view}{$sig}; + substr($template,$position,1,1); + } + +# if (!defined($DEFINITION{$inst})) { die("could not find instruction defintion for inst $inst"); } + + printf("# $inst\n"); + if (defined($legal)) { + printf("$DEFINITION{$inst} 1\n"); + } + else { + printf("$DEFINITION{$inst} $template\n"); + } + +} + + +exit; + +foreach $inst (sort keys %DEFINITION) { + $value=$DEFINITION{$inst}; + printf("%-10s = $value\n",$inst); +} + + +foreach $sig (sort keys %{ $OUTPUT{$view} }) { + $position=$OUTPUT{$view}{$sig}; + printf("$sig $position\n"); +} diff --git a/tools/picmap b/tools/picmap new file mode 100755 index 0000000..83485b4 --- /dev/null +++ b/tools/picmap @@ -0,0 +1,59 @@ +#!/usr/bin/perl + +use Getopt::Long; + +use integer; + +$helpusage = "placeholder"; + +GetOptions ('total_int=s' => \$total_int)|| die("$helpusage"); + +$LEN=15; + +#printf("logic [2:0] mask;\n"); + +printf("// mask[3:0] = { 4'b1000 - 30b mask,4'b0100 - 31b mask, 4'b0010 - 28b mask, 4'b0001 - 32b mask }\n"); +printf("always_comb begin\n"); +printf(" case \(address[14:0]\)\n"); +printf(" 15'b011000000000000 : mask[3:0] = 4'b0100;\n"); +for ($i=1; $i<=$total_int; $i++) { + $j=hex("4000"); + printf(" 15'b%s : mask[3:0] = 4'b1000;\n",d2b($j+$i*4)); +} +for ($i=1; $i<=$total_int; $i++) { + $j=hex("2000"); + printf(" 15'b%s : mask[3:0] = 4'b0100;\n",d2b($j+$i*4)); +} +for ($i=1; $i<=$total_int; $i++) { + $j=hex("0"); + printf(" 15'b%s : mask[3:0] = 4'b0010;\n",d2b($j+$i*4)); +} + printf(" %-17s : mask[3:0] = 4'b0001;\n","default"); +printf(" endcase\n"); +printf("end\n"); + + +sub b2d { + my ($v) = @_; + + $v = oct("0b" . $v); + + return($v); +} + +sub d2b { + my ($v) = @_; + + my $repeat; + + $v = sprintf "%b",$v; + if (length($v)<$LEN) { + $repeat=$LEN-length($v); + $v="0"x$repeat.$v; + } + elsif (length($v)>$LEN) { + $v=substr($v,length($v)-$LEN,$LEN); + } + + return($v); +} diff --git a/tools/smalldiv b/tools/smalldiv new file mode 100755 index 0000000..3484100 --- /dev/null +++ b/tools/smalldiv @@ -0,0 +1,121 @@ +#!/usr/bin/perl + +use Getopt::Long; + +use integer; + +$helpusage = "placeholder"; + +GetOptions ('len=s' => \$len, + 'num=s' => \$num, + 'den=s' => \$den, + 'skip' => \$skip) || die("$helpusage"); + +if (!defined($len)) { $len=8; } +$LEN=$len; + +$n=d2b($num); # numerator - quotient +$m=d2b($den); # denominator - divisor + + +printf(".i 8\n"); +printf(".o 4\n"); +printf(".ilb q_ff[3] q_ff[2] q_ff[1] q_ff[0] m_ff[3] m_ff[2] m_ff[1] m_ff[0]\n"); +printf(".ob smallnum[3] smallnum[2] smallnum[1] smallnum[0]\n"); +printf(".type fr\n"); +for ($q=0; $q<16; $q++) { + for ($m=0; $m<16; $m++) { + if ($m==0) { next; } + $result=int($q/$m); + printf("%s %s %s\n",d2bl($q,4),d2bl($m,4),d2bl($result,4)); + } +} + +exit; + +#$LEN=length($n); + +$a="0"x$LEN; +$q=$n; + +#printf("n=%s, m=%s\n",$n,$m); +#printf("a=%s, q=%s\n",$a,$q); + +for ($i=1; $i<=$LEN; $i++) { + + #printf("iteration $n:\n"); + + printf("$i: a=%s q=%s\n",$a,$q); + + + $signa = substr($a,0,1); + + + $a = substr($a.$q,1,$LEN); # new a with q shifted in + + if ($signa==0) { $a=b2d($a)-b2d($m); } + else { $a=b2d($a)+b2d($m); } + + $a=d2b($a); + + + $signa = substr($a,0,1); + if ($signa==0) { $q=substr($q,1,$LEN-1)."1"; } + else { $q=substr($q,1,$LEN-1)."0"; } + +} + + +#printf("a=$a\n"); +$signa = substr($a,0,1); +if ($signa==1 && !defined($skip)) { + printf("correction:\n"); + $a=b2d($a)+b2d($m); + $a=d2b($a); +} +#printf("a=$a\n"); +printf("%d / %d = %d R %d ",b2d($n),b2d($m),b2d($q),b2d($a)); +if ($a eq $n) { printf("-> remainder equal numerator\n"); } +else { printf("\n"); } + +sub b2d { + my ($v) = @_; + + $v = oct("0b" . $v); + + return($v); +} + +sub d2b { + my ($v) = @_; + + my $repeat; + + $v = sprintf "%b",$v; + if (length($v)<$LEN) { + $repeat=$LEN-length($v); + $v="0"x$repeat.$v; + } + elsif (length($v)>$LEN) { + $v=substr($v,length($v)-$LEN,$LEN); + } + + return($v); +} + +sub d2bl { + my ($v,$LEN) = @_; + + my $repeat; + + $v = sprintf "%b",$v; + if (length($v)<$LEN) { + $repeat=$LEN-length($v); + $v="0"x$repeat.$v; + } + elsif (length($v)>$LEN) { + $v=substr($v,length($v)-$LEN,$LEN); + } + + return($v); +} diff --git a/tools/unrollforverilator b/tools/unrollforverilator new file mode 100755 index 0000000..a562ec3 --- /dev/null +++ b/tools/unrollforverilator @@ -0,0 +1,169 @@ +#!/usr/bin/perl +#use strict; +#use warnings; + +my $RV_ROOT = $ENV{RV_ROOT}; + +my $TOTAL_INT=$ARGV[0]; +print "// argv=".$ARGV[0]."\n"; +my $NUM_LEVELS; +if($TOTAL_INT==2){$NUM_LEVELS=1;} +elsif ($TOTAL_INT==4){$NUM_LEVELS=2;} +elsif ($TOTAL_INT==8){$NUM_LEVELS=3;} +elsif ($TOTAL_INT==16){$NUM_LEVELS=4;} +elsif ($TOTAL_INT==32){$NUM_LEVELS=5;} +elsif ($TOTAL_INT==64){$NUM_LEVELS=6;} +elsif ($TOTAL_INT==128){$NUM_LEVELS=7;} +elsif ($TOTAL_INT==256){$NUM_LEVELS=8;} +elsif ($TOTAL_INT==512){$NUM_LEVELS=9;} +elsif ($TOTAL_INT==1024){$NUM_LEVELS=10;} +else {$NUM_LEVELS=int(log($TOTAL_INT)/log(2))+1;} +print ("// TOTAL_INT=".$TOTAL_INT." NUM_LEVELS=".$NUM_LEVELS."\n"); +$next_level = 1; +print ("`ifdef RV_PIC_2CYCLE\n"); +if($TOTAL_INT > 2){ +print ("// LEVEL0\n"); +print ("logic [TOTAL_INT+2:0] [INTPRIORITY_BITS-1:0] level_intpend_w_prior_en_".$next_level.";\n"); +print ("logic [TOTAL_INT+2:0] [ID_BITS-1:0] level_intpend_id_".$next_level.";\n"); +print (" for (m=0; m<=(TOTAL_INT)/(2**(".$next_level.")) ; m++) begin : COMPARE0\n"); +print (" if ( m == (TOTAL_INT)/(2**(".$next_level."))) begin \n"); +print (" assign level_intpend_w_prior_en_".$next_level."[m+1] = '0 ;\n"); +print (" assign level_intpend_id_".$next_level."[m+1] = '0 ;\n"); +print (" end\n"); +print (" cmp_and_mux #(\n"); +print (" .ID_BITS(ID_BITS),\n"); +print (" .INTPRIORITY_BITS(INTPRIORITY_BITS)) cmp_l".$next_level." (\n"); +print (" .a_id(level_intpend_id[0][2*m]),\n"); +print (" .a_priority(level_intpend_w_prior_en[0][2*m]),\n"); +print (" .b_id(level_intpend_id[0][2*m+1]),\n"); +print (" .b_priority(level_intpend_w_prior_en[0][2*m+1]),\n"); +print (" .out_id(level_intpend_id_".$next_level."[m]),\n"); +print (" .out_priority(level_intpend_w_prior_en_".$next_level."[m])) ;\n"); +print (" \n"); +print (" end\n\n"); +for (my $l=1; $l