Copy Cores-SweRV-EL2 to flow.

This commit is contained in:
colin 2022-05-10 13:06:26 +00:00
parent 11312aee91
commit fd64d8618a
83 changed files with 37951 additions and 0 deletions

8
Flow/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
configs/snapshots
work
obj_dir
*.vcd
*.csv
*.log
*.exe
*.swp

44
Flow/configs/README.md Normal file
View File

@ -0,0 +1,44 @@
# SweRV RISC-V EL2 core from Western Digital
## Configuration
### Contents
Name | Description
---------------------- | ------------------------------
swerv.config | Configuration script for SweRV-EL2
swerv_config_gen.py | Python wrapper to run swerv.config, used by SweRVolf
This script will generate a consistent set of `defines/#defines/parameters` needed for the design and testbench.
A perl hash (*perl_configs.pl*) and a JSON format for SweRV-iss are also generated.
This set of include files :
./snapshots/<target>
├── common_defines.vh # `defines for testbench
├── defines.h # #defines for C/assembly headers
├── el2_param.vh # Actual Design parameters
├── el2_pdef.vh # Parameter structure definition
├── pd_defines.vh # `defines for physical design
├── perl_configs.pl # Perl %configs hash for scripting
├── pic_map_auto.h # PIC memory map based on configure size
├── whisper.json # JSON file for swerv-iss
└── link.ld # Default linker file for tests
While the defines may be modified by hand, it is recommended that this script be used to generate a consistent set.
### Targets
There are 4 predefined target configurations: `default`, `default_ahb`, `typical_pd` and `high_perf` that can be selected via the `-target=name` option to swerv.config.
Target | Description
---------------------- | ------------------------------
default | Default configuration. AXI4 bus interface.
default_ahb | Default configuration, AHB-Lite bus interface
typical_pd | No ICCM, AXI4 bus interface
high_perf | Large BTB/BHT, AXI4 interface
`swerv.config` may be edited to add additional target configurations, or new configurations may be created via the command line `-set` or `-unset` options.
**Run `$RV_ROOT/configs/swerv.config -h` for options and settable parameters.**

2609
Flow/configs/swerv.config Executable file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,41 @@
#!/usr/bin/env python3
from fusesoc.capi2.generator import Generator
import os
import subprocess
import sys
class SwervConfigGenerator(Generator):
def run(self):
script_root = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..'))
files = [
{"common_defines.vh" : {
"copyto" : "config/common_defines.vh",
"file_type" : "systemVerilogSource"}},
{"el2_pdef.vh" : {
"copyto" : "config/el2_pdef.vh",
"file_type" : "systemVerilogSource"}},
{"el2_param.vh" : {
"is_include_file" : True,
"file_type" : "systemVerilogSource"}},
{"pic_map_auto.h" : {
"is_include_file" : True,
"file_type" : "systemVerilogSource"}}]
env = os.environ.copy()
env['RV_ROOT'] = script_root
env['BUILD_PATH'] = os.getcwd()
args = ['configs/swerv.config'] + self.config.get('args', [])
rc = subprocess.call(args, cwd=script_root, env=env, stdout=subprocess.DEVNULL)
if rc:
exit(1)
filenames = []
for f in files:
for k in f:
filenames.append(k)
self.add_files(files)
g = SwervConfigGenerator()
g.run()
g.write()

3
Flow/demo/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
build
gen
obj_dir

86
Flow/demo/Makefile Normal file
View File

@ -0,0 +1,86 @@
export RV_ROOT = ${PWD}/..
GCC_PREFIX = /opt/riscv/bin/riscv32-unknown-elf
GDB_PREFIX = /opt/riscv/bin/riscv32-unknown-elf-gdb
ABI = -mabi=ilp32 -march=rv32imc
DEMODIR = ${PWD}
BUILD_DIR = ${DEMODIR}/build
RV_SOC = ${RV_ROOT}/soc
TEST = jtag
ifdef debug
DEBUG_PLUS = +dumpon
VERILATOR_DEBUG = --trace
endif
LINK = $(DEMODIR)/link.ld
LINKPRO = $(DEMODIR)/link_pro.ld
# CFLAGS for verilator generated Makefiles. Without -std=c++11 it complains for `auto` variables
CFLAGS += "-std=c++11"
# Optimization for better performance; alternative is nothing for slower runtime (faster compiles)
# -O2 for faster runtime (slower compiles), or -O for balance.
VERILATOR_MAKE_FLAGS = OPT_FAST="-Os"
# Targets
all: clean verilator
clean:
rm -rf build obj_dir
swerv_define :
BUILD_PATH=${BUILD_DIR} PERLLIB=${RV_SOC} ${RV_SOC}/swerv.config -target=default -set build_axi4
##################### Verilog Builds #####################################
verilator-build: swerv_define
echo '`undef RV_ASSERT_ON' >> ${BUILD_DIR}/common_defines.vh
verilator --cc -CFLAGS ${CFLAGS} \
$(BUILD_DIR)/common_defines.vh \
$(BUILD_DIR)/el2_pdef.vh \
-I${BUILD_DIR} \
-Wno-WIDTH \
-Wno-UNOPTFLAT \
-F ${RV_SOC}/soc_top.mk \
-F ${RV_SOC}/soc_sim.mk \
$(RV_SOC)/soc_sim.sv \
--top-module soc_sim -exe test_soc_sim.cpp --autoflush $(VERILATOR_DEBUG)
cp ${DEMODIR}/test_soc_sim.cpp obj_dir
$(MAKE) -j -e -C obj_dir/ -f Vsoc_sim.mk $(VERILATOR_MAKE_FLAGS)
##################### Simulation Runs #####################################
verilator: program.hex verilator-build
cd build && ../obj_dir/Vsoc_sim ${DEBUG_PLUS}
sim:
cd build && ../obj_dir/Vsoc_sim ${DEBUG_PLUS}
##################### Test hex Build #####################################
program.hex: $(TEST).o $(LINK)
@echo Building $(TEST)
$(GCC_PREFIX)-gcc $(ABI) -Wl,-Map=$(BUILD_DIR)/$(TEST).map -lgcc -T$(LINKPRO) -o $(BUILD_DIR)/$(TEST).bin $(BUILD_DIR)/$(TEST).o -nostartfiles $(TEST_LIBS)
$(GCC_PREFIX)-objcopy -O verilog $(BUILD_DIR)/$(TEST).bin $(BUILD_DIR)/program.hex
$(GCC_PREFIX)-gcc $(ABI) -Wl,-Map=$(BUILD_DIR)/$(TEST).map -lgcc -T$(LINK) -o $(BUILD_DIR)/$(TEST).bin $(BUILD_DIR)/$(TEST).o -nostartfiles $(TEST_LIBS)
$(GCC_PREFIX)-objdump -S $(BUILD_DIR)/$(TEST).bin > $(BUILD_DIR)/$(TEST).dis
@echo Completed building $(TEST)
%.o : %.s swerv_define
$(GCC_PREFIX)-cpp -g -I${BUILD_DIR} $< > $(BUILD_DIR)/$*.cpp.s
$(GCC_PREFIX)-as -g $(ABI) $(BUILD_DIR)/$*.cpp.s -o $(BUILD_DIR)/$@
##################### openocd #####################################
openocd:
openocd -f swerv.cfg
gdb:
$(GDB_PREFIX) -x gdbinit ./build/jtag.bin
help:
@echo Possible targets: verilator help clean all verilator-build program.hex
.PHONY: help clean verilator

18
Flow/demo/Readmd.md Normal file
View File

@ -0,0 +1,18 @@
# jtag simulation
## start openocd
`openocd -d -f swerv.cfg`
## start gdb
`/opt/riscv/bin/riscv32-unknown-elf-gdb -ex "target extended-remote :3333"`
## quick start
At demo/jtag/
1. `make all`
2. `make openocd`
3. `make gdb`

3
Flow/demo/gdbinit Normal file
View File

@ -0,0 +1,3 @@
# set debug remote 1
target extended-remote :3333
set remotetimeout 2000

84
Flow/demo/jtag.s Normal file
View File

@ -0,0 +1,84 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2019 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Assembly code for Hello World
// Not using only ALU ops for creating the string
#include "defines.h"
#define STDOUT 0xd0580000
// Code to execute
.section .text
.global _start
_start:
// Clear minstret
csrw minstret, zero
csrw minstreth, zero
// Set up MTVEC - not expecting to use it though
li x1, RV_ICCM_SADR
csrw mtvec, x1
// Enable Caches in MRAC
li x1, 0x5f555555
csrw 0x7c0, x1
// Load string from hw_data
// and write to stdout address
li x3, STDOUT
la x4, hw_data
loop:
lb x5, 0(x4)
sb x5, 0(x3)
addi x4, x4, 1
bnez x5, loop
li x3, STDOUT
la x4, hw_data
loop2:
lb x5, 0(x4)
sb x5, 0(x3)
addi x4, x4, 1
bnez x5, loop2
loop3:
beq x0, x0, loop3
// Write 0xff to STDOUT for TB to terminate test.
_finish:
li x3, STDOUT
addi x5, x0, 0xff
sb x5, 0(x3)
beq x0, x0, _finish
.rept 100
nop
.endr
.global hw_data
.data
hw_data:
.ascii "----------------------------------\n"
.ascii "Hello World Colin.liang EL2@WDC !!\n"
.ascii "----------------------------------\n"
.byte 0

16
Flow/demo/link.ld Normal file
View File

@ -0,0 +1,16 @@
OUTPUT_ARCH( "riscv" )
ENTRY(_start)
SECTIONS
{
. = 0x80000000;
.text_init : { *(.text_init*) }
.text : { *(.text*) }
_end = .;
. = 0x80004000;
.data : ALIGN(0x800) { *(.*data) *(.rodata*) STACK = ALIGN(16) + 0x2000; }
.bss : { *(.bss) }
. = 0xd0580000;
.data.io : { *(.data.io) }
}

16
Flow/demo/link_pro.ld Normal file
View File

@ -0,0 +1,16 @@
OUTPUT_ARCH( "riscv" )
ENTRY(_start)
SECTIONS
{
. = 0x0000;
.text_init : { *(.text_init*) }
.text : { *(.text*) }
_end = .;
. = 0x4000;
.data : ALIGN(0x800) { *(.*data) *(.rodata*) STACK = ALIGN(16) + 0x2000; }
.bss : { *(.bss) }
. = 0xd0580000;
.data.io : { *(.data.io) }
}

32
Flow/demo/soc.lpf Normal file
View File

@ -0,0 +1,32 @@
LOCATE COMP "clk" SITE "P3";
IOBUF PORT "clk" IO_TYPE=LVCMOS33;
FREQUENCY PORT "clk" 25 MHZ;
LOCATE COMP "dbg_rst" SITE "N2";
IOBUF PORT "dbg_rst" IO_TYPE=LVCMOS33;
FREQUENCY PORT "dbg_rst" 25 MHZ;
LOCATE COMP "rst" SITE "N3";
IOBUF PORT "rst" IO_TYPE=LVCMOS33;
FREQUENCY PORT "rst" 25 MHZ;
LOCATE COMP "jtag_tck" SITE "T2";
IOBUF PORT "jtag_tck" IO_TYPE=LVCMOS33;
FREQUENCY PORT "jtag_tck" 25 MHZ;
LOCATE COMP "jtag_tms" SITE "T3";
IOBUF PORT "jtag_tms" IO_TYPE=LVCMOS33;
FREQUENCY PORT "jtag_tms" 25 MHZ;
LOCATE COMP "jtag_tdi" SITE "N4";
IOBUF PORT "jtag_tdi" IO_TYPE=LVCMOS33;
FREQUENCY PORT "jtag_tdi" 25 MHZ;
LOCATE COMP "jtag_trst_n" SITE "M3";
IOBUF PORT "jtag_trst_n" IO_TYPE=LVCMOS33;
FREQUENCY PORT "jtag_trst_n" 25 MHZ;
LOCATE COMP "jtag_tdo" SITE "M4";
IOBUF PORT "jtag_tdo" IO_TYPE=LVCMOS33;

34
Flow/demo/swerv.cfg Normal file
View File

@ -0,0 +1,34 @@
# "JTAG adapter" for simulation, exposed to OpenOCD through a TCP socket
# speaking the remote_bitbang protocol. The adapter is implemented as
# SystemVerilog DPI module.
adapter driver remote_bitbang
remote_bitbang host localhost
remote_bitbang port 44853
# Target configuration for the riscv chip
set _CHIPNAME riscv
set _TARGETNAME $_CHIPNAME.cpu
jtag newtap $_CHIPNAME tap -irlen 5 -expected-id 0x1000008b
set _TARGETNAME $_CHIPNAME.tap
target create $_TARGETNAME riscv -chain-position $_TARGETNAME
# Configure work area in on-chip SRAM
# $_TARGETNAME configure -work-area-phys 0x1000e000 -work-area-size 1000 -work-area-backup 0
riscv expose_csrs 1988
# Be verbose about GDB errors
gdb_report_data_abort enable
gdb_report_register_access_error enable
# Increase timeouts in simulation
riscv set_command_timeout_sec 4200
# Conclude OpenOCD configuration
init
# Halt the target
halt

60
Flow/demo/synth.sh Executable file
View File

@ -0,0 +1,60 @@
#!/bin/bash
set -ex
PWD=$(pwd)
SOC=$PWD/../soc/
SOCFILE=../soc/soc_top.mk
DEFINE_DIR=$PWD/build
DEFINE="${DEFINE_DIR}/el2_pdef.vh ${DEFINE_DIR}/common_defines.vh"
mkdir -p gen
rm -rf gen/*
mkdir gen/design
YOSYS_COARSE=true
YOSYS_GLOBRST=false
YOSYS_SPLITNETS=false
TOP="soc_top"
RTL_FILES="$DEFINE $(cat $SOCFILE | sed 's/[[:space:]]//g' | sed '/^$/d' | sed -e "s!^!$SOC!" | tr '\n' ' ')"
sv2v -I${DEFINE_DIR} $RTL_FILES > gen/soc_top.v
{
echo "read_verilog gen/soc_top.v"
if test -n "$TOP"; then
echo "hierarchy -check -top $TOP"
else
echo "hierarchy -check"
fi
if $YOSYS_GLOBRST; then
# insertation of global reset (e.g. for FPGA cores)
echo "add -global_input globrst 1"
echo "proc -global_arst globrst"
fi
echo "synth -run coarse; opt -fine"
# echo "tee -o gen/brams.log memory_bram -rules scripts/brams.txt;;"
if ! $YOSYS_COARSE; then
echo "memory_map; techmap; opt; abc -dff; clean"
fi
if $YOSYS_SPLITNETS; then
# icarus verilog has a performance problems when there are
# dependencies between the bits of a long vector
echo "splitnets; clean"
fi
if $YOSYS_COARSE; then
echo "write_verilog -noexpr -noattr gen/synth.v"
else
echo "select -assert-none t:\$[!_]"
echo "write_verilog -noattr gen/synth.v"
fi
echo "synth_ecp5 -top $TOP -json gen/soc.json"
# echo "synth_xilinx -top $TOP"
} > gen/synth.ys
yosys -v2 -l gen/synth.log gen/synth.ys
nextpnr-ecp5 --25k --package CABGA381 --speed 6 --textcfg soc.cfg --lpf soc.lpf --freq 1 --json gen/soc.json

View File

@ -0,0 +1,65 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2019 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <stdlib.h>
#include <iostream>
#include <utility>
#include <string>
#include "Vsoc_sim.h"
#include "verilated.h"
#include "verilated_vcd_c.h"
vluint64_t main_time = 0;
double sc_time_stamp () {
return main_time;
}
int main(int argc, char** argv) {
std::cout << "\nVerilatorTB: Start of sim\n" << std::endl;
Verilated::commandArgs(argc, argv);
Vsoc_sim* soc = new Vsoc_sim;
// init trace dump
VerilatedVcdC* tfp = NULL;
#if VM_TRACE
Verilated::traceEverOn(true);
tfp = new VerilatedVcdC;
soc->trace (tfp, 24);
tfp->open ("sim.vcd");
#endif
// Simulate
while(!Verilated::gotFinish()){
#if VM_TRACE
tfp->dump (main_time);
#endif
main_time += 5;
soc->core_clk = !soc->core_clk;
soc->eval();
}
#if VM_TRACE
tfp->close();
#endif
std::cout << "\nVerilatorTB: End of sim" << std::endl;
exit(EXIT_SUCCESS);
}

758
Flow/design/dbg/el2_dbg.sv Normal file
View File

@ -0,0 +1,758 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
// Function: Top level SWERV core file to control the debug mode
// Comments: Responsible to put the rest of the core in quiesce mode,
// Send the commands/address. sends WrData and Recieve read Data.
// And then Resume the core to do the normal mode
// Author :
//********************************************************************************
module el2_dbg
import el2_pkg::*;
#(
`include "el2_param.vh"
)(
// outputs to the core for command and data interface
output logic [31:0] dbg_cmd_addr,
output logic [31:0] dbg_cmd_wrdata,
output logic dbg_cmd_valid,
output logic dbg_cmd_write, // 1: write command, 0: read_command
output logic [1:0] dbg_cmd_type, // 0:gpr 1:csr 2: memory
output logic [1:0] dbg_cmd_size, // size of the abstract mem access debug command
output logic dbg_core_rst_l, // core reset from dm
// inputs back from the core/dec
input logic [31:0] core_dbg_rddata,
input logic core_dbg_cmd_done, // This will be treated like a valid signal
input logic core_dbg_cmd_fail, // Exception during command run
// Signals to dma to get a bubble
output logic dbg_dma_bubble, // Debug needs a bubble to send a valid
input logic dma_dbg_ready, // DMA is ready to accept debug request
// interface with the rest of the core to halt/resume handshaking
output logic dbg_halt_req, // This is a pulse
output logic dbg_resume_req, // Debug sends a resume requests. Pulse
input logic dec_tlu_debug_mode, // Core is in debug mode
input logic dec_tlu_dbg_halted, // The core has finished the queiscing sequence. Core is halted now
input logic dec_tlu_mpc_halted_only, // Only halted due to MPC
input logic dec_tlu_resume_ack, // core sends back an ack for the resume (pulse)
// inputs from the JTAG
input logic dmi_reg_en, // read or write
input logic [6:0] dmi_reg_addr, // address of DM register
input logic dmi_reg_wr_en, // write instruction
input logic [31:0] dmi_reg_wdata, // write data
// output
output logic [31:0] dmi_reg_rdata, // read data
// AXI Write Channels
output logic sb_axi_awvalid,
input logic sb_axi_awready,
output logic [pt.SB_BUS_TAG-1:0] sb_axi_awid,
output logic [31:0] sb_axi_awaddr,
output logic [3:0] sb_axi_awregion,
output logic [7:0] sb_axi_awlen,
output logic [2:0] sb_axi_awsize,
output logic [1:0] sb_axi_awburst,
output logic sb_axi_awlock,
output logic [3:0] sb_axi_awcache,
output logic [2:0] sb_axi_awprot,
output logic [3:0] sb_axi_awqos,
output logic sb_axi_wvalid,
input logic sb_axi_wready,
output logic [63:0] sb_axi_wdata,
output logic [7:0] sb_axi_wstrb,
output logic sb_axi_wlast,
input logic sb_axi_bvalid,
output logic sb_axi_bready,
input logic [1:0] sb_axi_bresp,
// AXI Read Channels
output logic sb_axi_arvalid,
input logic sb_axi_arready,
output logic [pt.SB_BUS_TAG-1:0] sb_axi_arid,
output logic [31:0] sb_axi_araddr,
output logic [3:0] sb_axi_arregion,
output logic [7:0] sb_axi_arlen,
output logic [2:0] sb_axi_arsize,
output logic [1:0] sb_axi_arburst,
output logic sb_axi_arlock,
output logic [3:0] sb_axi_arcache,
output logic [2:0] sb_axi_arprot,
output logic [3:0] sb_axi_arqos,
input logic sb_axi_rvalid,
output logic sb_axi_rready,
input logic [63:0] sb_axi_rdata,
input logic [1:0] sb_axi_rresp,
input logic dbg_bus_clk_en,
// general inputs
input logic clk,
input logic free_clk,
input logic rst_l, // This includes both top rst and debug rst
input logic dbg_rst_l,
input logic clk_override,
input logic scan_mode
);
typedef enum logic [3:0] {IDLE=4'h0, HALTING=4'h1, HALTED=4'h2, CORE_CMD_START=4'h3, CORE_CMD_WAIT=4'h4, SB_CMD_START=4'h5, SB_CMD_SEND=4'h6, SB_CMD_RESP=4'h7, CMD_DONE=4'h8, RESUMING=4'h9} state_t;
typedef enum logic [3:0] {SBIDLE=4'h0, WAIT_RD=4'h1, WAIT_WR=4'h2, CMD_RD=4'h3, CMD_WR=4'h4, CMD_WR_ADDR=4'h5, CMD_WR_DATA=4'h6, RSP_RD=4'h7, RSP_WR=4'h8, DONE=4'h9} sb_state_t;
state_t dbg_state;
state_t dbg_nxtstate;
logic dbg_state_en;
// these are the registers that the debug module implements
logic [31:0] dmstatus_reg; // [26:24]-dmerr, [17:16]-resume ack, [9:8]-halted, [3:0]-version
logic [31:0] dmcontrol_reg; // dmcontrol register has only 6 bits implemented. 31: haltreq, 30: resumereq, 29: haltreset, 28: ackhavereset, 1: ndmreset, 0: dmactive.
logic [31:0] command_reg;
logic [31:0] abstractcs_reg; // bits implemted are [12] - busy and [10:8]= command error
logic [31:0] haltsum0_reg;
logic [31:0] data0_reg;
logic [31:0] data1_reg;
// data 0
logic [31:0] data0_din;
logic data0_reg_wren, data0_reg_wren0, data0_reg_wren1, data0_reg_wren2;
// data 1
logic [31:0] data1_din;
logic data1_reg_wren, data1_reg_wren0, data1_reg_wren1;
// abstractcs
logic abstractcs_busy_wren;
logic abstractcs_busy_din;
logic [2:0] abstractcs_error_din;
logic abstractcs_error_sel0, abstractcs_error_sel1, abstractcs_error_sel2, abstractcs_error_sel3, abstractcs_error_sel4, abstractcs_error_sel5, abstractcs_error_sel6;
logic dbg_sb_bus_error;
// abstractauto
logic abstractauto_reg_wren;
logic [1:0] abstractauto_reg;
// dmstatus
logic dmstatus_resumeack_wren;
logic dmstatus_resumeack_din;
logic dmstatus_haveresetn_wren;
logic dmstatus_resumeack;
logic dmstatus_unavail;
logic dmstatus_running;
logic dmstatus_halted;
logic dmstatus_havereset, dmstatus_haveresetn;
// dmcontrol
logic resumereq;
logic dmcontrol_wren, dmcontrol_wren_Q;
// command
logic execute_command_ns, execute_command;
logic command_wren, command_regno_wren;
logic command_transfer_din;
logic command_postexec_din;
logic [31:0] command_din;
logic [3:0] dbg_cmd_addr_incr;
logic [31:0] dbg_cmd_curr_addr;
logic [31:0] dbg_cmd_next_addr;
// needed to send the read data back for dmi reads
logic [31:0] dmi_reg_rdata_din;
sb_state_t sb_state;
sb_state_t sb_nxtstate;
logic sb_state_en;
//System bus section
logic sbcs_wren;
logic sbcs_sbbusy_wren;
logic sbcs_sbbusy_din;
logic sbcs_sbbusyerror_wren;
logic sbcs_sbbusyerror_din;
logic sbcs_sberror_wren;
logic [2:0] sbcs_sberror_din;
logic sbcs_unaligned;
logic sbcs_illegal_size;
logic [19:15] sbcs_reg_int;
// data
logic sbdata0_reg_wren0;
logic sbdata0_reg_wren1;
logic sbdata0_reg_wren;
logic [31:0] sbdata0_din;
logic sbdata1_reg_wren0;
logic sbdata1_reg_wren1;
logic sbdata1_reg_wren;
logic [31:0] sbdata1_din;
logic sbaddress0_reg_wren0;
logic sbaddress0_reg_wren1;
logic sbaddress0_reg_wren;
logic [31:0] sbaddress0_reg_din;
logic [3:0] sbaddress0_incr;
logic sbreadonaddr_access;
logic sbreadondata_access;
logic sbdata0wr_access;
logic sb_abmem_cmd_done_in, sb_abmem_data_done_in;
logic sb_abmem_cmd_done_en, sb_abmem_data_done_en;
logic sb_abmem_cmd_done, sb_abmem_data_done;
logic [31:0] abmem_addr;
logic abmem_addr_in_dccm_region, abmem_addr_in_iccm_region, abmem_addr_in_pic_region;
logic abmem_addr_core_local;
logic abmem_addr_external;
logic sb_cmd_pending, sb_abmem_cmd_pending;
logic sb_abmem_cmd_write;
logic [2:0] sb_abmem_cmd_size;
logic [31:0] sb_abmem_cmd_addr;
logic [31:0] sb_abmem_cmd_wdata;
logic [2:0] sb_cmd_size;
logic [31:0] sb_cmd_addr;
logic [63:0] sb_cmd_wdata;
logic sb_bus_cmd_read, sb_bus_cmd_write_addr, sb_bus_cmd_write_data;
logic sb_bus_rsp_read, sb_bus_rsp_write;
logic sb_bus_rsp_error;
logic [63:0] sb_bus_rdata;
//registers
logic [31:0] sbcs_reg;
logic [31:0] sbaddress0_reg;
logic [31:0] sbdata0_reg;
logic [31:0] sbdata1_reg;
logic sb_abmem_cmd_arvalid, sb_abmem_cmd_awvalid, sb_abmem_cmd_wvalid;
logic sb_abmem_read_pend;
logic sb_cmd_awvalid, sb_cmd_wvalid, sb_cmd_arvalid;
logic sb_read_pend;
logic [31:0] sb_axi_addr;
logic [63:0] sb_axi_wrdata;
logic [2:0] sb_axi_size;
logic dbg_dm_rst_l;
logic rst_l_sync;
//clken
logic dbg_free_clken;
logic dbg_free_clk;
logic sb_free_clken;
logic sb_free_clk;
// clocking
// used for the abstract commands.
assign dbg_free_clken = dmi_reg_en | execute_command | (dbg_state != IDLE) | dbg_state_en | dec_tlu_dbg_halted | dec_tlu_mpc_halted_only | dec_tlu_debug_mode | dbg_halt_req | clk_override;
// used for the system bus
assign sb_free_clken = dmi_reg_en | execute_command | sb_state_en | (sb_state != SBIDLE) | clk_override;
rvoclkhdr dbg_free_cgc (.en(dbg_free_clken), .l1clk(dbg_free_clk), .*);
rvoclkhdr sb_free_cgc (.en(sb_free_clken), .l1clk(sb_free_clk), .*);
// end clocking section
// Reset logic
assign dbg_dm_rst_l = dbg_rst_l & (dmcontrol_reg[0] | scan_mode);
assign dbg_core_rst_l = ~dmcontrol_reg[1] | scan_mode;
// synchronize the rst
rvsyncss #(1) rstl_syncff (.din(rst_l), .dout(rst_l_sync), .clk(free_clk), .rst_l(dbg_rst_l));
// system bus register
// sbcs[31:29], sbcs - [22]:sbbusyerror, [21]: sbbusy, [20]:sbreadonaddr, [19:17]:sbaccess, [16]:sbautoincrement, [15]:sbreadondata, [14:12]:sberror, sbsize=32, 128=0, 64/32/16/8 are legal
assign sbcs_reg[31:29] = 3'b1;
assign sbcs_reg[28:23] = '0;
assign sbcs_reg[19:15] = {sbcs_reg_int[19], ~sbcs_reg_int[18], sbcs_reg_int[17:15]};
assign sbcs_reg[11:5] = 7'h20;
assign sbcs_reg[4:0] = 5'b01111;
assign sbcs_wren = (dmi_reg_addr == 7'h38) & dmi_reg_en & dmi_reg_wr_en & (sb_state == SBIDLE);
assign sbcs_sbbusyerror_wren = (sbcs_wren & dmi_reg_wdata[22]) |
(sbcs_reg[21] & dmi_reg_en & ((dmi_reg_wr_en & (dmi_reg_addr == 7'h39)) | (dmi_reg_addr == 7'h3c) | (dmi_reg_addr == 7'h3d)));
assign sbcs_sbbusyerror_din = ~(sbcs_wren & dmi_reg_wdata[22]); // Clear when writing one
rvdffs #(1) sbcs_sbbusyerror_reg (.din(sbcs_sbbusyerror_din), .dout(sbcs_reg[22]), .en(sbcs_sbbusyerror_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk));
rvdffs #(1) sbcs_sbbusy_reg (.din(sbcs_sbbusy_din), .dout(sbcs_reg[21]), .en(sbcs_sbbusy_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk));
rvdffs #(1) sbcs_sbreadonaddr_reg (.din(dmi_reg_wdata[20]), .dout(sbcs_reg[20]), .en(sbcs_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk));
rvdffs #(5) sbcs_misc_reg (.din({dmi_reg_wdata[19],~dmi_reg_wdata[18],dmi_reg_wdata[17:15]}),
.dout(sbcs_reg_int[19:15]), .en(sbcs_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk));
rvdffs #(3) sbcs_error_reg (.din(sbcs_sberror_din[2:0]), .dout(sbcs_reg[14:12]), .en(sbcs_sberror_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk));
assign sbcs_unaligned = ((sbcs_reg[19:17] == 3'b001) & sbaddress0_reg[0]) |
((sbcs_reg[19:17] == 3'b010) & (|sbaddress0_reg[1:0])) |
((sbcs_reg[19:17] == 3'b011) & (|sbaddress0_reg[2:0]));
assign sbcs_illegal_size = sbcs_reg[19]; // Anything bigger than 64 bits is illegal
assign sbaddress0_incr[3:0] = ({4{(sbcs_reg[19:17] == 3'h0)}} & 4'b0001) |
({4{(sbcs_reg[19:17] == 3'h1)}} & 4'b0010) |
({4{(sbcs_reg[19:17] == 3'h2)}} & 4'b0100) |
({4{(sbcs_reg[19:17] == 3'h3)}} & 4'b1000);
// sbdata
assign sbdata0_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3c); // write data only when single read is 0
assign sbdata0_reg_wren1 = (sb_state == RSP_RD) & sb_state_en & ~sbcs_sberror_wren;
assign sbdata0_reg_wren = sbdata0_reg_wren0 | sbdata0_reg_wren1;
assign sbdata1_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3d); // write data only when single read is 0;
assign sbdata1_reg_wren1 = (sb_state == RSP_RD) & sb_state_en & ~sbcs_sberror_wren;
assign sbdata1_reg_wren = sbdata1_reg_wren0 | sbdata1_reg_wren1;
assign sbdata0_din[31:0] = ({32{sbdata0_reg_wren0}} & dmi_reg_wdata[31:0]) |
({32{sbdata0_reg_wren1}} & sb_bus_rdata[31:0]);
assign sbdata1_din[31:0] = ({32{sbdata1_reg_wren0}} & dmi_reg_wdata[31:0]) |
({32{sbdata1_reg_wren1}} & sb_bus_rdata[63:32]);
rvdffe #(32) dbg_sbdata0_reg (.*, .din(sbdata0_din[31:0]), .dout(sbdata0_reg[31:0]), .en(sbdata0_reg_wren), .rst_l(dbg_dm_rst_l));
rvdffe #(32) dbg_sbdata1_reg (.*, .din(sbdata1_din[31:0]), .dout(sbdata1_reg[31:0]), .en(sbdata1_reg_wren), .rst_l(dbg_dm_rst_l));
// sbaddress
assign sbaddress0_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h39);
assign sbaddress0_reg_wren = sbaddress0_reg_wren0 | sbaddress0_reg_wren1;
assign sbaddress0_reg_din[31:0]= ({32{sbaddress0_reg_wren0}} & dmi_reg_wdata[31:0]) |
({32{sbaddress0_reg_wren1}} & (sbaddress0_reg[31:0] + {28'b0,sbaddress0_incr[3:0]}));
rvdffe #(32) dbg_sbaddress0_reg (.*, .din(sbaddress0_reg_din[31:0]), .dout(sbaddress0_reg[31:0]), .en(sbaddress0_reg_wren), .rst_l(dbg_dm_rst_l));
assign sbreadonaddr_access = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h39) & sbcs_reg[20]; // if readonaddr is set the next command will start upon writing of addr0
assign sbreadondata_access = dmi_reg_en & ~dmi_reg_wr_en & (dmi_reg_addr == 7'h3c) & sbcs_reg[15]; // if readondata is set the next command will start upon reading of data0
assign sbdata0wr_access = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3c); // write to sbdata0 will start write command to system bus
// memory mapped registers
// dmcontrol register has only 5 bits implemented. 31: haltreq, 30: resumereq, 28: ackhavereset, 1: ndmreset, 0: dmactive.
// rest all the bits are zeroed out
// dmactive flop is reset based on core rst_l, all other flops use dm_rst_l
assign dmcontrol_wren = (dmi_reg_addr == 7'h10) & dmi_reg_en & dmi_reg_wr_en;
assign dmcontrol_reg[29] = '0;
assign dmcontrol_reg[27:2] = '0;
assign resumereq = dmcontrol_reg[30] & ~dmcontrol_reg[31] & dmcontrol_wren_Q;
rvdffs #(4) dmcontrolff (.din({dmi_reg_wdata[31:30],dmi_reg_wdata[28],dmi_reg_wdata[1]}), .dout({dmcontrol_reg[31:30], dmcontrol_reg[28], dmcontrol_reg[1]}), .en(dmcontrol_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk));
rvdffs #(1) dmcontrol_dmactive_ff (.din(dmi_reg_wdata[0]), .dout(dmcontrol_reg[0]), .en(dmcontrol_wren), .rst_l(dbg_rst_l), .clk(dbg_free_clk));
rvdff #(1) dmcontrol_wrenff(.din(dmcontrol_wren), .dout(dmcontrol_wren_Q), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk));
// dmstatus register bits that are implemented
// [19:18]-havereset,[17:16]-resume ack, [9:8]-halted, [3:0]-version
// rest all the bits are zeroed out
//assign dmstatus_wren = (dmi_reg_addr[31:0] == 32'h11) & dmi_reg_en;
assign dmstatus_reg[31:20] = '0;
assign dmstatus_reg[19:18] = {2{dmstatus_havereset}};
assign dmstatus_reg[15:14] = '0;
assign dmstatus_reg[7] = '1;
assign dmstatus_reg[6:4] = '0;
assign dmstatus_reg[17:16] = {2{dmstatus_resumeack}};
assign dmstatus_reg[13:12] = {2{dmstatus_unavail}};
assign dmstatus_reg[11:10] = {2{dmstatus_running}};
assign dmstatus_reg[9:8] = {2{dmstatus_halted}};
assign dmstatus_reg[3:0] = 4'h2;
assign dmstatus_resumeack_wren = ((dbg_state == RESUMING) & dec_tlu_resume_ack) | (dmstatus_resumeack & resumereq & dmstatus_halted);
assign dmstatus_resumeack_din = (dbg_state == RESUMING) & dec_tlu_resume_ack;
assign dmstatus_haveresetn_wren = (dmi_reg_addr == 7'h10) & dmi_reg_wdata[28] & dmi_reg_en & dmi_reg_wr_en & dmcontrol_reg[0]; // clear the havereset
assign dmstatus_havereset = ~dmstatus_haveresetn;
assign dmstatus_unavail = dmcontrol_reg[1] | ~rst_l_sync;
assign dmstatus_running = ~(dmstatus_unavail | dmstatus_halted);
rvdffs #(1) dmstatus_resumeack_reg (.din(dmstatus_resumeack_din), .dout(dmstatus_resumeack), .en(dmstatus_resumeack_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk));
rvdff #(1) dmstatus_halted_reg (.din(dec_tlu_dbg_halted & ~dec_tlu_mpc_halted_only), .dout(dmstatus_halted), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk));
rvdffs #(1) dmstatus_haveresetn_reg (.din(1'b1), .dout(dmstatus_haveresetn), .en(dmstatus_haveresetn_wren), .rst_l(rst_l), .clk(dbg_free_clk));
// haltsum0 register
assign haltsum0_reg[31:1] = '0;
assign haltsum0_reg[0] = dmstatus_halted;
// abstractcs register
// bits implemted are [12] - busy and [10:8]= command error
assign abstractcs_reg[31:13] = '0;
assign abstractcs_reg[11] = '0;
assign abstractcs_reg[7:4] = '0;
assign abstractcs_reg[3:0] = 4'h2; // One data register
assign abstractcs_error_sel0 = abstractcs_reg[12] & ~(|abstractcs_reg[10:8]) & dmi_reg_en & ((dmi_reg_wr_en & ((dmi_reg_addr == 7'h16) | (dmi_reg_addr == 7'h17)) | (dmi_reg_addr == 7'h18)) |
(dmi_reg_addr == 7'h4) | (dmi_reg_addr == 7'h5));
assign abstractcs_error_sel1 = execute_command & ~(|abstractcs_reg[10:8]) &
((~((command_reg[31:24] == 8'b0) | (command_reg[31:24] == 8'h2))) | // Illegal command
(((command_reg[22:20] == 3'b011) | (command_reg[22])) & (command_reg[31:24] == 8'h2)) | // Illegal abstract memory size (can't be DW or higher)
((command_reg[22:20] != 3'b010) & ((command_reg[31:24] == 8'h0) & command_reg[17])) | // Illegal abstract reg size
((command_reg[31:24] == 8'h0) & command_reg[18])); //postexec for abstract register access
assign abstractcs_error_sel2 = ((core_dbg_cmd_done & core_dbg_cmd_fail) | // exception from core
(execute_command & (command_reg[31:24] == 8'h0) & // unimplemented regs
(((command_reg[15:12] == 4'h1) & (command_reg[11:5] != 0)) | (command_reg[15:13] != 0)))) & ~(|abstractcs_reg[10:8]);
assign abstractcs_error_sel3 = execute_command & (dbg_state != HALTED) & ~(|abstractcs_reg[10:8]);
assign abstractcs_error_sel4 = dbg_sb_bus_error & dbg_bus_clk_en & ~(|abstractcs_reg[10:8]);// sb bus error for abstract memory command
assign abstractcs_error_sel5 = execute_command & (command_reg[31:24] == 8'h2) & ~(|abstractcs_reg[10:8]) &
(((command_reg[22:20] == 3'b001) & data1_reg[0]) | ((command_reg[22:20] == 3'b010) & (|data1_reg[1:0]))); //Unaligned address for abstract memory
assign abstractcs_error_sel6 = (dmi_reg_addr == 7'h16) & dmi_reg_en & dmi_reg_wr_en;
assign abstractcs_error_din[2:0] = abstractcs_error_sel0 ? 3'b001 : // writing command or abstractcs while a command was executing. Or accessing data0
abstractcs_error_sel1 ? 3'b010 : // writing a illegal command type to cmd field of command
abstractcs_error_sel2 ? 3'b011 : // exception while running command
abstractcs_error_sel3 ? 3'b100 : // writing a comnand when not in the halted state
abstractcs_error_sel4 ? 3'b101 : // Bus error
abstractcs_error_sel5 ? 3'b111 : // unaligned or illegal size abstract memory command
abstractcs_error_sel6 ? (~dmi_reg_wdata[10:8] & abstractcs_reg[10:8]) : //W1C
abstractcs_reg[10:8]; //hold
rvdffs #(1) dmabstractcs_busy_reg (.din(abstractcs_busy_din), .dout(abstractcs_reg[12]), .en(abstractcs_busy_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk));
rvdff #(3) dmabstractcs_error_reg (.din(abstractcs_error_din[2:0]), .dout(abstractcs_reg[10:8]), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk));
// abstract auto reg
assign abstractauto_reg_wren = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h18) & ~abstractcs_reg[12];
rvdffs #(2) dbg_abstractauto_reg (.*, .din(dmi_reg_wdata[1:0]), .dout(abstractauto_reg[1:0]), .en(abstractauto_reg_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk));
// command register - implemented all the bits in this register
// command[16] = 1: write, 0: read
assign execute_command_ns = command_wren |
(dmi_reg_en & ~abstractcs_reg[12] & (((dmi_reg_addr == 7'h4) & abstractauto_reg[0]) | ((dmi_reg_addr == 7'h5) & abstractauto_reg[1])));
assign command_wren = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en;
assign command_regno_wren = command_wren | ((command_reg[31:24] == 8'h0) & command_reg[19] & (dbg_state == CMD_DONE) & ~(|abstractcs_reg[10:8])); // aarpostincrement
assign command_postexec_din = (dmi_reg_wdata[31:24] == 8'h0) & dmi_reg_wdata[18];
assign command_transfer_din = (dmi_reg_wdata[31:24] == 8'h0) & dmi_reg_wdata[17];
assign command_din[31:16] = {dmi_reg_wdata[31:24],1'b0,dmi_reg_wdata[22:19],command_postexec_din,command_transfer_din, dmi_reg_wdata[16]};
assign command_din[15:0] = command_wren ? dmi_reg_wdata[15:0] : dbg_cmd_next_addr[15:0];
rvdff #(1) execute_commandff (.*, .din(execute_command_ns), .dout(execute_command), .clk(dbg_free_clk), .rst_l(dbg_dm_rst_l));
rvdffe #(16) dmcommand_reg (.*, .din(command_din[31:16]), .dout(command_reg[31:16]), .en(command_wren), .rst_l(dbg_dm_rst_l));
rvdffe #(16) dmcommand_regno_reg (.*, .din(command_din[15:0]), .dout(command_reg[15:0]), .en(command_regno_wren), .rst_l(dbg_dm_rst_l));
// data0 reg
assign data0_reg_wren0 = (dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h4) & (dbg_state == HALTED) & ~abstractcs_reg[12]);
assign data0_reg_wren1 = core_dbg_cmd_done & (dbg_state == CORE_CMD_WAIT) & ~command_reg[16];
assign data0_reg_wren = data0_reg_wren0 | data0_reg_wren1 | data0_reg_wren2;
assign data0_din[31:0] = ({32{data0_reg_wren0}} & dmi_reg_wdata[31:0]) |
({32{data0_reg_wren1}} & core_dbg_rddata[31:0]) |
({32{data0_reg_wren2}} & sb_bus_rdata[31:0]);
rvdffe #(32) dbg_data0_reg (.*, .din(data0_din[31:0]), .dout(data0_reg[31:0]), .en(data0_reg_wren), .rst_l(dbg_dm_rst_l));
// data 1
assign data1_reg_wren0 = (dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h5) & (dbg_state == HALTED) & ~abstractcs_reg[12]);
assign data1_reg_wren1 = (dbg_state == CMD_DONE) & (command_reg[31:24] == 8'h2) & command_reg[19] & ~(|abstractcs_reg[10:8]); // aampostincrement
assign data1_reg_wren = data1_reg_wren0 | data1_reg_wren1;
assign data1_din[31:0] = ({32{data1_reg_wren0}} & dmi_reg_wdata[31:0]) |
({32{data1_reg_wren1}} & dbg_cmd_next_addr[31:0]);
rvdffe #(32) dbg_data1_reg (.*, .din(data1_din[31:0]), .dout(data1_reg[31:0]), .en(data1_reg_wren), .rst_l(dbg_dm_rst_l));
rvdffs #(1) sb_abmem_cmd_doneff (.din(sb_abmem_cmd_done_in), .dout(sb_abmem_cmd_done), .en(sb_abmem_cmd_done_en), .clk(dbg_free_clk), .rst_l(dbg_dm_rst_l), .*);
rvdffs #(1) sb_abmem_data_doneff (.din(sb_abmem_data_done_in), .dout(sb_abmem_data_done), .en(sb_abmem_data_done_en), .clk(dbg_free_clk), .rst_l(dbg_dm_rst_l), .*);
// FSM to control the debug mode entry, command send/recieve, and Resume flow.
always_comb begin
dbg_nxtstate = IDLE;
dbg_state_en = 1'b0;
abstractcs_busy_wren = 1'b0;
abstractcs_busy_din = 1'b0;
dbg_halt_req = dmcontrol_wren_Q & dmcontrol_reg[31]; // single pulse output to the core. Need to drive every time this register is written since core might be halted due to MPC
dbg_resume_req = 1'b0; // single pulse output to the core
dbg_sb_bus_error = 1'b0;
data0_reg_wren2 = 1'b0;
sb_abmem_cmd_done_in = 1'b0;
sb_abmem_data_done_in = 1'b0;
sb_abmem_cmd_done_en = 1'b0;
sb_abmem_data_done_en = 1'b0;
case (dbg_state)
IDLE: begin
dbg_nxtstate = (dmstatus_reg[9] | dec_tlu_mpc_halted_only) ? HALTED : HALTING; // initiate the halt command to the core
dbg_state_en = dmcontrol_reg[31] | dmstatus_reg[9] | dec_tlu_mpc_halted_only; // when the jtag writes the halt bit in the DM register, OR when the status indicates H
dbg_halt_req = dmcontrol_reg[31]; // only when jtag has written the halt_req bit in the control. Removed debug mode qualification during MPC changes
end
HALTING : begin
dbg_nxtstate = HALTED; // Goto HALTED once the core sends an ACK
dbg_state_en = dmstatus_reg[9] | dec_tlu_mpc_halted_only; // core indicates halted
end
HALTED: begin
// wait for halted to go away before send to resume. Else start of new command
dbg_nxtstate = dmstatus_reg[9] ? (resumereq ? RESUMING : (((command_reg[31:24] == 8'h2) & abmem_addr_external) ? SB_CMD_START : CORE_CMD_START)) :
(dmcontrol_reg[31] ? HALTING : IDLE); // This is MPC halted case
dbg_state_en = (dmstatus_reg[9] & resumereq) | execute_command | ~(dmstatus_reg[9] | dec_tlu_mpc_halted_only);
abstractcs_busy_wren = dbg_state_en & ((dbg_nxtstate == CORE_CMD_START) | (dbg_nxtstate == SB_CMD_START)); // write busy when a new command was written by jtag
abstractcs_busy_din = 1'b1;
dbg_resume_req = dbg_state_en & (dbg_nxtstate == RESUMING); // single cycle pulse to core if resuming
end
CORE_CMD_START: begin
// Don't execute the command if cmderror or transfer=0 for abstract register access
dbg_nxtstate = ((|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17])) ? CMD_DONE : CORE_CMD_WAIT; // new command sent to the core
dbg_state_en = dbg_cmd_valid | (|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17]);
end
CORE_CMD_WAIT: begin
dbg_nxtstate = CMD_DONE;
dbg_state_en = core_dbg_cmd_done; // go to done state for one cycle after completing current command
end
SB_CMD_START: begin
dbg_nxtstate = (|abstractcs_reg[10:8]) ? CMD_DONE : SB_CMD_SEND;
dbg_state_en = (dbg_bus_clk_en & ~sb_cmd_pending) | (|abstractcs_reg[10:8]);
end
SB_CMD_SEND: begin
sb_abmem_cmd_done_in = 1'b1;
sb_abmem_data_done_in= 1'b1;
sb_abmem_cmd_done_en = (sb_bus_cmd_read | sb_bus_cmd_write_addr) & dbg_bus_clk_en;
sb_abmem_data_done_en= (sb_bus_cmd_read | sb_bus_cmd_write_data) & dbg_bus_clk_en;
dbg_nxtstate = SB_CMD_RESP;
dbg_state_en = (sb_abmem_cmd_done | sb_abmem_cmd_done_en) & (sb_abmem_data_done | sb_abmem_data_done_en) & dbg_bus_clk_en;
end
SB_CMD_RESP: begin
dbg_nxtstate = CMD_DONE;
dbg_state_en = (sb_bus_rsp_read | sb_bus_rsp_write) & dbg_bus_clk_en;
dbg_sb_bus_error = (sb_bus_rsp_read | sb_bus_rsp_write) & sb_bus_rsp_error & dbg_bus_clk_en;
data0_reg_wren2 = dbg_state_en & ~sb_abmem_cmd_write & ~dbg_sb_bus_error;
end
CMD_DONE: begin
dbg_nxtstate = HALTED;
dbg_state_en = 1'b1;
abstractcs_busy_wren = dbg_state_en; // remove the busy bit from the abstracts ( bit 12 )
abstractcs_busy_din = 1'b0;
sb_abmem_cmd_done_in = 1'b0;
sb_abmem_data_done_in= 1'b0;
sb_abmem_cmd_done_en = 1'b1;
sb_abmem_data_done_en= 1'b1;
end
RESUMING : begin
dbg_nxtstate = IDLE;
dbg_state_en = dmstatus_reg[17]; // resume ack has been updated in the dmstatus register
end
default : begin
dbg_nxtstate = IDLE;
dbg_state_en = 1'b0;
abstractcs_busy_wren = 1'b0;
abstractcs_busy_din = 1'b0;
dbg_halt_req = 1'b0; // single pulse output to the core
dbg_resume_req = 1'b0; // single pulse output to the core
dbg_sb_bus_error = 1'b0;
data0_reg_wren2 = 1'b0;
sb_abmem_cmd_done_in = 1'b0;
sb_abmem_data_done_in = 1'b0;
sb_abmem_cmd_done_en = 1'b0;
sb_abmem_data_done_en = 1'b0;
end
endcase
end // always_comb begin
assign dmi_reg_rdata_din[31:0] = ({32{dmi_reg_addr == 7'h4}} & data0_reg[31:0]) |
({32{dmi_reg_addr == 7'h5}} & data1_reg[31:0]) |
({32{dmi_reg_addr == 7'h10}} & {2'b0,dmcontrol_reg[29],1'b0,dmcontrol_reg[27:0]}) | // Read0 to Write only bits
({32{dmi_reg_addr == 7'h11}} & dmstatus_reg[31:0]) |
({32{dmi_reg_addr == 7'h16}} & abstractcs_reg[31:0]) |
({32{dmi_reg_addr == 7'h17}} & command_reg[31:0]) |
({32{dmi_reg_addr == 7'h18}} & {30'h0,abstractauto_reg[1:0]}) |
({32{dmi_reg_addr == 7'h40}} & haltsum0_reg[31:0]) |
({32{dmi_reg_addr == 7'h38}} & sbcs_reg[31:0]) |
({32{dmi_reg_addr == 7'h39}} & sbaddress0_reg[31:0]) |
({32{dmi_reg_addr == 7'h3c}} & sbdata0_reg[31:0]) |
({32{dmi_reg_addr == 7'h3d}} & sbdata1_reg[31:0]);
rvdffs #($bits(state_t)) dbg_state_reg (.din(dbg_nxtstate), .dout({dbg_state}), .en(dbg_state_en), .rst_l(dbg_dm_rst_l & rst_l), .clk(dbg_free_clk));
rvdffe #(32) dmi_rddata_reg (.din(dmi_reg_rdata_din[31:0]), .dout(dmi_reg_rdata[31:0]), .en(dmi_reg_en), .rst_l(dbg_dm_rst_l), .clk(clk), .*);
assign abmem_addr[31:0] = data1_reg[31:0];
assign abmem_addr_core_local = (abmem_addr_in_dccm_region | abmem_addr_in_iccm_region | abmem_addr_in_pic_region);
assign abmem_addr_external = ~abmem_addr_core_local;
assign abmem_addr_in_dccm_region = (abmem_addr[31:28] == pt.DCCM_REGION) & pt.DCCM_ENABLE;
assign abmem_addr_in_iccm_region = (abmem_addr[31:28] == pt.ICCM_REGION) & pt.ICCM_ENABLE;
assign abmem_addr_in_pic_region = (abmem_addr[31:28] == pt.PIC_REGION);
// interface for the core
assign dbg_cmd_addr[31:0] = (command_reg[31:24] == 8'h2) ? data1_reg[31:0] : {20'b0, command_reg[11:0]};
assign dbg_cmd_wrdata[31:0] = data0_reg[31:0];
assign dbg_cmd_valid = (dbg_state == CORE_CMD_START) & ~((|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17]) | ((command_reg[31:24] == 8'h2) & abmem_addr_external)) & dma_dbg_ready;
assign dbg_cmd_write = command_reg[16];
assign dbg_cmd_type[1:0] = (command_reg[31:24] == 8'h2) ? 2'b10 : {1'b0, (command_reg[15:12] == 4'b0)};
assign dbg_cmd_size[1:0] = command_reg[21:20];
assign dbg_cmd_addr_incr[3:0] = (command_reg[31:24] == 8'h2) ? (4'h1 << sb_abmem_cmd_size[1:0]) : 4'h1;
assign dbg_cmd_curr_addr[31:0] = (command_reg[31:24] == 8'h2) ? data1_reg[31:0] : {16'b0, command_reg[15:0]};
assign dbg_cmd_next_addr[31:0] = dbg_cmd_curr_addr[31:0] + {28'h0,dbg_cmd_addr_incr[3:0]};
// Ask DMA to stop taking bus trxns since debug request is done
assign dbg_dma_bubble = ((dbg_state == CORE_CMD_START) & ~(|abstractcs_reg[10:8])) | (dbg_state == CORE_CMD_WAIT);
assign sb_cmd_pending = (sb_state == CMD_RD) | (sb_state == CMD_WR) | (sb_state == CMD_WR_ADDR) | (sb_state == CMD_WR_DATA) | (sb_state == RSP_RD) | (sb_state == RSP_WR);
assign sb_abmem_cmd_pending = (dbg_state == SB_CMD_START) | (dbg_state == SB_CMD_SEND) | (dbg_state== SB_CMD_RESP);
// system bus FSM
always_comb begin
sb_nxtstate = SBIDLE;
sb_state_en = 1'b0;
sbcs_sbbusy_wren = 1'b0;
sbcs_sbbusy_din = 1'b0;
sbcs_sberror_wren = 1'b0;
sbcs_sberror_din[2:0] = 3'b0;
sbaddress0_reg_wren1 = 1'b0;
case (sb_state)
SBIDLE: begin
sb_nxtstate = sbdata0wr_access ? WAIT_WR : WAIT_RD;
sb_state_en = (sbdata0wr_access | sbreadondata_access | sbreadonaddr_access) & ~(|sbcs_reg[14:12]) & ~sbcs_reg[22];
sbcs_sbbusy_wren = sb_state_en; // set the single read bit if it is a singlread command
sbcs_sbbusy_din = 1'b1;
sbcs_sberror_wren = sbcs_wren & (|dmi_reg_wdata[14:12]); // write to clear the error bits
sbcs_sberror_din[2:0] = ~dmi_reg_wdata[14:12] & sbcs_reg[14:12];
end
WAIT_RD: begin
sb_nxtstate = (sbcs_unaligned | sbcs_illegal_size) ? DONE : CMD_RD;
sb_state_en = (dbg_bus_clk_en & ~sb_abmem_cmd_pending) | sbcs_unaligned | sbcs_illegal_size;
sbcs_sberror_wren = sbcs_unaligned | sbcs_illegal_size;
sbcs_sberror_din[2:0] = sbcs_unaligned ? 3'b011 : 3'b100;
end
WAIT_WR: begin
sb_nxtstate = (sbcs_unaligned | sbcs_illegal_size) ? DONE : CMD_WR;
sb_state_en = (dbg_bus_clk_en & ~sb_abmem_cmd_pending) | sbcs_unaligned | sbcs_illegal_size;
sbcs_sberror_wren = sbcs_unaligned | sbcs_illegal_size;
sbcs_sberror_din[2:0] = sbcs_unaligned ? 3'b011 : 3'b100;
end
CMD_RD : begin
sb_nxtstate = RSP_RD;
sb_state_en = sb_bus_cmd_read & dbg_bus_clk_en;
end
CMD_WR : begin
sb_nxtstate = (sb_bus_cmd_write_addr & sb_bus_cmd_write_data) ? RSP_WR : (sb_bus_cmd_write_data ? CMD_WR_ADDR : CMD_WR_DATA);
sb_state_en = (sb_bus_cmd_write_addr | sb_bus_cmd_write_data) & dbg_bus_clk_en;
end
CMD_WR_ADDR : begin
sb_nxtstate = RSP_WR;
sb_state_en = sb_bus_cmd_write_addr & dbg_bus_clk_en;
end
CMD_WR_DATA : begin
sb_nxtstate = RSP_WR;
sb_state_en = sb_bus_cmd_write_data & dbg_bus_clk_en;
end
RSP_RD: begin
sb_nxtstate = DONE;
sb_state_en = sb_bus_rsp_read & dbg_bus_clk_en;
sbcs_sberror_wren = sb_state_en & sb_bus_rsp_error;
sbcs_sberror_din[2:0] = 3'b010;
end
RSP_WR: begin
sb_nxtstate = DONE;
sb_state_en = sb_bus_rsp_write & dbg_bus_clk_en;
sbcs_sberror_wren = sb_state_en & sb_bus_rsp_error;
sbcs_sberror_din[2:0] = 3'b010;
end
DONE: begin
sb_nxtstate = SBIDLE;
sb_state_en = 1'b1;
sbcs_sbbusy_wren = 1'b1; // reset the single read
sbcs_sbbusy_din = 1'b0;
sbaddress0_reg_wren1 = sbcs_reg[16] & (sbcs_reg[14:12] == 3'b0); // auto increment was set and no error. Update to new address after completing the current command
end
default : begin
sb_nxtstate = SBIDLE;
sb_state_en = 1'b0;
sbcs_sbbusy_wren = 1'b0;
sbcs_sbbusy_din = 1'b0;
sbcs_sberror_wren = 1'b0;
sbcs_sberror_din[2:0] = 3'b0;
sbaddress0_reg_wren1 = 1'b0;
end
endcase
end // always_comb begin
rvdffs #($bits(sb_state_t)) sb_state_reg (.din(sb_nxtstate), .dout({sb_state}), .en(sb_state_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk));
assign sb_abmem_cmd_write = command_reg[16];
assign sb_abmem_cmd_size[2:0] = {1'b0, command_reg[21:20]};
assign sb_abmem_cmd_addr[31:0] = abmem_addr[31:0];
assign sb_abmem_cmd_wdata[31:0] = data0_reg[31:0];
assign sb_cmd_size[2:0] = sbcs_reg[19:17];
assign sb_cmd_wdata[63:0] = {sbdata1_reg[31:0], sbdata0_reg[31:0]};
assign sb_cmd_addr[31:0] = sbaddress0_reg[31:0];
assign sb_abmem_cmd_awvalid = (dbg_state == SB_CMD_SEND) & sb_abmem_cmd_write & ~sb_abmem_cmd_done;
assign sb_abmem_cmd_wvalid = (dbg_state == SB_CMD_SEND) & sb_abmem_cmd_write & ~sb_abmem_data_done;
assign sb_abmem_cmd_arvalid = (dbg_state == SB_CMD_SEND) & ~sb_abmem_cmd_write & ~sb_abmem_cmd_done & ~sb_abmem_data_done;
assign sb_abmem_read_pend = (dbg_state == SB_CMD_RESP) & ~sb_abmem_cmd_write;
assign sb_cmd_awvalid = ((sb_state == CMD_WR) | (sb_state == CMD_WR_ADDR));
assign sb_cmd_wvalid = ((sb_state == CMD_WR) | (sb_state == CMD_WR_DATA));
assign sb_cmd_arvalid = (sb_state == CMD_RD);
assign sb_read_pend = (sb_state == RSP_RD);
assign sb_axi_size[2:0] = (sb_abmem_cmd_awvalid | sb_abmem_cmd_wvalid | sb_abmem_cmd_arvalid | sb_abmem_read_pend) ? sb_abmem_cmd_size[2:0] : sb_cmd_size[2:0];
assign sb_axi_addr[31:0] = (sb_abmem_cmd_awvalid | sb_abmem_cmd_wvalid | sb_abmem_cmd_arvalid | sb_abmem_read_pend) ? sb_abmem_cmd_addr[31:0] : sb_cmd_addr[31:0];
assign sb_axi_wrdata[63:0] = (sb_abmem_cmd_awvalid | sb_abmem_cmd_wvalid) ? {2{sb_abmem_cmd_wdata[31:0]}} : sb_cmd_wdata[63:0];
// Generic bus response signals
assign sb_bus_cmd_read = sb_axi_arvalid & sb_axi_arready;
assign sb_bus_cmd_write_addr = sb_axi_awvalid & sb_axi_awready;
assign sb_bus_cmd_write_data = sb_axi_wvalid & sb_axi_wready;
assign sb_bus_rsp_read = sb_axi_rvalid & sb_axi_rready;
assign sb_bus_rsp_write = sb_axi_bvalid & sb_axi_bready;
assign sb_bus_rsp_error = (sb_bus_rsp_read & (|(sb_axi_rresp[1:0]))) | (sb_bus_rsp_write & (|(sb_axi_bresp[1:0])));
// AXI Request signals
assign sb_axi_awvalid = sb_abmem_cmd_awvalid | sb_cmd_awvalid;
assign sb_axi_awaddr[31:0] = sb_axi_addr[31:0];
assign sb_axi_awid[pt.SB_BUS_TAG-1:0] = '0;
assign sb_axi_awsize[2:0] = sb_axi_size[2:0];
assign sb_axi_awprot[2:0] = 3'b001;
assign sb_axi_awcache[3:0] = 4'b1111;
assign sb_axi_awregion[3:0] = sb_axi_addr[31:28];
assign sb_axi_awlen[7:0] = '0;
assign sb_axi_awburst[1:0] = 2'b01;
assign sb_axi_awqos[3:0] = '0;
assign sb_axi_awlock = '0;
assign sb_axi_wvalid = sb_abmem_cmd_wvalid | sb_cmd_wvalid;
assign sb_axi_wdata[63:0] = ({64{(sb_axi_size[2:0] == 3'h0)}} & {8{sb_axi_wrdata[7:0]}}) |
({64{(sb_axi_size[2:0] == 3'h1)}} & {4{sb_axi_wrdata[15:0]}}) |
({64{(sb_axi_size[2:0] == 3'h2)}} & {2{sb_axi_wrdata[31:0]}}) |
({64{(sb_axi_size[2:0] == 3'h3)}} & {sb_axi_wrdata[63:0]});
assign sb_axi_wstrb[7:0] = ({8{(sb_axi_size[2:0] == 3'h0)}} & (8'h1 << sb_axi_addr[2:0])) |
({8{(sb_axi_size[2:0] == 3'h1)}} & (8'h3 << {sb_axi_addr[2:1],1'b0})) |
({8{(sb_axi_size[2:0] == 3'h2)}} & (8'hf << {sb_axi_addr[2],2'b0})) |
({8{(sb_axi_size[2:0] == 3'h3)}} & 8'hff);
assign sb_axi_wlast = '1;
assign sb_axi_arvalid = sb_abmem_cmd_arvalid | sb_cmd_arvalid;
assign sb_axi_araddr[31:0] = sb_axi_addr[31:0];
assign sb_axi_arid[pt.SB_BUS_TAG-1:0] = '0;
assign sb_axi_arsize[2:0] = sb_axi_size[2:0];
assign sb_axi_arprot[2:0] = 3'b001;
assign sb_axi_arcache[3:0] = 4'b0;
assign sb_axi_arregion[3:0] = sb_axi_addr[31:28];
assign sb_axi_arlen[7:0] = '0;
assign sb_axi_arburst[1:0] = 2'b01;
assign sb_axi_arqos[3:0] = '0;
assign sb_axi_arlock = '0;
// AXI Response signals
assign sb_axi_bready = 1'b1;
assign sb_axi_rready = 1'b1;
assign sb_bus_rdata[63:0] = ({64{sb_axi_size == 3'h0}} & ((sb_axi_rdata[63:0] >> 8*sb_axi_addr[2:0]) & 64'hff)) |
({64{sb_axi_size == 3'h1}} & ((sb_axi_rdata[63:0] >> 16*sb_axi_addr[2:1]) & 64'hffff)) |
({64{sb_axi_size == 3'h2}} & ((sb_axi_rdata[63:0] >> 32*sb_axi_addr[2]) & 64'hffff_ffff)) |
({64{sb_axi_size == 3'h3}} & sb_axi_rdata[63:0]);
`ifdef RV_ASSERT_ON
// assertion.
// when the resume_ack is asserted then the dec_tlu_dbg_halted should be 0
dm_check_resume_and_halted: assert property (@(posedge clk) disable iff(~rst_l) (~dec_tlu_resume_ack | ~dec_tlu_dbg_halted));
assert_b2b_haltreq: assert property (@(posedge clk) disable iff (~(rst_l)) (##1 dbg_halt_req |=> ~dbg_halt_req)); // One cycle delay to fix weird issue around reset
assert_halt_resume_onehot: assert #0 ($onehot0({dbg_halt_req, dbg_resume_req}));
`endif
endmodule

254
Flow/design/dec/cdecode Normal file
View File

@ -0,0 +1,254 @@
.definition
# invalid rs2=0
c.add0 = [1001.....1....10]
c.add1 = [1001......1...10]
c.add2 = [1001.......1..10]
c.add3 = [1001........1.10]
c.add4 = [1001.........110]
# invalid rs2=0
c.mv0 = [1000.....1....10]
c.mv1 = [1000......1...10]
c.mv2 = [1000.......1..10]
c.mv3 = [1000........1.10]
c.mv4 = [1000.........110]
# invalid if rs1=0
c.jalr0 = [10011....0000010]
c.jalr1 = [1001.1...0000010]
c.jalr2 = [1001..1..0000010]
c.jalr3 = [1001...1.0000010]
c.jalr4 = [1001....10000010]
c.addi = [000...........01]
# invalid imm=0
c.addi16sp0 = [011100010.....01]
c.addi16sp1 = [011.000101....01]
c.addi16sp2 = [011.00010.1...01]
c.addi16sp3 = [011.00010..1..01]
c.addi16sp4 = [011.00010...1.01]
c.addi16sp5 = [011.00010....101]
# invalid uimm=0
c.addi4spn0 = [0001..........00]
c.addi4spn1 = [000.1.........00]
c.addi4spn2 = [000..1........00]
c.addi4spn3 = [000...1.......00]
c.addi4spn4 = [000....1......00]
c.addi4spn5 = [000.....1.....00]
c.addi4spn6 = [000......1....00]
c.addi4spn7 = [000.......1...00]
c.and = [100011...11...01]
c.andi = [100.10........01]
c.beqz = [110...........01]
c.bnez = [111...........01]
c.ebreak = [1001000000000010]
c.j = [101...........01]
c.jal = [001...........01]
c.jr0 = [10001....0000010]
c.jr1 = [1000.1...0000010]
c.jr2 = [1000..1..0000010]
c.jr3 = [1000...1.0000010]
c.jr4 = [1000....10000010]
c.li = [010...........01]
# invalid rd=x2 or imm=0
c.lui0 = [01111.........01]
c.lui1 = [0111.1........01]
c.lui2 = [0111..1.......01]
c.lui3 = [0111...0......01]
c.lui4 = [0111....1.....01]
c.lui5 = [011.1....1....01]
c.lui6 = [011..1...1....01]
c.lui7 = [011...1..1....01]
c.lui8 = [011....0.1....01]
c.lui9 = [011.....11....01]
c.lui10= [011.1.....1...01]
c.lui11= [011..1....1...01]
c.lui12 = [011...1...1...01]
c.lui13 = [011....0..1...01]
c.lui14 = [011.....1.1...01]
c.lui15 = [011.1......1..01]
c.lui16 = [011..1.....1..01]
c.lui17 = [011...1....1..01]
c.lui18 = [011....0...1..01]
c.lui19 = [011.....1..1..01]
c.lui20 = [011.1.......1.01]
c.lui21 = [011..1......1.01]
c.lui22 = [011...1.....1.01]
c.lui23 = [011....0....1.01]
c.lui24 = [011.....1...1.01]
c.lui25 = [011.1........101]
c.lui26 = [011..1.......101]
c.lui27 = [011...1......101]
c.lui28 = [011....0.....101]
c.lui29 = [011.....1....101]
c.lw = [010...........00]
c.lwsp = [010...........10]
c.or = [100011...10...01]
# bit 5 of the shift must be 0 to be legal
c.slli = [0000..........10]
c.srai = [100001........01]
c.srli = [100000........01]
c.sub = [100011...00...01]
c.sw = [110...........00]
c.swsp = [110...........10]
c.xor = [100011...01...01]
.input
rv32c = {
i[15]
i[14]
i[13]
i[12]
i[11]
i[10]
i[9]
i[8]
i[7]
i[6]
i[5]
i[4]
i[3]
i[2]
i[1]
i[0]
}
.output
rv32c = {
rdrd
rdrs1
rs2rs2
rdprd
rdprs1
rs2prs2
rs2prd
uimm9_2
ulwimm6_2
ulwspimm7_2
rdeq2
rdeq1
rs1eq2
sbroffset8_1
simm9_4
simm5_0
sjaloffset11_1
sluimm17_12
uimm5_0
uswimm6_2
uswspimm7_2
o[31]
o[30]
o[29]
o[28]
o[27]
o[26]
o[25]
o[24]
o[23]
o[22]
o[21]
o[20]
o[19]
o[18]
o[17]
o[16]
o[15]
o[14]
o[13]
o[12]
o[11]
o[10]
o[9]
o[8]
o[7]
o[6]
o[5]
o[4]
o[3]
o[2]
o[1]
o[0]
}
# assign rs2d[4:0] = i[6:2];
#
# assign rdd[4:0] = i[11:7];
#
# assign rdpd[4:0] = {2'b01, i[9:7]};
#
# assign rs2pd[4:0] = {2'b01, i[4:2]};
.decode
rv32c[c.add{0-4}] = { rdrd rdrs1 rs2rs2 o[5] o[4] o[1] o[0] }
rv32c[c.mv{0-4}] = { rdrd rs2rs2 o[5] o[4] o[1] o[0] }
rv32c[c.addi] = { rdrd rdrs1 simm5_0 o[4] o[1] o[0] }
rv32c[c.addi16sp{0-5}] = { rdeq2 rs1eq2 simm9_4 o[4] o[1] o[0] }
rv32c[c.addi4spn{0-7}] = { rs2prd rs1eq2 uimm9_2 o[4] o[1] o[0] }
rv32c[c.and] = { rdprd rdprs1 rs2prs2 o[14] o[13] o[12] o[5] o[4] o[1] o[0] }
rv32c[c.andi] = { rdprd rdprs1 simm5_0 o[14] o[13] o[12] o[4] o[1] o[0] }
rv32c[c.beqz] = { rdprs1 sbroffset8_1 o[6] o[5] o[1] o[0] }
rv32c[c.bnez] = { rdprs1 sbroffset8_1 o[12] o[6] o[5] o[1] o[0] }
rv32c[c.ebreak] = { o[20] o[6] o[5] o[4] o[1] o[0] }
rv32c[c.j] = { sjaloffset11_1 o[6] o[5] o[3] o[2] o[1] o[0] }
rv32c[c.jal] = { sjaloffset11_1 rdeq1 o[6] o[5] o[3] o[2] o[1] o[0] }
rv32c[c.jalr{0-4}] = { rdeq1 rdrs1 o[6] o[5] o[2] o[1] o[0] }
rv32c[c.jr{0-4}] = { rdrs1 o[6] o[5] o[2] o[1] o[0] }
rv32c[c.li] = { rdrd simm5_0 o[4] o[1] o[0] }
rv32c[c.lui{0-29}] = { rdrd sluimm17_12 o[5] o[4] o[2] o[1] o[0] }
rv32c[c.lw] = { rs2prd rdprs1 ulwimm6_2 o[13] o[1] o[0] }
rv32c[c.lwsp] = { rdrd rs1eq2 ulwspimm7_2 o[13] o[1] o[0] }
rv32c[c.or] = { rdprd rdprs1 rs2prs2 o[14] o[13] o[5] o[4] o[1] o[0] }
rv32c[c.slli] = { rdrd rdrs1 uimm5_0 o[12] o[4] o[1] o[0] }
rv32c[c.srai] = { rdprd rdprs1 uimm5_0 o[30] o[14] o[12] o[4] o[1] o[0] }
rv32c[c.srli] = { rdprd rdprs1 uimm5_0 o[14] o[12] o[4] o[1] o[0] }
rv32c[c.sub] = { rdprd rdprs1 rs2prs2 o[30] o[5] o[4] o[1] o[0] }
rv32c[c.sw] = { rdprs1 rs2prs2 uswimm6_2 o[13] o[5] o[1] o[0] }
rv32c[c.swsp] = { rs2rs2 rs1eq2 uswspimm7_2 o[13] o[5] o[1] o[0] }
rv32c[c.xor] = { rdprd rdprs1 rs2prs2 o[14] o[5] o[4] o[1] o[0] }
.end

259
Flow/design/dec/csrdecode Normal file
View File

@ -0,0 +1,259 @@
.definition
csr_misa = [001100000001]
csr_mvendorid = [111100010001]
csr_marchid = [111100010010]
csr_mimpid = [111100010011]
csr_mhartid = [111100010100]
csr_mstatus = [001100000000]
csr_mtvec = [001100000101]
csr_mip = [001101000100]
csr_mie = [001100000100]
csr_mcyclel = [101100000000]
csr_mcycleh = [101110000000]
csr_minstretl = [101100000010]
csr_minstreth = [101110000010]
csr_mscratch = [001101000000]
csr_mepc = [001101000001]
csr_mcause = [001101000010]
csr_mscause = [011111111111]
csr_mtval = [001101000011]
csr_mrac = [011111000000]
csr_dmst = [011111000100]
csr_mdeau = [101111000000]
csr_mdseac = [111111000000]
csr_meivt = [101111001000]
csr_meihap = [111111001000]
csr_meipt = [101111001001]
csr_meicpct = [101111001010]
csr_meicurpl = [101111001100]
csr_meicidpl = [101111001011]
csr_dcsr = [011110110000]
csr_dpc = [011110110001]
csr_dicawics = [011111001000]
csr_dicad0h = [011111001100]
csr_dicad0 = [011111001001]
csr_dicad1 = [011111001010]
csr_dicago = [011111001011]
csr_mtsel = [011110100000]
csr_mtdata1 = [011110100001]
csr_mtdata2 = [011110100010]
csr_mhpmc3 = [101100000011]
csr_mhpmc4 = [101100000100]
csr_mhpmc5 = [101100000101]
csr_mhpmc6 = [101100000110]
csr_mhpmc3h = [101110000011]
csr_mhpmc4h = [101110000100]
csr_mhpmc5h = [101110000101]
csr_mhpmc6h = [101110000110]
csr_mhpme3 = [001100100011]
csr_mhpme4 = [001100100100]
csr_mhpme5 = [001100100101]
csr_mhpme6 = [001100100110]
csr_micect = [011111110000]
csr_miccmect = [011111110001]
csr_mdccmect = [011111110010]
csr_mpmc = [011111000110]
csr_mcgc = [011111111000]
csr_mcpc = [011111000010]
csr_mfdc = [011111111001]
csr_mitctl0 = [011111010100]
csr_mitctl1 = [011111010111]
csr_mitb0 = [011111010011]
csr_mitb1 = [011111010110]
csr_mitcnt0 = [011111010010]
csr_mitcnt1 = [011111010101]
csr_perfva = [101100000111]
csr_perfvb = [101100001...]
csr_perfvc = [10110001....]
csr_perfvd = [101110000111]
csr_perfve = [101110001...]
csr_perfvf = [10111001....]
csr_perfvg = [001100100111]
csr_perfvh = [001100101...]
csr_perfvi = [00110011....]
csr_mcountinhibit = [001100100000]
csr_mfdht = [011111001110]
csr_mfdhs = [011111001111]
.input
csr = {
dec_csr_rdaddr_d[11]
dec_csr_rdaddr_d[10]
dec_csr_rdaddr_d[9]
dec_csr_rdaddr_d[8]
dec_csr_rdaddr_d[7]
dec_csr_rdaddr_d[6]
dec_csr_rdaddr_d[5]
dec_csr_rdaddr_d[4]
dec_csr_rdaddr_d[3]
dec_csr_rdaddr_d[2]
dec_csr_rdaddr_d[1]
dec_csr_rdaddr_d[0]
}
.output
csr = {
csr_misa
csr_mvendorid
csr_marchid
csr_mimpid
csr_mhartid
csr_mstatus
csr_mtvec
csr_mip
csr_mie
csr_mcyclel
csr_mcycleh
csr_minstretl
csr_minstreth
csr_mscratch
csr_mepc
csr_mcause
csr_mscause
csr_mtval
csr_mrac
csr_dmst
csr_mdseac
csr_meihap
csr_meivt
csr_meipt
csr_meicurpl
csr_meicidpl
csr_dcsr
csr_mcgc
csr_mfdc
csr_dpc
csr_mtsel
csr_mtdata1
csr_mtdata2
csr_mhpmc3
csr_mhpmc4
csr_mhpmc5
csr_mhpmc6
csr_mhpmc3h
csr_mhpmc4h
csr_mhpmc5h
csr_mhpmc6h
csr_mhpme3
csr_mhpme4
csr_mhpme5
csr_mhpme6
csr_mcountinhibit
csr_mitctl0
csr_mitctl1
csr_mitb0
csr_mitb1
csr_mitcnt0
csr_mitcnt1
csr_perfva
csr_perfvb
csr_perfvc
csr_perfvd
csr_perfve
csr_perfvf
csr_perfvg
csr_perfvh
csr_perfvi
csr_mpmc
csr_mcpc
csr_meicpct
csr_mdeau
csr_micect
csr_miccmect
csr_mdccmect
csr_mfdht
csr_mfdhs
csr_dicawics
csr_dicad0h
csr_dicad0
csr_dicad1
csr_dicago
valid_only
presync
postsync
}
.decode
csr[ csr_misa ] = { csr_misa }
csr[ csr_mvendorid ] = { csr_mvendorid }
csr[ csr_marchid ] = { csr_marchid }
csr[ csr_mimpid ] = { csr_mimpid }
csr[ csr_mhartid ] = { csr_mhartid }
csr[ csr_mstatus ] = { csr_mstatus postsync }
csr[ csr_mtvec ] = { csr_mtvec postsync}
csr[ csr_mip ] = { csr_mip }
csr[ csr_mie ] = { csr_mie }
csr[ csr_mcyclel ] = { csr_mcyclel }
csr[ csr_mcycleh ] = { csr_mcycleh }
csr[ csr_minstretl ] = { csr_minstretl presync }
csr[ csr_minstreth ] = { csr_minstreth presync }
csr[ csr_mscratch ] = { csr_mscratch }
csr[ csr_mepc ] = { csr_mepc postsync}
csr[ csr_mcause ] = { csr_mcause }
csr[ csr_mscause ] = { csr_mscause }
csr[ csr_mtval ] = { csr_mtval }
csr[ csr_mrac ] = { csr_mrac postsync }
csr[ csr_dmst ] = { csr_dmst postsync}
csr[ csr_mdseac ] = { csr_mdseac }
csr[ csr_meipt ] = { csr_meipt }
csr[ csr_meihap ] = { csr_meihap }
csr[ csr_meivt ] = { csr_meivt }
csr[ csr_meicurpl ] = { csr_meicurpl }
csr[ csr_mdeau ] = { csr_mdeau }
csr[ csr_meicpct ] = { csr_meicpct }
csr[ csr_mpmc ] = { csr_mpmc }
csr[ csr_mcpc ] = { csr_mcpc presync postsync }
csr[ csr_meicidpl ] = { csr_meicidpl }
csr[ csr_mcgc ] = { csr_mcgc }
csr[ csr_mcountinhibit] = { csr_mcountinhibit presync postsync }
csr[ csr_mfdc ] = { csr_mfdc presync postsync }
csr[ csr_dcsr ] = { csr_dcsr }
csr[ csr_dpc ] = { csr_dpc }
csr[ csr_mtsel ] = { csr_mtsel }
csr[ csr_mtdata1 ] = { csr_mtdata1 presync postsync }
csr[ csr_mtdata2 ] = { csr_mtdata2 postsync }
csr[ csr_mhpmc3 ] = { csr_mhpmc3 presync }
csr[ csr_mhpmc4 ] = { csr_mhpmc4 presync }
csr[ csr_mhpmc5 ] = { csr_mhpmc5 presync }
csr[ csr_mhpmc6 ] = { csr_mhpmc6 presync }
csr[ csr_mhpmc3h ] = { csr_mhpmc3h presync }
csr[ csr_mhpmc4h ] = { csr_mhpmc4h presync }
csr[ csr_mhpmc5h ] = { csr_mhpmc5h presync }
csr[ csr_mhpmc6h ] = { csr_mhpmc6h presync }
csr[ csr_mhpme3 ] = { csr_mhpme3 }
csr[ csr_mhpme4 ] = { csr_mhpme4 }
csr[ csr_mhpme5 ] = { csr_mhpme5 }
csr[ csr_mhpme6 ] = { csr_mhpme6 }
csr[ csr_micect ] = { csr_micect }
csr[ csr_miccmect ] = { csr_miccmect }
csr[ csr_mdccmect ] = { csr_mdccmect }
csr[ csr_dicawics ] = { csr_dicawics }
csr[ csr_dicad0h ] = { csr_dicad0h }
csr[ csr_dicad0 ] = { csr_dicad0 }
csr[ csr_dicad1 ] = { csr_dicad1 }
csr[ csr_dicago ] = { csr_dicago }
csr[ csr_mitctl0 ] = { csr_mitctl0 }
csr[ csr_mitctl1 ] = { csr_mitctl1 }
csr[ csr_mitb0 ] = { csr_mitb0 }
csr[ csr_mitb1 ] = { csr_mitb1 }
csr[ csr_mitcnt0 ] = { csr_mitcnt0 }
csr[ csr_mitcnt1 ] = { csr_mitcnt1 }
csr[ csr_mfdht ] = { csr_mfdht }
csr[ csr_mfdhs ] = { csr_mfdhs }
csr[ csr_mcountinhibit] = { csr_mcountinhibit presync postsync }
csr[ csr_perfva ] = { valid_only }
csr[ csr_perfvb ] = { valid_only }
csr[ csr_perfvc ] = { valid_only }
csr[ csr_perfvd ] = { valid_only }
csr[ csr_perfve ] = { valid_only }
csr[ csr_perfvf ] = { valid_only }
csr[ csr_perfvg ] = { valid_only }
csr[ csr_perfvh ] = { valid_only }
csr[ csr_perfvi ] = { valid_only }
.end

617
Flow/design/dec/decode Normal file
View File

@ -0,0 +1,617 @@
.definition
clz = [011000000000.....001.....0010011]
ctz = [011000000001.....001.....0010011]
cpop = [011000000010.....001.....0010011]
sext_b = [011000000100.....001.....0010011]
sext_h = [011000000101.....001.....0010011]
min = [0000101..........100.....0110011]
max = [0000101..........110.....0110011]
minu = [0000101..........101.....0110011]
maxu = [0000101..........111.....0110011]
andn = [0100000..........111.....0110011]
orn = [0100000..........110.....0110011]
xnor = [0100000..........100.....0110011]
#pack = [0000100..........100.....0110011]
zext_h = [000010000000.....100.....0110011]
pack1 = [000010000001.....100.....0110011]
pack2 = [000010000010.....100.....0110011]
pack3 = [000010000011.....100.....0110011]
pack4 = [000010000100.....100.....0110011]
pack5 = [000010000101.....100.....0110011]
pack6 = [000010000110.....100.....0110011]
pack7 = [000010000111.....100.....0110011]
pack8 = [000010001000.....100.....0110011]
pack9 = [000010001001.....100.....0110011]
pack10 = [000010001010.....100.....0110011]
pack11 = [000010001011.....100.....0110011]
pack12 = [000010001100.....100.....0110011]
pack13 = [000010001101.....100.....0110011]
pack14 = [000010001110.....100.....0110011]
pack15 = [000010001111.....100.....0110011]
pack16 = [000010010000.....100.....0110011]
pack17 = [000010010001.....100.....0110011]
pack18 = [000010010010.....100.....0110011]
pack19 = [000010010011.....100.....0110011]
pack20 = [000010010100.....100.....0110011]
pack21 = [000010010101.....100.....0110011]
pack22 = [000010010110.....100.....0110011]
pack23 = [000010010111.....100.....0110011]
pack24 = [000010011000.....100.....0110011]
pack25 = [000010011001.....100.....0110011]
pack26 = [000010011010.....100.....0110011]
pack27 = [000010011011.....100.....0110011]
pack28 = [000010011100.....100.....0110011]
pack29 = [000010011101.....100.....0110011]
pack30 = [000010011110.....100.....0110011]
pack31 = [000010011111.....100.....0110011]
packu = [0100100..........100.....0110011]
packh = [0000100..........111.....0110011]
rol = [0110000..........001.....0110011]
ror = [0110000..........101.....0110011]
rori = [0110000..........101.....0010011]
sh1add = [0010000..........010.....0110011]
sh2add = [0010000..........100.....0110011]
sh3add = [0010000..........110.....0110011]
bset = [0010100..........001.....0110011]
bclr = [0100100..........001.....0110011]
binv = [0110100..........001.....0110011]
bext = [0100100..........101.....0110011]
bseti = [0010100..........001.....0010011]
bclri = [0100100..........001.....0010011]
binvi = [0110100..........001.....0010011]
bexti = [0100100..........101.....0010011]
grev = [0110100..........101.....0110011]
#grevi = [01101............101.....0010011]
grevi0 = [011010000000.....101.....0010011]
grevi1 = [011010000001.....101.....0010011]
grevi2 = [011010000010.....101.....0010011]
grevi3 = [011010000011.....101.....0010011]
grevi4 = [011010000100.....101.....0010011]
grevi5 = [011010000101.....101.....0010011]
grevi6 = [011010000110.....101.....0010011]
grevi7 = [011010000111.....101.....0010011]
grevi8 = [011010001000.....101.....0010011]
grevi9 = [011010001001.....101.....0010011]
grevi10 = [011010001010.....101.....0010011]
grevi11 = [011010001011.....101.....0010011]
grevi12 = [011010001100.....101.....0010011]
grevi13 = [011010001101.....101.....0010011]
grevi14 = [011010001110.....101.....0010011]
grevi15 = [011010001111.....101.....0010011]
grevi16 = [011010010000.....101.....0010011]
grevi17 = [011010010001.....101.....0010011]
grevi18 = [011010010010.....101.....0010011]
grevi19 = [011010010011.....101.....0010011]
grevi20 = [011010010100.....101.....0010011]
grevi21 = [011010010101.....101.....0010011]
grevi22 = [011010010110.....101.....0010011]
grevi23 = [011010010111.....101.....0010011]
#grevi24 = [011010011000.....101.....0010011] # REV8
rev8 = [011010011000.....101.....0010011]
grevi25 = [011010011001.....101.....0010011]
grevi26 = [011010011010.....101.....0010011]
grevi27 = [011010011011.....101.....0010011]
grevi28 = [011010011100.....101.....0010011]
grevi29 = [011010011101.....101.....0010011]
grevi30 = [011010011110.....101.....0010011]
grevi31 = [011010011111.....101.....0010011]
gorc = [0010100..........101.....0110011]
#gorci = [00101............101.....0010011]
gorci0 = [001010000000.....101.....0010011]
gorci1 = [001010000001.....101.....0010011]
gorci2 = [001010000010.....101.....0010011]
gorci3 = [001010000011.....101.....0010011]
gorci4 = [001010000100.....101.....0010011]
gorci5 = [001010000101.....101.....0010011]
gorci6 = [001010000110.....101.....0010011]
#gorci7 = [001010000111.....101.....0010011] # ORC_B
orc_b = [001010000111.....101.....0010011]
gorci8 = [001010001000.....101.....0010011]
gorci9 = [001010001001.....101.....0010011]
gorci10 = [001010001010.....101.....0010011]
gorci11 = [001010001011.....101.....0010011]
gorci12 = [001010001100.....101.....0010011]
gorci13 = [001010001101.....101.....0010011]
gorci14 = [001010001110.....101.....0010011]
gorci15 = [001010001111.....101.....0010011]
gorci16 = [001010010000.....101.....0010011]
gorci17 = [001010010001.....101.....0010011]
gorci18 = [001010010010.....101.....0010011]
gorci19 = [001010010011.....101.....0010011]
gorci20 = [001010010100.....101.....0010011]
gorci21 = [001010010101.....101.....0010011]
gorci22 = [001010010110.....101.....0010011]
gorci23 = [001010010111.....101.....0010011]
gorci24 = [001010011000.....101.....0010011]
gorci25 = [001010011001.....101.....0010011]
gorci26 = [001010011010.....101.....0010011]
gorci27 = [001010011011.....101.....0010011]
gorci28 = [001010011100.....101.....0010011]
gorci29 = [001010011101.....101.....0010011]
gorci30 = [001010011110.....101.....0010011]
gorci31 = [001010011111.....101.....0010011]
shfl = [0000100..........001.....0110011]
shfli = [00001000.........001.....0010011]
unshfl = [0000100..........101.....0110011]
unshfli = [00001000.........101.....0010011]
bdecompress = [0100100..........110.....0110011]
bcompress = [0000100..........110.....0110011]
clmul = [0000101..........001.....0110011]
clmulr = [0000101..........010.....0110011]
clmulh = [0000101..........011.....0110011]
crc32_b = [011000010000.....001.....0010011]
crc32_h = [011000010001.....001.....0010011]
crc32_w = [011000010010.....001.....0010011]
crc32c_b = [011000011000.....001.....0010011]
crc32c_h = [011000011001.....001.....0010011]
crc32c_w = [011000011010.....001.....0010011]
bfp = [0100100..........111.....0110011]
xperm_n = [0010100..........010.....0110011]
xperm_b = [0010100..........100.....0110011]
xperm_h = [0010100..........110.....0110011]
add = [0000000..........000.....0110011]
addi = [.................000.....0010011]
sub = [0100000..........000.....0110011]
and = [0000000..........111.....0110011]
andi = [.................111.....0010011]
or = [0000000..........110.....0110011]
ori = [.................110.....0010011]
xor = [0000000..........100.....0110011]
xori = [.................100.....0010011]
sll = [0000000..........001.....0110011]
slli = [0000000..........001.....0010011]
sra = [0100000..........101.....0110011]
srai = [0100000..........101.....0010011]
srl = [0000000..........101.....0110011]
srli = [0000000..........101.....0010011]
lui = [.........................0110111]
auipc = [.........................0010111]
slt = [0000000..........010.....0110011]
sltu = [0000000..........011.....0110011]
slti = [.................010.....0010011]
sltiu = [.................011.....0010011]
beq = [.................000.....1100011]
bne = [.................001.....1100011]
bge = [.................101.....1100011]
blt = [.................100.....1100011]
bgeu = [.................111.....1100011]
bltu = [.................110.....1100011]
jal = [.........................1101111]
jalr = [.................000.....1100111]
lb = [.................000.....0000011]
lh = [.................001.....0000011]
lw = [.................010.....0000011]
sb = [.................000.....0100011]
sh = [.................001.....0100011]
sw = [.................010.....0100011]
lbu = [.................100.....0000011]
lhu = [.................101.....0000011]
fence = [0000........00000000000000001111]
fence.i = [00000000000000000001000000001111]
ebreak = [00000000000100000000000001110011]
ecall = [00000000000000000000000001110011]
mret = [00110000001000000000000001110011]
wfi = [00010000010100000000000001110011]
csrrc_ro = [............00000011.....1110011]
csrrc_rw0 = [............1....011.....1110011]
csrrc_rw1 = [.............1...011.....1110011]
csrrc_rw2 = [..............1..011.....1110011]
csrrc_rw3 = [...............1.011.....1110011]
csrrc_rw4 = [................1011.....1110011]
csrrci_ro = [............00000111.....1110011]
csrrci_rw0 = [............1....111.....1110011]
csrrci_rw1 = [.............1...111.....1110011]
csrrci_rw2 = [..............1..111.....1110011]
csrrci_rw3 = [...............1.111.....1110011]
csrrci_rw4 = [................1111.....1110011]
csrrs_ro = [............00000010.....1110011]
csrrs_rw0 = [............1....010.....1110011]
csrrs_rw1 = [.............1...010.....1110011]
csrrs_rw2 = [..............1..010.....1110011]
csrrs_rw3 = [...............1.010.....1110011]
csrrs_rw4 = [................1010.....1110011]
csrrsi_ro = [............00000110.....1110011]
csrrsi_rw0 = [............1....110.....1110011]
csrrsi_rw1 = [.............1...110.....1110011]
csrrsi_rw2 = [..............1..110.....1110011]
csrrsi_rw3 = [...............1.110.....1110011]
csrrsi_rw4 = [................1110.....1110011]
csrw = [.................001000001110011]
csrrw0 = [.................001....11110011]
csrrw1 = [.................001...1.1110011]
csrrw2 = [.................001..1..1110011]
csrrw3 = [.................001.1...1110011]
csrrw4 = [.................0011....1110011]
csrwi = [.................101000001110011]
csrrwi0 = [.................101....11110011]
csrrwi1 = [.................101...1.1110011]
csrrwi2 = [.................101..1..1110011]
csrrwi3 = [.................101.1...1110011]
csrrwi4 = [.................1011....1110011]
mul = [0000001..........000.....0110011]
mulh = [0000001..........001.....0110011]
mulhsu = [0000001..........010.....0110011]
mulhu = [0000001..........011.....0110011]
div = [0000001..........100.....0110011]
divu = [0000001..........101.....0110011]
rem = [0000001..........110.....0110011]
remu = [0000001..........111.....0110011]
.input
rv32i = {
i[31]
i[30]
i[29]
i[28]
i[27]
i[26]
i[25]
i[24]
i[23]
i[22]
i[21]
i[20]
i[19]
i[18]
i[17]
i[16]
i[15]
i[14]
i[13]
i[12]
i[11]
i[10]
i[9]
i[8]
i[7]
i[6]
i[5]
i[4]
i[3]
i[2]
i[1]
i[0]
}
.output
rv32i = {
alu
rs1
rs2
imm12
rd
shimm5
imm20
pc
load
store
lsu
add
sub
land
lor
lxor
sll
sra
srl
slt
unsign
condbr
beq
bne
bge
blt
jal
by
half
word
csr_read
csr_clr
csr_set
csr_write
csr_imm
presync
postsync
ebreak
ecall
mret
mul
rs1_sign
rs2_sign
low
div
rem
fence
fence_i
clz
ctz
cpop
sext_b
sext_h
min
max
pack
packu
packh
rol
ror
zbb
bset
bclr
binv
bext
zbs
bcompress
bdecompress
zbe
clmul
clmulh
clmulr
zbc
grev
gorc
shfl
unshfl
xperm_n
xperm_b
xperm_h
zbp
crc32_b
crc32_h
crc32_w
crc32c_b
crc32c_h
crc32c_w
zbr
bfp
zbf
sh1add
sh2add
sh3add
zba
pm_alu
}
.decode
rv32i[clz] = { alu zbb rs1 rd clz }
rv32i[ctz] = { alu zbb rs1 rd ctz }
rv32i[cpop] = { alu zbb rs1 rd cpop }
rv32i[sext_b] = { alu zbb rs1 rd sext_b}
rv32i[sext_h] = { alu zbb rs1 rd sext_h}
rv32i[min] = { alu zbb rs1 rs2 rd sub min }
rv32i[max] = { alu zbb rs1 rs2 rd sub max }
rv32i[minu] = { alu zbb rs1 rs2 rd unsign sub min }
rv32i[maxu] = { alu zbb rs1 rs2 rd unsign sub max }
rv32i[andn] = { alu zbb zbp rs1 rs2 rd land }
rv32i[orn] = { alu zbb zbp rs1 rs2 rd lor }
rv32i[xnor] = { alu zbb zbp rs1 rs2 rd lxor }
rv32i[packu] = { alu zbp rs1 rs2 rd packu }
rv32i[packh] = { alu zbp rs1 rs2 rd packh zbe zbf}
rv32i[rol] = { alu zbb zbp rs1 rs2 rd rol }
rv32i[ror] = { alu zbb zbp rs1 rs2 rd ror }
rv32i[rori] = { alu zbb zbp rs1 rd shimm5 ror }
rv32i[bset] = { alu zbs rs1 rs2 rd bset }
rv32i[bclr] = { alu zbs rs1 rs2 rd bclr }
rv32i[binv] = { alu zbs rs1 rs2 rd binv }
rv32i[bext] = { alu zbs rs1 rs2 rd bext }
rv32i[bseti] = { alu zbs rs1 rd shimm5 bset }
rv32i[bclri] = { alu zbs rs1 rd shimm5 bclr }
rv32i[binvi] = { alu zbs rs1 rd shimm5 binv }
rv32i[bexti] = { alu zbs rs1 rd shimm5 bext }
rv32i[sh1add] = { alu zba rs1 rs2 rd sh1add}
rv32i[sh2add] = { alu zba rs1 rs2 rd sh2add}
rv32i[sh3add] = { alu zba rs1 rs2 rd sh3add}
#v32i[pack] = { alu zbp rs1 rs2 rd pack zbe zbf}
rv32i[zext_h] = { alu zbb zbp rs1 rs2 rd pack zbe zbf} # pack with rs2=x0
rv32i[pack{1-31}]= { alu zbp rs1 rs2 rd pack zbe zbf}
rv32i[mul] = { mul rs1 rs2 rd low }
rv32i[mulh] = { mul rs1 rs2 rd rs1_sign rs2_sign }
rv32i[mulhu] = { mul rs1 rs2 rd }
rv32i[mulhsu] = { mul rs1 rs2 rd rs1_sign }
rv32i[bcompress] = { mul zbe rs1 rs2 rd bcompress }
rv32i[bdecompress] = { mul zbe rs1 rs2 rd bdecompress }
rv32i[clmul] = { mul zbc rs1 rs2 rd clmul }
rv32i[clmulh] = { mul zbc rs1 rs2 rd clmulh}
rv32i[clmulr] = { mul zbc rs1 rs2 rd clmulr}
rv32i[crc32_b] = { mul zbr rs1 rd crc32_b}
rv32i[crc32_h] = { mul zbr rs1 rd crc32_h}
rv32i[crc32_w] = { mul zbr rs1 rd crc32_w}
rv32i[crc32c_b] = { mul zbr rs1 rd crc32c_b}
rv32i[crc32c_h] = { mul zbr rs1 rd crc32c_h}
rv32i[crc32c_w] = { mul zbr rs1 rd crc32c_w}
rv32i[bfp] = { mul zbf rs1 rs2 rd bfp }
rv32i[grev] = { mul zbp rs1 rs2 rd grev }
rv32i[grevi{0-23}] = { mul zbp rs1 rd shimm5 grev }
rv32i[grevi{25-31}] = { mul zbp rs1 rd shimm5 grev }
rv32i[rev8] = { alu zbb zbp rs1 rd shimm5 grev } # grevi24
rv32i[gorc] = { mul zbp rs1 rs2 rd gorc }
rv32i[gorci{0-6}] = { mul zbp rs1 rd shimm5 gorc }
rv32i[gorci{8-31}] = { mul zbp rs1 rd shimm5 gorc }
rv32i[orc_b] = { alu zbb zbp rs1 rd shimm5 gorc } # gorci7
rv32i[shfl] = { mul zbp rs1 rs2 rd shfl }
rv32i[shfli] = { mul zbp rs1 rd shimm5 shfl }
rv32i[unshfl] = { mul zbp rs1 rs2 rd unshfl}
rv32i[unshfli] = { mul zbp rs1 rd shimm5 unshfl}
rv32i[xperm_n] = { mul zbp rs1 rs2 rd xperm_n}
rv32i[xperm_b] = { mul zbp rs1 rs2 rd xperm_b}
rv32i[xperm_h] = { mul zbp rs1 rs2 rd xperm_h}
rv32i[div] = { div rs1 rs2 rd }
rv32i[divu] = { div rs1 rs2 rd unsign }
rv32i[rem] = { div rs1 rs2 rd rem}
rv32i[remu] = { div rs1 rs2 rd unsign rem}
rv32i[add] = { alu rs1 rs2 rd add pm_alu }
rv32i[addi] = { alu rs1 imm12 rd add pm_alu }
rv32i[sub] = { alu rs1 rs2 rd sub pm_alu }
rv32i[and] = { alu rs1 rs2 rd land pm_alu }
rv32i[andi] = { alu rs1 imm12 rd land pm_alu }
rv32i[or] = { alu rs1 rs2 rd lor pm_alu }
rv32i[ori] = { alu rs1 imm12 rd lor pm_alu }
rv32i[xor] = { alu rs1 rs2 rd lxor pm_alu }
rv32i[xori] = { alu rs1 imm12 rd lxor pm_alu }
rv32i[sll] = { alu rs1 rs2 rd sll pm_alu }
rv32i[slli] = { alu rs1 shimm5 rd sll pm_alu }
rv32i[sra] = { alu rs1 rs2 rd sra pm_alu }
rv32i[srai] = { alu rs1 shimm5 rd sra pm_alu }
rv32i[srl] = { alu rs1 rs2 rd srl pm_alu }
rv32i[srli] = { alu rs1 shimm5 rd srl pm_alu }
rv32i[lui] = { alu imm20 rd lor pm_alu }
rv32i[auipc] = { alu imm20 pc rd add pm_alu }
rv32i[slt] = { alu rs1 rs2 rd sub slt pm_alu }
rv32i[sltu] = { alu rs1 rs2 rd sub slt unsign pm_alu }
rv32i[slti] = { alu rs1 imm12 rd sub slt pm_alu }
rv32i[sltiu] = { alu rs1 imm12 rd sub slt unsign pm_alu }
rv32i[beq] = { alu rs1 rs2 sub condbr beq }
rv32i[bne] = { alu rs1 rs2 sub condbr bne }
rv32i[bge] = { alu rs1 rs2 sub condbr bge }
rv32i[blt] = { alu rs1 rs2 sub condbr blt }
rv32i[bgeu] = { alu rs1 rs2 sub condbr bge unsign }
rv32i[bltu] = { alu rs1 rs2 sub condbr blt unsign }
rv32i[jal] = { alu imm20 rd pc jal }
rv32i[jalr] = { alu rs1 rd imm12 jal }
rv32i[lb] = { lsu load rs1 rd by }
rv32i[lh] = { lsu load rs1 rd half }
rv32i[lw] = { lsu load rs1 rd word }
rv32i[lbu] = { lsu load rs1 rd by unsign }
rv32i[lhu] = { lsu load rs1 rd half unsign }
rv32i[sb] = { lsu store rs1 rs2 by }
rv32i[sh] = { lsu store rs1 rs2 half }
rv32i[sw] = { lsu store rs1 rs2 word }
rv32i[fence] = { alu lor fence presync}
# fence.i has fence effect in addtion to flush I$ and redirect
rv32i[fence.i] = { alu lor fence fence_i presync postsync}
# nops for now
rv32i[ebreak] = { alu rs1 imm12 rd lor ebreak postsync}
rv32i[ecall] = { alu rs1 imm12 rd lor ecall postsync}
rv32i[mret] = { alu rs1 imm12 rd lor mret postsync}
rv32i[wfi] = { alu rs1 imm12 rd lor pm_alu }
# csr means read
# csr_read - put csr on rs2 and rs1 0's
rv32i[csrrc_ro] = { alu rd csr_read }
# put csr on rs2 and make rs1 0's into alu. Save rs1 for csr_clr later
rv32i[csrrc_rw{0-4}] = { alu rd csr_read rs1 csr_clr presync postsync }
rv32i[csrrci_ro] = { alu rd csr_read }
rv32i[csrrci_rw{0-4}] = { alu rd csr_read rs1 csr_clr csr_imm presync postsync }
rv32i[csrrs_ro] = { alu rd csr_read }
rv32i[csrrs_rw{0-4}] = { alu rd csr_read rs1 csr_set presync postsync }
rv32i[csrrsi_ro] = { alu rd csr_read }
rv32i[csrrsi_rw{0-4}] = { alu rd csr_read rs1 csr_set csr_imm presync postsync }
rv32i[csrrw{0-4}] = { alu rd csr_read rs1 csr_write presync postsync }
rv32i[csrrwi{0-4}] = { alu rd csr_read rs1 csr_write csr_imm presync postsync }
# optimize csr write only - pipelined
rv32i[csrw] = { alu rd rs1 csr_write }
rv32i[csrwi] = { alu rd csr_write csr_imm }
.end

446
Flow/design/dec/el2_dec.sv Normal file
View File

@ -0,0 +1,446 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// dec: decode unit - decode, bypassing, ARF, interrupts
//
//********************************************************************************
// $Id$
//
//
// Function: Decode
// Comments: Decode, dependency scoreboard, ARF
//
//
// A -> D -> EX1 ... WB
//
//********************************************************************************
module el2_dec
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic free_clk, // Clock always. Through two clock headers. For flops without second clock header built in.
input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in.
input logic lsu_fastint_stall_any, // needed by lsu for 2nd pass of dma with ecc correction, stall next cycle
output logic dec_extint_stall, // Stall on external interrupt
output logic dec_i0_decode_d, // Valid instruction at D-stage and not blocked
output logic dec_pause_state_cg, // to top for active state clock gating
output logic dec_tlu_core_empty,
input logic rst_l, // reset, active low
input logic [31:1] rst_vec, // reset vector, from core pins
input logic nmi_int, // NMI pin
input logic [31:1] nmi_vec, // NMI vector, from pins
input logic i_cpu_halt_req, // Asynchronous Halt request to CPU
input logic i_cpu_run_req, // Asynchronous Restart request to CPU
output logic o_cpu_halt_status, // Halt status of core (pmu/fw)
output logic o_cpu_halt_ack, // Halt request ack
output logic o_cpu_run_ack, // Run request ack
output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request
input logic [31:4] core_id, // CORE ID
// external MPC halt/run interface
input logic mpc_debug_halt_req, // Async halt request
input logic mpc_debug_run_req, // Async run request
input logic mpc_reset_run_req, // Run/halt after reset
output logic mpc_debug_halt_ack, // Halt ack
output logic mpc_debug_run_ack, // Run ack
output logic debug_brkpt_status, // debug breakpoint
input logic exu_pmu_i0_br_misp, // slot 0 branch misp
input logic exu_pmu_i0_br_ataken, // slot 0 branch actual taken
input logic exu_pmu_i0_pc4, // slot 0 4 byte branch
input logic lsu_nonblock_load_valid_m, // valid nonblock load at m
input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // -> corresponding tag
input logic lsu_nonblock_load_inv_r, // invalidate request for nonblock load r
input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // -> corresponding tag
input logic lsu_nonblock_load_data_valid, // valid nonblock load data back
input logic lsu_nonblock_load_data_error, // nonblock load bus error
input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // -> corresponding tag
input logic [31:0] lsu_nonblock_load_data, // nonblock load data
input logic lsu_pmu_bus_trxn, // D side bus transaction
input logic lsu_pmu_bus_misaligned, // D side bus misaligned
input logic lsu_pmu_bus_error, // D side bus error
input logic lsu_pmu_bus_busy, // D side bus busy
input logic lsu_pmu_misaligned_m, // D side load or store misaligned
input logic lsu_pmu_load_external_m, // D side bus load
input logic lsu_pmu_store_external_m, // D side bus store
input logic dma_pmu_dccm_read, // DMA DCCM read
input logic dma_pmu_dccm_write, // DMA DCCM write
input logic dma_pmu_any_read, // DMA read
input logic dma_pmu_any_write, // DMA write
input logic [31:1] lsu_fir_addr, // Fast int address
input logic [1:0] lsu_fir_error, // Fast int lookup error
input logic ifu_pmu_instr_aligned, // aligned instructions
input logic ifu_pmu_fetch_stall, // fetch unit stalled
input logic ifu_pmu_ic_miss, // icache miss
input logic ifu_pmu_ic_hit, // icache hit
input logic ifu_pmu_bus_error, // Instruction side bus error
input logic ifu_pmu_bus_busy, // Instruction side bus busy
input logic ifu_pmu_bus_trxn, // Instruction side bus transaction
input logic ifu_ic_error_start, // IC single bit error
input logic ifu_iccm_rd_ecc_single_err, // ICCM single bit error
input logic [3:0] lsu_trigger_match_m,
input logic dbg_cmd_valid, // debugger abstract command valid
input logic dbg_cmd_write, // command is a write
input logic [1:0] dbg_cmd_type, // command type
input logic [31:0] dbg_cmd_addr, // command address
input logic [1:0] dbg_cmd_wrdata, // command write data, for fence/fence_i
input logic ifu_i0_icaf, // icache access fault
input logic [1:0] ifu_i0_icaf_type, // icache access fault type
input logic ifu_i0_icaf_second, // i0 has access fault on second 2B of 4B inst
input logic ifu_i0_dbecc, // icache/iccm double-bit error
input logic lsu_idle_any, // lsu idle for halting
input el2_br_pkt_t i0_brp, // branch packet
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index
input logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR
input logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag
input logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index
input el2_lsu_error_pkt_t lsu_error_pkt_r, // LSU exception/error packet
input logic lsu_single_ecc_error_incr, // LSU inc SB error counter
input logic lsu_imprecise_error_load_any, // LSU imprecise load bus error
input logic lsu_imprecise_error_store_any, // LSU imprecise store bus error
input logic [31:0] lsu_imprecise_error_addr_any, // LSU imprecise bus error address
input logic [31:0] exu_div_result, // final div result
input logic exu_div_wren, // Divide write enable to GPR
input logic [31:0] exu_csr_rs1_x, // rs1 for csr instruction
input logic [31:0] lsu_result_m, // load result
input logic [31:0] lsu_result_corr_r, // load result - corrected load data
input logic lsu_load_stall_any, // This is for blocking loads
input logic lsu_store_stall_any, // This is for blocking stores
input logic dma_dccm_stall_any, // stall any load/store at decode, pmu event
input logic dma_iccm_stall_any, // iccm stalled, pmu event
input logic iccm_dma_sb_error, // ICCM DMA single bit error
input logic exu_flush_final, // slot0 flush
input logic [31:1] exu_npc_r, // next PC
input logic [31:0] exu_i0_result_x, // alu result x
input logic ifu_i0_valid, // fetch valids to instruction buffer
input logic [31:0] ifu_i0_instr, // fetch inst's to instruction buffer
input logic [31:1] ifu_i0_pc, // pc's for instruction buffer
input logic ifu_i0_pc4, // indication of 4B or 2B for corresponding inst
input logic [31:1] exu_i0_pc_x, // pc's for e1 from the alu's
input logic mexintpend, // External interrupt pending
input logic timer_int, // Timer interrupt pending (from pin)
input logic soft_int, // Software interrupt pending (from pin)
input logic [7:0] pic_claimid, // PIC claimid
input logic [3:0] pic_pl, // PIC priv level
input logic mhwakeup, // High priority wakeup
output logic [3:0] dec_tlu_meicurpl, // to PIC, Current priv level
output logic [3:0] dec_tlu_meipt, // to PIC
input logic [70:0] ifu_ic_debug_rd_data, // diagnostic icache read data
input logic ifu_ic_debug_rd_data_valid, // diagnostic icache read data valid
output el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt, // packet of DICAWICS, DICAD0/1, DICAGO info for icache diagnostics
// Debug start
input logic dbg_halt_req, // DM requests a halt
input logic dbg_resume_req, // DM requests a resume
input logic ifu_miss_state_idle, // I-side miss buffer empty
output logic dec_tlu_dbg_halted, // Core is halted and ready for debug command
output logic dec_tlu_debug_mode, // Core is in debug mode
output logic dec_tlu_resume_ack, // Resume acknowledge
output logic dec_tlu_flush_noredir_r, // Tell fetch to idle on this flush
output logic dec_tlu_mpc_halted_only, // Core is halted only due to MPC
output logic dec_tlu_flush_leak_one_r, // single step
output logic dec_tlu_flush_err_r, // iside perr/ecc rfpc
output logic [31:2] dec_tlu_meihap, // Fast ext int base
output logic dec_debug_wdata_rs1_d, // insert debug write data into rs1 at decode
output logic [31:0] dec_dbg_rddata, // debug command read data
output logic dec_dbg_cmd_done, // abstract command is done
output logic dec_dbg_cmd_fail, // abstract command failed (illegal reg address)
output el2_trigger_pkt_t [3:0] trigger_pkt_any, // info needed by debug trigger blocks
output logic dec_tlu_force_halt, // halt has been forced
// Debug end
// branch info from pipe0 for errors or counter updates
input logic [1:0] exu_i0_br_hist_r, // history
input logic exu_i0_br_error_r, // error
input logic exu_i0_br_start_error_r, // start error
input logic exu_i0_br_valid_r, // valid
input logic exu_i0_br_mp_r, // mispredict
input logic exu_i0_br_middle_r, // middle of bank
// branch info from pipe1 for errors or counter updates
input logic exu_i0_br_way_r, // way hit or repl
output logic dec_i0_rs1_en_d, // Qualify GPR RS1 data
output logic dec_i0_rs2_en_d, // Qualify GPR RS2 data
output logic [31:0] gpr_i0_rs1_d, // gpr rs1 data
output logic [31:0] gpr_i0_rs2_d, // gpr rs2 data
output logic [31:0] dec_i0_immed_d, // immediate data
output logic [12:1] dec_i0_br_immed_d, // br immediate data
output el2_alu_pkt_t i0_ap, // alu packet
output logic dec_i0_alu_decode_d, // schedule on D-stage alu
output logic dec_i0_branch_d, // Branch in D-stage
output logic dec_i0_select_pc_d, // select pc onto rs1 for jal's
output logic [31:1] dec_i0_pc_d, // pc's at decode
output logic [3:0] dec_i0_rs1_bypass_en_d, // rs1 bypass enable
output logic [3:0] dec_i0_rs2_bypass_en_d, // rs2 bypass enable
output logic [31:0] dec_i0_result_r, // Result R-stage
output el2_lsu_pkt_t lsu_p, // lsu packet
output logic dec_qual_lsu_d, // LSU instruction at D. Use to quiet LSU operands
output el2_mul_pkt_t mul_p, // mul packet
output el2_div_pkt_t div_p, // div packet
output logic dec_div_cancel, // cancel divide operation
output logic [11:0] dec_lsu_offset_d, // 12b offset for load/store addresses
output logic dec_csr_ren_d, // CSR read enable
output logic [31:0] dec_csr_rddata_d, // CSR read data
output logic dec_tlu_flush_lower_r, // tlu flush due to late mp, exception, rfpc, or int
output logic dec_tlu_flush_lower_wb,
output logic [31:1] dec_tlu_flush_path_r, // tlu flush target
output logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state
output logic dec_tlu_fence_i_r, // flush is a fence_i rfnpc, flush icache
output logic [31:1] pred_correct_npc_x, // npc if prediction is correct at e2 stage
output el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // slot 0 branch predictor update packet
output logic dec_tlu_perfcnt0, // toggles when slot0 perf counter 0 has an event inc
output logic dec_tlu_perfcnt1, // toggles when slot0 perf counter 1 has an event inc
output logic dec_tlu_perfcnt2, // toggles when slot0 perf counter 2 has an event inc
output logic dec_tlu_perfcnt3, // toggles when slot0 perf counter 3 has an event inc
output el2_predict_pkt_t dec_i0_predict_p_d, // prediction packet to alus
output logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // DEC predict fghr
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // DEC predict index
output logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // DEC predict branch tag
output logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associt btb error index
output logic dec_lsu_valid_raw_d,
output logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control
output logic [1:0] dec_data_en, // clock-gate control logic
output logic [1:0] dec_ctl_en,
input logic [15:0] ifu_i0_cinst, // 16b compressed instruction
output el2_trace_pkt_t trace_rv_trace_pkt, // trace packet
// feature disable from mfdc
output logic dec_tlu_external_ldfwd_disable, // disable external load forwarding
output logic dec_tlu_sideeffect_posted_disable, // disable posted stores to side-effect address
output logic dec_tlu_core_ecc_disable, // disable core ECC
output logic dec_tlu_bpred_disable, // disable branch prediction
output logic dec_tlu_wb_coalescing_disable, // disable writebuffer coalescing
output logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:16]
// clock gating overrides from mcgc
output logic dec_tlu_misc_clk_override, // override misc clock domain gating
output logic dec_tlu_ifu_clk_override, // override fetch clock domain gating
output logic dec_tlu_lsu_clk_override, // override load/store clock domain gating
output logic dec_tlu_bus_clk_override, // override bus clock domain gating
output logic dec_tlu_pic_clk_override, // override PIC clock domain gating
output logic dec_tlu_picio_clk_override, // override PICIO clock domain gating
output logic dec_tlu_dccm_clk_override, // override DCCM clock domain gating
output logic dec_tlu_icm_clk_override, // override ICCM clock domain gating
output logic dec_tlu_i0_commit_cmt, // committed i0 instruction
input logic scan_mode // Flop scan mode control
);
logic dec_tlu_dec_clk_override; // to and from dec blocks
logic clk_override;
logic dec_ib0_valid_d;
logic dec_pmu_instr_decoded;
logic dec_pmu_decode_stall;
logic dec_pmu_presync_stall;
logic dec_pmu_postsync_stall;
logic dec_tlu_wr_pause_r; // CSR write to pause reg is at R.
logic [4:0] dec_i0_rs1_d;
logic [4:0] dec_i0_rs2_d;
logic [31:0] dec_i0_instr_d;
logic dec_tlu_trace_disable;
logic dec_tlu_pipelining_disable;
logic [4:0] dec_i0_waddr_r;
logic dec_i0_wen_r;
logic [31:0] dec_i0_wdata_r;
logic dec_csr_wen_r; // csr write enable at wb
logic [11:0] dec_csr_wraddr_r; // write address for csryes
logic [31:0] dec_csr_wrdata_r; // csr write data at wb
logic [11:0] dec_csr_rdaddr_d; // read address for csr
logic dec_csr_legal_d; // csr indicates legal operation
logic dec_csr_wen_unq_d; // valid csr with write - for csr legal
logic dec_csr_any_unq_d; // valid csr - for csr legal
logic dec_csr_stall_int_ff; // csr is mie/mstatus
el2_trap_pkt_t dec_tlu_packet_r;
logic dec_i0_pc4_d;
logic dec_tlu_presync_d;
logic dec_tlu_postsync_d;
logic dec_tlu_debug_stall;
logic [31:0] dec_illegal_inst;
logic dec_i0_icaf_d;
logic dec_i0_dbecc_d;
logic dec_i0_icaf_second_d;
logic [3:0] dec_i0_trigger_match_d;
logic dec_debug_fence_d;
logic dec_nonblock_load_wen;
logic [4:0] dec_nonblock_load_waddr;
logic dec_tlu_flush_pause_r;
el2_br_pkt_t dec_i0_brp;
logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index;
logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr;
logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag;
logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index; // Fully associt btb index
logic [31:1] dec_tlu_i0_pc_r;
logic dec_tlu_i0_kill_writeb_wb;
logic dec_tlu_i0_valid_r;
logic dec_pause_state;
logic [1:0] dec_i0_icaf_type_d; // i0 instruction access fault type
logic dec_tlu_flush_extint; // Fast ext int started
logic [31:0] dec_i0_inst_wb;
logic [31:1] dec_i0_pc_wb;
logic dec_tlu_i0_valid_wb1, dec_tlu_int_valid_wb1;
logic [4:0] dec_tlu_exc_cause_wb1;
logic [31:0] dec_tlu_mtval_wb1;
logic dec_tlu_i0_exc_valid_wb1;
logic [4:0] div_waddr_wb;
logic dec_div_active;
logic dec_debug_valid_d;
assign clk_override = dec_tlu_dec_clk_override;
assign dec_dbg_rddata[31:0] = dec_i0_wdata_r[31:0];
el2_dec_ib_ctl #(.pt(pt)) instbuff (.*);
el2_dec_decode_ctl #(.pt(pt)) decode (.*);
el2_dec_tlu_ctl #(.pt(pt)) tlu (.*);
el2_dec_gpr_ctl #(.pt(pt)) arf (.*,
// inputs
.raddr0(dec_i0_rs1_d[4:0]),
.raddr1(dec_i0_rs2_d[4:0]),
.wen0(dec_i0_wen_r), .waddr0(dec_i0_waddr_r[4:0]), .wd0(dec_i0_wdata_r[31:0]),
.wen1(dec_nonblock_load_wen), .waddr1(dec_nonblock_load_waddr[4:0]), .wd1(lsu_nonblock_load_data[31:0]),
.wen2(exu_div_wren), .waddr2(div_waddr_wb), .wd2(exu_div_result[31:0]),
// outputs
.rd0(gpr_i0_rs1_d[31:0]), .rd1(gpr_i0_rs2_d[31:0])
);
// Trigger
el2_dec_trigger #(.pt(pt)) dec_trigger (.*);
// trace
assign trace_rv_trace_pkt.trace_rv_i_insn_ip = dec_i0_inst_wb[31:0];
assign trace_rv_trace_pkt.trace_rv_i_address_ip = { dec_i0_pc_wb[31:1], 1'b0};
assign trace_rv_trace_pkt.trace_rv_i_valid_ip = dec_tlu_int_valid_wb1 | dec_tlu_i0_valid_wb1 | dec_tlu_i0_exc_valid_wb1;
assign trace_rv_trace_pkt.trace_rv_i_exception_ip = dec_tlu_int_valid_wb1 | dec_tlu_i0_exc_valid_wb1;
assign trace_rv_trace_pkt.trace_rv_i_ecause_ip = dec_tlu_exc_cause_wb1[4:0]; // replicate across ports
assign trace_rv_trace_pkt.trace_rv_i_interrupt_ip = dec_tlu_int_valid_wb1;
assign trace_rv_trace_pkt.trace_rv_i_tval_ip = dec_tlu_mtval_wb1[31:0]; // replicate across ports
// end trace
endmodule // el2_dec

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,95 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
module el2_dec_gpr_ctl
import el2_pkg::*;
#(
`include "el2_param.vh"
) (
input logic [4:0] raddr0, // logical read addresses
input logic [4:0] raddr1,
input logic wen0, // write enable
input logic [4:0] waddr0, // write address
input logic [31:0] wd0, // write data
input logic wen1, // write enable
input logic [4:0] waddr1, // write address
input logic [31:0] wd1, // write data
input logic wen2, // write enable
input logic [4:0] waddr2, // write address
input logic [31:0] wd2, // write data
input logic clk,
input logic rst_l,
output logic [31:0] rd0, // read data
output logic [31:0] rd1,
input logic scan_mode
);
logic [31:1] [31:0] gpr_out; // 31 x 32 bit GPRs
logic [31:1] [31:0] gpr_in;
logic [31:1] w0v,w1v,w2v;
logic [31:1] gpr_wr_en;
// GPR Write Enables
assign gpr_wr_en[31:1] = (w0v[31:1] | w1v[31:1] | w2v[31:1]);
for ( genvar j=1; j<32; j++ ) begin : gpr
rvdffe #(32) gprff (.*, .en(gpr_wr_en[j]), .din(gpr_in[j][31:0]), .dout(gpr_out[j][31:0]));
end : gpr
// the read out
always_comb begin
rd0[31:0] = 32'b0;
rd1[31:0] = 32'b0;
w0v[31:1] = 31'b0;
w1v[31:1] = 31'b0;
w2v[31:1] = 31'b0;
gpr_in[31:1] = '0;
// GPR Read logic
for (int j=1; j<32; j++ ) begin
rd0[31:0] |= ({32{(raddr0[4:0]== 5'(j))}} & gpr_out[j][31:0]);
rd1[31:0] |= ({32{(raddr1[4:0]== 5'(j))}} & gpr_out[j][31:0]);
end
// GPR Write logic
for (int j=1; j<32; j++ ) begin
w0v[j] = wen0 & (waddr0[4:0]== 5'(j) );
w1v[j] = wen1 & (waddr1[4:0]== 5'(j) );
w2v[j] = wen2 & (waddr2[4:0]== 5'(j) );
gpr_in[j] = ({32{w0v[j]}} & wd0[31:0]) |
({32{w1v[j]}} & wd1[31:0]) |
({32{w2v[j]}} & wd2[31:0]);
end
end // always_comb begin
`ifdef RV_ASSERT_ON
logic write_collision_unused;
assign write_collision_unused = ( (w0v[31:1] == w1v[31:1]) & wen0 & wen1 ) |
( (w0v[31:1] == w2v[31:1]) & wen0 & wen2 ) |
( (w1v[31:1] == w2v[31:1]) & wen1 & wen2 );
// asserting that no 2 ports will write to the same gpr simultaneously
assert_multiple_wen_to_same_gpr: assert #0 (~( write_collision_unused ) );
`endif
endmodule

View File

@ -0,0 +1,164 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
module el2_dec_ib_ctl
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic dbg_cmd_valid, // valid dbg cmd
input logic dbg_cmd_write, // dbg cmd is write
input logic [1:0] dbg_cmd_type, // dbg type
input logic [31:0] dbg_cmd_addr, // expand to 31:0
input el2_br_pkt_t i0_brp, // i0 branch packet from aligner
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index
input logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR
input logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag
input logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index
input logic ifu_i0_pc4, // i0 is 4B inst else 2B
input logic ifu_i0_valid, // i0 valid from ifu
input logic ifu_i0_icaf, // i0 instruction access fault
input logic [1:0] ifu_i0_icaf_type, // i0 instruction access fault type
input logic ifu_i0_icaf_second, // i0 has access fault on second 2B of 4B inst
input logic ifu_i0_dbecc, // i0 double-bit error
input logic [31:0] ifu_i0_instr, // i0 instruction from the aligner
input logic [31:1] ifu_i0_pc, // i0 pc from the aligner
output logic dec_ib0_valid_d, // ib0 valid
output logic dec_debug_valid_d, // Debug read or write at D-stage
output logic [31:0] dec_i0_instr_d, // i0 inst at decode
output logic [31:1] dec_i0_pc_d, // i0 pc at decode
output logic dec_i0_pc4_d, // i0 is 4B inst else 2B
output el2_br_pkt_t dec_i0_brp, // i0 branch packet at decode
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index, // i0 branch index
output logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr, // BP FGHR
output logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag, // BP tag
output logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index, // Fully associt btb index
output logic dec_i0_icaf_d, // i0 instruction access fault at decode
output logic dec_i0_icaf_second_d, // i0 instruction access fault on second 2B of 4B inst
output logic [1:0] dec_i0_icaf_type_d, // i0 instruction access fault type
output logic dec_i0_dbecc_d, // i0 double-bit error at decode
output logic dec_debug_wdata_rs1_d, // put debug write data onto rs1 source: machine is halted
output logic dec_debug_fence_d // debug fence inst
);
logic debug_valid;
logic [4:0] dreg;
logic [11:0] dcsr;
logic [31:0] ib0, ib0_debug_in;
logic debug_read;
logic debug_write;
logic debug_read_gpr;
logic debug_write_gpr;
logic debug_read_csr;
logic debug_write_csr;
logic [34:0] ifu_i0_pcdata, pc0;
assign ifu_i0_pcdata[34:0] = { ifu_i0_icaf_second, ifu_i0_dbecc, ifu_i0_icaf,
ifu_i0_pc[31:1], ifu_i0_pc4 };
assign pc0[34:0] = ifu_i0_pcdata[34:0];
assign dec_i0_icaf_second_d = pc0[34]; // icaf's can only decode as i0
assign dec_i0_dbecc_d = pc0[33];
assign dec_i0_icaf_d = pc0[32];
assign dec_i0_pc_d[31:1] = pc0[31:1];
assign dec_i0_pc4_d = pc0[0];
assign dec_i0_icaf_type_d[1:0] = ifu_i0_icaf_type[1:0];
// GPR accesses
// put reg to read on rs1
// read -> or %x0, %reg,%x0 {000000000000,reg[4:0],110000000110011}
// put write date on rs1
// write -> or %reg, %x0, %x0 {00000000000000000110,reg[4:0],0110011}
// CSR accesses
// csr is of form rd, csr, rs1
// read -> csrrs %x0, %csr, %x0 {csr[11:0],00000010000001110011}
// put write data on rs1
// write -> csrrw %x0, %csr, %x0 {csr[11:0],00000001000001110011}
// abstract memory command not done here
assign debug_valid = dbg_cmd_valid & (dbg_cmd_type[1:0] != 2'h2);
assign debug_read = debug_valid & ~dbg_cmd_write;
assign debug_write = debug_valid & dbg_cmd_write;
assign debug_read_gpr = debug_read & (dbg_cmd_type[1:0]==2'h0);
assign debug_write_gpr = debug_write & (dbg_cmd_type[1:0]==2'h0);
assign debug_read_csr = debug_read & (dbg_cmd_type[1:0]==2'h1);
assign debug_write_csr = debug_write & (dbg_cmd_type[1:0]==2'h1);
assign dreg[4:0] = dbg_cmd_addr[4:0];
assign dcsr[11:0] = dbg_cmd_addr[11:0];
assign ib0_debug_in[31:0] = ({32{debug_read_gpr}} & {12'b000000000000,dreg[4:0],15'b110000000110011}) |
({32{debug_write_gpr}} & {20'b00000000000000000110,dreg[4:0],7'b0110011}) |
({32{debug_read_csr}} & {dcsr[11:0],20'b00000010000001110011}) |
({32{debug_write_csr}} & {dcsr[11:0],20'b00000001000001110011});
// machine is in halted state, pipe empty, write will always happen next cycle
assign dec_debug_wdata_rs1_d = debug_write_gpr | debug_write_csr;
// special fence csr for use only in debug mode
assign dec_debug_fence_d = debug_write_csr & (dcsr[11:0] == 12'h7c4);
assign ib0[31:0] = (debug_valid) ? ib0_debug_in[31:0] : ifu_i0_instr[31:0];
assign dec_ib0_valid_d = ifu_i0_valid | debug_valid;
assign dec_debug_valid_d = debug_valid;
assign dec_i0_instr_d[31:0] = ib0[31:0];
assign dec_i0_brp = i0_brp;
assign dec_i0_bp_index = ifu_i0_bp_index;
assign dec_i0_bp_fghr = ifu_i0_bp_fghr;
assign dec_i0_bp_btag = ifu_i0_bp_btag;
assign dec_i0_bp_fa_index = ifu_i0_fa_index;
endmodule

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,49 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Owner:
// Function: DEC Trigger Logic
// Comments:
//
//********************************************************************************
module el2_dec_trigger
import el2_pkg::*;
#(
`include "el2_param.vh"
)(
input el2_trigger_pkt_t [3:0] trigger_pkt_any, // Packet from tlu. 'select':0-pc,1-Opcode 'Execute' needs to be set for dec triggers to fire. 'match'-1 do mask, 0: full match
input logic [31:1] dec_i0_pc_d, // i0 pc
output logic [3:0] dec_i0_trigger_match_d // Trigger match
);
logic [3:0][31:0] dec_i0_match_data;
logic [3:0] dec_i0_trigger_data_match;
for (genvar i=0; i<4; i++) begin
assign dec_i0_match_data[i][31:0] = ({32{~trigger_pkt_any[i].select & trigger_pkt_any[i].execute}} & {dec_i0_pc_d[31:1], trigger_pkt_any[i].tdata2[0]}); // select=0; do a PC match
rvmaskandmatch trigger_i0_match (.mask(trigger_pkt_any[i].tdata2[31:0]), .data(dec_i0_match_data[i][31:0]), .masken(trigger_pkt_any[i].match), .match(dec_i0_trigger_data_match[i]));
assign dec_i0_trigger_match_d[i] = trigger_pkt_any[i].execute & trigger_pkt_any[i].m & dec_i0_trigger_data_match[i];
end
endmodule // el2_dec_trigger

View File

@ -0,0 +1,64 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2018 Western Digital Corporation or it's affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//------------------------------------------------------------------------------------
//
// Copyright Western Digital, 2019
// Owner : Alex Grobman
// Description:
// This module Synchronizes the signals between JTAG (TCK) and
// processor (Core_clk)
//
//-------------------------------------------------------------------------------------
module dmi_jtag_to_core_sync (
// JTAG signals
input rd_en, // 1 bit Read Enable from JTAG
input wr_en, // 1 bit Write enable from JTAG
// Processor Signals
input rst_n, // Core reset
input clk, // Core clock
output reg_en, // 1 bit Write interface bit to Processor
output reg_wr_en // 1 bit Write enable to Processor
);
wire c_rd_en;
wire c_wr_en;
reg [2:0] rden, wren;
// Outputs
assign reg_en = c_wr_en | c_rd_en;
assign reg_wr_en = c_wr_en;
// synchronizers
always @ ( posedge clk or negedge rst_n) begin
if(!rst_n) begin
rden <= '0;
wren <= '0;
end
else begin
rden <= {rden[1:0], rd_en};
wren <= {wren[1:0], wr_en};
end
end
assign c_rd_en = rden[1] & ~rden[2];
assign c_wr_en = wren[1] & ~wren[2];
endmodule

View File

@ -0,0 +1,90 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2018 Western Digital Corporation or it's affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//------------------------------------------------------------------------------------
//
// Copyright Western Digital, 2018
// Owner : Anusha Narayanamoorthy
// Description:
// Wrapper module for JTAG_TAP and DMI synchronizer
//
//-------------------------------------------------------------------------------------
module dmi_wrapper(
// JTAG signals
input trst_n, // JTAG reset
input tck, // JTAG clock
input tms, // Test mode select
input tdi, // Test Data Input
output tdo, // Test Data Output
output tdoEnable, // Test Data Output enable
// Processor Signals
input core_rst_n, // Core reset
input core_clk, // Core clock
input [31:1] jtag_id, // JTAG ID
input [31:0] rd_data, // 32 bit Read data from Processor
output [31:0] reg_wr_data, // 32 bit Write data to Processor
output [6:0] reg_wr_addr, // 7 bit reg address to Processor
output reg_en, // 1 bit Read enable to Processor
output reg_wr_en, // 1 bit Write enable to Processor
output dmi_hard_reset
);
//Wire Declaration
wire rd_en;
wire wr_en;
wire dmireset;
//jtag_tap instantiation
rvjtag_tap i_jtag_tap(
.trst(trst_n), // dedicated JTAG TRST (active low) pad signal or asynchronous active low power on reset
.tck(tck), // dedicated JTAG TCK pad signal
.tms(tms), // dedicated JTAG TMS pad signal
.tdi(tdi), // dedicated JTAG TDI pad signal
.tdo(tdo), // dedicated JTAG TDO pad signal
.tdoEnable(tdoEnable), // enable for TDO pad
.wr_data(reg_wr_data), // 32 bit Write data
.wr_addr(reg_wr_addr), // 7 bit Write address
.rd_en(rd_en), // 1 bit read enable
.wr_en(wr_en), // 1 bit Write enable
.rd_data(rd_data), // 32 bit Read data
.rd_status(2'b0),
.idle(3'h0), // no need to wait to sample data
.dmi_stat(2'b0), // no need to wait or error possible
.version(4'h1), // debug spec 0.13 compliant
.jtag_id(jtag_id),
.dmi_hard_reset(dmi_hard_reset),
.dmi_reset(dmireset)
);
// dmi_jtag_to_core_sync instantiation
dmi_jtag_to_core_sync i_dmi_jtag_to_core_sync(
.wr_en(wr_en), // 1 bit Write enable
.rd_en(rd_en), // 1 bit Read enable
.rst_n(core_rst_n),
.clk(core_clk),
.reg_en(reg_en), // 1 bit Write interface bit
.reg_wr_en(reg_wr_en) // 1 bit Write enable
);
endmodule

View File

@ -0,0 +1,224 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2019 Western Digital Corporation or it's affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
module rvjtag_tap #(
parameter AWIDTH = 7
)
(
input trst,
input tck,
input tms,
input tdi,
output reg tdo,
output tdoEnable,
output [31:0] wr_data,
output [AWIDTH-1:0] wr_addr,
output wr_en,
output rd_en,
input [31:0] rd_data,
input [1:0] rd_status,
output reg dmi_reset,
output reg dmi_hard_reset,
input [2:0] idle,
input [1:0] dmi_stat,
/*
-- revisionCode : 4'h0;
-- manufacturersIdCode : 11'h45;
-- deviceIdCode : 16'h0001;
-- order MSB .. LSB -> [4 bit version or revision] [16 bit part number] [11 bit manufacturer id] [value of 1'b1 in LSB]
*/
input [31:1] jtag_id,
input [3:0] version
);
localparam USER_DR_LENGTH = AWIDTH + 34;
reg [USER_DR_LENGTH-1:0] sr, nsr, dr;
///////////////////////////////////////////////////////
// Tap controller
///////////////////////////////////////////////////////
logic[3:0] state, nstate;
logic [4:0] ir;
wire jtag_reset;
wire shift_dr;
wire pause_dr;
wire update_dr;
wire capture_dr;
wire shift_ir;
wire pause_ir ;
wire update_ir ;
wire capture_ir;
wire[1:0] dr_en;
wire devid_sel;
wire [5:0] abits;
assign abits = AWIDTH[5:0];
localparam TEST_LOGIC_RESET_STATE = 0;
localparam RUN_TEST_IDLE_STATE = 1;
localparam SELECT_DR_SCAN_STATE = 2;
localparam CAPTURE_DR_STATE = 3;
localparam SHIFT_DR_STATE = 4;
localparam EXIT1_DR_STATE = 5;
localparam PAUSE_DR_STATE = 6;
localparam EXIT2_DR_STATE = 7;
localparam UPDATE_DR_STATE = 8;
localparam SELECT_IR_SCAN_STATE = 9;
localparam CAPTURE_IR_STATE = 10;
localparam SHIFT_IR_STATE = 11;
localparam EXIT1_IR_STATE = 12;
localparam PAUSE_IR_STATE = 13;
localparam EXIT2_IR_STATE = 14;
localparam UPDATE_IR_STATE = 15;
always_comb begin
nstate = state;
case(state)
TEST_LOGIC_RESET_STATE: nstate = tms ? TEST_LOGIC_RESET_STATE : RUN_TEST_IDLE_STATE;
RUN_TEST_IDLE_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE;
SELECT_DR_SCAN_STATE: nstate = tms ? SELECT_IR_SCAN_STATE : CAPTURE_DR_STATE;
CAPTURE_DR_STATE: nstate = tms ? EXIT1_DR_STATE : SHIFT_DR_STATE;
SHIFT_DR_STATE: nstate = tms ? EXIT1_DR_STATE : SHIFT_DR_STATE;
EXIT1_DR_STATE: nstate = tms ? UPDATE_DR_STATE : PAUSE_DR_STATE;
PAUSE_DR_STATE: nstate = tms ? EXIT2_DR_STATE : PAUSE_DR_STATE;
EXIT2_DR_STATE: nstate = tms ? UPDATE_DR_STATE : SHIFT_DR_STATE;
UPDATE_DR_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE;
SELECT_IR_SCAN_STATE: nstate = tms ? TEST_LOGIC_RESET_STATE : CAPTURE_IR_STATE;
CAPTURE_IR_STATE: nstate = tms ? EXIT1_IR_STATE : SHIFT_IR_STATE;
SHIFT_IR_STATE: nstate = tms ? EXIT1_IR_STATE : SHIFT_IR_STATE;
EXIT1_IR_STATE: nstate = tms ? UPDATE_IR_STATE : PAUSE_IR_STATE;
PAUSE_IR_STATE: nstate = tms ? EXIT2_IR_STATE : PAUSE_IR_STATE;
EXIT2_IR_STATE: nstate = tms ? UPDATE_IR_STATE : SHIFT_IR_STATE;
UPDATE_IR_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE;
default: nstate = TEST_LOGIC_RESET_STATE;
endcase
end
always @ (posedge tck or negedge trst) begin
if(!trst) state <= TEST_LOGIC_RESET_STATE;
else state <= nstate;
end
assign jtag_reset = state == TEST_LOGIC_RESET_STATE;
assign shift_dr = state == SHIFT_DR_STATE;
assign pause_dr = state == PAUSE_DR_STATE;
assign update_dr = state == UPDATE_DR_STATE;
assign capture_dr = state == CAPTURE_DR_STATE;
assign shift_ir = state == SHIFT_IR_STATE;
assign pause_ir = state == PAUSE_IR_STATE;
assign update_ir = state == UPDATE_IR_STATE;
assign capture_ir = state == CAPTURE_IR_STATE;
assign tdoEnable = shift_dr | shift_ir;
///////////////////////////////////////////////////////
// IR register
///////////////////////////////////////////////////////
always @ (negedge tck or negedge trst) begin
if (!trst) ir <= 5'b1;
else begin
if (jtag_reset) ir <= 5'b1;
else if (update_ir) ir <= (sr[4:0] == '0) ? 5'h1f :sr[4:0];
end
end
assign devid_sel = ir == 5'b00001;
assign dr_en[0] = ir == 5'b10000;
assign dr_en[1] = ir == 5'b10001;
///////////////////////////////////////////////////////
// Shift register
///////////////////////////////////////////////////////
always @ (posedge tck or negedge trst) begin
if(!trst)begin
sr <= '0;
end
else begin
sr <= nsr;
end
end
// SR next value
always_comb begin
nsr = sr;
case(1)
shift_dr: begin
case(1)
dr_en[1]: nsr = {tdi, sr[USER_DR_LENGTH-1:1]};
dr_en[0],
devid_sel: nsr = {{USER_DR_LENGTH-32{1'b0}},tdi, sr[31:1]};
default: nsr = {{USER_DR_LENGTH-1{1'b0}},tdi}; // bypass
endcase
end
capture_dr: begin
nsr[0] = 1'b0;
case(1)
dr_en[0]: nsr = {{USER_DR_LENGTH-15{1'b0}}, idle, dmi_stat, abits, version};
dr_en[1]: nsr = {{AWIDTH{1'b0}}, rd_data, rd_status};
devid_sel: nsr = {{USER_DR_LENGTH-32{1'b0}}, jtag_id, 1'b1};
endcase
end
shift_ir: nsr = {{USER_DR_LENGTH-5{1'b0}},tdi, sr[4:1]};
capture_ir: nsr = {{USER_DR_LENGTH-1{1'b0}},1'b1};
endcase
end
// TDO retiming
always @ (negedge tck ) tdo <= sr[0];
// DMI CS register
always @ (posedge tck or negedge trst) begin
if(!trst) begin
dmi_hard_reset <= 1'b0;
dmi_reset <= 1'b0;
end
else if (update_dr & dr_en[0]) begin
dmi_hard_reset <= sr[17];
dmi_reset <= sr[16];
end
else begin
dmi_hard_reset <= 1'b0;
dmi_reset <= 1'b0;
end
end
// DR register
always @ (posedge tck or negedge trst) begin
if(!trst)
dr <= '0;
else begin
if (update_dr & dr_en[1])
dr <= sr;
else
dr <= {dr[USER_DR_LENGTH-1:2],2'b0};
end
end
assign {wr_addr, wr_data, wr_en, rd_en} = dr;
endmodule

637
Flow/design/el2_dma_ctrl.sv Normal file
View File

@ -0,0 +1,637 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
// Function: Top level SWERV core file
// Comments:
//
//********************************************************************************
module el2_dma_ctrl #(
`include "el2_param.vh"
)(
input logic clk,
input logic free_clk,
input logic rst_l,
input logic dma_bus_clk_en, // slave bus clock enable
input logic clk_override,
input logic scan_mode,
// Debug signals
input logic [31:0] dbg_cmd_addr,
input logic [31:0] dbg_cmd_wrdata,
input logic dbg_cmd_valid,
input logic dbg_cmd_write, // 1: write command, 0: read_command
input logic [1:0] dbg_cmd_type, // 0:gpr 1:csr 2: memory
input logic [1:0] dbg_cmd_size, // size of the abstract mem access debug command
input logic dbg_dma_bubble, // Debug needs a bubble to send a valid
output logic dma_dbg_ready, // DMA is ready to accept debug request
output logic dma_dbg_cmd_done,
output logic dma_dbg_cmd_fail,
output logic [31:0] dma_dbg_rddata,
// Core side signals
output logic dma_dccm_req, // DMA dccm request (only one of dccm/iccm will be set)
output logic dma_iccm_req, // DMA iccm request
output logic [2:0] dma_mem_tag, // DMA Buffer entry number
output logic [31:0] dma_mem_addr, // DMA request address
output logic [2:0] dma_mem_sz, // DMA request size
output logic dma_mem_write, // DMA write to dccm/iccm
output logic [63:0] dma_mem_wdata, // DMA write data
input logic dccm_dma_rvalid, // dccm data valid for DMA read
input logic dccm_dma_ecc_error, // ECC error on DMA read
input logic [2:0] dccm_dma_rtag, // Tag of the DMA req
input logic [63:0] dccm_dma_rdata, // dccm data for DMA read
input logic iccm_dma_rvalid, // iccm data valid for DMA read
input logic iccm_dma_ecc_error, // ECC error on DMA read
input logic [2:0] iccm_dma_rtag, // Tag of the DMA req
input logic [63:0] iccm_dma_rdata, // iccm data for DMA read
output logic dma_active, // DMA is busy
output logic dma_dccm_stall_any, // stall dccm pipe (bubble) so that DMA can proceed
output logic dma_iccm_stall_any, // stall iccm pipe (bubble) so that DMA can proceed
input logic dccm_ready, // dccm ready to accept DMA request
input logic iccm_ready, // iccm ready to accept DMA request
input logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:15]
// PMU signals
output logic dma_pmu_dccm_read,
output logic dma_pmu_dccm_write,
output logic dma_pmu_any_read,
output logic dma_pmu_any_write,
// AXI Write Channels
input logic dma_axi_awvalid,
output logic dma_axi_awready,
input logic [pt.DMA_BUS_TAG-1:0] dma_axi_awid,
input logic [31:0] dma_axi_awaddr,
input logic [2:0] dma_axi_awsize,
input logic dma_axi_wvalid,
output logic dma_axi_wready,
input logic [63:0] dma_axi_wdata,
input logic [7:0] dma_axi_wstrb,
output logic dma_axi_bvalid,
input logic dma_axi_bready,
output logic [1:0] dma_axi_bresp,
output logic [pt.DMA_BUS_TAG-1:0] dma_axi_bid,
// AXI Read Channels
input logic dma_axi_arvalid,
output logic dma_axi_arready,
input logic [pt.DMA_BUS_TAG-1:0] dma_axi_arid,
input logic [31:0] dma_axi_araddr,
input logic [2:0] dma_axi_arsize,
output logic dma_axi_rvalid,
input logic dma_axi_rready,
output logic [pt.DMA_BUS_TAG-1:0] dma_axi_rid,
output logic [63:0] dma_axi_rdata,
output logic [1:0] dma_axi_rresp,
output logic dma_axi_rlast
);
localparam DEPTH = pt.DMA_BUF_DEPTH;
localparam DEPTH_PTR = $clog2(DEPTH);
localparam NACK_COUNT = 7;
logic [DEPTH-1:0] fifo_valid;
logic [DEPTH-1:0][1:0] fifo_error;
logic [DEPTH-1:0] fifo_error_bus;
logic [DEPTH-1:0] fifo_rpend;
logic [DEPTH-1:0] fifo_done; // DMA trxn is done in core
logic [DEPTH-1:0] fifo_done_bus; // DMA trxn is done in core but synced to bus clock
logic [DEPTH-1:0][31:0] fifo_addr;
logic [DEPTH-1:0][2:0] fifo_sz;
logic [DEPTH-1:0][7:0] fifo_byteen;
logic [DEPTH-1:0] fifo_write;
logic [DEPTH-1:0] fifo_posted_write;
logic [DEPTH-1:0] fifo_dbg;
logic [DEPTH-1:0][63:0] fifo_data;
logic [DEPTH-1:0][pt.DMA_BUS_TAG-1:0] fifo_tag;
logic [DEPTH-1:0][pt.DMA_BUS_ID-1:0] fifo_mid;
logic [DEPTH-1:0][pt.DMA_BUS_PRTY-1:0] fifo_prty;
logic [DEPTH-1:0] fifo_cmd_en;
logic [DEPTH-1:0] fifo_data_en;
logic [DEPTH-1:0] fifo_pend_en;
logic [DEPTH-1:0] fifo_done_en;
logic [DEPTH-1:0] fifo_done_bus_en;
logic [DEPTH-1:0] fifo_error_en;
logic [DEPTH-1:0] fifo_error_bus_en;
logic [DEPTH-1:0] fifo_reset;
logic [DEPTH-1:0][1:0] fifo_error_in;
logic [DEPTH-1:0][63:0] fifo_data_in;
logic fifo_write_in;
logic fifo_posted_write_in;
logic fifo_dbg_in;
logic [31:0] fifo_addr_in;
logic [2:0] fifo_sz_in;
logic [7:0] fifo_byteen_in;
logic [DEPTH_PTR-1:0] RspPtr, NxtRspPtr;
logic [DEPTH_PTR-1:0] WrPtr, NxtWrPtr;
logic [DEPTH_PTR-1:0] RdPtr, NxtRdPtr;
logic WrPtrEn, RdPtrEn, RspPtrEn;
logic [1:0] dma_dbg_sz;
logic [1:0] dma_dbg_addr;
logic [31:0] dma_dbg_mem_rddata;
logic [31:0] dma_dbg_mem_wrdata;
logic dma_dbg_cmd_error;
logic dma_dbg_cmd_done_q;
logic fifo_full, fifo_full_spec, fifo_empty;
logic dma_address_error, dma_alignment_error;
logic [3:0] num_fifo_vld;
logic dma_mem_req;
logic [31:0] dma_mem_addr_int;
logic [2:0] dma_mem_sz_int;
logic [7:0] dma_mem_byteen;
logic dma_mem_addr_in_dccm;
logic dma_mem_addr_in_iccm;
logic dma_mem_addr_in_pic;
logic dma_mem_addr_in_pic_region_nc;
logic dma_mem_addr_in_dccm_region_nc;
logic dma_mem_addr_in_iccm_region_nc;
logic [2:0] dma_nack_count, dma_nack_count_d, dma_nack_count_csr;
logic dma_buffer_c1_clken;
logic dma_free_clken;
logic dma_buffer_c1_clk;
logic dma_free_clk;
logic dma_bus_clk;
logic bus_rsp_valid, bus_rsp_sent;
logic bus_cmd_valid, bus_cmd_sent;
logic bus_cmd_write, bus_cmd_posted_write;
logic [7:0] bus_cmd_byteen;
logic [2:0] bus_cmd_sz;
logic [31:0] bus_cmd_addr;
logic [63:0] bus_cmd_wdata;
logic [pt.DMA_BUS_TAG-1:0] bus_cmd_tag;
logic [pt.DMA_BUS_ID-1:0] bus_cmd_mid;
logic [pt.DMA_BUS_PRTY-1:0] bus_cmd_prty;
logic bus_posted_write_done;
logic fifo_full_spec_bus;
logic dbg_dma_bubble_bus;
logic stall_dma_in;
logic dma_fifo_ready;
logic wrbuf_en, wrbuf_data_en;
logic wrbuf_cmd_sent, wrbuf_rst, wrbuf_data_rst;
logic wrbuf_vld, wrbuf_data_vld;
logic [pt.DMA_BUS_TAG-1:0] wrbuf_tag;
logic [2:0] wrbuf_sz;
logic [31:0] wrbuf_addr;
logic [63:0] wrbuf_data;
logic [7:0] wrbuf_byteen;
logic rdbuf_en;
logic rdbuf_cmd_sent, rdbuf_rst;
logic rdbuf_vld;
logic [pt.DMA_BUS_TAG-1:0] rdbuf_tag;
logic [2:0] rdbuf_sz;
logic [31:0] rdbuf_addr;
logic axi_mstr_prty_in, axi_mstr_prty_en;
logic axi_mstr_priority;
logic axi_mstr_sel;
logic axi_rsp_valid, axi_rsp_sent;
logic axi_rsp_write;
logic [pt.DMA_BUS_TAG-1:0] axi_rsp_tag;
logic [1:0] axi_rsp_error;
logic [63:0] axi_rsp_rdata;
//------------------------LOGIC STARTS HERE---------------------------------
// FIFO inputs
assign fifo_addr_in[31:0] = dbg_cmd_valid ? dbg_cmd_addr[31:0] : bus_cmd_addr[31:0];
assign fifo_byteen_in[7:0] = {8{~dbg_cmd_valid}} & bus_cmd_byteen[7:0]; // Byte enable is used only for bus requests
assign fifo_sz_in[2:0] = dbg_cmd_valid ? {1'b0,dbg_cmd_size[1:0]} : bus_cmd_sz[2:0];
assign fifo_write_in = dbg_cmd_valid ? dbg_cmd_write : bus_cmd_write;
assign fifo_posted_write_in = ~dbg_cmd_valid & bus_cmd_posted_write;
assign fifo_dbg_in = dbg_cmd_valid;
for (genvar i=0 ;i<DEPTH; i++) begin: GenFifo
assign fifo_cmd_en[i] = ((bus_cmd_sent & dma_bus_clk_en) | (dbg_cmd_valid & dbg_cmd_type[1])) & (i == WrPtr[DEPTH_PTR-1:0]);
assign fifo_data_en[i] = (((bus_cmd_sent & fifo_write_in & dma_bus_clk_en) | (dbg_cmd_valid & dbg_cmd_type[1] & dbg_cmd_write)) & (i == WrPtr[DEPTH_PTR-1:0])) |
((dma_address_error | dma_alignment_error) & (i == RdPtr[DEPTH_PTR-1:0])) |
(dccm_dma_rvalid & (i == DEPTH_PTR'(dccm_dma_rtag[2:0]))) |
(iccm_dma_rvalid & (i == DEPTH_PTR'(iccm_dma_rtag[2:0])));
assign fifo_pend_en[i] = (dma_dccm_req | dma_iccm_req) & ~dma_mem_write & (i == RdPtr[DEPTH_PTR-1:0]);
assign fifo_error_en[i] = ((dma_address_error | dma_alignment_error | dma_dbg_cmd_error) & (i == RdPtr[DEPTH_PTR-1:0])) |
((dccm_dma_rvalid & dccm_dma_ecc_error) & (i == DEPTH_PTR'(dccm_dma_rtag[2:0]))) |
((iccm_dma_rvalid & iccm_dma_ecc_error) & (i == DEPTH_PTR'(iccm_dma_rtag[2:0])));
assign fifo_error_bus_en[i] = (((|fifo_error_in[i][1:0]) & fifo_error_en[i]) | (|fifo_error[i])) & dma_bus_clk_en;
assign fifo_done_en[i] = ((|fifo_error[i] | fifo_error_en[i] | ((dma_dccm_req | dma_iccm_req) & dma_mem_write)) & (i == RdPtr[DEPTH_PTR-1:0])) |
(dccm_dma_rvalid & (i == DEPTH_PTR'(dccm_dma_rtag[2:0]))) |
(iccm_dma_rvalid & (i == DEPTH_PTR'(iccm_dma_rtag[2:0])));
assign fifo_done_bus_en[i] = (fifo_done_en[i] | fifo_done[i]) & dma_bus_clk_en;
assign fifo_reset[i] = (((bus_rsp_sent | bus_posted_write_done) & dma_bus_clk_en) | dma_dbg_cmd_done) & (i == RspPtr[DEPTH_PTR-1:0]);
assign fifo_error_in[i] = (dccm_dma_rvalid & (i == DEPTH_PTR'(dccm_dma_rtag[2:0]))) ? {1'b0,dccm_dma_ecc_error} : (iccm_dma_rvalid & (i == DEPTH_PTR'(iccm_dma_rtag[2:0]))) ? {1'b0,iccm_dma_ecc_error} :
{(dma_address_error | dma_alignment_error | dma_dbg_cmd_error), dma_alignment_error};
assign fifo_data_in[i] = (fifo_error_en[i] & (|fifo_error_in[i])) ? {32'b0,fifo_addr[i]} :
((dccm_dma_rvalid & (i == DEPTH_PTR'(dccm_dma_rtag[2:0]))) ? dccm_dma_rdata[63:0] : (iccm_dma_rvalid & (i == DEPTH_PTR'(iccm_dma_rtag[2:0]))) ? iccm_dma_rdata[63:0] :
(dbg_cmd_valid ? {2{dma_dbg_mem_wrdata[31:0]}} : bus_cmd_wdata[63:0]));
rvdffsc #(1) fifo_valid_dff (.din(1'b1), .dout(fifo_valid[i]), .en(fifo_cmd_en[i]), .clear(fifo_reset[i]), .clk(dma_free_clk), .*);
rvdffsc #(2) fifo_error_dff (.din(fifo_error_in[i]), .dout(fifo_error[i]), .en(fifo_error_en[i]), .clear(fifo_reset[i]), .clk(dma_free_clk), .*);
rvdffsc #(1) fifo_error_bus_dff (.din(1'b1), .dout(fifo_error_bus[i]), .en(fifo_error_bus_en[i]), .clear(fifo_reset[i]), .clk(dma_free_clk), .*);
rvdffsc #(1) fifo_rpend_dff (.din(1'b1), .dout(fifo_rpend[i]), .en(fifo_pend_en[i]), .clear(fifo_reset[i]), .clk(dma_free_clk), .*);
rvdffsc #(1) fifo_done_dff (.din(1'b1), .dout(fifo_done[i]), .en(fifo_done_en[i]), .clear(fifo_reset[i]), .clk(dma_free_clk), .*);
rvdffsc #(1) fifo_done_bus_dff (.din(1'b1), .dout(fifo_done_bus[i]), .en(fifo_done_bus_en[i]), .clear(fifo_reset[i]), .clk(dma_free_clk), .*);
rvdffe #(32) fifo_addr_dff (.din(fifo_addr_in[31:0]), .dout(fifo_addr[i]), .en(fifo_cmd_en[i]), .*);
rvdffs #(3) fifo_sz_dff (.din(fifo_sz_in[2:0]), .dout(fifo_sz[i]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*);
rvdffs #(8) fifo_byteen_dff (.din(fifo_byteen_in[7:0]), .dout(fifo_byteen[i]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*);
rvdffs #(1) fifo_write_dff (.din(fifo_write_in), .dout(fifo_write[i]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*);
rvdffs #(1) fifo_posted_write_dff (.din(fifo_posted_write_in), .dout(fifo_posted_write[i]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*);
rvdffs #(1) fifo_dbg_dff (.din(fifo_dbg_in), .dout(fifo_dbg[i]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*);
rvdffe #(64) fifo_data_dff (.din(fifo_data_in[i]), .dout(fifo_data[i]), .en(fifo_data_en[i]), .*);
rvdffs #(pt.DMA_BUS_TAG) fifo_tag_dff(.din(bus_cmd_tag[pt.DMA_BUS_TAG-1:0]), .dout(fifo_tag[i][pt.DMA_BUS_TAG-1:0]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*);
rvdffs #(pt.DMA_BUS_ID) fifo_mid_dff(.din(bus_cmd_mid[pt.DMA_BUS_ID-1:0]), .dout(fifo_mid[i][pt.DMA_BUS_ID-1:0]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*);
rvdffs #(pt.DMA_BUS_PRTY) fifo_prty_dff(.din(bus_cmd_prty[pt.DMA_BUS_PRTY-1:0]), .dout(fifo_prty[i][pt.DMA_BUS_PRTY-1:0]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*);
end
// Pointer logic
assign NxtWrPtr[DEPTH_PTR-1:0] = (WrPtr[DEPTH_PTR-1:0] == (DEPTH-1)) ? '0 : WrPtr[DEPTH_PTR-1:0] + 1'b1;
assign NxtRdPtr[DEPTH_PTR-1:0] = (RdPtr[DEPTH_PTR-1:0] == (DEPTH-1)) ? '0 : RdPtr[DEPTH_PTR-1:0] + 1'b1;
assign NxtRspPtr[DEPTH_PTR-1:0] = (RspPtr[DEPTH_PTR-1:0] == (DEPTH-1)) ? '0 : RspPtr[DEPTH_PTR-1:0] + 1'b1;
assign WrPtrEn = |fifo_cmd_en[DEPTH-1:0];
assign RdPtrEn = dma_dccm_req | dma_iccm_req | (dma_address_error | dma_alignment_error | dma_dbg_cmd_error);
assign RspPtrEn = (dma_dbg_cmd_done | (bus_rsp_sent | bus_posted_write_done) & dma_bus_clk_en);
rvdffs #(DEPTH_PTR) WrPtr_dff(.din(NxtWrPtr[DEPTH_PTR-1:0]), .dout(WrPtr[DEPTH_PTR-1:0]), .en(WrPtrEn), .clk(dma_free_clk), .*);
rvdffs #(DEPTH_PTR) RdPtr_dff(.din(NxtRdPtr[DEPTH_PTR-1:0]), .dout(RdPtr[DEPTH_PTR-1:0]), .en(RdPtrEn), .clk(dma_free_clk), .*);
rvdffs #(DEPTH_PTR) RspPtr_dff(.din(NxtRspPtr[DEPTH_PTR-1:0]), .dout(RspPtr[DEPTH_PTR-1:0]), .en(RspPtrEn), .clk(dma_free_clk), .*);
// Miscellaneous signals
assign fifo_full = fifo_full_spec_bus;
always_comb begin
num_fifo_vld[3:0] = {3'b0,bus_cmd_sent} - {3'b0,bus_rsp_sent};
for (int i=0; i<DEPTH; i++) begin
num_fifo_vld[3:0] += {3'b0,fifo_valid[i]};
end
end
assign fifo_full_spec = (num_fifo_vld[3:0] >= DEPTH);
assign dma_fifo_ready = ~(fifo_full | dbg_dma_bubble_bus);
// Error logic
assign dma_address_error = fifo_valid[RdPtr] & ~fifo_done[RdPtr] & ~fifo_dbg[RdPtr] & (~(dma_mem_addr_in_dccm | dma_mem_addr_in_iccm)); // request not for ICCM or DCCM
assign dma_alignment_error = fifo_valid[RdPtr] & ~fifo_done[RdPtr] & ~fifo_dbg[RdPtr] & ~dma_address_error &
(((dma_mem_sz_int[2:0] == 3'h1) & dma_mem_addr_int[0]) | // HW size but unaligned
((dma_mem_sz_int[2:0] == 3'h2) & (|dma_mem_addr_int[1:0])) | // W size but unaligned
((dma_mem_sz_int[2:0] == 3'h3) & (|dma_mem_addr_int[2:0])) | // DW size but unaligned
(dma_mem_addr_in_iccm & ~((dma_mem_sz_int[1:0] == 2'b10) | (dma_mem_sz_int[1:0] == 2'b11))) | // ICCM access not word size
(dma_mem_addr_in_dccm & dma_mem_write & ~((dma_mem_sz_int[1:0] == 2'b10) | (dma_mem_sz_int[1:0] == 2'b11))) | // DCCM write not word size
(dma_mem_write & (dma_mem_sz_int[2:0] == 3'h2) & (dma_mem_byteen[dma_mem_addr_int[2:0]+:4] != 4'hf)) | // Write byte enables not aligned for word store
(dma_mem_write & (dma_mem_sz_int[2:0] == 3'h3) & ~((dma_mem_byteen[7:0] == 8'h0f) | (dma_mem_byteen[7:0] == 8'hf0) | (dma_mem_byteen[7:0] == 8'hff)))); // Write byte enables not aligned for dword store
//Dbg outputs
assign dma_dbg_ready = fifo_empty & dbg_dma_bubble;
assign dma_dbg_cmd_done = (fifo_valid[RspPtr] & fifo_dbg[RspPtr] & fifo_done[RspPtr]);
assign dma_dbg_cmd_fail = (|fifo_error[RspPtr] & dma_dbg_cmd_done) ;
assign dma_dbg_sz[1:0] = fifo_sz[RspPtr][1:0];
assign dma_dbg_addr[1:0] = fifo_addr[RspPtr][1:0];
assign dma_dbg_mem_rddata[31:0] = fifo_addr[RspPtr][2] ? fifo_data[RspPtr][63:32] : fifo_data[RspPtr][31:0];
assign dma_dbg_rddata[31:0] = ({32{(dma_dbg_sz[1:0] == 2'h0)}} & ((dma_dbg_mem_rddata[31:0] >> 8*dma_dbg_addr[1:0]) & 32'hff)) |
({32{(dma_dbg_sz[1:0] == 2'h1)}} & ((dma_dbg_mem_rddata[31:0] >> 16*dma_dbg_addr[1]) & 32'hffff)) |
({32{(dma_dbg_sz[1:0] == 2'h2)}} & dma_dbg_mem_rddata[31:0]);
assign dma_dbg_cmd_error = fifo_valid[RdPtr] & ~fifo_done[RdPtr] & fifo_dbg[RdPtr] &
((~(dma_mem_addr_in_dccm | dma_mem_addr_in_iccm | dma_mem_addr_in_pic)) | // Address outside of ICCM/DCCM/PIC
((dma_mem_addr_in_iccm | dma_mem_addr_in_pic) & (dma_mem_sz_int[1:0] != 2'b10))); // Only word accesses allowed for ICCM/PIC
assign dma_dbg_mem_wrdata[31:0] = ({32{dbg_cmd_size[1:0] == 2'h0}} & {4{dbg_cmd_wrdata[7:0]}}) |
({32{dbg_cmd_size[1:0] == 2'h1}} & {2{dbg_cmd_wrdata[15:0]}}) |
({32{dbg_cmd_size[1:0] == 2'h2}} & dbg_cmd_wrdata[31:0]);
// Block the decode if fifo full
assign dma_dccm_stall_any = dma_mem_req & (dma_mem_addr_in_dccm | dma_mem_addr_in_pic) & (dma_nack_count >= dma_nack_count_csr);
assign dma_iccm_stall_any = dma_mem_req & dma_mem_addr_in_iccm & (dma_nack_count >= dma_nack_count_csr);
// Used to indicate ready to debug
assign fifo_empty = ~((|(fifo_valid[DEPTH-1:0])) | bus_cmd_sent);
// Nack counter, stall the lsu pipe if 7 nacks
assign dma_nack_count_csr[2:0] = dec_tlu_dma_qos_prty[2:0];
assign dma_nack_count_d[2:0] = (dma_nack_count[2:0] >= dma_nack_count_csr[2:0]) ? ({3{~(dma_dccm_req | dma_iccm_req)}} & dma_nack_count[2:0]) :
(dma_mem_req & ~(dma_dccm_req | dma_iccm_req)) ? (dma_nack_count[2:0] + 1'b1) : 3'b0;
rvdffs #(3) nack_count_dff(.din(dma_nack_count_d[2:0]), .dout(dma_nack_count[2:0]), .en(dma_mem_req), .clk(dma_free_clk), .*);
// Core outputs
assign dma_mem_req = fifo_valid[RdPtr] & ~fifo_rpend[RdPtr] & ~fifo_done[RdPtr] & ~(dma_address_error | dma_alignment_error | dma_dbg_cmd_error);
assign dma_dccm_req = dma_mem_req & (dma_mem_addr_in_dccm | dma_mem_addr_in_pic) & dccm_ready;
assign dma_iccm_req = dma_mem_req & dma_mem_addr_in_iccm & iccm_ready;
assign dma_mem_tag[2:0] = 3'(RdPtr);
assign dma_mem_addr_int[31:0] = fifo_addr[RdPtr];
assign dma_mem_sz_int[2:0] = fifo_sz[RdPtr];
assign dma_mem_addr[31:0] = (dma_mem_write & ~fifo_dbg[RdPtr] & (dma_mem_byteen[7:0] == 8'hf0)) ? {dma_mem_addr_int[31:3],1'b1,dma_mem_addr_int[1:0]} : dma_mem_addr_int[31:0];
assign dma_mem_sz[2:0] = (dma_mem_write & ~fifo_dbg[RdPtr] & ((dma_mem_byteen[7:0] == 8'h0f) | (dma_mem_byteen[7:0] == 8'hf0))) ? 3'h2 : dma_mem_sz_int[2:0];
assign dma_mem_byteen[7:0] = fifo_byteen[RdPtr];
assign dma_mem_write = fifo_write[RdPtr];
assign dma_mem_wdata[63:0] = fifo_data[RdPtr];
// PMU outputs
assign dma_pmu_dccm_read = dma_dccm_req & ~dma_mem_write;
assign dma_pmu_dccm_write = dma_dccm_req & dma_mem_write;
assign dma_pmu_any_read = (dma_dccm_req | dma_iccm_req) & ~dma_mem_write;
assign dma_pmu_any_write = (dma_dccm_req | dma_iccm_req) & dma_mem_write;
// Address check dccm
if (pt.DCCM_ENABLE) begin: Gen_dccm_enable
rvrangecheck #(.CCM_SADR(pt.DCCM_SADR),
.CCM_SIZE(pt.DCCM_SIZE)) addr_dccm_rangecheck (
.addr(dma_mem_addr_int[31:0]),
.in_range(dma_mem_addr_in_dccm),
.in_region(dma_mem_addr_in_dccm_region_nc)
);
end else begin: Gen_dccm_disable
assign dma_mem_addr_in_dccm = '0;
assign dma_mem_addr_in_dccm_region_nc = '0;
end // else: !if(pt.ICCM_ENABLE)
// Address check iccm
if (pt.ICCM_ENABLE) begin: Gen_iccm_enable
rvrangecheck #(.CCM_SADR(pt.ICCM_SADR),
.CCM_SIZE(pt.ICCM_SIZE)) addr_iccm_rangecheck (
.addr(dma_mem_addr_int[31:0]),
.in_range(dma_mem_addr_in_iccm),
.in_region(dma_mem_addr_in_iccm_region_nc)
);
end else begin: Gen_iccm_disable
assign dma_mem_addr_in_iccm = '0;
assign dma_mem_addr_in_iccm_region_nc = '0;
end // else: !if(pt.ICCM_ENABLE)
// PIC memory address check
rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR),
.CCM_SIZE(pt.PIC_SIZE)) addr_pic_rangecheck (
.addr(dma_mem_addr_int[31:0]),
.in_range(dma_mem_addr_in_pic),
.in_region(dma_mem_addr_in_pic_region_nc)
);
// Inputs
rvdff_fpga #(1) fifo_full_bus_ff (.din(fifo_full_spec), .dout(fifo_full_spec_bus), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
rvdff_fpga #(1) dbg_dma_bubble_ff (.din(dbg_dma_bubble), .dout(dbg_dma_bubble_bus), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
rvdff #(1) dma_dbg_cmd_doneff (.din(dma_dbg_cmd_done), .dout(dma_dbg_cmd_done_q), .clk(free_clk), .*);
// Clock Gating logic
assign dma_buffer_c1_clken = (bus_cmd_valid & dma_bus_clk_en) | dbg_cmd_valid | clk_override;
assign dma_free_clken = (bus_cmd_valid | bus_rsp_valid | dbg_cmd_valid | dma_dbg_cmd_done | dma_dbg_cmd_done_q | (|fifo_valid[DEPTH-1:0]) | clk_override);
rvoclkhdr dma_buffer_c1cgc ( .en(dma_buffer_c1_clken), .l1clk(dma_buffer_c1_clk), .* );
rvoclkhdr dma_free_cgc (.en(dma_free_clken), .l1clk(dma_free_clk), .*);
`ifdef RV_FPGA_OPTIMIZE
assign dma_bus_clk = 1'b0;
`else
rvclkhdr dma_bus_cgc (.en(dma_bus_clk_en), .l1clk(dma_bus_clk), .*);
`endif
// Write channel buffer
assign wrbuf_en = dma_axi_awvalid & dma_axi_awready;
assign wrbuf_data_en = dma_axi_wvalid & dma_axi_wready;
assign wrbuf_cmd_sent = bus_cmd_sent & bus_cmd_write;
assign wrbuf_rst = wrbuf_cmd_sent & ~wrbuf_en;
assign wrbuf_data_rst = wrbuf_cmd_sent & ~wrbuf_data_en;
rvdffsc_fpga #(.WIDTH(1)) wrbuf_vldff (.din(1'b1), .dout(wrbuf_vld), .en(wrbuf_en), .clear(wrbuf_rst), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
rvdffsc_fpga #(.WIDTH(1)) wrbuf_data_vldff (.din(1'b1), .dout(wrbuf_data_vld), .en(wrbuf_data_en), .clear(wrbuf_data_rst), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(pt.DMA_BUS_TAG)) wrbuf_tagff (.din(dma_axi_awid[pt.DMA_BUS_TAG-1:0]), .dout(wrbuf_tag[pt.DMA_BUS_TAG-1:0]), .en(wrbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(3)) wrbuf_szff (.din(dma_axi_awsize[2:0]), .dout(wrbuf_sz[2:0]), .en(wrbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
rvdffe #(.WIDTH(32)) wrbuf_addrff (.din(dma_axi_awaddr[31:0]), .dout(wrbuf_addr[31:0]), .en(wrbuf_en & dma_bus_clk_en), .*);
rvdffe #(.WIDTH(64)) wrbuf_dataff (.din(dma_axi_wdata[63:0]), .dout(wrbuf_data[63:0]), .en(wrbuf_data_en & dma_bus_clk_en), .*);
rvdffs_fpga #(.WIDTH(8)) wrbuf_byteenff (.din(dma_axi_wstrb[7:0]), .dout(wrbuf_byteen[7:0]), .en(wrbuf_data_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
// Read channel buffer
assign rdbuf_en = dma_axi_arvalid & dma_axi_arready;
assign rdbuf_cmd_sent = bus_cmd_sent & ~bus_cmd_write;
assign rdbuf_rst = rdbuf_cmd_sent & ~rdbuf_en;
rvdffsc_fpga #(.WIDTH(1)) rdbuf_vldff (.din(1'b1), .dout(rdbuf_vld), .en(rdbuf_en), .clear(rdbuf_rst), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(pt.DMA_BUS_TAG)) rdbuf_tagff (.din(dma_axi_arid[pt.DMA_BUS_TAG-1:0]), .dout(rdbuf_tag[pt.DMA_BUS_TAG-1:0]), .en(rdbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(3)) rdbuf_szff (.din(dma_axi_arsize[2:0]), .dout(rdbuf_sz[2:0]), .en(rdbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
rvdffe #(.WIDTH(32)) rdbuf_addrff (.din(dma_axi_araddr[31:0]), .dout(rdbuf_addr[31:0]), .en(rdbuf_en & dma_bus_clk_en), .*);
assign dma_axi_awready = ~(wrbuf_vld & ~wrbuf_cmd_sent);
assign dma_axi_wready = ~(wrbuf_data_vld & ~wrbuf_cmd_sent);
assign dma_axi_arready = ~(rdbuf_vld & ~rdbuf_cmd_sent);
//Generate a single request from read/write channel
assign bus_cmd_valid = (wrbuf_vld & wrbuf_data_vld) | rdbuf_vld;
assign bus_cmd_sent = bus_cmd_valid & dma_fifo_ready;
assign bus_cmd_write = axi_mstr_sel;
assign bus_cmd_posted_write = '0;
assign bus_cmd_addr[31:0] = axi_mstr_sel ? wrbuf_addr[31:0] : rdbuf_addr[31:0];
assign bus_cmd_sz[2:0] = axi_mstr_sel ? wrbuf_sz[2:0] : rdbuf_sz[2:0];
assign bus_cmd_wdata[63:0] = wrbuf_data[63:0];
assign bus_cmd_byteen[7:0] = wrbuf_byteen[7:0];
assign bus_cmd_tag[pt.DMA_BUS_TAG-1:0] = axi_mstr_sel ? wrbuf_tag[pt.DMA_BUS_TAG-1:0] : rdbuf_tag[pt.DMA_BUS_TAG-1:0];
assign bus_cmd_mid[pt.DMA_BUS_ID-1:0] = '0;
assign bus_cmd_prty[pt.DMA_BUS_PRTY-1:0] = '0;
// Sel=1 -> write has higher priority
assign axi_mstr_sel = (wrbuf_vld & wrbuf_data_vld & rdbuf_vld) ? axi_mstr_priority : (wrbuf_vld & wrbuf_data_vld);
assign axi_mstr_prty_in = ~axi_mstr_priority;
assign axi_mstr_prty_en = bus_cmd_sent;
rvdffs_fpga #(.WIDTH(1)) mstr_prtyff(.din(axi_mstr_prty_in), .dout(axi_mstr_priority), .en(axi_mstr_prty_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
assign axi_rsp_valid = fifo_valid[RspPtr] & ~fifo_dbg[RspPtr] & fifo_done_bus[RspPtr];
assign axi_rsp_rdata[63:0] = fifo_data[RspPtr];
assign axi_rsp_write = fifo_write[RspPtr];
assign axi_rsp_error[1:0] = fifo_error[RspPtr][0] ? 2'b10 : (fifo_error[RspPtr][1] ? 2'b11 : 2'b0);
assign axi_rsp_tag[pt.DMA_BUS_TAG-1:0] = fifo_tag[RspPtr];
// AXI response channel signals
assign dma_axi_bvalid = axi_rsp_valid & axi_rsp_write;
assign dma_axi_bresp[1:0] = axi_rsp_error[1:0];
assign dma_axi_bid[pt.DMA_BUS_TAG-1:0] = axi_rsp_tag[pt.DMA_BUS_TAG-1:0];
assign dma_axi_rvalid = axi_rsp_valid & ~axi_rsp_write;
assign dma_axi_rresp[1:0] = axi_rsp_error;
assign dma_axi_rdata[63:0] = axi_rsp_rdata[63:0];
assign dma_axi_rlast = 1'b1;
assign dma_axi_rid[pt.DMA_BUS_TAG-1:0] = axi_rsp_tag[pt.DMA_BUS_TAG-1:0];
assign bus_posted_write_done = 1'b0;
assign bus_rsp_valid = (dma_axi_bvalid | dma_axi_rvalid);
assign bus_rsp_sent = (dma_axi_bvalid & dma_axi_bready) | (dma_axi_rvalid & dma_axi_rready);
assign dma_active = wrbuf_vld | rdbuf_vld | (|fifo_valid[DEPTH-1:0]);
`ifdef RV_ASSERT_ON
for (genvar i=0; i<DEPTH; i++) begin
assert_fifo_done_and_novalid: assert #0 (~fifo_done[i] | fifo_valid[i]);
end
// Assertion to check awvalid stays stable during entire bus clock
property dma_axi_awvalid_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_awvalid != $past(dma_axi_awvalid)) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_awvalid_stable: assert property (dma_axi_awvalid_stable) else
$display("DMA AXI awvalid changed in middle of bus clock");
// Assertion to check awid stays stable during entire bus clock
property dma_axi_awid_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_awvalid & (dma_axi_awid[pt.DMA_BUS_TAG-1:0] != $past(dma_axi_awid[pt.DMA_BUS_TAG-1:0]))) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_awid_stable: assert property (dma_axi_awid_stable) else
$display("DMA AXI awid changed in middle of bus clock");
// Assertion to check awaddr stays stable during entire bus clock
property dma_axi_awaddr_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_awvalid & (dma_axi_awaddr[31:0] != $past(dma_axi_awaddr[31:0]))) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_awaddr_stable: assert property (dma_axi_awaddr_stable) else
$display("DMA AXI awaddr changed in middle of bus clock");
// Assertion to check awsize stays stable during entire bus clock
property dma_axi_awsize_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_awvalid & (dma_axi_awsize[2:0] != $past(dma_axi_awsize[2:0]))) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_awsize_stable: assert property (dma_axi_awsize_stable) else
$display("DMA AXI awsize changed in middle of bus clock");
// Assertion to check wstrb stays stable during entire bus clock
property dma_axi_wstrb_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_wvalid & (dma_axi_wstrb[7:0] != $past(dma_axi_wstrb[7:0]))) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_wstrb_stable: assert property (dma_axi_wstrb_stable) else
$display("DMA AXI wstrb changed in middle of bus clock");
// Assertion to check wdata stays stable during entire bus clock
property dma_axi_wdata_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_wvalid & (dma_axi_wdata[63:0] != $past(dma_axi_wdata[63:0]))) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_wdata_stable: assert property (dma_axi_wdata_stable) else
$display("DMA AXI wdata changed in middle of bus clock");
// Assertion to check awvalid stays stable during entire bus clock
property dma_axi_arvalid_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_arvalid != $past(dma_axi_arvalid)) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_arvalid_stable: assert property (dma_axi_arvalid_stable) else
$display("DMA AXI awvalid changed in middle of bus clock");
// Assertion to check awid stays stable during entire bus clock
property dma_axi_arid_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_arvalid & (dma_axi_arid[pt.DMA_BUS_TAG-1:0] != $past(dma_axi_arid[pt.DMA_BUS_TAG-1:0]))) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_arid_stable: assert property (dma_axi_arid_stable) else
$display("DMA AXI awid changed in middle of bus clock");
// Assertion to check awaddr stays stable during entire bus clock
property dma_axi_araddr_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_arvalid & (dma_axi_araddr[31:0] != $past(dma_axi_araddr[31:0]))) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_araddr_stable: assert property (dma_axi_araddr_stable) else
$display("DMA AXI awaddr changed in middle of bus clock");
// Assertion to check awsize stays stable during entire bus clock
property dma_axi_arsize_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_awvalid & (dma_axi_arsize[2:0] != $past(dma_axi_arsize[2:0]))) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_arsize_stable: assert property (dma_axi_arsize_stable) else
$display("DMA AXI awsize changed in middle of bus clock");
// Assertion to check bvalid stays stable during entire bus clock
property dma_axi_bvalid_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_bvalid != $past(dma_axi_bvalid)) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_bvalid_stable: assert property (dma_axi_bvalid_stable) else
$display("DMA AXI bvalid changed in middle of bus clock");
// Assertion to check bvalid stays stable if bready is low
property dma_axi_bvalid_stable_till_bready;
@(posedge clk) disable iff(~rst_l) (~dma_axi_bvalid && $past(dma_axi_bvalid)) |-> $past(dma_axi_bready);
endproperty
assert_dma_axi_bvalid_stable_till_bready: assert property (dma_axi_bvalid_stable_till_bready) else
$display("DMA AXI bvalid deasserted without bready");
// Assertion to check bresp stays stable during entire bus clock
property dma_axi_bresp_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_bvalid & (dma_axi_bresp[1:0] != $past(dma_axi_bresp[1:0]))) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_bresp_stable: assert property (dma_axi_bresp_stable) else
$display("DMA AXI bresp changed in middle of bus clock");
// Assertion to check bid stays stable during entire bus clock
property dma_axi_bid_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_bvalid & (dma_axi_bid[pt.DMA_BUS_TAG-1:0] != $past(dma_axi_bid[pt.DMA_BUS_TAG-1:0]))) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_bid_stable: assert property (dma_axi_bid_stable) else
$display("DMA AXI bid changed in middle of bus clock");
// Assertion to check rvalid stays stable during entire bus clock
property dma_axi_rvalid_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_rvalid != $past(dma_axi_rvalid)) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_rvalid_stable: assert property (dma_axi_rvalid_stable) else
$display("DMA AXI bvalid changed in middle of bus clock");
// Assertion to check rvalid stays stable if bready is low
property dma_axi_rvalid_stable_till_ready;
@(posedge clk) disable iff(~rst_l) (~dma_axi_rvalid && $past(dma_axi_rvalid)) |-> $past(dma_axi_rready);
endproperty
assert_dma_axi_rvalid_stable_till_ready: assert property (dma_axi_rvalid_stable_till_ready) else
$display("DMA AXI bvalid changed in middle of bus clock");
// Assertion to check rresp stays stable during entire bus clock
property dma_axi_rresp_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_rvalid & (dma_axi_rresp[1:0] != $past(dma_axi_rresp[1:0]))) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_rresp_stable: assert property (dma_axi_rresp_stable) else
$display("DMA AXI bresp changed in middle of bus clock");
// Assertion to check rid stays stable during entire bus clock
property dma_axi_rid_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_rvalid & (dma_axi_rid[pt.DMA_BUS_TAG-1:0] != $past(dma_axi_rid[pt.DMA_BUS_TAG-1:0]))) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_rid_stable: assert property (dma_axi_rid_stable) else
$display("DMA AXI bid changed in middle of bus clock");
// Assertion to check rdata stays stable during entire bus clock
property dma_axi_rdata_stable;
@(posedge clk) disable iff(~rst_l) (dma_axi_rvalid & (dma_axi_rdata[63:0] != $past(dma_axi_rdata[63:0]))) |-> $past(dma_bus_clk_en);
endproperty
assert_dma_axi_rdata_stable: assert property (dma_axi_rdata_stable) else
$display("DMA AXI bid changed in middle of bus clock");
`endif
endmodule // el2_dma_ctrl

139
Flow/design/el2_mem.sv Normal file
View File

@ -0,0 +1,139 @@
//********************************************************************************
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
module el2_mem
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic clk,
input logic rst_l,
input logic dccm_clk_override,
input logic icm_clk_override,
input logic dec_tlu_core_ecc_disable,
//DCCM ports
input logic dccm_wren,
input logic dccm_rden,
input logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo,
input logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi,
input logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo,
input logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi,
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo,
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi,
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo,
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi,
//`ifdef pt.DCCM_ENABLE
input el2_dccm_ext_in_pkt_t [pt.DCCM_NUM_BANKS-1:0] dccm_ext_in_pkt,
//`endif
//ICCM ports
input el2_ccm_ext_in_pkt_t [pt.ICCM_NUM_BANKS-1:0] iccm_ext_in_pkt,
input logic [pt.ICCM_BITS-1:1] iccm_rw_addr,
input logic iccm_buf_correct_ecc, // ICCM is doing a single bit error correct cycle
input logic iccm_correction_state, // ICCM is doing a single bit error correct cycle
input logic iccm_wren,
input logic iccm_rden,
input logic [2:0] iccm_wr_size,
input logic [77:0] iccm_wr_data,
output logic [63:0] iccm_rd_data,
output logic [77:0] iccm_rd_data_ecc,
// Icache and Itag Ports
input logic [31:1] ic_rw_addr,
input logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid,
input logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en,
input logic ic_rd_en,
input logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache.
input logic ic_sel_premux_data, // Premux data sel
input el2_ic_data_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_data_ext_in_pkt,
input el2_ic_tag_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0] ic_tag_ext_in_pkt,
input logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC
input logic [70:0] ic_debug_wr_data, // Debug wr cache.
output logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
input logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache.
input logic ic_debug_rd_en, // Icache debug rd
input logic ic_debug_wr_en, // Icache debug wr
input logic ic_debug_tag_array, // Debug tag array
input logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr.
output logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
output logic [25:0] ictag_debug_rd_data,// Debug icache tag.
output logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // ecc error per bank
output logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, // parity error per bank
output logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit,
output logic ic_tag_perr, // Icache Tag parity error
input logic scan_mode
);
logic active_clk;
rvoclkhdr active_cg ( .en(1'b1), .l1clk(active_clk), .* );
// DCCM Instantiation
if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
el2_lsu_dccm_mem #(.pt(pt)) dccm (
.clk_override(dccm_clk_override),
.*
);
end else begin: Gen_dccm_disable
assign dccm_rd_data_lo = '0;
assign dccm_rd_data_hi = '0;
end
if ( pt.ICACHE_ENABLE ) begin: icache
el2_ifu_ic_mem #(.pt(pt)) icm (
.clk_override(icm_clk_override),
.*
);
end
else begin
assign ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0] = '0;
assign ic_tag_perr = '0 ;
assign ic_rd_data = '0 ;
assign ictag_debug_rd_data = '0 ;
end // else: !if( pt.ICACHE_ENABLE )
if (pt.ICCM_ENABLE) begin : iccm
el2_ifu_iccm_mem #(.pt(pt)) iccm (.*,
.clk_override(icm_clk_override),
.iccm_rw_addr(iccm_rw_addr[pt.ICCM_BITS-1:1]),
.iccm_rd_data(iccm_rd_data[63:0])
);
end
else begin
assign iccm_rd_data = '0 ;
assign iccm_rd_data_ecc = '0 ;
end
endmodule

595
Flow/design/el2_pic_ctrl.sv Normal file
View File

@ -0,0 +1,595 @@
//********************************************************************************
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
//********************************************************************************
// Function: Programmable Interrupt Controller
// Comments:
//********************************************************************************
module el2_pic_ctrl #(
`include "el2_param.vh"
)
(
input logic clk, // Core clock
input logic free_clk, // free clock
input logic rst_l, // Reset for all flops
input logic clk_override, // Clock over-ride for gating
input logic io_clk_override, // PIC IO Clock over-ride for gating
input logic [pt.PIC_TOTAL_INT_PLUS1-1:0] extintsrc_req, // Interrupt requests
input logic [31:0] picm_rdaddr, // Address of the register
input logic [31:0] picm_wraddr, // Address of the register
input logic [31:0] picm_wr_data, // Data to be written to the register
input logic picm_wren, // Write enable to the register
input logic picm_rden, // Read enable for the register
input logic picm_mken, // Read the Mask for the register
input logic [3:0] meicurpl, // Current Priority Level
input logic [3:0] meipt, // Current Priority Threshold
output logic mexintpend, // External Inerrupt request to the core
output logic [7:0] claimid, // Claim Id of the requested interrupt
output logic [3:0] pl, // Priority level of the requested interrupt
output logic [31:0] picm_rd_data, // Read data of the register
output logic mhwakeup, // Wake-up interrupt request
input logic scan_mode // scan mode
);
localparam NUM_LEVELS = $clog2(pt.PIC_TOTAL_INT_PLUS1);
localparam INTPRIORITY_BASE_ADDR = pt.PIC_BASE_ADDR ;
localparam INTPEND_BASE_ADDR = pt.PIC_BASE_ADDR + 32'h00001000 ;
localparam INTENABLE_BASE_ADDR = pt.PIC_BASE_ADDR + 32'h00002000 ;
localparam EXT_INTR_PIC_CONFIG = pt.PIC_BASE_ADDR + 32'h00003000 ;
localparam EXT_INTR_GW_CONFIG = pt.PIC_BASE_ADDR + 32'h00004000 ;
localparam EXT_INTR_GW_CLEAR = pt.PIC_BASE_ADDR + 32'h00005000 ;
localparam INTPEND_SIZE = (pt.PIC_TOTAL_INT_PLUS1 < 32) ? 32 :
(pt.PIC_TOTAL_INT_PLUS1 < 64) ? 64 :
(pt.PIC_TOTAL_INT_PLUS1 < 128) ? 128 :
(pt.PIC_TOTAL_INT_PLUS1 < 256) ? 256 :
(pt.PIC_TOTAL_INT_PLUS1 < 512) ? 512 : 1024 ;
localparam INT_GRPS = INTPEND_SIZE / 32 ;
localparam INTPRIORITY_BITS = 4 ;
localparam ID_BITS = 8 ;
localparam int GW_CONFIG[pt.PIC_TOTAL_INT_PLUS1-1:0] = '{default:0} ;
localparam INT_ENABLE_GRPS = (pt.PIC_TOTAL_INT_PLUS1 - 1) / 4 ;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_clk_enable ;
logic [INT_ENABLE_GRPS:0] intenable_clk_enable_grp ;
logic [INT_ENABLE_GRPS:0] gw_clk ;
logic addr_intpend_base_match;
logic raddr_config_pic_match ;
logic raddr_intenable_base_match;
logic raddr_intpriority_base_match;
logic raddr_config_gw_base_match ;
logic waddr_config_pic_match ;
logic waddr_intpriority_base_match;
logic waddr_intenable_base_match;
logic waddr_config_gw_base_match ;
logic addr_clear_gw_base_match ;
logic mexintpend_in;
logic mhwakeup_in ;
logic intpend_reg_read ;
logic [31:0] picm_rd_data_in, intpend_rd_out;
logic intenable_rd_out ;
logic [INTPRIORITY_BITS-1:0] intpriority_rd_out;
logic [1:0] gw_config_rd_out;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [INTPRIORITY_BITS-1:0] intpriority_reg;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [INTPRIORITY_BITS-1:0] intpriority_reg_inv;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intpriority_reg_we;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intpriority_reg_re;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [1:0] gw_config_reg;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_reg;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_reg_we;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_reg_re;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] gw_config_reg_we;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] gw_config_reg_re;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] gw_clear_reg_we;
logic [INTPEND_SIZE-1:0] intpend_reg_extended;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [INTPRIORITY_BITS-1:0] intpend_w_prior_en;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [ID_BITS-1:0] intpend_id;
logic [INTPRIORITY_BITS-1:0] maxint;
logic [INTPRIORITY_BITS-1:0] selected_int_priority;
logic [INT_GRPS-1:0] [31:0] intpend_rd_part_out ;
logic config_reg;
logic intpriord;
logic config_reg_we ;
logic config_reg_re ;
logic config_reg_in ;
logic prithresh_reg_write , prithresh_reg_read;
logic intpriority_reg_read ;
logic intenable_reg_read ;
logic gw_config_reg_read ;
logic picm_wren_ff , picm_rden_ff ;
logic [31:0] picm_raddr_ff;
logic [31:0] picm_waddr_ff;
logic [31:0] picm_wr_data_ff;
logic [3:0] mask;
logic picm_mken_ff;
logic [ID_BITS-1:0] claimid_in ;
logic [INTPRIORITY_BITS-1:0] pl_in ;
logic [INTPRIORITY_BITS-1:0] pl_in_q ;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] extintsrc_req_sync;
logic [pt.PIC_TOTAL_INT_PLUS1-1:0] extintsrc_req_gw;
logic picm_bypass_ff;
// clkens
logic pic_raddr_c1_clken;
logic pic_waddr_c1_clken;
logic pic_data_c1_clken;
logic pic_pri_c1_clken;
logic pic_int_c1_clken;
logic gw_config_c1_clken;
// clocks
logic pic_raddr_c1_clk;
logic pic_data_c1_clk;
logic pic_pri_c1_clk;
logic pic_int_c1_clk;
logic gw_config_c1_clk;
// ---- Clock gating section ------
// c1 clock enables
assign pic_raddr_c1_clken = picm_mken | picm_rden | clk_override;
assign pic_data_c1_clken = picm_wren | clk_override;
assign pic_pri_c1_clken = (waddr_intpriority_base_match & picm_wren_ff) | (raddr_intpriority_base_match & picm_rden_ff) | clk_override;
assign pic_int_c1_clken = (waddr_intenable_base_match & picm_wren_ff) | (raddr_intenable_base_match & picm_rden_ff) | clk_override;
assign gw_config_c1_clken = (waddr_config_gw_base_match & picm_wren_ff) | (raddr_config_gw_base_match & picm_rden_ff) | clk_override;
// C1 - 1 clock pulse for data
rvoclkhdr pic_addr_c1_cgc ( .en(pic_raddr_c1_clken), .l1clk(pic_raddr_c1_clk), .* );
rvoclkhdr pic_data_c1_cgc ( .en(pic_data_c1_clken), .l1clk(pic_data_c1_clk), .* );
rvoclkhdr pic_pri_c1_cgc ( .en(pic_pri_c1_clken), .l1clk(pic_pri_c1_clk), .* );
rvoclkhdr pic_int_c1_cgc ( .en(pic_int_c1_clken), .l1clk(pic_int_c1_clk), .* );
rvoclkhdr gw_config_c1_cgc ( .en(gw_config_c1_clken), .l1clk(gw_config_c1_clk), .* );
// ------ end clock gating section ------------------------
assign raddr_intenable_base_match = (picm_raddr_ff[31:NUM_LEVELS+2] == INTENABLE_BASE_ADDR[31:NUM_LEVELS+2]) ;
assign raddr_intpriority_base_match = (picm_raddr_ff[31:NUM_LEVELS+2] == INTPRIORITY_BASE_ADDR[31:NUM_LEVELS+2]) ;
assign raddr_config_gw_base_match = (picm_raddr_ff[31:NUM_LEVELS+2] == EXT_INTR_GW_CONFIG[31:NUM_LEVELS+2]) ;
assign raddr_config_pic_match = (picm_raddr_ff[31:0] == EXT_INTR_PIC_CONFIG[31:0]) ;
assign addr_intpend_base_match = (picm_raddr_ff[31:6] == INTPEND_BASE_ADDR[31:6]) ;
assign waddr_config_pic_match = (picm_waddr_ff[31:0] == EXT_INTR_PIC_CONFIG[31:0]) ;
assign addr_clear_gw_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == EXT_INTR_GW_CLEAR[31:NUM_LEVELS+2]) ;
assign waddr_intpriority_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == INTPRIORITY_BASE_ADDR[31:NUM_LEVELS+2]) ;
assign waddr_intenable_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == INTENABLE_BASE_ADDR[31:NUM_LEVELS+2]) ;
assign waddr_config_gw_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == EXT_INTR_GW_CONFIG[31:NUM_LEVELS+2]) ;
assign picm_bypass_ff = picm_rden_ff & picm_wren_ff & ( picm_raddr_ff[31:0] == picm_waddr_ff[31:0] ); // pic writes and reads to same address together
rvdff #(32) picm_radd_flop (.*, .din (picm_rdaddr), .dout(picm_raddr_ff), .clk(pic_raddr_c1_clk));
rvdff #(32) picm_wadd_flop (.*, .din (picm_wraddr), .dout(picm_waddr_ff), .clk(pic_data_c1_clk));
rvdff #(1) picm_wre_flop (.*, .din (picm_wren), .dout(picm_wren_ff), .clk(free_clk));
rvdff #(1) picm_rde_flop (.*, .din (picm_rden), .dout(picm_rden_ff), .clk(free_clk));
rvdff #(1) picm_mke_flop (.*, .din (picm_mken), .dout(picm_mken_ff), .clk(free_clk));
rvdff #(32) picm_dat_flop (.*, .din (picm_wr_data[31:0]), .dout(picm_wr_data_ff[31:0]), .clk(pic_data_c1_clk));
//rvsyncss #(pt.PIC_TOTAL_INT_PLUS1-1) sync_inst
//(
// .clk (free_clk),
// .dout(extintsrc_req_sync[pt.PIC_TOTAL_INT_PLUS1-1:1]),
// .din (extintsrc_req[pt.PIC_TOTAL_INT_PLUS1-1:1]),
// .*) ;
//
//assign extintsrc_req_sync[0] = extintsrc_req[0];
/*
genvar p ;
for (p=0; p<=INT_ENABLE_GRPS ; p++) begin : IO_CLK_GRP
if (p==INT_ENABLE_GRPS) begin : LAST_GRP
assign intenable_clk_enable_grp[p] = |intenable_clk_enable[pt.PIC_TOTAL_INT_PLUS1-1 : p*4] | io_clk_override;
rvoclkhdr intenable_c1_cgc ( .en(intenable_clk_enable_grp[p]), .l1clk(gw_clk[p]), .* );
end else begin : CLK_GRPS
assign intenable_clk_enable_grp[p] = |intenable_clk_enable[p*4+3 : p*4] | io_clk_override;
rvoclkhdr intenable_c1_cgc ( .en(intenable_clk_enable_grp[p]), .l1clk(gw_clk[p]), .* );
end
end
*/
genvar i ;
genvar p ;
for (p=0; p<=INT_ENABLE_GRPS ; p++) begin : IO_CLK_GRP
wire grp_clk, grp_clken;
assign grp_clken = |intenable_clk_enable[(p==INT_ENABLE_GRPS?pt.PIC_TOTAL_INT_PLUS1-1:p*4+3) : p*4] | io_clk_override;
`ifndef RV_FPGA_OPTIMIZE
rvclkhdr intenable_c1_cgc( .en(grp_clken), .l1clk(grp_clk), .* );
`else
assign gw_clk[p] = 1'b0 ;
`endif
for(genvar i= (p==0 ? 1: 0); i< (p==INT_ENABLE_GRPS ? pt.PIC_TOTAL_INT_PLUS1-p*4 :4); i++) begin : GW
el2_configurable_gw gw_inst(
.*,
.gw_clk(grp_clk),
.rawclk(clk),
.clken (grp_clken),
.extintsrc_req(extintsrc_req[i+p*4]) ,
.meigwctrl_polarity(gw_config_reg[i+p*4][0]) ,
.meigwctrl_type(gw_config_reg[i+p*4][1]) ,
.meigwclr(gw_clear_reg_we[i+p*4]) ,
.extintsrc_req_config(extintsrc_req_gw[i+p*4])
);
end
end
for (i=0; i<pt.PIC_TOTAL_INT_PLUS1 ; i++) begin : SETREG
if (i > 0 ) begin : NON_ZERO_INT
assign intpriority_reg_we[i] = waddr_intpriority_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff;
assign intpriority_reg_re[i] = raddr_intpriority_base_match & (picm_raddr_ff[NUM_LEVELS+1:2] == i) & picm_rden_ff;
assign intenable_reg_we[i] = waddr_intenable_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff;
assign intenable_reg_re[i] = raddr_intenable_base_match & (picm_raddr_ff[NUM_LEVELS+1:2] == i) & picm_rden_ff;
assign gw_config_reg_we[i] = waddr_config_gw_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff;
assign gw_config_reg_re[i] = raddr_config_gw_base_match & (picm_raddr_ff[NUM_LEVELS+1:2] == i) & picm_rden_ff;
assign gw_clear_reg_we[i] = addr_clear_gw_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff ;
rvdffs #(INTPRIORITY_BITS) intpriority_ff (.*, .en( intpriority_reg_we[i]), .din (picm_wr_data_ff[INTPRIORITY_BITS-1:0]), .dout(intpriority_reg[i]), .clk(pic_pri_c1_clk));
rvdffs #(1) intenable_ff (.*, .en( intenable_reg_we[i]), .din (picm_wr_data_ff[0]), .dout(intenable_reg[i]), .clk(pic_int_c1_clk));
rvdffs #(2) gw_config_ff (.*, .en( gw_config_reg_we[i]), .din (picm_wr_data_ff[1:0]), .dout(gw_config_reg[i]), .clk(gw_config_c1_clk));
assign intenable_clk_enable[i] = gw_config_reg[i][1] | intenable_reg_we[i] | intenable_reg[i] | gw_clear_reg_we[i] ;
/*
rvsyncss_fpga #(1) sync_inst
(
.gw_clk (gw_clk[i/4]),
.rawclk (clk),
.clken (intenable_clk_enable_grp[i/4]),
.dout (extintsrc_req_sync[i]),
.din (extintsrc_req[i]),
.*) ;
el2_configurable_gw config_gw_inst(.*,
.gw_clk(gw_clk[i/4]),
.rawclk(clk),
.clken (intenable_clk_enable_grp[i/4]),
.extintsrc_req_sync(extintsrc_req_sync[i]) ,
.meigwctrl_polarity(gw_config_reg[i][0]) ,
.meigwctrl_type(gw_config_reg[i][1]) ,
.meigwclr(gw_clear_reg_we[i]) ,
.extintsrc_req_config(extintsrc_req_gw[i])
);
*/
end else begin : INT_ZERO
assign intpriority_reg_we[i] = 1'b0 ;
assign intpriority_reg_re[i] = 1'b0 ;
assign intenable_reg_we[i] = 1'b0 ;
assign intenable_reg_re[i] = 1'b0 ;
assign gw_config_reg_we[i] = 1'b0 ;
assign gw_config_reg_re[i] = 1'b0 ;
assign gw_clear_reg_we[i] = 1'b0 ;
assign gw_config_reg[i] = '0 ;
assign intpriority_reg[i] = {INTPRIORITY_BITS{1'b0}} ;
assign intenable_reg[i] = 1'b0 ;
assign extintsrc_req_gw[i] = 1'b0 ;
assign extintsrc_req_sync[i] = 1'b0 ;
assign intenable_clk_enable[i] = 1'b0;
end
assign intpriority_reg_inv[i] = intpriord ? ~intpriority_reg[i] : intpriority_reg[i] ;
assign intpend_w_prior_en[i] = {INTPRIORITY_BITS{(extintsrc_req_gw[i] & intenable_reg[i])}} & intpriority_reg_inv[i] ;
assign intpend_id[i] = i ;
end
assign pl_in[INTPRIORITY_BITS-1:0] = selected_int_priority[INTPRIORITY_BITS-1:0] ;
//if (pt.PIC_2CYCLE == 1) begin : genblock
//end
//else begin : genblock
//end
genvar l, m , j, k;
if (pt.PIC_2CYCLE == 1) begin : genblock
logic [NUM_LEVELS/2:0] [pt.PIC_TOTAL_INT_PLUS1+2:0] [INTPRIORITY_BITS-1:0] level_intpend_w_prior_en;
logic [NUM_LEVELS/2:0] [pt.PIC_TOTAL_INT_PLUS1+2:0] [ID_BITS-1:0] level_intpend_id;
logic [NUM_LEVELS:NUM_LEVELS/2] [(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2))+1:0] [INTPRIORITY_BITS-1:0] levelx_intpend_w_prior_en;
logic [NUM_LEVELS:NUM_LEVELS/2] [(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2))+1:0] [ID_BITS-1:0] levelx_intpend_id;
assign level_intpend_w_prior_en[0][pt.PIC_TOTAL_INT_PLUS1+2:0] = {4'b0,4'b0,4'b0,intpend_w_prior_en[pt.PIC_TOTAL_INT_PLUS1-1:0]} ;
assign level_intpend_id[0][pt.PIC_TOTAL_INT_PLUS1+2:0] = {8'b0,8'b0,8'b0,intpend_id[pt.PIC_TOTAL_INT_PLUS1-1:0]} ;
logic [(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2)):0] [INTPRIORITY_BITS-1:0] l2_intpend_w_prior_en_ff;
logic [(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2)):0] [ID_BITS-1:0] l2_intpend_id_ff;
assign levelx_intpend_w_prior_en[NUM_LEVELS/2][(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2))+1:0] = {{1*INTPRIORITY_BITS{1'b0}},l2_intpend_w_prior_en_ff[(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2)):0]} ;
assign levelx_intpend_id[NUM_LEVELS/2][(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2))+1:0] = {{1*ID_BITS{1'b1}},l2_intpend_id_ff[(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2)):0]} ;
/// Do the prioritization of the interrupts here ////////////
for (l=0; l<NUM_LEVELS/2 ; l++) begin : TOP_LEVEL
for (m=0; m<=(pt.PIC_TOTAL_INT_PLUS1)/(2**(l+1)) ; m++) begin : COMPARE
if ( m == (pt.PIC_TOTAL_INT_PLUS1)/(2**(l+1))) begin
assign level_intpend_w_prior_en[l+1][m+1] = '0 ;
assign level_intpend_id[l+1][m+1] = '0 ;
end
el2_cmp_and_mux #(.ID_BITS(ID_BITS),
.INTPRIORITY_BITS(INTPRIORITY_BITS)) cmp_l1 (
.a_id(level_intpend_id[l][2*m]),
.a_priority(level_intpend_w_prior_en[l][2*m]),
.b_id(level_intpend_id[l][2*m+1]),
.b_priority(level_intpend_w_prior_en[l][2*m+1]),
.out_id(level_intpend_id[l+1][m]),
.out_priority(level_intpend_w_prior_en[l+1][m])) ;
end
end
for (i=0; i<=pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2) ; i++) begin : MIDDLE_FLOPS
rvdff #(INTPRIORITY_BITS) level2_intpend_prior_reg (.*, .din (level_intpend_w_prior_en[NUM_LEVELS/2][i]), .dout(l2_intpend_w_prior_en_ff[i]), .clk(free_clk));
rvdff #(ID_BITS) level2_intpend_id_reg (.*, .din (level_intpend_id[NUM_LEVELS/2][i]), .dout(l2_intpend_id_ff[i]), .clk(free_clk));
end
for (j=NUM_LEVELS/2; j<NUM_LEVELS ; j++) begin : BOT_LEVELS
for (k=0; k<=(pt.PIC_TOTAL_INT_PLUS1)/(2**(j+1)) ; k++) begin : COMPARE
if ( k == (pt.PIC_TOTAL_INT_PLUS1)/(2**(j+1))) begin
assign levelx_intpend_w_prior_en[j+1][k+1] = '0 ;
assign levelx_intpend_id[j+1][k+1] = '0 ;
end
el2_cmp_and_mux #(.ID_BITS(ID_BITS),
.INTPRIORITY_BITS(INTPRIORITY_BITS))
cmp_l1 (
.a_id(levelx_intpend_id[j][2*k]),
.a_priority(levelx_intpend_w_prior_en[j][2*k]),
.b_id(levelx_intpend_id[j][2*k+1]),
.b_priority(levelx_intpend_w_prior_en[j][2*k+1]),
.out_id(levelx_intpend_id[j+1][k]),
.out_priority(levelx_intpend_w_prior_en[j+1][k])) ;
end
end
assign claimid_in[ID_BITS-1:0] = levelx_intpend_id[NUM_LEVELS][0] ; // This is the last level output
assign selected_int_priority[INTPRIORITY_BITS-1:0] = levelx_intpend_w_prior_en[NUM_LEVELS][0] ;
end
else begin : genblock
logic [NUM_LEVELS:0] [pt.PIC_TOTAL_INT_PLUS1+1:0] [INTPRIORITY_BITS-1:0] level_intpend_w_prior_en;
logic [NUM_LEVELS:0] [pt.PIC_TOTAL_INT_PLUS1+1:0] [ID_BITS-1:0] level_intpend_id;
assign level_intpend_w_prior_en[0][pt.PIC_TOTAL_INT_PLUS1+1:0] = {{2*INTPRIORITY_BITS{1'b0}},intpend_w_prior_en[pt.PIC_TOTAL_INT_PLUS1-1:0]} ;
assign level_intpend_id[0][pt.PIC_TOTAL_INT_PLUS1+1:0] = {{2*ID_BITS{1'b1}},intpend_id[pt.PIC_TOTAL_INT_PLUS1-1:0]} ;
/// Do the prioritization of the interrupts here ////////////
// genvar l, m , j, k; already declared outside ifdef
for (l=0; l<NUM_LEVELS ; l++) begin : LEVEL
for (m=0; m<=(pt.PIC_TOTAL_INT_PLUS1)/(2**(l+1)) ; m++) begin : COMPARE
if ( m == (pt.PIC_TOTAL_INT_PLUS1)/(2**(l+1))) begin
assign level_intpend_w_prior_en[l+1][m+1] = '0 ;
assign level_intpend_id[l+1][m+1] = '0 ;
end
el2_cmp_and_mux #(.ID_BITS(ID_BITS),
.INTPRIORITY_BITS(INTPRIORITY_BITS)) cmp_l1 (
.a_id(level_intpend_id[l][2*m]),
.a_priority(level_intpend_w_prior_en[l][2*m]),
.b_id(level_intpend_id[l][2*m+1]),
.b_priority(level_intpend_w_prior_en[l][2*m+1]),
.out_id(level_intpend_id[l+1][m]),
.out_priority(level_intpend_w_prior_en[l+1][m])) ;
end
end
assign claimid_in[ID_BITS-1:0] = level_intpend_id[NUM_LEVELS][0] ; // This is the last level output
assign selected_int_priority[INTPRIORITY_BITS-1:0] = level_intpend_w_prior_en[NUM_LEVELS][0] ;
end
///////////////////////////////////////////////////////////////////////
// Config Reg`
///////////////////////////////////////////////////////////////////////
assign config_reg_we = waddr_config_pic_match & picm_wren_ff;
assign config_reg_re = raddr_config_pic_match & picm_rden_ff;
assign config_reg_in = picm_wr_data_ff[0] ; //
rvdffs #(1) config_reg_ff (.*, .clk(free_clk), .en(config_reg_we), .din (config_reg_in), .dout(config_reg));
assign intpriord = config_reg ;
//////////////////////////////////////////////////////////////////////////
// Send the interrupt to the core if it is above the thresh-hold
//////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////
/// ClaimId Reg and Corresponding PL
///////////////////////////////////////////////////////////
//
assign pl_in_q[INTPRIORITY_BITS-1:0] = intpriord ? ~pl_in : pl_in ;
rvdff #(ID_BITS) claimid_ff (.*, .din (claimid_in[ID_BITS-1:00]), .dout(claimid[ID_BITS-1:00]), .clk(free_clk));
rvdff #(INTPRIORITY_BITS) pl_ff (.*, .din (pl_in_q[INTPRIORITY_BITS-1:0]), .dout(pl[INTPRIORITY_BITS-1:0]), .clk(free_clk));
logic [INTPRIORITY_BITS-1:0] meipt_inv , meicurpl_inv ;
assign meipt_inv[INTPRIORITY_BITS-1:0] = intpriord ? ~meipt[INTPRIORITY_BITS-1:0] : meipt[INTPRIORITY_BITS-1:0] ;
assign meicurpl_inv[INTPRIORITY_BITS-1:0] = intpriord ? ~meicurpl[INTPRIORITY_BITS-1:0] : meicurpl[INTPRIORITY_BITS-1:0] ;
assign mexintpend_in = (( selected_int_priority[INTPRIORITY_BITS-1:0] > meipt_inv[INTPRIORITY_BITS-1:0]) &
( selected_int_priority[INTPRIORITY_BITS-1:0] > meicurpl_inv[INTPRIORITY_BITS-1:0]) );
rvdff #(1) mexintpend_ff (.*, .clk(free_clk), .din (mexintpend_in), .dout(mexintpend));
assign maxint[INTPRIORITY_BITS-1:0] = intpriord ? 0 : 15 ;
assign mhwakeup_in = ( pl_in_q[INTPRIORITY_BITS-1:0] == maxint) ;
rvdff #(1) wake_up_ff (.*, .clk(free_clk), .din (mhwakeup_in), .dout(mhwakeup));
//////////////////////////////////////////////////////////////////////////
// Reads of register.
// 1- intpending
//////////////////////////////////////////////////////////////////////////
assign intpend_reg_read = addr_intpend_base_match & picm_rden_ff ;
assign intpriority_reg_read = raddr_intpriority_base_match & picm_rden_ff;
assign intenable_reg_read = raddr_intenable_base_match & picm_rden_ff;
assign gw_config_reg_read = raddr_config_gw_base_match & picm_rden_ff;
assign intpend_reg_extended[INTPEND_SIZE-1:0] = {{INTPEND_SIZE-pt.PIC_TOTAL_INT_PLUS1{1'b0}},extintsrc_req_gw[pt.PIC_TOTAL_INT_PLUS1-1:0]} ;
for (i=0; i<(INT_GRPS); i++) begin
assign intpend_rd_part_out[i] = (({32{intpend_reg_read & picm_raddr_ff[5:2] == i}}) & intpend_reg_extended[((32*i)+31):(32*i)]) ;
end
always_comb begin : INTPEND_RD
intpend_rd_out = '0 ;
for (int i=0; i<INT_GRPS; i++) begin
intpend_rd_out |= intpend_rd_part_out[i] ;
end
end
always_comb begin : INTEN_RD
intenable_rd_out = '0 ;
intpriority_rd_out = '0 ;
gw_config_rd_out = '0 ;
for (int i=0; i<pt.PIC_TOTAL_INT_PLUS1; i++) begin
if (intenable_reg_re[i]) begin
intenable_rd_out = intenable_reg[i] ;
end
if (intpriority_reg_re[i]) begin
intpriority_rd_out = intpriority_reg[i] ;
end
if (gw_config_reg_re[i]) begin
gw_config_rd_out = gw_config_reg[i] ;
end
end
end
assign picm_rd_data_in[31:0] = ({32{intpend_reg_read }} & intpend_rd_out ) |
({32{intpriority_reg_read }} & {{32-INTPRIORITY_BITS{1'b0}}, intpriority_rd_out } ) |
({32{intenable_reg_read }} & {31'b0 , intenable_rd_out } ) |
({32{gw_config_reg_read }} & {30'b0 , gw_config_rd_out } ) |
({32{config_reg_re }} & {31'b0 , config_reg } ) |
({32{picm_mken_ff & mask[3]}} & {30'b0 , 2'b11 } ) |
({32{picm_mken_ff & mask[2]}} & {31'b0 , 1'b1 } ) |
({32{picm_mken_ff & mask[1]}} & {28'b0 , 4'b1111 } ) |
({32{picm_mken_ff & mask[0]}} & 32'b0 ) ;
assign picm_rd_data[31:0] = picm_bypass_ff ? picm_wr_data_ff[31:0] : picm_rd_data_in[31:0] ;
logic [14:0] address;
assign address[14:0] = picm_raddr_ff[14:0];
`include "pic_map_auto.h"
endmodule
module el2_cmp_and_mux #(parameter ID_BITS=8,
INTPRIORITY_BITS = 4)
(
input logic [ID_BITS-1:0] a_id,
input logic [INTPRIORITY_BITS-1:0] a_priority,
input logic [ID_BITS-1:0] b_id,
input logic [INTPRIORITY_BITS-1:0] b_priority,
output logic [ID_BITS-1:0] out_id,
output logic [INTPRIORITY_BITS-1:0] out_priority
);
logic a_is_lt_b ;
assign a_is_lt_b = ( a_priority[INTPRIORITY_BITS-1:0] < b_priority[INTPRIORITY_BITS-1:0] ) ;
assign out_id[ID_BITS-1:0] = a_is_lt_b ? b_id[ID_BITS-1:0] :
a_id[ID_BITS-1:0] ;
assign out_priority[INTPRIORITY_BITS-1:0] = a_is_lt_b ? b_priority[INTPRIORITY_BITS-1:0] :
a_priority[INTPRIORITY_BITS-1:0] ;
endmodule // cmp_and_mux
module el2_configurable_gw (
input logic gw_clk,
input logic rawclk,
input logic clken,
input logic rst_l,
input logic extintsrc_req ,
input logic meigwctrl_polarity ,
input logic meigwctrl_type ,
input logic meigwclr ,
output logic extintsrc_req_config
);
logic gw_int_pending_in, gw_int_pending, extintsrc_req_sync;
rvsyncss_fpga #(1) sync_inst (
.dout (extintsrc_req_sync),
.din (extintsrc_req),
.*) ;
assign gw_int_pending_in = (extintsrc_req_sync ^ meigwctrl_polarity) | (gw_int_pending & ~meigwclr) ;
rvdff_fpga #(1) int_pend_ff (.*, .clk(gw_clk), .rawclk(rawclk), .clken(clken), .din (gw_int_pending_in), .dout(gw_int_pending));
assign extintsrc_req_config = meigwctrl_type ? ((extintsrc_req_sync ^ meigwctrl_polarity) | gw_int_pending) : (extintsrc_req_sync ^ meigwctrl_polarity) ;
endmodule // configurable_gw

1309
Flow/design/el2_swerv.sv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,730 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
// Function: Top wrapper file with el2_swerv/mem instantiated inside
// Comments:
//
//********************************************************************************
module el2_swerv_wrapper
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic clk,
input logic rst_l,
input logic dbg_rst_l,
input logic [31:1] rst_vec,
input logic nmi_int,
input logic [31:1] nmi_vec,
input logic [31:1] jtag_id,
output logic [31:0] trace_rv_i_insn_ip,
output logic [31:0] trace_rv_i_address_ip,
output logic trace_rv_i_valid_ip,
output logic trace_rv_i_exception_ip,
output logic [4:0] trace_rv_i_ecause_ip,
output logic trace_rv_i_interrupt_ip,
output logic [31:0] trace_rv_i_tval_ip,
// Bus signals
`ifdef RV_BUILD_AXI4
//-------------------------- LSU AXI signals--------------------------
// AXI Write Channels
output logic lsu_axi_awvalid,
input logic lsu_axi_awready,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid,
output logic [31:0] lsu_axi_awaddr,
output logic [3:0] lsu_axi_awregion,
output logic [7:0] lsu_axi_awlen,
output logic [2:0] lsu_axi_awsize,
output logic [1:0] lsu_axi_awburst,
output logic lsu_axi_awlock,
output logic [3:0] lsu_axi_awcache,
output logic [2:0] lsu_axi_awprot,
output logic [3:0] lsu_axi_awqos,
output logic lsu_axi_wvalid,
input logic lsu_axi_wready,
output logic [63:0] lsu_axi_wdata,
output logic [7:0] lsu_axi_wstrb,
output logic lsu_axi_wlast,
input logic lsu_axi_bvalid,
output logic lsu_axi_bready,
input logic [1:0] lsu_axi_bresp,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid,
// AXI Read Channels
output logic lsu_axi_arvalid,
input logic lsu_axi_arready,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid,
output logic [31:0] lsu_axi_araddr,
output logic [3:0] lsu_axi_arregion,
output logic [7:0] lsu_axi_arlen,
output logic [2:0] lsu_axi_arsize,
output logic [1:0] lsu_axi_arburst,
output logic lsu_axi_arlock,
output logic [3:0] lsu_axi_arcache,
output logic [2:0] lsu_axi_arprot,
output logic [3:0] lsu_axi_arqos,
input logic lsu_axi_rvalid,
output logic lsu_axi_rready,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid,
input logic [63:0] lsu_axi_rdata,
input logic [1:0] lsu_axi_rresp,
input logic lsu_axi_rlast,
//-------------------------- IFU AXI signals--------------------------
// AXI Write Channels
output logic ifu_axi_awvalid,
input logic ifu_axi_awready,
output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_awid,
output logic [31:0] ifu_axi_awaddr,
output logic [3:0] ifu_axi_awregion,
output logic [7:0] ifu_axi_awlen,
output logic [2:0] ifu_axi_awsize,
output logic [1:0] ifu_axi_awburst,
output logic ifu_axi_awlock,
output logic [3:0] ifu_axi_awcache,
output logic [2:0] ifu_axi_awprot,
output logic [3:0] ifu_axi_awqos,
output logic ifu_axi_wvalid,
input logic ifu_axi_wready,
output logic [63:0] ifu_axi_wdata,
output logic [7:0] ifu_axi_wstrb,
output logic ifu_axi_wlast,
input logic ifu_axi_bvalid,
output logic ifu_axi_bready,
input logic [1:0] ifu_axi_bresp,
input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_bid,
// AXI Read Channels
output logic ifu_axi_arvalid,
input logic ifu_axi_arready,
output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_arid,
output logic [31:0] ifu_axi_araddr,
output logic [3:0] ifu_axi_arregion,
output logic [7:0] ifu_axi_arlen,
output logic [2:0] ifu_axi_arsize,
output logic [1:0] ifu_axi_arburst,
output logic ifu_axi_arlock,
output logic [3:0] ifu_axi_arcache,
output logic [2:0] ifu_axi_arprot,
output logic [3:0] ifu_axi_arqos,
input logic ifu_axi_rvalid,
output logic ifu_axi_rready,
input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_rid,
input logic [63:0] ifu_axi_rdata,
input logic [1:0] ifu_axi_rresp,
input logic ifu_axi_rlast,
//-------------------------- SB AXI signals--------------------------
// AXI Write Channels
output logic sb_axi_awvalid,
input logic sb_axi_awready,
output logic [pt.SB_BUS_TAG-1:0] sb_axi_awid,
output logic [31:0] sb_axi_awaddr,
output logic [3:0] sb_axi_awregion,
output logic [7:0] sb_axi_awlen,
output logic [2:0] sb_axi_awsize,
output logic [1:0] sb_axi_awburst,
output logic sb_axi_awlock,
output logic [3:0] sb_axi_awcache,
output logic [2:0] sb_axi_awprot,
output logic [3:0] sb_axi_awqos,
output logic sb_axi_wvalid,
input logic sb_axi_wready,
output logic [63:0] sb_axi_wdata,
output logic [7:0] sb_axi_wstrb,
output logic sb_axi_wlast,
input logic sb_axi_bvalid,
output logic sb_axi_bready,
input logic [1:0] sb_axi_bresp,
input logic [pt.SB_BUS_TAG-1:0] sb_axi_bid,
// AXI Read Channels
output logic sb_axi_arvalid,
input logic sb_axi_arready,
output logic [pt.SB_BUS_TAG-1:0] sb_axi_arid,
output logic [31:0] sb_axi_araddr,
output logic [3:0] sb_axi_arregion,
output logic [7:0] sb_axi_arlen,
output logic [2:0] sb_axi_arsize,
output logic [1:0] sb_axi_arburst,
output logic sb_axi_arlock,
output logic [3:0] sb_axi_arcache,
output logic [2:0] sb_axi_arprot,
output logic [3:0] sb_axi_arqos,
input logic sb_axi_rvalid,
output logic sb_axi_rready,
input logic [pt.SB_BUS_TAG-1:0] sb_axi_rid,
input logic [63:0] sb_axi_rdata,
input logic [1:0] sb_axi_rresp,
input logic sb_axi_rlast,
//-------------------------- DMA AXI signals--------------------------
// AXI Write Channels
input logic dma_axi_awvalid,
output logic dma_axi_awready,
input logic [pt.DMA_BUS_TAG-1:0] dma_axi_awid,
input logic [31:0] dma_axi_awaddr,
input logic [2:0] dma_axi_awsize,
input logic [2:0] dma_axi_awprot,
input logic [7:0] dma_axi_awlen,
input logic [1:0] dma_axi_awburst,
input logic dma_axi_wvalid,
output logic dma_axi_wready,
input logic [63:0] dma_axi_wdata,
input logic [7:0] dma_axi_wstrb,
input logic dma_axi_wlast,
output logic dma_axi_bvalid,
input logic dma_axi_bready,
output logic [1:0] dma_axi_bresp,
output logic [pt.DMA_BUS_TAG-1:0] dma_axi_bid,
// AXI Read Channels
input logic dma_axi_arvalid,
output logic dma_axi_arready,
input logic [pt.DMA_BUS_TAG-1:0] dma_axi_arid,
input logic [31:0] dma_axi_araddr,
input logic [2:0] dma_axi_arsize,
input logic [2:0] dma_axi_arprot,
input logic [7:0] dma_axi_arlen,
input logic [1:0] dma_axi_arburst,
output logic dma_axi_rvalid,
input logic dma_axi_rready,
output logic [pt.DMA_BUS_TAG-1:0] dma_axi_rid,
output logic [63:0] dma_axi_rdata,
output logic [1:0] dma_axi_rresp,
output logic dma_axi_rlast,
`endif
`ifdef RV_BUILD_AHB_LITE
//// AHB LITE BUS
output logic [31:0] haddr,
output logic [2:0] hburst,
output logic hmastlock,
output logic [3:0] hprot,
output logic [2:0] hsize,
output logic [1:0] htrans,
output logic hwrite,
input logic [63:0] hrdata,
input logic hready,
input logic hresp,
// LSU AHB Master
output logic [31:0] lsu_haddr,
output logic [2:0] lsu_hburst,
output logic lsu_hmastlock,
output logic [3:0] lsu_hprot,
output logic [2:0] lsu_hsize,
output logic [1:0] lsu_htrans,
output logic lsu_hwrite,
output logic [63:0] lsu_hwdata,
input logic [63:0] lsu_hrdata,
input logic lsu_hready,
input logic lsu_hresp,
// Debug Syster Bus AHB
output logic [31:0] sb_haddr,
output logic [2:0] sb_hburst,
output logic sb_hmastlock,
output logic [3:0] sb_hprot,
output logic [2:0] sb_hsize,
output logic [1:0] sb_htrans,
output logic sb_hwrite,
output logic [63:0] sb_hwdata,
input logic [63:0] sb_hrdata,
input logic sb_hready,
input logic sb_hresp,
// DMA Slave
input logic dma_hsel,
input logic [31:0] dma_haddr,
input logic [2:0] dma_hburst,
input logic dma_hmastlock,
input logic [3:0] dma_hprot,
input logic [2:0] dma_hsize,
input logic [1:0] dma_htrans,
input logic dma_hwrite,
input logic [63:0] dma_hwdata,
input logic dma_hreadyin,
output logic [63:0] dma_hrdata,
output logic dma_hreadyout,
output logic dma_hresp,
`endif
// clk ratio signals
input logic lsu_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface
input logic ifu_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface
input logic dbg_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface
input logic dma_bus_clk_en, // Clock ratio b/w cpu core clk & AHB slave interface
// all of these test inputs are brought to top-level; must be tied off based on usage by physical design (ie. icache or not, iccm or not, dccm or not)
input el2_dccm_ext_in_pkt_t [pt.DCCM_NUM_BANKS-1:0] dccm_ext_in_pkt,
input el2_ccm_ext_in_pkt_t [pt.ICCM_NUM_BANKS-1:0] iccm_ext_in_pkt,
input el2_ic_data_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_data_ext_in_pkt,
input el2_ic_tag_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0] ic_tag_ext_in_pkt,
input logic timer_int,
input logic soft_int,
input logic [pt.PIC_TOTAL_INT:1] extintsrc_req,
output logic dec_tlu_perfcnt0, // toggles when slot0 perf counter 0 has an event inc
output logic dec_tlu_perfcnt1,
output logic dec_tlu_perfcnt2,
output logic dec_tlu_perfcnt3,
// ports added by the soc team
input logic jtag_tck, // JTAG clk
input logic jtag_tms, // JTAG TMS
input logic jtag_tdi, // JTAG tdi
input logic jtag_trst_n, // JTAG Reset
output logic jtag_tdo, // JTAG TDO
input logic [31:4] core_id,
// external MPC halt/run interface
input logic mpc_debug_halt_req, // Async halt request
input logic mpc_debug_run_req, // Async run request
input logic mpc_reset_run_req, // Run/halt after reset
output logic mpc_debug_halt_ack, // Halt ack
output logic mpc_debug_run_ack, // Run ack
output logic debug_brkpt_status, // debug breakpoint
input logic i_cpu_halt_req, // Async halt req to CPU
output logic o_cpu_halt_ack, // core response to halt
output logic o_cpu_halt_status, // 1'b1 indicates core is halted
output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request
input logic i_cpu_run_req, // Async restart req to CPU
output logic o_cpu_run_ack, // Core response to run req
input logic scan_mode, // To enable scan mode
input logic mbist_mode // to enable mbist
);
logic active_l2clk;
logic free_l2clk;
// DCCM ports
logic dccm_wren;
logic dccm_rden;
logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo;
logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi;
logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo;
logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi;
logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo;
logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi;
logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo;
logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi;
// PIC ports
// Icache & Itag ports
logic [31:1] ic_rw_addr;
logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en ; // Which way to write
logic ic_rd_en ;
logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid; // Valid from the I$ tag valid outside (in flops).
logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit; // ic_rd_hit[3:0]
logic ic_tag_perr; // Ic tag parity error
logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr; // Read/Write addresss to the Icache.
logic ic_debug_rd_en; // Icache debug rd
logic ic_debug_wr_en; // Icache debug wr
logic ic_debug_tag_array; // Debug tag array
logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way; // Debug way. Rd or Wr.
logic [25:0] ictag_debug_rd_data; // Debug icache tag.
logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data;
logic [63:0] ic_rd_data;
logic [70:0] ic_debug_rd_data; // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
logic [70:0] ic_debug_wr_data; // Debug wr cache.
logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr; // ecc error per bank
logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr; // parity error per bank
logic [63:0] ic_premux_data;
logic ic_sel_premux_data;
// ICCM ports
logic [pt.ICCM_BITS-1:1] iccm_rw_addr;
logic iccm_wren;
logic iccm_rden;
logic [2:0] iccm_wr_size;
logic [77:0] iccm_wr_data;
logic iccm_buf_correct_ecc;
logic iccm_correction_state;
logic [63:0] iccm_rd_data;
logic [77:0] iccm_rd_data_ecc;
logic core_rst_l; // Core reset including rst_l and dbg_rst_l
logic jtag_tdoEn;
logic dccm_clk_override;
logic icm_clk_override;
logic dec_tlu_core_ecc_disable;
// zero out the signals not presented at the wrapper instantiation level
`ifdef RV_BUILD_AXI4
//// AHB LITE BUS
logic [31:0] haddr;
logic [2:0] hburst;
logic hmastlock;
logic [3:0] hprot;
logic [2:0] hsize;
logic [1:0] htrans;
logic hwrite;
logic [63:0] hrdata;
logic hready;
logic hresp;
// LSU AHB Master
logic [31:0] lsu_haddr;
logic [2:0] lsu_hburst;
logic lsu_hmastlock;
logic [3:0] lsu_hprot;
logic [2:0] lsu_hsize;
logic [1:0] lsu_htrans;
logic lsu_hwrite;
logic [63:0] lsu_hwdata;
logic [63:0] lsu_hrdata;
logic lsu_hready;
logic lsu_hresp;
// Debug Syster Bus AHB
logic [31:0] sb_haddr;
logic [2:0] sb_hburst;
logic sb_hmastlock;
logic [3:0] sb_hprot;
logic [2:0] sb_hsize;
logic [1:0] sb_htrans;
logic sb_hwrite;
logic [63:0] sb_hwdata;
logic [63:0] sb_hrdata;
logic sb_hready;
logic sb_hresp;
// DMA Slave
logic dma_hsel;
logic [31:0] dma_haddr;
logic [2:0] dma_hburst;
logic dma_hmastlock;
logic [3:0] dma_hprot;
logic [2:0] dma_hsize;
logic [1:0] dma_htrans;
logic dma_hwrite;
logic [63:0] dma_hwdata;
logic dma_hreadyin;
logic [63:0] dma_hrdata;
logic dma_hreadyout;
logic dma_hresp;
// AHB
assign hrdata[63:0] = '0;
assign hready = '0;
assign hresp = '0;
// LSU
assign lsu_hrdata[63:0] = '0;
assign lsu_hready = '0;
assign lsu_hresp = '0;
// Debu
assign sb_hrdata[63:0] = '0;
assign sb_hready = '0;
assign sb_hresp = '0;
// DMA
assign dma_hsel = '0;
assign dma_haddr[31:0] = '0;
assign dma_hburst[2:0] = '0;
assign dma_hmastlock = '0;
assign dma_hprot[3:0] = '0;
assign dma_hsize[2:0] = '0;
assign dma_htrans[1:0] = '0;
assign dma_hwrite = '0;
assign dma_hwdata[63:0] = '0;
assign dma_hreadyin = '0;
`endif // `ifdef RV_BUILD_AXI4
`ifdef RV_BUILD_AHB_LITE
wire lsu_axi_awvalid;
wire lsu_axi_awready;
wire [pt.LSU_BUS_TAG-1:0] lsu_axi_awid;
wire [31:0] lsu_axi_awaddr;
wire [3:0] lsu_axi_awregion;
wire [7:0] lsu_axi_awlen;
wire [2:0] lsu_axi_awsize;
wire [1:0] lsu_axi_awburst;
wire lsu_axi_awlock;
wire [3:0] lsu_axi_awcache;
wire [2:0] lsu_axi_awprot;
wire [3:0] lsu_axi_awqos;
wire lsu_axi_wvalid;
wire lsu_axi_wready;
wire [63:0] lsu_axi_wdata;
wire [7:0] lsu_axi_wstrb;
wire lsu_axi_wlast;
wire lsu_axi_bvalid;
wire lsu_axi_bready;
wire [1:0] lsu_axi_bresp;
wire [pt.LSU_BUS_TAG-1:0] lsu_axi_bid;
// AXI Read Channels
wire lsu_axi_arvalid;
wire lsu_axi_arready;
wire [pt.LSU_BUS_TAG-1:0] lsu_axi_arid;
wire [31:0] lsu_axi_araddr;
wire [3:0] lsu_axi_arregion;
wire [7:0] lsu_axi_arlen;
wire [2:0] lsu_axi_arsize;
wire [1:0] lsu_axi_arburst;
wire lsu_axi_arlock;
wire [3:0] lsu_axi_arcache;
wire [2:0] lsu_axi_arprot;
wire [3:0] lsu_axi_arqos;
wire lsu_axi_rvalid;
wire lsu_axi_rready;
wire [pt.LSU_BUS_TAG-1:0] lsu_axi_rid;
wire [63:0] lsu_axi_rdata;
wire [1:0] lsu_axi_rresp;
wire lsu_axi_rlast;
//-------------------------- IFU AXI signals--------------------------
// AXI Write Channels
wire ifu_axi_awvalid;
wire ifu_axi_awready;
wire [pt.IFU_BUS_TAG-1:0] ifu_axi_awid;
wire [31:0] ifu_axi_awaddr;
wire [3:0] ifu_axi_awregion;
wire [7:0] ifu_axi_awlen;
wire [2:0] ifu_axi_awsize;
wire [1:0] ifu_axi_awburst;
wire ifu_axi_awlock;
wire [3:0] ifu_axi_awcache;
wire [2:0] ifu_axi_awprot;
wire [3:0] ifu_axi_awqos;
wire ifu_axi_wvalid;
wire ifu_axi_wready;
wire [63:0] ifu_axi_wdata;
wire [7:0] ifu_axi_wstrb;
wire ifu_axi_wlast;
wire ifu_axi_bvalid;
wire ifu_axi_bready;
wire [1:0] ifu_axi_bresp;
wire [pt.IFU_BUS_TAG-1:0] ifu_axi_bid;
// AXI Read Channels
wire ifu_axi_arvalid;
wire ifu_axi_arready;
wire [pt.IFU_BUS_TAG-1:0] ifu_axi_arid;
wire [31:0] ifu_axi_araddr;
wire [3:0] ifu_axi_arregion;
wire [7:0] ifu_axi_arlen;
wire [2:0] ifu_axi_arsize;
wire [1:0] ifu_axi_arburst;
wire ifu_axi_arlock;
wire [3:0] ifu_axi_arcache;
wire [2:0] ifu_axi_arprot;
wire [3:0] ifu_axi_arqos;
wire ifu_axi_rvalid;
wire ifu_axi_rready;
wire [pt.IFU_BUS_TAG-1:0] ifu_axi_rid;
wire [63:0] ifu_axi_rdata;
wire [1:0] ifu_axi_rresp;
wire ifu_axi_rlast;
//-------------------------- SB AXI signals--------------------------
// AXI Write Channels
wire sb_axi_awvalid;
wire sb_axi_awready;
wire [pt.SB_BUS_TAG-1:0] sb_axi_awid;
wire [31:0] sb_axi_awaddr;
wire [3:0] sb_axi_awregion;
wire [7:0] sb_axi_awlen;
wire [2:0] sb_axi_awsize;
wire [1:0] sb_axi_awburst;
wire sb_axi_awlock;
wire [3:0] sb_axi_awcache;
wire [2:0] sb_axi_awprot;
wire [3:0] sb_axi_awqos;
wire sb_axi_wvalid;
wire sb_axi_wready;
wire [63:0] sb_axi_wdata;
wire [7:0] sb_axi_wstrb;
wire sb_axi_wlast;
wire sb_axi_bvalid;
wire sb_axi_bready;
wire [1:0] sb_axi_bresp;
wire [pt.SB_BUS_TAG-1:0] sb_axi_bid;
// AXI Read Channels
wire sb_axi_arvalid;
wire sb_axi_arready;
wire [pt.SB_BUS_TAG-1:0] sb_axi_arid;
wire [31:0] sb_axi_araddr;
wire [3:0] sb_axi_arregion;
wire [7:0] sb_axi_arlen;
wire [2:0] sb_axi_arsize;
wire [1:0] sb_axi_arburst;
wire sb_axi_arlock;
wire [3:0] sb_axi_arcache;
wire [2:0] sb_axi_arprot;
wire [3:0] sb_axi_arqos;
wire sb_axi_rvalid;
wire sb_axi_rready;
wire [pt.SB_BUS_TAG-1:0] sb_axi_rid;
wire [63:0] sb_axi_rdata;
wire [1:0] sb_axi_rresp;
wire sb_axi_rlast;
//-------------------------- DMA AXI signals--------------------------
// AXI Write Channels
wire dma_axi_awvalid;
wire dma_axi_awready;
wire [pt.DMA_BUS_TAG-1:0] dma_axi_awid;
wire [31:0] dma_axi_awaddr;
wire [2:0] dma_axi_awsize;
wire [2:0] dma_axi_awprot;
wire [7:0] dma_axi_awlen;
wire [1:0] dma_axi_awburst;
wire dma_axi_wvalid;
wire dma_axi_wready;
wire [63:0] dma_axi_wdata;
wire [7:0] dma_axi_wstrb;
wire dma_axi_wlast;
wire dma_axi_bvalid;
wire dma_axi_bready;
wire [1:0] dma_axi_bresp;
wire [pt.DMA_BUS_TAG-1:0] dma_axi_bid;
// AXI Read Channels
wire dma_axi_arvalid;
wire dma_axi_arready;
wire [pt.DMA_BUS_TAG-1:0] dma_axi_arid;
wire [31:0] dma_axi_araddr;
wire [2:0] dma_axi_arsize;
wire [2:0] dma_axi_arprot;
wire [7:0] dma_axi_arlen;
wire [1:0] dma_axi_arburst;
wire dma_axi_rvalid;
wire dma_axi_rready;
wire [pt.DMA_BUS_TAG-1:0] dma_axi_rid;
wire [63:0] dma_axi_rdata;
wire [1:0] dma_axi_rresp;
wire dma_axi_rlast;
// AXI
assign ifu_axi_awready = 1'b1;
assign ifu_axi_wready = 1'b1;
assign ifu_axi_bvalid = '0;
assign ifu_axi_bresp[1:0] = '0;
assign ifu_axi_bid[pt.IFU_BUS_TAG-1:0] = '0;
`endif // `ifdef RV_BUILD_AHB_LITE
logic dmi_reg_en;
logic [6:0] dmi_reg_addr;
logic dmi_reg_wr_en;
logic [31:0] dmi_reg_wdata;
logic [31:0] dmi_reg_rdata;
// Instantiate the el2_swerv core
el2_swerv #(.pt(pt)) swerv (
.clk(clk),
.*
);
// Instantiate the mem
el2_mem #(.pt(pt)) mem (
.clk(active_l2clk),
.rst_l(core_rst_l),
.*
);
// JTAG/DMI instance
dmi_wrapper dmi_wrapper (
// JTAG signals
.trst_n (jtag_trst_n), // JTAG reset
.tck (jtag_tck), // JTAG clock
.tms (jtag_tms), // Test mode select
.tdi (jtag_tdi), // Test Data Input
.tdo (jtag_tdo), // Test Data Output
.tdoEnable (),
// Processor Signals
.core_rst_n (dbg_rst_l), // Debug reset, active low
.core_clk (clk), // Core clock
.jtag_id (jtag_id), // JTAG ID
.rd_data (dmi_reg_rdata), // Read data from Processor
.reg_wr_data (dmi_reg_wdata), // Write data to Processor
.reg_wr_addr (dmi_reg_addr), // Write address to Processor
.reg_en (dmi_reg_en), // Write interface bit to Processor
.reg_wr_en (dmi_reg_wr_en), // Write enable to Processor
.dmi_hard_reset ()
);
`ifdef RV_ASSERT_ON
// to avoid internal assertions failure at time 0
initial begin
$assertoff(0, swerv);
@ (negedge clk) $asserton(0, swerv);
end
`endif
endmodule

369
Flow/design/exu/el2_exu.sv Normal file
View File

@ -0,0 +1,369 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
module el2_exu
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic clk, // Top level clock
input logic rst_l, // Reset
input logic scan_mode, // Scan control
input logic [1:0] dec_data_en, // Clock enable {x,r}, one cycle pulse
input logic [1:0] dec_ctl_en, // Clock enable {x,r}, two cycle pulse
input logic [31:0] dbg_cmd_wrdata, // Debug data to primary I0 RS1
input el2_alu_pkt_t i0_ap, // DEC alu {valid,predecodes}
input logic dec_debug_wdata_rs1_d, // Debug select to primary I0 RS1
input el2_predict_pkt_t dec_i0_predict_p_d, // DEC branch predict packet
input logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // DEC predict fghr
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // DEC predict index
input logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // DEC predict branch tag
input logic [31:0] lsu_result_m, // Load result M-stage
input logic [31:0] lsu_nonblock_load_data, // nonblock load data
input logic dec_i0_rs1_en_d, // Qualify GPR RS1 data
input logic dec_i0_rs2_en_d, // Qualify GPR RS2 data
input logic [31:0] gpr_i0_rs1_d, // DEC data gpr
input logic [31:0] gpr_i0_rs2_d, // DEC data gpr
input logic [31:0] dec_i0_immed_d, // DEC data immediate
input logic [31:0] dec_i0_result_r, // DEC result in R-stage
input logic [12:1] dec_i0_br_immed_d, // Branch immediate
input logic dec_i0_alu_decode_d, // Valid to X-stage ALU
input logic dec_i0_branch_d, // Branch in D-stage
input logic dec_i0_select_pc_d, // PC select to RS1
input logic [31:1] dec_i0_pc_d, // Instruction PC
input logic [3:0] dec_i0_rs1_bypass_en_d, // DEC bypass select 1 - X-stage, 0 - dec bypass data
input logic [3:0] dec_i0_rs2_bypass_en_d, // DEC bypass select 1 - X-stage, 0 - dec bypass data
input logic dec_csr_ren_d, // CSR read select
input logic [31:0] dec_csr_rddata_d, // CSR read data
input logic dec_qual_lsu_d, // LSU instruction at D. Use to quiet LSU operands
input el2_mul_pkt_t mul_p, // DEC {valid, operand signs, low, operand bypass}
input el2_div_pkt_t div_p, // DEC {valid, unsigned, rem}
input logic dec_div_cancel, // Cancel the divide operation
input logic [31:1] pred_correct_npc_x, // DEC NPC for correctly predicted branch
input logic dec_tlu_flush_lower_r, // Flush divide and secondary ALUs
input logic [31:1] dec_tlu_flush_path_r, // Redirect target
input logic dec_extint_stall, // External stall mux select
input logic [31:2] dec_tlu_meihap, // External stall mux data
output logic [31:0] exu_lsu_rs1_d, // LSU operand
output logic [31:0] exu_lsu_rs2_d, // LSU operand
output logic exu_flush_final, // Pipe is being flushed this cycle
output logic [31:1] exu_flush_path_final, // Target for the oldest flush source
output logic [31:0] exu_i0_result_x, // Primary ALU result to DEC
output logic [31:1] exu_i0_pc_x, // Primary PC result to DEC
output logic [31:0] exu_csr_rs1_x, // RS1 source for a CSR instruction
output logic [31:1] exu_npc_r, // Divide NPC
output logic [1:0] exu_i0_br_hist_r, // to DEC I0 branch history
output logic exu_i0_br_error_r, // to DEC I0 branch error
output logic exu_i0_br_start_error_r, // to DEC I0 branch start error
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // to DEC I0 branch index
output logic exu_i0_br_valid_r, // to DEC I0 branch valid
output logic exu_i0_br_mp_r, // to DEC I0 branch mispredict
output logic exu_i0_br_middle_r, // to DEC I0 branch middle
output logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // to DEC I0 branch fghr
output logic exu_i0_br_way_r, // to DEC I0 branch way
output el2_predict_pkt_t exu_mp_pkt, // Mispredict branch packet
output logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // Mispredict global history
output logic [pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index
output logic [pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag
output logic exu_pmu_i0_br_misp, // to PMU - I0 E4 branch mispredict
output logic exu_pmu_i0_br_ataken, // to PMU - I0 E4 taken
output logic exu_pmu_i0_pc4, // to PMU - I0 E4 PC
output logic [31:0] exu_div_result, // Divide result
output logic exu_div_wren // Divide write enable to GPR
);
logic [31:0] i0_rs1_bypass_data_d;
logic [31:0] i0_rs2_bypass_data_d;
logic i0_rs1_bypass_en_d;
logic i0_rs2_bypass_en_d;
logic [31:0] i0_rs1_d, i0_rs2_d;
logic [31:0] muldiv_rs1_d;
logic [31:1] pred_correct_npc_r;
logic i0_pred_correct_upper_r;
logic [31:1] i0_flush_path_upper_r;
logic x_data_en, x_data_en_q1, x_data_en_q2, r_data_en, r_data_en_q2;
logic x_ctl_en, r_ctl_en;
logic [pt.BHT_GHR_SIZE-1:0] ghr_d_ns, ghr_d;
logic [pt.BHT_GHR_SIZE-1:0] ghr_x_ns, ghr_x;
logic i0_taken_d;
logic i0_taken_x;
logic i0_valid_d;
logic i0_valid_x;
logic [pt.BHT_GHR_SIZE-1:0] after_flush_eghr;
el2_predict_pkt_t final_predict_mp;
el2_predict_pkt_t i0_predict_newp_d;
logic flush_in_d;
logic [31:0] alu_result_x;
logic mul_valid_x;
logic [31:0] mul_result_x;
el2_predict_pkt_t i0_pp_r;
logic i0_flush_upper_d;
logic [31:1] i0_flush_path_d;
el2_predict_pkt_t i0_predict_p_d;
logic i0_pred_correct_upper_d;
logic i0_flush_upper_x;
logic [31:1] i0_flush_path_x;
el2_predict_pkt_t i0_predict_p_x;
logic i0_pred_correct_upper_x;
logic i0_branch_x;
localparam PREDPIPESIZE = pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1+pt.BHT_GHR_SIZE+pt.BTB_BTAG_SIZE;
logic [PREDPIPESIZE-1:0] predpipe_d, predpipe_x, predpipe_r, final_predpipe_mp;
rvdffpcie #(31) i_flush_path_x_ff (.*, .clk(clk), .en ( x_data_en ), .din ( i0_flush_path_d[31:1] ), .dout( i0_flush_path_x[31:1] ) );
rvdffe #(32) i_csr_rs1_x_ff (.*, .clk(clk), .en ( x_data_en_q1 ), .din ( i0_rs1_d[31:0] ), .dout( exu_csr_rs1_x[31:0] ) );
rvdffppe #($bits(el2_predict_pkt_t)) i_predictpacket_x_ff (.*, .clk(clk), .en ( x_data_en ), .din ( i0_predict_p_d ), .dout( i0_predict_p_x ) );
rvdffe #(PREDPIPESIZE) i_predpipe_x_ff (.*, .clk(clk), .en ( x_data_en_q2 ), .din ( predpipe_d ), .dout( predpipe_x ) );
rvdffe #(PREDPIPESIZE) i_predpipe_r_ff (.*, .clk(clk), .en ( r_data_en_q2 ), .din ( predpipe_x ), .dout( predpipe_r ) );
rvdffe #(4+pt.BHT_GHR_SIZE) i_x_ff (.*, .clk(clk), .en ( x_ctl_en ), .din ({i0_valid_d,i0_taken_d,i0_flush_upper_d,i0_pred_correct_upper_d,ghr_x_ns[pt.BHT_GHR_SIZE-1:0]} ),
.dout({i0_valid_x,i0_taken_x,i0_flush_upper_x,i0_pred_correct_upper_x,ghr_x[pt.BHT_GHR_SIZE-1:0]} ) );
rvdffppe #($bits(el2_predict_pkt_t)+1) i_r_ff0 (.*, .clk(clk), .en ( r_ctl_en ), .din ({i0_pred_correct_upper_x, i0_predict_p_x}),
.dout({i0_pred_correct_upper_r, i0_pp_r }) );
rvdffpcie #(31) i_flush_r_ff (.*, .clk(clk), .en ( r_data_en ), .din ( i0_flush_path_x[31:1] ), .dout( i0_flush_path_upper_r[31:1]) );
rvdffpcie #(31) i_npc_r_ff (.*, .clk(clk), .en ( r_data_en ), .din ( pred_correct_npc_x[31:1] ), .dout( pred_correct_npc_r[31:1] ) );
rvdffie #(pt.BHT_GHR_SIZE+2,1) i_misc_ff (.*, .clk(clk), .din ({ghr_d_ns[pt.BHT_GHR_SIZE-1:0], mul_p.valid, dec_i0_branch_d}),
.dout({ghr_d[pt.BHT_GHR_SIZE-1:0] , mul_valid_x, i0_branch_x}) );
assign predpipe_d[PREDPIPESIZE-1:0]
= {i0_predict_fghr_d, i0_predict_index_d, i0_predict_btag_d};
assign i0_rs1_bypass_en_d = dec_i0_rs1_bypass_en_d[0] | dec_i0_rs1_bypass_en_d[1] | dec_i0_rs1_bypass_en_d[2] | dec_i0_rs1_bypass_en_d[3];
assign i0_rs2_bypass_en_d = dec_i0_rs2_bypass_en_d[0] | dec_i0_rs2_bypass_en_d[1] | dec_i0_rs2_bypass_en_d[2] | dec_i0_rs2_bypass_en_d[3];
assign i0_rs1_bypass_data_d[31:0]=({32{dec_i0_rs1_bypass_en_d[0]}} & dec_i0_result_r[31:0] ) |
({32{dec_i0_rs1_bypass_en_d[1]}} & lsu_result_m[31:0] ) |
({32{dec_i0_rs1_bypass_en_d[2]}} & exu_i0_result_x[31:0] ) |
({32{dec_i0_rs1_bypass_en_d[3]}} & lsu_nonblock_load_data[31:0]);
assign i0_rs2_bypass_data_d[31:0]=({32{dec_i0_rs2_bypass_en_d[0]}} & dec_i0_result_r[31:0] ) |
({32{dec_i0_rs2_bypass_en_d[1]}} & lsu_result_m[31:0] ) |
({32{dec_i0_rs2_bypass_en_d[2]}} & exu_i0_result_x[31:0] ) |
({32{dec_i0_rs2_bypass_en_d[3]}} & lsu_nonblock_load_data[31:0]);
assign i0_rs1_d[31:0] = ({32{ i0_rs1_bypass_en_d }} & i0_rs1_bypass_data_d[31:0]) |
({32{~i0_rs1_bypass_en_d & dec_i0_select_pc_d }} & {dec_i0_pc_d[31:1],1'b0} ) | // for jal's
({32{~i0_rs1_bypass_en_d & dec_debug_wdata_rs1_d }} & dbg_cmd_wrdata[31:0] ) |
({32{~i0_rs1_bypass_en_d & ~dec_debug_wdata_rs1_d & dec_i0_rs1_en_d}} & gpr_i0_rs1_d[31:0] );
assign i0_rs2_d[31:0] = ({32{~i0_rs2_bypass_en_d & dec_i0_rs2_en_d}} & gpr_i0_rs2_d[31:0] ) |
({32{~i0_rs2_bypass_en_d }} & dec_i0_immed_d[31:0] ) |
({32{ i0_rs2_bypass_en_d }} & i0_rs2_bypass_data_d[31:0]);
assign exu_lsu_rs1_d[31:0] = ({32{~i0_rs1_bypass_en_d & ~dec_extint_stall & dec_i0_rs1_en_d & dec_qual_lsu_d}} & gpr_i0_rs1_d[31:0] ) |
({32{ i0_rs1_bypass_en_d & ~dec_extint_stall & dec_qual_lsu_d}} & i0_rs1_bypass_data_d[31:0]) |
({32{ dec_extint_stall & dec_qual_lsu_d}} & {dec_tlu_meihap[31:2],2'b0});
assign exu_lsu_rs2_d[31:0] = ({32{~i0_rs2_bypass_en_d & ~dec_extint_stall & dec_i0_rs2_en_d & dec_qual_lsu_d}} & gpr_i0_rs2_d[31:0] ) |
({32{ i0_rs2_bypass_en_d & ~dec_extint_stall & dec_qual_lsu_d}} & i0_rs2_bypass_data_d[31:0]);
assign muldiv_rs1_d[31:0] = ({32{~i0_rs1_bypass_en_d & dec_i0_rs1_en_d}} & gpr_i0_rs1_d[31:0] ) |
({32{ i0_rs1_bypass_en_d }} & i0_rs1_bypass_data_d[31:0]);
assign x_data_en = dec_data_en[1];
assign x_data_en_q1 = dec_data_en[1] & dec_csr_ren_d;
assign x_data_en_q2 = dec_data_en[1] & dec_i0_branch_d;
assign r_data_en = dec_data_en[0];
assign r_data_en_q2 = dec_data_en[0] & i0_branch_x;
assign x_ctl_en = dec_ctl_en[1];
assign r_ctl_en = dec_ctl_en[0];
el2_exu_alu_ctl #(.pt(pt)) i_alu (.*,
.enable ( x_data_en ), // I
.pp_in ( i0_predict_newp_d ), // I
.valid_in ( dec_i0_alu_decode_d ), // I
.flush_upper_x ( i0_flush_upper_x ), // I
.flush_lower_r ( dec_tlu_flush_lower_r ), // I
.a_in ( i0_rs1_d[31:0] ), // I
.b_in ( i0_rs2_d[31:0] ), // I
.pc_in ( dec_i0_pc_d[31:1] ), // I
.brimm_in ( dec_i0_br_immed_d[12:1] ), // I
.ap ( i0_ap ), // I
.csr_ren_in ( dec_csr_ren_d ), // I
.csr_rddata_in ( dec_csr_rddata_d[31:0] ), // I
.result_ff ( alu_result_x[31:0] ), // O
.flush_upper_out ( i0_flush_upper_d ), // O
.flush_final_out ( exu_flush_final ), // O
.flush_path_out ( i0_flush_path_d[31:1] ), // O
.predict_p_out ( i0_predict_p_d ), // O
.pred_correct_out ( i0_pred_correct_upper_d ), // O
.pc_ff ( exu_i0_pc_x[31:1] )); // O
el2_exu_mul_ctl #(.pt(pt)) i_mul (.*,
.mul_p ( mul_p & {$bits(el2_mul_pkt_t){mul_p.valid}} ), // I
.rs1_in ( muldiv_rs1_d[31:0] & {32{mul_p.valid}} ), // I
.rs2_in ( i0_rs2_d[31:0] & {32{mul_p.valid}} ), // I
.result_x ( mul_result_x[31:0] )); // O
el2_exu_div_ctl #(.pt(pt)) i_div (.*,
.cancel ( dec_div_cancel ), // I
.dp ( div_p ), // I
.dividend ( muldiv_rs1_d[31:0] ), // I
.divisor ( i0_rs2_d[31:0] ), // I
.finish_dly ( exu_div_wren ), // O
.out ( exu_div_result[31:0] )); // O
assign exu_i0_result_x[31:0] = (mul_valid_x) ? mul_result_x[31:0] : alu_result_x[31:0];
always_comb begin
i0_predict_newp_d = dec_i0_predict_p_d;
i0_predict_newp_d.boffset = dec_i0_pc_d[1]; // from the start of inst
end
assign exu_pmu_i0_br_misp = i0_pp_r.misp;
assign exu_pmu_i0_br_ataken = i0_pp_r.ataken;
assign exu_pmu_i0_pc4 = i0_pp_r.pc4;
assign i0_valid_d = i0_predict_p_d.valid & dec_i0_alu_decode_d & ~dec_tlu_flush_lower_r;
assign i0_taken_d = (i0_predict_p_d.ataken & dec_i0_alu_decode_d);
if(pt.BTB_ENABLE==1) begin
// maintain GHR at D
assign ghr_d_ns[pt.BHT_GHR_SIZE-1:0]
= ({pt.BHT_GHR_SIZE{~dec_tlu_flush_lower_r & i0_valid_d}} & {ghr_d[pt.BHT_GHR_SIZE-2:0], i0_taken_d}) |
({pt.BHT_GHR_SIZE{~dec_tlu_flush_lower_r & ~i0_valid_d}} & ghr_d[pt.BHT_GHR_SIZE-1:0] ) |
({pt.BHT_GHR_SIZE{ dec_tlu_flush_lower_r }} & ghr_x[pt.BHT_GHR_SIZE-1:0] );
// maintain GHR at X
assign ghr_x_ns[pt.BHT_GHR_SIZE-1:0]
= ({pt.BHT_GHR_SIZE{ i0_valid_x}} & {ghr_x[pt.BHT_GHR_SIZE-2:0], i0_taken_x}) |
({pt.BHT_GHR_SIZE{~i0_valid_x}} & ghr_x[pt.BHT_GHR_SIZE-1:0] ) ;
assign exu_i0_br_valid_r = i0_pp_r.valid;
assign exu_i0_br_mp_r = i0_pp_r.misp;
assign exu_i0_br_way_r = i0_pp_r.way;
assign exu_i0_br_hist_r[1:0] = {2{i0_pp_r.valid}} & i0_pp_r.hist[1:0];
assign exu_i0_br_error_r = i0_pp_r.br_error;
assign exu_i0_br_middle_r = i0_pp_r.pc4 ^ i0_pp_r.boffset;
assign exu_i0_br_start_error_r = i0_pp_r.br_start_error;
assign {exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0],
exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]}= predpipe_r[PREDPIPESIZE-1:pt.BTB_BTAG_SIZE];
assign final_predict_mp = (i0_flush_upper_x) ? i0_predict_p_x : '0;
assign final_predpipe_mp[PREDPIPESIZE-1:0] = (i0_flush_upper_x) ? predpipe_x : '0;
assign after_flush_eghr[pt.BHT_GHR_SIZE-1:0] = (i0_flush_upper_x & ~dec_tlu_flush_lower_r) ? ghr_d[pt.BHT_GHR_SIZE-1:0] : ghr_x[pt.BHT_GHR_SIZE-1:0];
assign exu_mp_pkt.valid = final_predict_mp.valid;
assign exu_mp_pkt.way = final_predict_mp.way;
assign exu_mp_pkt.misp = final_predict_mp.misp;
assign exu_mp_pkt.pcall = final_predict_mp.pcall;
assign exu_mp_pkt.pja = final_predict_mp.pja;
assign exu_mp_pkt.pret = final_predict_mp.pret;
assign exu_mp_pkt.ataken = final_predict_mp.ataken;
assign exu_mp_pkt.boffset = final_predict_mp.boffset;
assign exu_mp_pkt.pc4 = final_predict_mp.pc4;
assign exu_mp_pkt.hist[1:0] = final_predict_mp.hist[1:0];
assign exu_mp_pkt.toffset[11:0] = final_predict_mp.toffset[11:0];
assign exu_mp_fghr[pt.BHT_GHR_SIZE-1:0] = after_flush_eghr[pt.BHT_GHR_SIZE-1:0];
assign {exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO],
exu_mp_btag[pt.BTB_BTAG_SIZE-1:0]} = final_predpipe_mp[PREDPIPESIZE-pt.BHT_GHR_SIZE-1:0];
assign exu_mp_eghr[pt.BHT_GHR_SIZE-1:0] = final_predpipe_mp[PREDPIPESIZE-1:pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+pt.BTB_BTAG_SIZE+1]; // mp ghr for bht write
end // if (pt.BTB_ENABLE==1)
else begin
assign ghr_d_ns = '0;
assign ghr_x_ns = '0;
assign exu_mp_pkt = '0;
assign exu_mp_eghr = '0;
assign exu_mp_fghr = '0;
assign exu_mp_index = '0;
assign exu_mp_btag = '0;
assign exu_i0_br_hist_r = '0;
assign exu_i0_br_error_r = '0;
assign exu_i0_br_start_error_r = '0;
assign exu_i0_br_index_r = '0;
assign exu_i0_br_valid_r = '0;
assign exu_i0_br_mp_r = '0;
assign exu_i0_br_middle_r = '0;
assign exu_i0_br_fghr_r = '0;
assign exu_i0_br_way_r = '0;
end // else: !if(pt.BTB_ENABLE==1)
assign exu_flush_path_final[31:1] = ( {31{ dec_tlu_flush_lower_r }} & dec_tlu_flush_path_r[31:1] ) |
( {31{~dec_tlu_flush_lower_r & i0_flush_upper_d}} & i0_flush_path_d[31:1] );
assign exu_npc_r[31:1] = (i0_pred_correct_upper_r) ? pred_correct_npc_r[31:1] : i0_flush_path_upper_r[31:1];
endmodule // el2_exu

View File

@ -0,0 +1,576 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
module el2_exu_alu_ctl
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic clk, // Top level clock
input logic rst_l, // Reset
input logic scan_mode, // Scan control
input logic flush_upper_x, // Branch flush from previous cycle
input logic flush_lower_r, // Master flush of entire pipeline
input logic enable, // Clock enable
input logic valid_in, // Valid
input el2_alu_pkt_t ap, // predecodes
input logic csr_ren_in, // CSR select
input logic [31:0] csr_rddata_in, // CSR data
input logic signed [31:0] a_in, // A operand
input logic [31:0] b_in, // B operand
input logic [31:1] pc_in, // for pc=pc+2,4 calculations
input el2_predict_pkt_t pp_in, // Predicted branch structure
input logic [12:1] brimm_in, // Branch offset
output logic [31:0] result_ff, // final result
output logic flush_upper_out, // Branch flush
output logic flush_final_out, // Branch flush or flush entire pipeline
output logic [31:1] flush_path_out, // Branch flush PC
output logic [31:1] pc_ff, // flopped PC
output logic pred_correct_out, // NPC control
output el2_predict_pkt_t predict_p_out // Predicted branch structure
);
logic [31:0] zba_a_in;
logic [31:0] aout;
logic cout,ov,neg;
logic [31:0] lout;
logic [31:0] sout;
logic sel_shift;
logic sel_adder;
logic slt_one;
logic actual_taken;
logic [31:1] pcout;
logic cond_mispredict;
logic target_mispredict;
logic eq, ne, lt, ge;
logic any_jal;
logic [1:0] newhist;
logic sel_pc;
logic [31:0] csr_write_data;
logic [31:0] result;
// *** Start - BitManip ***
// Zbb
logic ap_clz;
logic ap_ctz;
logic ap_cpop;
logic ap_sext_b;
logic ap_sext_h;
logic ap_min;
logic ap_max;
logic ap_rol;
logic ap_ror;
logic ap_rev8;
logic ap_orc_b;
logic ap_zbb;
// Zbs
logic ap_bset;
logic ap_bclr;
logic ap_binv;
logic ap_bext;
// Zbp
logic ap_pack;
logic ap_packu;
logic ap_packh;
// Zba
logic ap_sh1add;
logic ap_sh2add;
logic ap_sh3add;
logic ap_zba;
if (pt.BITMANIP_ZBB == 1)
begin
assign ap_clz = ap.clz;
assign ap_ctz = ap.ctz;
assign ap_cpop = ap.cpop;
assign ap_sext_b = ap.sext_b;
assign ap_sext_h = ap.sext_h;
assign ap_min = ap.min;
assign ap_max = ap.max;
end
else
begin
assign ap_clz = 1'b0;
assign ap_ctz = 1'b0;
assign ap_cpop = 1'b0;
assign ap_sext_b = 1'b0;
assign ap_sext_h = 1'b0;
assign ap_min = 1'b0;
assign ap_max = 1'b0;
end
if ( (pt.BITMANIP_ZBB == 1) | (pt.BITMANIP_ZBP == 1) )
begin
assign ap_rol = ap.rol;
assign ap_ror = ap.ror;
assign ap_rev8 = ap.grev & (b_in[4:0] == 5'b11000);
assign ap_orc_b = ap.gorc & (b_in[4:0] == 5'b00111);
assign ap_zbb = ap.zbb;
end
else
begin
assign ap_rol = 1'b0;
assign ap_ror = 1'b0;
assign ap_rev8 = 1'b0;
assign ap_orc_b = 1'b0;
assign ap_zbb = 1'b0;
end
if (pt.BITMANIP_ZBS == 1)
begin
assign ap_bset = ap.bset;
assign ap_bclr = ap.bclr;
assign ap_binv = ap.binv;
assign ap_bext = ap.bext;
end
else
begin
assign ap_bset = 1'b0;
assign ap_bclr = 1'b0;
assign ap_binv = 1'b0;
assign ap_bext = 1'b0;
end
if (pt.BITMANIP_ZBP == 1)
begin
assign ap_packu = ap.packu;
end
else
begin
assign ap_packu = 1'b0;
end
if ( (pt.BITMANIP_ZBB == 1) | (pt.BITMANIP_ZBP == 1) | (pt.BITMANIP_ZBE == 1) | (pt.BITMANIP_ZBF == 1) )
begin
assign ap_pack = ap.pack;
assign ap_packh = ap.packh;
end
else
begin
assign ap_pack = 1'b0;
assign ap_packh = 1'b0;
end
if (pt.BITMANIP_ZBA == 1)
begin
assign ap_sh1add = ap.sh1add;
assign ap_sh2add = ap.sh2add;
assign ap_sh3add = ap.sh3add;
assign ap_zba = ap.zba;
end
else
begin
assign ap_sh1add = 1'b0;
assign ap_sh2add = 1'b0;
assign ap_sh3add = 1'b0;
assign ap_zba = 1'b0;
end
// *** End - BitManip ***
rvdffpcie #(31) i_pc_ff (.*, .clk(clk), .en(enable), .din(pc_in[31:1]), .dout(pc_ff[31:1])); // any PC is run through here - doesn't have to be alu
rvdffe #(32) i_result_ff (.*, .clk(clk), .en(enable & valid_in), .din(result[31:0]), .dout(result_ff[31:0]));
// immediates are just muxed into rs2
// add => add=1;
// sub => add=1; sub=1;
// and => lctl=3
// or => lctl=2
// xor => lctl=1
// sll => sctl=3
// srl => sctl=2
// sra => sctl=1
// slt => slt
// lui => lctl=2; or x0, imm20 previously << 12
// auipc => add; add pc, imm20 previously << 12
// beq => bctl=4; add; add x0, pc, sext(offset[12:1])
// bne => bctl=3; add; add x0, pc, sext(offset[12:1])
// blt => bctl=2; add; add x0, pc, sext(offset[12:1])
// bge => bctl=1; add; add x0, pc, sext(offset[12:1])
// jal => rs1=pc {pc[31:1],1'b0}, rs2=sext(offset20:1]); rd=pc+[2,4]
// jalr => rs1=rs1, rs2=sext(offset20:1]); rd=pc+[2,4]
assign zba_a_in[31:0] = ( {32{ ap_sh1add}} & {a_in[30:0],1'b0} ) |
( {32{ ap_sh2add}} & {a_in[29:0],2'b0} ) |
( {32{ ap_sh3add}} & {a_in[28:0],3'b0} ) |
( {32{~ap_zba }} & a_in[31:0] );
logic [31:0] bm;
assign bm[31:0] = ( ap.sub ) ? ~b_in[31:0] : b_in[31:0];
assign {cout, aout[31:0]} = {1'b0, zba_a_in[31:0]} + {1'b0, bm[31:0]} + {32'b0, ap.sub};
assign ov = (~a_in[31] & ~bm[31] & aout[31]) |
( a_in[31] & bm[31] & ~aout[31] );
assign lt = (~ap.unsign & (neg ^ ov)) |
( ap.unsign & ~cout);
assign eq = (a_in[31:0] == b_in[31:0]);
assign ne = ~eq;
assign neg = aout[31];
assign ge = ~lt;
assign lout[31:0] = ( {32{csr_ren_in }} & csr_rddata_in[31:0] ) |
( {32{ap.land & ~ap_zbb}} & a_in[31:0] & b_in[31:0] ) |
( {32{ap.lor & ~ap_zbb}} & (a_in[31:0] | b_in[31:0]) ) |
( {32{ap.lxor & ~ap_zbb}} & (a_in[31:0] ^ b_in[31:0]) ) |
( {32{ap.land & ap_zbb}} & a_in[31:0] & ~b_in[31:0] ) |
( {32{ap.lor & ap_zbb}} & (a_in[31:0] | ~b_in[31:0]) ) |
( {32{ap.lxor & ap_zbb}} & (a_in[31:0] ^ ~b_in[31:0]) );
// * * * * * * * * * * * * * * * * * * BitManip : ROL,ROR * * * * * * * * * * * * * * * * * *
// * * * * * * * * * * * * * * * * * * BitManip : ZBEXT * * * * * * * * * * * * * * * * * *
logic [5:0] shift_amount;
logic [31:0] shift_mask;
logic [62:0] shift_extend;
logic [62:0] shift_long;
assign shift_amount[5:0] = ( { 6{ap.sll}} & (6'd32 - {1'b0,b_in[4:0]}) ) | // [5] unused
( { 6{ap.srl}} & {1'b0,b_in[4:0]} ) |
( { 6{ap.sra}} & {1'b0,b_in[4:0]} ) |
( { 6{ap_rol}} & (6'd32 - {1'b0,b_in[4:0]}) ) |
( { 6{ap_ror}} & {1'b0,b_in[4:0]} ) |
( { 6{ap_bext}} & {1'b0,b_in[4:0]} );
assign shift_mask[31:0] = ( 32'hffffffff << ({5{ap.sll}} & b_in[4:0]) );
assign shift_extend[31:0] = a_in[31:0];
assign shift_extend[62:32] = ( {31{ap.sra}} & {31{a_in[31]}} ) |
( {31{ap.sll}} & a_in[30:0] ) |
( {31{ap_rol}} & a_in[30:0] ) |
( {31{ap_ror}} & a_in[30:0] );
assign shift_long[62:0] = ( shift_extend[62:0] >> shift_amount[4:0] ); // 62-32 unused
assign sout[31:0] = shift_long[31:0] & shift_mask[31:0];
// * * * * * * * * * * * * * * * * * * BitManip : CLZ,CTZ * * * * * * * * * * * * * * * * * *
logic bitmanip_clz_ctz_sel;
logic [31:0] bitmanip_a_reverse_ff;
logic [31:0] bitmanip_lzd_in;
logic [5:0] bitmanip_dw_lzd_enc;
logic [5:0] bitmanip_clz_ctz_result;
assign bitmanip_clz_ctz_sel = ap_clz | ap_ctz;
assign bitmanip_a_reverse_ff[31:0] = {a_in[0], a_in[1], a_in[2], a_in[3], a_in[4], a_in[5], a_in[6], a_in[7],
a_in[8], a_in[9], a_in[10], a_in[11], a_in[12], a_in[13], a_in[14], a_in[15],
a_in[16], a_in[17], a_in[18], a_in[19], a_in[20], a_in[21], a_in[22], a_in[23],
a_in[24], a_in[25], a_in[26], a_in[27], a_in[28], a_in[29], a_in[30], a_in[31]};
assign bitmanip_lzd_in[31:0] = ( {32{ap_clz}} & a_in[31:0] ) |
( {32{ap_ctz}} & bitmanip_a_reverse_ff[31:0]);
logic [31:0] bitmanip_lzd_os;
integer i;
logic found;
always_comb
begin
bitmanip_lzd_os[31:0] = bitmanip_lzd_in[31:0];
bitmanip_dw_lzd_enc[5:0]= 6'b0;
found = 1'b0;
for (int i=0; i<32 && found==0; i++) begin
if (bitmanip_lzd_os[31] == 1'b0) begin
bitmanip_dw_lzd_enc[5:0]= bitmanip_dw_lzd_enc[5:0] + 6'b00_0001;
bitmanip_lzd_os[31:0] = bitmanip_lzd_os[31:0] << 1;
end
else
found=1'b1;
end
end
assign bitmanip_clz_ctz_result[5:0] = {6{bitmanip_clz_ctz_sel}} & {bitmanip_dw_lzd_enc[5],( {5{~bitmanip_dw_lzd_enc[5]}} & bitmanip_dw_lzd_enc[4:0] )};
// * * * * * * * * * * * * * * * * * * BitManip : CPOP * * * * * * * * * * * * * * * * * *
logic [5:0] bitmanip_cpop;
logic [5:0] bitmanip_cpop_result;
integer bitmanip_cpop_i;
always_comb
begin
bitmanip_cpop[5:0] = 6'b0;
for (bitmanip_cpop_i=0; bitmanip_cpop_i<32; bitmanip_cpop_i++)
begin
bitmanip_cpop[5:0] = bitmanip_cpop[5:0] + {5'b0,a_in[bitmanip_cpop_i]};
end // FOR bitmanip_cpop_i
end // ALWAYS_COMB
assign bitmanip_cpop_result[5:0] = {6{ap_cpop}} & bitmanip_cpop[5:0];
// * * * * * * * * * * * * * * * * * * BitManip : SEXT_B,SEXT_H * * * * * * * * * * * * * * * * *
logic [31:0] bitmanip_sext_result;
assign bitmanip_sext_result[31:0] = ( {32{ap_sext_b}} & { {24{a_in[7]}} ,a_in[7:0] } ) |
( {32{ap_sext_h}} & { {16{a_in[15]}},a_in[15:0] } );
// * * * * * * * * * * * * * * * * * * BitManip : MIN,MAX,MINU,MAXU * * * * * * * * * * * * * * *
logic bitmanip_minmax_sel;
logic [31:0] bitmanip_minmax_result;
assign bitmanip_minmax_sel = ap_min | ap_max;
logic bitmanip_minmax_sel_a;
assign bitmanip_minmax_sel_a = ge ^ ap_min;
assign bitmanip_minmax_result[31:0] = ({32{bitmanip_minmax_sel & bitmanip_minmax_sel_a}} & a_in[31:0]) |
({32{bitmanip_minmax_sel & ~bitmanip_minmax_sel_a}} & b_in[31:0]);
// * * * * * * * * * * * * * * * * * * BitManip : PACK, PACKU, PACKH * * * * * * * * * * * * * * *
logic [31:0] bitmanip_pack_result;
logic [31:0] bitmanip_packu_result;
logic [31:0] bitmanip_packh_result;
assign bitmanip_pack_result[31:0] = {32{ap_pack}} & {b_in[15:0], a_in[15:0]};
assign bitmanip_packu_result[31:0] = {32{ap_packu}} & {b_in[31:16],a_in[31:16]};
assign bitmanip_packh_result[31:0] = {32{ap_packh}} & {16'b0,b_in[7:0],a_in[7:0]};
// * * * * * * * * * * * * * * * * * * BitManip : REV, ORC_B * * * * * * * * * * * * * * * * * *
logic [31:0] bitmanip_rev8_result;
logic [31:0] bitmanip_orc_b_result;
assign bitmanip_rev8_result[31:0] = {32{ap_rev8}} & {a_in[7:0],a_in[15:8],a_in[23:16],a_in[31:24]};
// uint32_t gorc32(uint32_t rs1, uint32_t rs2)
// {
// uint32_t x = rs1;
// int shamt = rs2 & 31; ORC.B ORC16
// if (shamt & 1) x |= ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); 1 0
// if (shamt & 2) x |= ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); 1 0
// if (shamt & 4) x |= ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); 1 0
// if (shamt & 8) x |= ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); 0 0
// if (shamt & 16) x |= ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); 0 1
// return x;
// }
// BEFORE 31 , 30 , 29 , 28 , 27 , 26, 25, 24
// shamt[0] b = a31|a30,a31|a30,a29|a28,a29|a28, a27|a26,a27|a26,a25|a24,a25|a24
// shamt[1] c = b31|b29,b30|b28,b31|b29,b30|b28, b27|b25,b26|b24,b27|b25,b26|b24
// shamt[2] d = c31|c27,c30|c26,c29|c25,c28|c24, c31|c27,c30|c26,c29|c25,c28|c24
//
// Expand d31 = c31 | c27;
// = b31 | b29 | b27 | b25;
// = a31|a30 | a29|a28 | a27|a26 | a25|a24
assign bitmanip_orc_b_result[31:0] = {32{ap_orc_b}} & { {8{| a_in[31:24]}}, {8{| a_in[23:16]}}, {8{| a_in[15:8]}}, {8{| a_in[7:0]}} };
// * * * * * * * * * * * * * * * * * * BitManip : ZBSET, ZBCLR, ZBINV * * * * * * * * * * * * * *
logic [31:0] bitmanip_sb_1hot;
logic [31:0] bitmanip_sb_data;
assign bitmanip_sb_1hot[31:0] = ( 32'h00000001 << b_in[4:0] );
assign bitmanip_sb_data[31:0] = ( {32{ap_bset}} & ( a_in[31:0] | bitmanip_sb_1hot[31:0]) ) |
( {32{ap_bclr}} & ( a_in[31:0] & ~bitmanip_sb_1hot[31:0]) ) |
( {32{ap_binv}} & ( a_in[31:0] ^ bitmanip_sb_1hot[31:0]) );
assign sel_shift = ap.sll | ap.srl | ap.sra | ap_rol | ap_ror;
assign sel_adder = (ap.add | ap.sub | ap_zba) & ~ap.slt & ~ap_min & ~ap_max;
assign sel_pc = ap.jal | pp_in.pcall | pp_in.pja | pp_in.pret;
assign csr_write_data[31:0]= (ap.csr_imm) ? b_in[31:0] : a_in[31:0];
assign slt_one = ap.slt & lt;
assign result[31:0] = lout[31:0] |
({32{sel_shift}} & sout[31:0] ) |
({32{sel_adder}} & aout[31:0] ) |
({32{sel_pc}} & {pcout[31:1],1'b0} ) |
({32{ap.csr_write}} & csr_write_data[31:0] ) |
{31'b0, slt_one} |
({32{ap_bext}} & {31'b0, sout[0]} ) |
{26'b0, bitmanip_clz_ctz_result[5:0]} |
{26'b0, bitmanip_cpop_result[5:0]} |
bitmanip_sext_result[31:0] |
bitmanip_minmax_result[31:0] |
bitmanip_pack_result[31:0] |
bitmanip_packu_result[31:0] |
bitmanip_packh_result[31:0] |
bitmanip_rev8_result[31:0] |
bitmanip_orc_b_result[31:0] |
bitmanip_sb_data[31:0];
// *** branch handling ***
assign any_jal = ap.jal |
pp_in.pcall |
pp_in.pja |
pp_in.pret;
assign actual_taken = (ap.beq & eq) |
(ap.bne & ne) |
(ap.blt & lt) |
(ap.bge & ge) |
any_jal;
// for a conditional br pcout[] will be the opposite of the branch prediction
// for jal or pcall, it will be the link address pc+2 or pc+4
rvbradder ibradder (
.pc ( pc_in[31:1] ),
.offset ( brimm_in[12:1] ),
.dout ( pcout[31:1] ));
// pred_correct is for the npc logic
// pred_correct indicates not to use the flush_path
// for any_jal pred_correct==0
assign pred_correct_out = (valid_in & ap.predict_nt & ~actual_taken & ~any_jal) |
(valid_in & ap.predict_t & actual_taken & ~any_jal);
// for any_jal adder output is the flush path
assign flush_path_out[31:1]= (any_jal) ? aout[31:1] : pcout[31:1];
// pcall and pret are included here
assign cond_mispredict = (ap.predict_t & ~actual_taken) |
(ap.predict_nt & actual_taken);
// target mispredicts on ret's
assign target_mispredict = pp_in.pret & (pp_in.prett[31:1] != aout[31:1]);
assign flush_upper_out = (ap.jal | cond_mispredict | target_mispredict) & valid_in & ~flush_upper_x & ~flush_lower_r;
assign flush_final_out = ( (ap.jal | cond_mispredict | target_mispredict) & valid_in & ~flush_upper_x ) | flush_lower_r;
// .i 3
// .o 2
// .ilb hist[1] hist[0] taken
// .ob newhist[1] newhist[0]
// .type fd
//
// 00 0 01
// 01 0 01
// 10 0 00
// 11 0 10
// 00 1 10
// 01 1 00
// 10 1 11
// 11 1 11
assign newhist[1] = ( pp_in.hist[1] & pp_in.hist[0]) | (~pp_in.hist[0] & actual_taken);
assign newhist[0] = (~pp_in.hist[1] & ~actual_taken) | ( pp_in.hist[1] & actual_taken);
always_comb begin
predict_p_out = pp_in;
predict_p_out.misp = ~flush_upper_x & ~flush_lower_r & (cond_mispredict | target_mispredict);
predict_p_out.ataken = actual_taken;
predict_p_out.hist[1] = newhist[1];
predict_p_out.hist[0] = newhist[0];
end
endmodule // el2_exu_alu_ctl

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,737 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
module el2_exu_mul_ctl
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic clk, // Top level clock
input logic rst_l, // Reset
input logic scan_mode, // Scan mode
input el2_mul_pkt_t mul_p, // {Valid, RS1 signed operand, RS2 signed operand, Select low 32-bits of result}
input logic [31:0] rs1_in, // A operand
input logic [31:0] rs2_in, // B operand
output logic [31:0] result_x // Result
);
logic mul_x_enable;
logic bit_x_enable;
logic signed [32:0] rs1_ext_in;
logic signed [32:0] rs2_ext_in;
logic [65:0] prod_x;
logic low_x;
// *** Start - BitManip ***
logic bitmanip_sel_d;
logic bitmanip_sel_x;
logic [31:0] bitmanip_d;
logic [31:0] bitmanip_x;
// ZBE
logic ap_bcompress;
logic ap_bdecompress;
// ZBC
logic ap_clmul;
logic ap_clmulh;
logic ap_clmulr;
// ZBP
logic ap_grev;
logic ap_gorc;
logic ap_shfl;
logic ap_unshfl;
logic ap_xperm_n;
logic ap_xperm_b;
logic ap_xperm_h;
// ZBR
logic ap_crc32_b;
logic ap_crc32_h;
logic ap_crc32_w;
logic ap_crc32c_b;
logic ap_crc32c_h;
logic ap_crc32c_w;
// ZBF
logic ap_bfp;
if (pt.BITMANIP_ZBE == 1)
begin
assign ap_bcompress = mul_p.bcompress;
assign ap_bdecompress = mul_p.bdecompress;
end
else
begin
assign ap_bcompress = 1'b0;
assign ap_bdecompress = 1'b0;
end
if (pt.BITMANIP_ZBC == 1)
begin
assign ap_clmul = mul_p.clmul;
assign ap_clmulh = mul_p.clmulh;
assign ap_clmulr = mul_p.clmulr;
end
else
begin
assign ap_clmul = 1'b0;
assign ap_clmulh = 1'b0;
assign ap_clmulr = 1'b0;
end
if (pt.BITMANIP_ZBP == 1)
begin
assign ap_grev = mul_p.grev;
assign ap_gorc = mul_p.gorc;
assign ap_shfl = mul_p.shfl;
assign ap_unshfl = mul_p.unshfl;
assign ap_xperm_n = mul_p.xperm_n;
assign ap_xperm_b = mul_p.xperm_b;
assign ap_xperm_h = mul_p.xperm_h;
end
else
begin
assign ap_grev = 1'b0;
assign ap_gorc = 1'b0;
assign ap_shfl = 1'b0;
assign ap_unshfl = 1'b0;
assign ap_xperm_n = 1'b0;
assign ap_xperm_b = 1'b0;
assign ap_xperm_h = 1'b0;
end
if (pt.BITMANIP_ZBR == 1)
begin
assign ap_crc32_b = mul_p.crc32_b;
assign ap_crc32_h = mul_p.crc32_h;
assign ap_crc32_w = mul_p.crc32_w;
assign ap_crc32c_b = mul_p.crc32c_b;
assign ap_crc32c_h = mul_p.crc32c_h;
assign ap_crc32c_w = mul_p.crc32c_w;
end
else
begin
assign ap_crc32_b = 1'b0;
assign ap_crc32_h = 1'b0;
assign ap_crc32_w = 1'b0;
assign ap_crc32c_b = 1'b0;
assign ap_crc32c_h = 1'b0;
assign ap_crc32c_w = 1'b0;
end
if (pt.BITMANIP_ZBF == 1)
begin
assign ap_bfp = mul_p.bfp;
end
else
begin
assign ap_bfp = 1'b0;
end
// *** End - BitManip ***
assign mul_x_enable = mul_p.valid;
assign bit_x_enable = mul_p.valid;
assign rs1_ext_in[32] = mul_p.rs1_sign & rs1_in[31];
assign rs2_ext_in[32] = mul_p.rs2_sign & rs2_in[31];
assign rs1_ext_in[31:0] = rs1_in[31:0];
assign rs2_ext_in[31:0] = rs2_in[31:0];
// --------------------------- Multiply ----------------------------------
logic signed [32:0] rs1_x;
logic signed [32:0] rs2_x;
rvdffe #(34) i_a_x_ff (.*, .clk(clk), .din({mul_p.low,rs1_ext_in[32:0]}), .dout({low_x,rs1_x[32:0]}), .en(mul_x_enable));
rvdffe #(33) i_b_x_ff (.*, .clk(clk), .din( rs2_ext_in[32:0] ), .dout( rs2_x[32:0] ), .en(mul_x_enable));
assign prod_x[65:0] = rs1_x * rs2_x;
// * * * * * * * * * * * * * * * * * * BitManip : BCOMPRESS, BDECOMPRESS * * * * * * * * * * * * *
// *** BCOMPRESS == "gather" ***
logic [31:0] bcompress_d;
logic bcompress_test_bit_d;
integer bcompress_i, bcompress_j;
always_comb
begin
bcompress_j = 0;
bcompress_test_bit_d = 1'b0;
bcompress_d[31:0] = 32'b0;
for (bcompress_i=0; bcompress_i<32; bcompress_i++)
begin
bcompress_test_bit_d = rs2_in[bcompress_i];
if (bcompress_test_bit_d)
begin
bcompress_d[bcompress_j] = rs1_in[bcompress_i];
bcompress_j = bcompress_j + 1;
end // IF bcompress_test_bit
end // FOR bcompress_i
end // ALWAYS_COMB
// *** BDECOMPRESS == "scatter" ***
logic [31:0] bdecompress_d;
logic bdecompress_test_bit_d;
integer bdecompress_i, bdecompress_j;
always_comb
begin
bdecompress_j = 0;
bdecompress_test_bit_d = 1'b0;
bdecompress_d[31:0] = 32'b0;
for (bdecompress_i=0; bdecompress_i<32; bdecompress_i++)
begin
bdecompress_test_bit_d = rs2_in[bdecompress_i];
if (bdecompress_test_bit_d)
begin
bdecompress_d[bdecompress_i] = rs1_in[bdecompress_j];
bdecompress_j = bdecompress_j + 1;
end // IF bdecompress_test_bit
end // FOR bdecompress_i
end // ALWAYS_COMB
// * * * * * * * * * * * * * * * * * * BitManip : CLMUL, CLMULH, CLMULR * * * * * * * * * * * * *
logic [62:0] clmul_raw_d;
assign clmul_raw_d[62:0] = ( {63{rs2_in[00]}} & {31'b0,rs1_in[31:0] } ) ^
( {63{rs2_in[01]}} & {30'b0,rs1_in[31:0], 1'b0} ) ^
( {63{rs2_in[02]}} & {29'b0,rs1_in[31:0], 2'b0} ) ^
( {63{rs2_in[03]}} & {28'b0,rs1_in[31:0], 3'b0} ) ^
( {63{rs2_in[04]}} & {27'b0,rs1_in[31:0], 4'b0} ) ^
( {63{rs2_in[05]}} & {26'b0,rs1_in[31:0], 5'b0} ) ^
( {63{rs2_in[06]}} & {25'b0,rs1_in[31:0], 6'b0} ) ^
( {63{rs2_in[07]}} & {24'b0,rs1_in[31:0], 7'b0} ) ^
( {63{rs2_in[08]}} & {23'b0,rs1_in[31:0], 8'b0} ) ^
( {63{rs2_in[09]}} & {22'b0,rs1_in[31:0], 9'b0} ) ^
( {63{rs2_in[10]}} & {21'b0,rs1_in[31:0],10'b0} ) ^
( {63{rs2_in[11]}} & {20'b0,rs1_in[31:0],11'b0} ) ^
( {63{rs2_in[12]}} & {19'b0,rs1_in[31:0],12'b0} ) ^
( {63{rs2_in[13]}} & {18'b0,rs1_in[31:0],13'b0} ) ^
( {63{rs2_in[14]}} & {17'b0,rs1_in[31:0],14'b0} ) ^
( {63{rs2_in[15]}} & {16'b0,rs1_in[31:0],15'b0} ) ^
( {63{rs2_in[16]}} & {15'b0,rs1_in[31:0],16'b0} ) ^
( {63{rs2_in[17]}} & {14'b0,rs1_in[31:0],17'b0} ) ^
( {63{rs2_in[18]}} & {13'b0,rs1_in[31:0],18'b0} ) ^
( {63{rs2_in[19]}} & {12'b0,rs1_in[31:0],19'b0} ) ^
( {63{rs2_in[20]}} & {11'b0,rs1_in[31:0],20'b0} ) ^
( {63{rs2_in[21]}} & {10'b0,rs1_in[31:0],21'b0} ) ^
( {63{rs2_in[22]}} & { 9'b0,rs1_in[31:0],22'b0} ) ^
( {63{rs2_in[23]}} & { 8'b0,rs1_in[31:0],23'b0} ) ^
( {63{rs2_in[24]}} & { 7'b0,rs1_in[31:0],24'b0} ) ^
( {63{rs2_in[25]}} & { 6'b0,rs1_in[31:0],25'b0} ) ^
( {63{rs2_in[26]}} & { 5'b0,rs1_in[31:0],26'b0} ) ^
( {63{rs2_in[27]}} & { 4'b0,rs1_in[31:0],27'b0} ) ^
( {63{rs2_in[28]}} & { 3'b0,rs1_in[31:0],28'b0} ) ^
( {63{rs2_in[29]}} & { 2'b0,rs1_in[31:0],29'b0} ) ^
( {63{rs2_in[30]}} & { 1'b0,rs1_in[31:0],30'b0} ) ^
( {63{rs2_in[31]}} & { rs1_in[31:0],31'b0} );
// * * * * * * * * * * * * * * * * * * BitManip : GREV * * * * * * * * * * * * * * * * * *
// uint32_t grev32(uint32_t rs1, uint32_t rs2)
// {
// uint32_t x = rs1;
// int shamt = rs2 & 31;
//
// if (shamt & 1) x = ( (x & 0x55555555) << 1) | ( (x & 0xAAAAAAAA) >> 1);
// if (shamt & 2) x = ( (x & 0x33333333) << 2) | ( (x & 0xCCCCCCCC) >> 2);
// if (shamt & 4) x = ( (x & 0x0F0F0F0F) << 4) | ( (x & 0xF0F0F0F0) >> 4);
// if (shamt & 8) x = ( (x & 0x00FF00FF) << 8) | ( (x & 0xFF00FF00) >> 8);
// if (shamt & 16) x = ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16);
//
// return x;
// }
logic [31:0] grev1_d;
logic [31:0] grev2_d;
logic [31:0] grev4_d;
logic [31:0] grev8_d;
logic [31:0] grev_d;
assign grev1_d[31:0] = (rs2_in[0]) ? {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25],
rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17],
rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09],
rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]} : rs1_in[31:0];
assign grev2_d[31:0] = (rs2_in[1]) ? {grev1_d[29:28],grev1_d[31:30],grev1_d[25:24],grev1_d[27:26],
grev1_d[21:20],grev1_d[23:22],grev1_d[17:16],grev1_d[19:18],
grev1_d[13:12],grev1_d[15:14],grev1_d[09:08],grev1_d[11:10],
grev1_d[05:04],grev1_d[07:06],grev1_d[01:00],grev1_d[03:02]} : grev1_d[31:0];
assign grev4_d[31:0] = (rs2_in[2]) ? {grev2_d[27:24],grev2_d[31:28],grev2_d[19:16],grev2_d[23:20],
grev2_d[11:08],grev2_d[15:12],grev2_d[03:00],grev2_d[07:04]} : grev2_d[31:0];
assign grev8_d[31:0] = (rs2_in[3]) ? {grev4_d[23:16],grev4_d[31:24],grev4_d[07:00],grev4_d[15:08]} : grev4_d[31:0];
assign grev_d[31:0] = (rs2_in[4]) ? {grev8_d[15:00],grev8_d[31:16]} : grev8_d[31:0];
// * * * * * * * * * * * * * * * * * * BitManip : GORC * * * * * * * * * * * * * * * * * *
// uint32_t gorc32(uint32_t rs1, uint32_t rs2)
// {
// uint32_t x = rs1;
// int shamt = rs2 & 31;
//
// if (shamt & 1) x |= ( (x & 0x55555555) << 1) | ( (x & 0xAAAAAAAA) >> 1);
// if (shamt & 2) x |= ( (x & 0x33333333) << 2) | ( (x & 0xCCCCCCCC) >> 2);
// if (shamt & 4) x |= ( (x & 0x0F0F0F0F) << 4) | ( (x & 0xF0F0F0F0) >> 4);
// if (shamt & 8) x |= ( (x & 0x00FF00FF) << 8) | ( (x & 0xFF00FF00) >> 8);
// if (shamt & 16) x |= ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16);
//
// return x;
// }
logic [31:0] gorc1_d;
logic [31:0] gorc2_d;
logic [31:0] gorc4_d;
logic [31:0] gorc8_d;
logic [31:0] gorc_d;
assign gorc1_d[31:0] = ( {32{rs2_in[0]}} & {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25],
rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17],
rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09],
rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]} ) | rs1_in[31:0];
assign gorc2_d[31:0] = ( {32{rs2_in[1]}} & {gorc1_d[29:28],gorc1_d[31:30],gorc1_d[25:24],gorc1_d[27:26],
gorc1_d[21:20],gorc1_d[23:22],gorc1_d[17:16],gorc1_d[19:18],
gorc1_d[13:12],gorc1_d[15:14],gorc1_d[09:08],gorc1_d[11:10],
gorc1_d[05:04],gorc1_d[07:06],gorc1_d[01:00],gorc1_d[03:02]} ) | gorc1_d[31:0];
assign gorc4_d[31:0] = ( {32{rs2_in[2]}} & {gorc2_d[27:24],gorc2_d[31:28],gorc2_d[19:16],gorc2_d[23:20],
gorc2_d[11:08],gorc2_d[15:12],gorc2_d[03:00],gorc2_d[07:04]} ) | gorc2_d[31:0];
assign gorc8_d[31:0] = ( {32{rs2_in[3]}} & {gorc4_d[23:16],gorc4_d[31:24],gorc4_d[07:00],gorc4_d[15:08]} ) | gorc4_d[31:0];
assign gorc_d[31:0] = ( {32{rs2_in[4]}} & {gorc8_d[15:00],gorc8_d[31:16]} ) | gorc8_d[31:0];
// * * * * * * * * * * * * * * * * * * BitManip : SHFL, UNSHLF * * * * * * * * * * * * * * * * * *
// uint32_t shuffle32_stage (uint32_t src, uint32_t maskL, uint32_t maskR, int N)
// {
// uint32_t x = src & ~(maskL | maskR);
// x |= ((src << N) & maskL) | ((src >> N) & maskR);
// return x;
// }
//
//
//
// uint32_t shfl32(uint32_t rs1, uint32_t rs2)
// {
// uint32_t x = rs1;
// int shamt = rs2 & 15
//
// if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
// if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
// if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2);
// if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
//
// return x;
// }
logic [31:0] shfl8_d;
logic [31:0] shfl4_d;
logic [31:0] shfl2_d;
logic [31:0] shfl_d;
assign shfl8_d[31:0] = (rs2_in[3]) ? {rs1_in[31:24],rs1_in[15:08],rs1_in[23:16],rs1_in[07:00]} : rs1_in[31:0];
assign shfl4_d[31:0] = (rs2_in[2]) ? {shfl8_d[31:28],shfl8_d[23:20],shfl8_d[27:24],shfl8_d[19:16],
shfl8_d[15:12],shfl8_d[07:04],shfl8_d[11:08],shfl8_d[03:00]} : shfl8_d[31:0];
assign shfl2_d[31:0] = (rs2_in[1]) ? {shfl4_d[31:30],shfl4_d[27:26],shfl4_d[29:28],shfl4_d[25:24],
shfl4_d[23:22],shfl4_d[19:18],shfl4_d[21:20],shfl4_d[17:16],
shfl4_d[15:14],shfl4_d[11:10],shfl4_d[13:12],shfl4_d[09:08],
shfl4_d[07:06],shfl4_d[03:02],shfl4_d[05:04],shfl4_d[01:00]} : shfl4_d[31:0];
assign shfl_d[31:0] = (rs2_in[0]) ? {shfl2_d[31],shfl2_d[29],shfl2_d[30],shfl2_d[28],shfl2_d[27],shfl2_d[25],shfl2_d[26],shfl2_d[24],
shfl2_d[23],shfl2_d[21],shfl2_d[22],shfl2_d[20],shfl2_d[19],shfl2_d[17],shfl2_d[18],shfl2_d[16],
shfl2_d[15],shfl2_d[13],shfl2_d[14],shfl2_d[12],shfl2_d[11],shfl2_d[09],shfl2_d[10],shfl2_d[08],
shfl2_d[07],shfl2_d[05],shfl2_d[06],shfl2_d[04],shfl2_d[03],shfl2_d[01],shfl2_d[02],shfl2_d[00]} : shfl2_d[31:0];
// uint32_t unshfl32(uint32_t rs1, uint32_t rs2)
// {
// uint32_t x = rs1;
// int shamt = rs2 & 15
//
// if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
// if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2);
// if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
// if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
//
// return x;
// }
logic [31:0] unshfl1_d;
logic [31:0] unshfl2_d;
logic [31:0] unshfl4_d;
logic [31:0] unshfl_d;
assign unshfl1_d[31:0] = (rs2_in[0]) ? {rs1_in[31],rs1_in[29],rs1_in[30],rs1_in[28],rs1_in[27],rs1_in[25],rs1_in[26],rs1_in[24],
rs1_in[23],rs1_in[21],rs1_in[22],rs1_in[20],rs1_in[19],rs1_in[17],rs1_in[18],rs1_in[16],
rs1_in[15],rs1_in[13],rs1_in[14],rs1_in[12],rs1_in[11],rs1_in[09],rs1_in[10],rs1_in[08],
rs1_in[07],rs1_in[05],rs1_in[06],rs1_in[04],rs1_in[03],rs1_in[01],rs1_in[02],rs1_in[00]} : rs1_in[31:0];
assign unshfl2_d[31:0] = (rs2_in[1]) ? {unshfl1_d[31:30],unshfl1_d[27:26],unshfl1_d[29:28],unshfl1_d[25:24],
unshfl1_d[23:22],unshfl1_d[19:18],unshfl1_d[21:20],unshfl1_d[17:16],
unshfl1_d[15:14],unshfl1_d[11:10],unshfl1_d[13:12],unshfl1_d[09:08],
unshfl1_d[07:06],unshfl1_d[03:02],unshfl1_d[05:04],unshfl1_d[01:00]} : unshfl1_d[31:0];
assign unshfl4_d[31:0] = (rs2_in[2]) ? {unshfl2_d[31:28],unshfl2_d[23:20],unshfl2_d[27:24],unshfl2_d[19:16],
unshfl2_d[15:12],unshfl2_d[07:04],unshfl2_d[11:08],unshfl2_d[03:00]} : unshfl2_d[31:0];
assign unshfl_d[31:0] = (rs2_in[3]) ? {unshfl4_d[31:24],unshfl4_d[15:08],unshfl4_d[23:16],unshfl4_d[07:00]} : unshfl4_d[31:0];
// * * * * * * * * * * * * * * * * * * BitManip : XPERM * * * * * * * * * * * * * * * * *
//
// These instructions operate on nibbles/bytes/half-words/words.
// rs1 is a vector of data words and rs2 is a vector of indices into rs1.
// The result of the instruction is the vector rs2 with each element replaced by the corresponding data word from rs1,
// or zero then the index in rs2 is out of bounds.
//
// uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2)
// {
// uint_xlen_t r = 0;
// uint_xlen_t sz = 1LL << sz_log2;
// uint_xlen_t mask = (1LL << sz) - 1;
// for (int i = 0; i < XLEN; i += sz)
// { uint_xlen_t pos = ((rs2 >> i) & mask) << sz_log2;
// if (pos < XLEN)
// r |= ((rs1 >> pos) & mask) << i;
// }
// return r;
// }
//
// uint_xlen_t xperm_n (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 2); }
// uint_xlen_t xperm_b (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 3); }
// uint_xlen_t xperm_h (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 4); }
// uint_xlen_t xperm_w (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 5); } Not part of RV32
//
// The xperm.[nbhw] instructions can be implemented with an XLEN/4-lane nibble-wide crossbarswitch.
// *** XPERM_B ***
// XLEN = 32
// SZ_LOG2 = 3
// SZ = 4'd8;
// MASK = ( 1 << 8 ) - 1
// = 8'hFF
// integer xperm_b_i;
// logic [31:0] xperm_b_r;
// logic [3:0] xperm_b_sz;
// logic [7:0] xperm_b_mask;
// logic [31:0] xperm_b_pos;
//
//
// assign xperm_b_sz[3:0] = 4'd8;
// assign xperm_b_mask[7:0] = 8'hff;
//
// always_comb
// begin
// xperm_b_r[31:0] = 32'b0;
//
// for (xperm_b_i=0; xperm_b_i<32; xperm_b_i = xperm_b_i + xperm_b_sz) // This code did not work...
// begin
// xperm_b_pos[31:0] = ( (rs2_in[31:0] >> xperm_b_i) & {24'h0,xperm_b_mask[7:0]} ) << 3;
// if (xperm_b_pos[31:0] < 32'd32)
// xperm_b_r[31:0] = xperm_b_r[31:0] | ( ((rs1_in[31:0] >> xperm_b_pos[4:0]) & {24'h0,xperm_b_mask[7:0]}) << xperm_b_i );
// end
// end
logic [31:0] xperm_n;
logic [31:0] xperm_b;
logic [31:0] xperm_h;
assign xperm_n[03:00] = { 4{ ~rs2_in[03] }} & ( (rs1_in[31:0] >> {rs2_in[02:00],2'b0}) & 4'hf ); // This is a 8:1 mux with qualified selects
assign xperm_n[07:04] = { 4{ ~rs2_in[07] }} & ( (rs1_in[31:0] >> {rs2_in[06:04],2'b0}) & 4'hf );
assign xperm_n[11:08] = { 4{ ~rs2_in[11] }} & ( (rs1_in[31:0] >> {rs2_in[10:08],2'b0}) & 4'hf );
assign xperm_n[15:12] = { 4{ ~rs2_in[15] }} & ( (rs1_in[31:0] >> {rs2_in[14:12],2'b0}) & 4'hf );
assign xperm_n[19:16] = { 4{ ~rs2_in[19] }} & ( (rs1_in[31:0] >> {rs2_in[18:16],2'b0}) & 4'hf );
assign xperm_n[23:20] = { 4{ ~rs2_in[23] }} & ( (rs1_in[31:0] >> {rs2_in[22:20],2'b0}) & 4'hf );
assign xperm_n[27:24] = { 4{ ~rs2_in[27] }} & ( (rs1_in[31:0] >> {rs2_in[26:24],2'b0}) & 4'hf );
assign xperm_n[31:28] = { 4{ ~rs2_in[31] }} & ( (rs1_in[31:0] >> {rs2_in[30:28],2'b0}) & 4'hf );
assign xperm_b[07:00] = { 8{ ~(| rs2_in[07:02]) }} & ( (rs1_in[31:0] >> {rs2_in[01:00],3'b0}) & 8'hff ); // This is a 4:1 mux with qualified selects
assign xperm_b[15:08] = { 8{ ~(| rs2_in[15:10]) }} & ( (rs1_in[31:0] >> {rs2_in[09:08],3'b0}) & 8'hff );
assign xperm_b[23:16] = { 8{ ~(| rs2_in[23:18]) }} & ( (rs1_in[31:0] >> {rs2_in[17:16],3'b0}) & 8'hff );
assign xperm_b[31:24] = { 8{ ~(| rs2_in[31:26]) }} & ( (rs1_in[31:0] >> {rs2_in[25:24],3'b0}) & 8'hff );
assign xperm_h[15:00] = {16{ ~(| rs2_in[15:01]) }} & ( (rs1_in[31:0] >> {rs2_in[00] ,4'b0}) & 16'hffff ); // This is a 2:1 mux with qualified selects
assign xperm_h[31:16] = {16{ ~(| rs2_in[31:17]) }} & ( (rs1_in[31:0] >> {rs2_in[16] ,4'b0}) & 16'hffff );
// * * * * * * * * * * * * * * * * * * BitManip : CRC32, CRC32c * * * * * * * * * * * * * * * * *
// *** computed from https: //crccalc.com ***
//
// "a" is 8'h61 = 8'b0110_0001 (8'h61 ^ 8'hff = 8'h9e)
//
// Input must first be XORed with 32'hffff_ffff
//
//
// CRC32
//
// Input Output Input Output
// ----- -------- -------- --------
// "a" e8b7be43 ffffff9e 174841bc
// "aa" 078a19d7 ffff9e9e f875e628
// "aaaa" ad98e545 9e9e9e9e 5267a1ba
//
//
//
// CRC32c
//
// Input Output Input Output
// ----- -------- -------- --------
// "a" c1d04330 ffffff9e 3e2fbccf
// "aa" f1f2dac2 ffff9e9e 0e0d253d
// "aaaa" 6a52eeb0 9e9e9e9e 95ad114f
logic crc32_all;
logic [31:0] crc32_poly_rev;
logic [31:0] crc32c_poly_rev;
integer crc32_bi, crc32_hi, crc32_wi, crc32c_bi, crc32c_hi, crc32c_wi;
logic [31:0] crc32_bd, crc32_hd, crc32_wd, crc32c_bd, crc32c_hd, crc32c_wd;
assign crc32_all = ap_crc32_b | ap_crc32_h | ap_crc32_w | ap_crc32c_b | ap_crc32c_h | ap_crc32c_w;
assign crc32_poly_rev[31:0] = 32'hEDB88320; // bit reverse of 32'h04C11DB7
assign crc32c_poly_rev[31:0] = 32'h82F63B78; // bit reverse of 32'h1EDC6F41
always_comb
begin
crc32_bd[31:0] = rs1_in[31:0];
for (crc32_bi=0; crc32_bi<8; crc32_bi++)
begin
crc32_bd[31:0] = (crc32_bd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_bd[0]}});
end // FOR crc32_bi
end // ALWAYS_COMB
always_comb
begin
crc32_hd[31:0] = rs1_in[31:0];
for (crc32_hi=0; crc32_hi<16; crc32_hi++)
begin
crc32_hd[31:0] = (crc32_hd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_hd[0]}});
end // FOR crc32_hi
end // ALWAYS_COMB
always_comb
begin
crc32_wd[31:0] = rs1_in[31:0];
for (crc32_wi=0; crc32_wi<32; crc32_wi++)
begin
crc32_wd[31:0] = (crc32_wd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_wd[0]}});
end // FOR crc32_wi
end // ALWAYS_COMB
always_comb
begin
crc32c_bd[31:0] = rs1_in[31:0];
for (crc32c_bi=0; crc32c_bi<8; crc32c_bi++)
begin
crc32c_bd[31:0] = (crc32c_bd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_bd[0]}});
end // FOR crc32c_bi
end // ALWAYS_COMB
always_comb
begin
crc32c_hd[31:0] = rs1_in[31:0];
for (crc32c_hi=0; crc32c_hi<16; crc32c_hi++)
begin
crc32c_hd[31:0] = (crc32c_hd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_hd[0]}});
end // FOR crc32c_hi
end // ALWAYS_COMB
always_comb
begin
crc32c_wd[31:0] = rs1_in[31:0];
for (crc32c_wi=0; crc32c_wi<32; crc32c_wi++)
begin
crc32c_wd[31:0] = (crc32c_wd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_wd[0]}});
end // FOR crc32c_wi
end // ALWAYS_COMB
// * * * * * * * * * * * * * * * * * * BitManip : BFP * * * * * * * * * * * * * * * * * *
// uint_xlen_t bfp(uint_xlen_t rs1, uint_xlen_t rs2)
// {
// uint_xlen_t cfg = rs2 >> (XLEN/2);
// if ((cfg >> 30) == 2) cfg = cfg >> 16;
// int len = (cfg >> 8) & (XLEN/2-1);
// int off = cfg & (XLEN-1);
// len = len ? len : XLEN/2;
// uint_xlen_t mask = slo(0, len) << off;
// uint_xlen_t data = rs2 << off;
// return (data & mask) | (rs1 & ~mask);
logic [4:0] bfp_len;
logic [4:0] bfp_off;
logic [31:0] bfp_len_mask_;
logic [31:0] bfp_off_mask_;
logic [15:0] bfp_preshift_data;
logic [31:0] bfp_shift_data;
logic [31:0] bfp_shift_mask;
logic [31:0] bfp_result_d;
assign bfp_len[3:0] = rs2_in[27:24];
assign bfp_len[4] = (bfp_len[3:0] == 4'b0); // If LEN field is zero, then LEN=16
assign bfp_off[4:0] = rs2_in[20:16];
assign bfp_len_mask_[31:0] = 32'hffff_ffff << bfp_len[4:0];
assign bfp_off_mask_[31:0] = 32'hffff_ffff << bfp_off[4:0];
assign bfp_preshift_data[15:0]= rs2_in[15:0] & ~bfp_len_mask_[15:0];
assign bfp_shift_data[31:0] = {16'b0,bfp_preshift_data[15:0]} << bfp_off[4:0];
assign bfp_shift_mask[31:0] = (bfp_len_mask_[31:0] << bfp_off[4:0]) | ~bfp_off_mask_[31:0];
assign bfp_result_d[31:0] = bfp_shift_data[31:0] | (rs1_in[31:0] & bfp_shift_mask[31:0]);
// * * * * * * * * * * * * * * * * * * BitManip : Common logic * * * * * * * * * * * * * * * * * *
assign bitmanip_sel_d = ap_bcompress | ap_bdecompress | ap_clmul | ap_clmulh | ap_clmulr | ap_grev | ap_gorc | ap_shfl | ap_unshfl | crc32_all | ap_bfp | ap_xperm_n | ap_xperm_b | ap_xperm_h;
assign bitmanip_d[31:0] = ( {32{ap_bcompress}} & bcompress_d[31:0] ) |
( {32{ap_bdecompress}} & bdecompress_d[31:0] ) |
( {32{ap_clmul}} & clmul_raw_d[31:0] ) |
( {32{ap_clmulh}} & {1'b0,clmul_raw_d[62:32]} ) |
( {32{ap_clmulr}} & clmul_raw_d[62:31] ) |
( {32{ap_grev}} & grev_d[31:0] ) |
( {32{ap_gorc}} & gorc_d[31:0] ) |
( {32{ap_shfl}} & shfl_d[31:0] ) |
( {32{ap_unshfl}} & unshfl_d[31:0] ) |
( {32{ap_crc32_b}} & crc32_bd[31:0] ) |
( {32{ap_crc32_h}} & crc32_hd[31:0] ) |
( {32{ap_crc32_w}} & crc32_wd[31:0] ) |
( {32{ap_crc32c_b}} & crc32c_bd[31:0] ) |
( {32{ap_crc32c_h}} & crc32c_hd[31:0] ) |
( {32{ap_crc32c_w}} & crc32c_wd[31:0] ) |
( {32{ap_bfp}} & bfp_result_d[31:0] ) |
( {32{ap_xperm_n}} & xperm_n[31:0] ) |
( {32{ap_xperm_b}} & xperm_b[31:0] ) |
( {32{ap_xperm_h}} & xperm_h[31:0] );
rvdffe #(33) i_bitmanip_ff (.*, .clk(clk), .din({bitmanip_sel_d,bitmanip_d[31:0]}), .dout({bitmanip_sel_x,bitmanip_x[31:0]}), .en(bit_x_enable));
assign result_x[31:0] = ( {32{~bitmanip_sel_x & ~low_x}} & prod_x[63:32] ) |
( {32{~bitmanip_sel_x & low_x}} & prod_x[31:0] ) |
bitmanip_x[31:0];
endmodule // el2_exu_mul_ctl

43
Flow/design/flist Normal file
View File

@ -0,0 +1,43 @@
$RV_ROOT/design/el2_swerv_wrapper.sv
$RV_ROOT/design/el2_mem.sv
$RV_ROOT/design/el2_pic_ctrl.sv
$RV_ROOT/design/el2_swerv.sv
$RV_ROOT/design/el2_dma_ctrl.sv
$RV_ROOT/design/ifu/el2_ifu_aln_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_compress_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_ifc_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_bp_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_ic_mem.sv
$RV_ROOT/design/ifu/el2_ifu_mem_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_iccm_mem.sv
$RV_ROOT/design/ifu/el2_ifu.sv
$RV_ROOT/design/dec/el2_dec_decode_ctl.sv
$RV_ROOT/design/dec/el2_dec_gpr_ctl.sv
$RV_ROOT/design/dec/el2_dec_ib_ctl.sv
$RV_ROOT/design/dec/el2_dec_tlu_ctl.sv
$RV_ROOT/design/dec/el2_dec_trigger.sv
$RV_ROOT/design/dec/el2_dec.sv
$RV_ROOT/design/exu/el2_exu_alu_ctl.sv
$RV_ROOT/design/exu/el2_exu_mul_ctl.sv
$RV_ROOT/design/exu/el2_exu_div_ctl.sv
$RV_ROOT/design/exu/el2_exu.sv
$RV_ROOT/design/lsu/el2_lsu.sv
$RV_ROOT/design/lsu/el2_lsu_clkdomain.sv
$RV_ROOT/design/lsu/el2_lsu_addrcheck.sv
$RV_ROOT/design/lsu/el2_lsu_lsc_ctl.sv
$RV_ROOT/design/lsu/el2_lsu_stbuf.sv
$RV_ROOT/design/lsu/el2_lsu_bus_buffer.sv
$RV_ROOT/design/lsu/el2_lsu_bus_intf.sv
$RV_ROOT/design/lsu/el2_lsu_ecc.sv
$RV_ROOT/design/lsu/el2_lsu_dccm_mem.sv
$RV_ROOT/design/lsu/el2_lsu_dccm_ctl.sv
$RV_ROOT/design/lsu/el2_lsu_trigger.sv
$RV_ROOT/design/dbg/el2_dbg.sv
$RV_ROOT/design/dmi/dmi_wrapper.v
$RV_ROOT/design/dmi/dmi_jtag_to_core_sync.v
$RV_ROOT/design/dmi/rvjtag_tap.v
$RV_ROOT/design/lib/el2_lib.sv
-v $RV_ROOT/design/lib/beh_lib.sv
-v $RV_ROOT/design/lib/mem_lib.sv
$RV_ROOT/design/lib/ahb_to_axi4.sv
$RV_ROOT/design/lib/axi4_to_ahb.sv

63
Flow/design/flist.formal Normal file
View File

@ -0,0 +1,63 @@
#-*-dotf-*-
$RV_ROOT/design/include/el2_def.sv
+incdir+$RV_ROOT/design/lib
+incdir+$RV_ROOT/design/include
+incdir+$RV_ROOT/design/dmi
//|+incdir+$SYNOPSYS_SYN_ROOT/dw/sim_ver
//|-y $SYNOPSYS_SYN_ROOT/dw/sim_ver
//|
//|$SYNOPSYS_SYN_ROOT/dw/sim_ver/DW01_addsub.v
//|$SYNOPSYS_SYN_ROOT/dw/sim_ver/DW_lzd.v
//|$SYNOPSYS_SYN_ROOT/dw/sim_ver/DW_minmax.v
//|$SYNOPSYS_SYN_ROOT/dw/sim_ver/DW02_mult.v
+incdir+/wdc/apps/mentor/questa/formal/2019.2/share/MODIFIED/dw
+incdir+/wdc/apps/mentor/questa/formal/2019.2/share/MODIFIED/dw/dw_datapath
/wdc/apps/mentor/questa/formal/2019.2/share/MODIFIED/dw/dw.remodel.v
$RV_ROOT/design/el2_swerv_wrapper.sv
$RV_ROOT/design/el2_mem.sv
$RV_ROOT/design/el2_pic_ctrl.sv
$RV_ROOT/design/el2_swerv.sv
$RV_ROOT/design/el2_dma_ctrl.sv
$RV_ROOT/design/ifu/el2_ifu_aln_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_compress_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_ifc_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_bp_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_ic_mem.sv
$RV_ROOT/design/ifu/el2_ifu_mem_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_iccm_mem.sv
$RV_ROOT/design/ifu/el2_ifu.sv
$RV_ROOT/design/dec/el2_dec_decode_ctl.sv
$RV_ROOT/design/dec/el2_dec_gpr_ctl.sv
$RV_ROOT/design/dec/el2_dec_ib_ctl.sv
$RV_ROOT/design/dec/el2_dec_tlu_ctl.sv
$RV_ROOT/design/dec/el2_dec_trigger.sv
$RV_ROOT/design/dec/el2_dec.sv
$RV_ROOT/design/exu/el2_exu_alu_ctl.sv
$RV_ROOT/design/exu/el2_exu_mul_ctl.sv
$RV_ROOT/design/exu/el2_exu_div_ctl.sv
$RV_ROOT/design/exu/el2_exu.sv
$RV_ROOT/design/lsu/el2_lsu.sv
$RV_ROOT/design/lsu/el2_lsu_clkdomain.sv
$RV_ROOT/design/lsu/el2_lsu_addrcheck.sv
$RV_ROOT/design/lsu/el2_lsu_lsc_ctl.sv
$RV_ROOT/design/lsu/el2_lsu_stbuf.sv
$RV_ROOT/design/lsu/el2_lsu_bus_buffer.sv
$RV_ROOT/design/lsu/el2_lsu_bus_intf.sv
$RV_ROOT/design/lsu/el2_lsu_ecc.sv
$RV_ROOT/design/lsu/el2_lsu_dccm_mem.sv
$RV_ROOT/design/lsu/el2_lsu_dccm_ctl.sv
$RV_ROOT/design/lsu/el2_lsu_trigger.sv
$RV_ROOT/design/dbg/el2_dbg.sv
$RV_ROOT/design/dmi/dmi_wrapper.v
$RV_ROOT/design/dmi/dmi_jtag_to_core_sync.v
$RV_ROOT/design/dmi/rvjtag_tap.v
$RV_ROOT/design/lib/el2_lib.sv
$RV_ROOT/design/lib/beh_lib.sv
$RV_ROOT/design/lib/mem_lib.sv
$RV_ROOT/design/lib/ahb_to_axi4.sv
$RV_ROOT/design/lib/axi4_to_ahb.sv

61
Flow/design/flist.questa Normal file
View File

@ -0,0 +1,61 @@
#-*-dotf-*-
# $RV_ROOT/workspace/work/snapshots/default/common_defines.vh
# $RV_ROOT/configs/snapshots/default/common_defines.vh
$RV_ROOT/workspace/work/snapshots/default/common_defines.vh
$RV_ROOT/design/include/el2_def.sv
# +incdir+$RV_ROOT/workspace/work/snapshots/default
# +incdir+$RV_ROOT/configs/snapshots/default
+incdir+$RV_ROOT/workspace/work/snapshots/default
+incdir+$RV_ROOT/design/lib
+incdir+$RV_ROOT/design/include
+incdir+$RV_ROOT/design/dmi
+incdir+$SYNOPSYS_SYN_ROOT/dw/sim_ver
-y $SYNOPSYS_SYN_ROOT/dw/sim_ver
$RV_ROOT/design/el2_swerv_wrapper.sv
$RV_ROOT/design/el2_mem.sv
$RV_ROOT/design/el2_pic_ctrl.sv
$RV_ROOT/design/el2_swerv.sv
$RV_ROOT/design/el2_dma_ctrl.sv
$RV_ROOT/design/ifu/el2_ifu_aln_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_compress_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_ifc_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_bp_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_ic_mem.sv
$RV_ROOT/design/ifu/el2_ifu_mem_ctl.sv
$RV_ROOT/design/ifu/el2_ifu_iccm_mem.sv
$RV_ROOT/design/ifu/el2_ifu.sv
$RV_ROOT/design/dec/el2_dec_decode_ctl.sv
$RV_ROOT/design/dec/el2_dec_gpr_ctl.sv
$RV_ROOT/design/dec/el2_dec_ib_ctl.sv
$RV_ROOT/design/dec/el2_dec_tlu_ctl.sv
$RV_ROOT/design/dec/el2_dec_trigger.sv
$RV_ROOT/design/dec/el2_dec.sv
$RV_ROOT/design/exu/el2_exu_alu_ctl.sv
$RV_ROOT/design/exu/el2_exu_mul_ctl.sv
$RV_ROOT/design/exu/el2_exu_div_ctl.sv
$RV_ROOT/design/exu/el2_exu.sv
$RV_ROOT/design/lsu/el2_lsu.sv
$RV_ROOT/design/lsu/el2_lsu_clkdomain.sv
$RV_ROOT/design/lsu/el2_lsu_addrcheck.sv
$RV_ROOT/design/lsu/el2_lsu_lsc_ctl.sv
$RV_ROOT/design/lsu/el2_lsu_stbuf.sv
$RV_ROOT/design/lsu/el2_lsu_bus_buffer.sv
$RV_ROOT/design/lsu/el2_lsu_bus_intf.sv
$RV_ROOT/design/lsu/el2_lsu_ecc.sv
$RV_ROOT/design/lsu/el2_lsu_dccm_mem.sv
$RV_ROOT/design/lsu/el2_lsu_dccm_ctl.sv
$RV_ROOT/design/lsu/el2_lsu_trigger.sv
$RV_ROOT/design/dbg/el2_dbg.sv
$RV_ROOT/design/dmi/dmi_wrapper.v
$RV_ROOT/design/dmi/dmi_jtag_to_core_sync.v
$RV_ROOT/design/dmi/rvjtag_tap.v
$RV_ROOT/design/lib/el2_lib.sv
$RV_ROOT/design/lib/beh_lib.sv
$RV_ROOT/design/lib/mem_lib.sv
$RV_ROOT/design/lib/ahb_to_axi4.sv
$RV_ROOT/design/lib/axi4_to_ahb.sv

371
Flow/design/ifu/el2_ifu.sv Normal file
View File

@ -0,0 +1,371 @@
//********************************************************************************
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
//********************************************************************************
// Function: Top level file for Icache, Fetch, Branch prediction & Aligner
// BFF -> F1 -> F2 -> A
//********************************************************************************
module el2_ifu
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic rst_l, // reset, active low
input logic dec_i0_decode_d, // Valid instruction at D and not blocked
input logic exu_flush_final, // flush, includes upper and lower
input logic dec_tlu_i0_commit_cmt , // committed i0
input logic dec_tlu_flush_err_wb , // flush due to parity error.
input logic dec_tlu_flush_noredir_wb, // don't fetch, validated with exu_flush_final
input logic [31:1] exu_flush_path_final, // flush fetch address
input logic [31:0] dec_tlu_mrac_ff ,// Side_effect , cacheable for each region
input logic dec_tlu_fence_i_wb, // fence.i, invalidate icache, validated with exu_flush_final
input logic dec_tlu_flush_leak_one_wb, // ignore bp for leak one fetches
input logic dec_tlu_bpred_disable, // disable all branch prediction
input logic dec_tlu_core_ecc_disable, // disable ecc checking and flagging
input logic dec_tlu_force_halt, // force halt
//-------------------------- IFU AXI signals--------------------------
// AXI Write Channels
output logic ifu_axi_awvalid,
output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_awid,
output logic [31:0] ifu_axi_awaddr,
output logic [3:0] ifu_axi_awregion,
output logic [7:0] ifu_axi_awlen,
output logic [2:0] ifu_axi_awsize,
output logic [1:0] ifu_axi_awburst,
output logic ifu_axi_awlock,
output logic [3:0] ifu_axi_awcache,
output logic [2:0] ifu_axi_awprot,
output logic [3:0] ifu_axi_awqos,
output logic ifu_axi_wvalid,
output logic [63:0] ifu_axi_wdata,
output logic [7:0] ifu_axi_wstrb,
output logic ifu_axi_wlast,
output logic ifu_axi_bready,
// AXI Read Channels
output logic ifu_axi_arvalid,
input logic ifu_axi_arready,
output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_arid,
output logic [31:0] ifu_axi_araddr,
output logic [3:0] ifu_axi_arregion,
output logic [7:0] ifu_axi_arlen,
output logic [2:0] ifu_axi_arsize,
output logic [1:0] ifu_axi_arburst,
output logic ifu_axi_arlock,
output logic [3:0] ifu_axi_arcache,
output logic [2:0] ifu_axi_arprot,
output logic [3:0] ifu_axi_arqos,
input logic ifu_axi_rvalid,
output logic ifu_axi_rready,
input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_rid,
input logic [63:0] ifu_axi_rdata,
input logic [1:0] ifu_axi_rresp,
input logic ifu_bus_clk_en,
input logic dma_iccm_req,
input logic [31:0] dma_mem_addr,
input logic [2:0] dma_mem_sz,
input logic dma_mem_write,
input logic [63:0] dma_mem_wdata,
input logic [2:0] dma_mem_tag, // DMA Buffer entry number
input logic dma_iccm_stall_any,
output logic iccm_dma_ecc_error,
output logic iccm_dma_rvalid,
output logic [63:0] iccm_dma_rdata,
output logic [2:0] iccm_dma_rtag, // Tag of the DMA req
output logic iccm_ready,
output logic ifu_pmu_instr_aligned,
output logic ifu_pmu_fetch_stall,
output logic ifu_ic_error_start, // has all of the I$ ecc/parity for data/tag
// I$ & ITAG Ports
output logic [31:1] ic_rw_addr, // Read/Write addresss to the Icache.
output logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en, // Icache write enable, when filling the Icache.
output logic ic_rd_en, // Icache read enable.
output logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC
input logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
input logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
input logic [25:0] ictag_debug_rd_data,// Debug icache tag.
output logic [70:0] ic_debug_wr_data, // Debug wr cache.
output logic [70:0] ifu_ic_debug_rd_data,
input logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, //
input logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr,
output logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache.
output logic ic_sel_premux_data, // Select the premux data.
output logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache.
output logic ic_debug_rd_en, // Icache debug rd
output logic ic_debug_wr_en, // Icache debug wr
output logic ic_debug_tag_array, // Debug tag array
output logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr.
output logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, // Valid bits when accessing the Icache. One valid bit per way. F2 stage
input logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, // Compare hits from Icache tags. Per way. F2 stage
input logic ic_tag_perr, // Icache Tag parity error
// ICCM ports
output logic [pt.ICCM_BITS-1:1] iccm_rw_addr, // ICCM read/write address.
output logic iccm_wren, // ICCM write enable (through the DMA)
output logic iccm_rden, // ICCM read enable.
output logic [77:0] iccm_wr_data, // ICCM write data.
output logic [2:0] iccm_wr_size, // ICCM write location within DW.
input logic [63:0] iccm_rd_data, // Data read from ICCM.
input logic [77:0] iccm_rd_data_ecc, // Data + ECC read from ICCM.
output logic ifu_iccm_rd_ecc_single_err, // This fetch has a single ICCM ecc error.
// Perf counter sigs
output logic ifu_pmu_ic_miss, // ic miss
output logic ifu_pmu_ic_hit, // ic hit
output logic ifu_pmu_bus_error, // iside bus error
output logic ifu_pmu_bus_busy, // iside bus busy
output logic ifu_pmu_bus_trxn, // iside bus transactions
output logic ifu_i0_icaf, // Instruction 0 access fault. From Aligner to Decode
output logic [1:0] ifu_i0_icaf_type, // Instruction 0 access fault type
output logic ifu_i0_valid, // Instruction 0 valid. From Aligner to Decode
output logic ifu_i0_icaf_second, // Instruction 0 has access fault on second 2B of 4B inst
output logic ifu_i0_dbecc, // Instruction 0 has double bit ecc error
output logic iccm_dma_sb_error, // Single Bit ECC error from a DMA access
output logic[31:0] ifu_i0_instr, // Instruction 0 . From Aligner to Decode
output logic[31:1] ifu_i0_pc, // Instruction 0 pc. From Aligner to Decode
output logic ifu_i0_pc4, // Instruction 0 is 4 byte. From Aligner to Decode
output logic ifu_miss_state_idle, // There is no outstanding miss. Cache miss state is idle.
output el2_br_pkt_t i0_brp, // Instruction 0 branch packet. From Aligner to Decode
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index
output logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR
output logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag
output logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index
input el2_predict_pkt_t exu_mp_pkt, // mispredict packet
input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr
input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index
input logic [pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag
input el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // slot0 update/error pkt
input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index
input logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associt btb error index
input dec_tlu_flush_lower_wb,
output logic [15:0] ifu_i0_cinst,
/// Icache debug
input el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt ,
output logic ifu_ic_debug_rd_data_valid,
output logic iccm_buf_correct_ecc,
output logic iccm_correction_state,
input logic scan_mode
);
localparam TAGWIDTH = 2 ;
localparam IDWIDTH = 2 ;
logic ifu_fb_consume1, ifu_fb_consume2;
logic [31:1] ifc_fetch_addr_f;
logic [31:1] ifc_fetch_addr_bf;
logic [1:0] ifu_fetch_val; // valids on a 2B boundary, left justified [7] implies valid fetch
logic [31:1] ifu_fetch_pc; // starting pc of fetch
logic iccm_rd_ecc_single_err, ic_error_start;
assign ifu_iccm_rd_ecc_single_err = iccm_rd_ecc_single_err;
assign ifu_ic_error_start = ic_error_start;
logic ic_write_stall;
logic ic_dma_active;
logic ifc_dma_access_ok;
logic [1:0] ic_access_fault_f;
logic [1:0] ic_access_fault_type_f;
logic ifu_ic_mb_empty;
logic ic_hit_f;
logic [1:0] ifu_bp_way_f; // way indication; right justified
logic ifu_bp_hit_taken_f; // kill next fetch; taken target found
logic [31:1] ifu_bp_btb_target_f; // predicted target PC
logic ifu_bp_inst_mask_f; // tell ic which valids to kill because of a taken branch; right justified
logic [1:0] ifu_bp_hist1_f; // history counters for all 4 potential branches; right justified
logic [1:0] ifu_bp_hist0_f; // history counters for all 4 potential branches; right justified
logic [11:0] ifu_bp_poffset_f; // predicted target
logic [1:0] ifu_bp_ret_f; // predicted ret ; right justified
logic [1:0] ifu_bp_pc4_f; // pc4 indication; right justified
logic [1:0] ifu_bp_valid_f; // branch valid, right justified
logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f;
logic [1:0] [$clog2(pt.BTB_SIZE)-1:0] ifu_bp_fa_index_f;
// fetch control
el2_ifu_ifc_ctl #(.pt(pt)) ifc (.*
);
// branch predictor
if (pt.BTB_ENABLE==1) begin : bpred
el2_ifu_bp_ctl #(.pt(pt)) bp (.*);
end
else begin : bpred
assign ifu_bp_hit_taken_f = '0;
// verif wires
logic btb_wr_en_way0, btb_wr_en_way1,dec_tlu_error_wb;
logic [16+pt.BTB_BTAG_SIZE:0] btb_wr_data;
assign btb_wr_en_way0 = '0;
assign btb_wr_en_way1 = '0;
assign btb_wr_data = '0;
assign dec_tlu_error_wb ='0;
assign ifu_bp_inst_mask_f = 1'b1;
end
logic [1:0] ic_fetch_val_f;
logic [31:0] ic_data_f;
logic [31:0] ifu_fetch_data_f;
logic ifc_fetch_req_f;
logic ifc_fetch_req_f_raw;
logic [1:0] iccm_rd_ecc_double_err; // This fetch has an iccm double error.
logic ifu_async_error_start;
assign ifu_fetch_data_f[31:0] = ic_data_f[31:0];
assign ifu_fetch_val[1:0] = ic_fetch_val_f[1:0];
assign ifu_fetch_pc[31:1] = ifc_fetch_addr_f[31:1];
logic ifc_fetch_uncacheable_bf; // The fetch request is uncacheable space. BF stage
logic ifc_fetch_req_bf; // Fetch request. Comes with the address. BF stage
logic ifc_fetch_req_bf_raw; // Fetch request without some qualifications. Used for clock-gating. BF stage
logic ifc_iccm_access_bf; // This request is to the ICCM. Do not generate misses to the bus.
logic ifc_region_acc_fault_bf; // Access fault. in ICCM region but offset is outside defined ICCM.
// aligner
el2_ifu_aln_ctl #(.pt(pt)) aln (
.*
);
// icache
el2_ifu_mem_ctl #(.pt(pt)) mem_ctl
(.*,
.ic_data_f(ic_data_f[31:0])
);
// Performance debug info
//
//
`ifdef DUMP_BTB_ON
logic exu_mp_valid; // conditional branch mispredict
logic exu_mp_way; // conditional branch mispredict
logic exu_mp_ataken; // direction is actual taken
logic exu_mp_boffset; // branch offsett
logic exu_mp_pc4; // branch is a 4B inst
logic exu_mp_call; // branch is a call inst
logic exu_mp_ret; // branch is a ret inst
logic exu_mp_ja; // branch is a jump always
logic [1:0] exu_mp_hist; // new history
logic [11:0] exu_mp_tgt; // target offset
logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address
assign exu_mp_valid = exu_mp_pkt.misp; // conditional branch mispredict
assign exu_mp_ataken = exu_mp_pkt.ataken; // direction is actual taken
assign exu_mp_boffset = exu_mp_pkt.boffset; // branch offset
assign exu_mp_pc4 = exu_mp_pkt.pc4; // branch is a 4B inst
assign exu_mp_call = exu_mp_pkt.pcall; // branch is a call inst
assign exu_mp_ret = exu_mp_pkt.pret; // branch is a ret inst
assign exu_mp_ja = exu_mp_pkt.pja; // branch is a jump always
assign exu_mp_way = exu_mp_pkt.way; // branch is a jump always
assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0]; // new history
assign exu_mp_tgt[11:0] = exu_mp_pkt.toffset[11:0] ; // target offset
assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ; // BTB/BHT address
logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_f;
`define DEC `CPU_TOP.dec
`define EXU `CPU_TOP.exu
el2_btb_addr_hash f2hash(.pc(ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
logic [31:0] mppc_ns, mppc;
logic exu_flush_final_d1;
assign mppc_ns[31:1] = `EXU.i0_flush_upper_x ? `EXU.exu_i0_pc_x : `EXU.dec_i0_pc_d;
assign mppc_ns[0] = 1'b0;
rvdff #(33) junk_ff (.*, .clk(active_clk), .din({mppc_ns[31:0], exu_flush_final}), .dout({mppc[31:0], exu_flush_final_d1}));
logic tmp_bnk;
assign tmp_bnk = bpred.bp.btb_sel_f[1];
always @(negedge clk) begin
if(`DEC.tlu.mcyclel[31:0] == 32'h0000_0010) begin
$display("BTB_CONFIG: %d",pt.BTB_SIZE);
`ifndef BP_NOGSHARE
$display("BHT_CONFIG: %d gshare: 1",pt.BHT_SIZE);
`else
$display("BHT_CONFIG: %d gshare: 0",pt.BHT_SIZE);
`endif
$display("RS_CONFIG: %d", pt.RET_STACK_SIZE);
end
if(exu_flush_final_d1 & ~(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error) & (exu_mp_pkt.misp | exu_mp_pkt.ataken))
$display("%7d BTB_MP : index: %0h bank: %0h call: %b ret: %b ataken: %b hist: %h valid: %b tag: %h targ: %h eghr: %b pred: %b ghr_index: %h brpc: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha, exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO], 1'b0, exu_mp_call, exu_mp_ret, exu_mp_ataken, exu_mp_hist[1:0], exu_mp_valid, exu_mp_btag[pt.BTB_BTAG_SIZE-1:0], {exu_flush_path_final[31:1], 1'b0}, exu_mp_eghr[pt.BHT_GHR_SIZE-1:0], exu_mp_valid, bpred.bp.bht_wr_addr0, mppc[31:0], exu_mp_pkt.way);
for(int i = 0; i < 8; i++) begin
if(ifu_bp_valid_f[i] & ifc_fetch_req_f)
$display("%7d BTB_HIT : index: %0h bank: %0h call: %b ret: %b taken: %b strength: %b tag: %h targ: %0h ghr: %4b ghr_index: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO],bpred.bp.btb_sel_f[1], bpred.bp.btb_rd_call_f, bpred.bp.btb_rd_ret_f, ifu_bp_hist1_f[tmp_bnk], ifu_bp_hist0_f[tmp_bnk], bpred.bp.fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0], {ifu_bp_btb_target_f[31:1], 1'b0}, bpred.bp.fghr[pt.BHT_GHR_SIZE-1:0], bpred.bp.bht_rd_addr_f, ifu_bp_way_f[tmp_bnk]);
end
if(dec_tlu_br0_r_pkt.valid & ~(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error))
$display("%7d BTB_UPD0: ghr_index: %0h bank: %0h hist: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,bpred.bp.br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO],{dec_tlu_br0_r_pkt.middle}, dec_tlu_br0_r_pkt.hist, dec_tlu_br0_r_pkt.way);
if(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error)
$display("%7d BTB_ERR0: index: %0h bank: %0h start: %b rfpc: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO],1'b0, dec_tlu_br0_r_pkt.br_start_error, {exu_flush_path_final[31:1], 1'b0}, dec_tlu_br0_r_pkt.way);
end // always @ (negedge clk)
function [1:0] encode4_2;
input [3:0] in;
encode4_2[1] = in[3] | in[2];
encode4_2[0] = in[3] | in[1];
endfunction
`endif
endmodule // el2_ifu

View File

@ -0,0 +1,701 @@
//********************************************************************************
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
//********************************************************************************
// Function: Instruction aligner
//********************************************************************************
module el2_ifu_aln_ctl
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic scan_mode, // Flop scan mode control
input logic rst_l, // reset, active low
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic ifu_async_error_start, // ecc/parity related errors with current fetch - not sent down the pipe
input logic [1:0] iccm_rd_ecc_double_err, // This fetch has a double ICCM ecc error.
input logic [1:0] ic_access_fault_f, // Instruction access fault for the current fetch.
input logic [1:0] ic_access_fault_type_f, // Instruction access fault types
input logic exu_flush_final, // Flush from the pipeline.
input logic dec_i0_decode_d, // Valid instruction at D-stage and not blocked
input logic [31:0] ifu_fetch_data_f, // fetch data in memory format - not right justified
input logic [1:0] ifu_fetch_val, // valids on a 2B boundary, right justified
input logic [31:1] ifu_fetch_pc, // starting pc of fetch
output logic ifu_i0_valid, // Instruction 0 is valid
output logic ifu_i0_icaf, // Instruction 0 has access fault
output logic [1:0] ifu_i0_icaf_type, // Instruction 0 access fault type
output logic ifu_i0_icaf_second, // Instruction 0 has access fault on second 2B of 4B inst
output logic ifu_i0_dbecc, // Instruction 0 has double bit ecc error
output logic [31:0] ifu_i0_instr, // Instruction 0
output logic [31:1] ifu_i0_pc, // Instruction 0 PC
output logic ifu_i0_pc4,
output logic ifu_fb_consume1, // Consumed one buffer. To fetch control fetch for buffer mass balance
output logic ifu_fb_consume2, // Consumed two buffers.To fetch control fetch for buffer mass balance
input logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f, // fetch GHR
input logic [31:1] ifu_bp_btb_target_f, // predicted RET target
input logic [11:0] ifu_bp_poffset_f, // predicted target offset
input logic [1:0] [$clog2(pt.BTB_SIZE)-1:0] ifu_bp_fa_index_f, // predicted branch index (fully associative option)
input logic [1:0] ifu_bp_hist0_f, // history counters for all 4 potential branches, bit 1, right justified
input logic [1:0] ifu_bp_hist1_f, // history counters for all 4 potential branches, bit 1, right justified
input logic [1:0] ifu_bp_pc4_f, // pc4 indication, right justified
input logic [1:0] ifu_bp_way_f, // way indication, right justified
input logic [1:0] ifu_bp_valid_f, // branch valid, right justified
input logic [1:0] ifu_bp_ret_f, // predicted ret indication, right justified
output el2_br_pkt_t i0_brp, // Branch packet for I0.
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index
output logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR
output logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag
output logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index
output logic ifu_pmu_instr_aligned, // number of inst aligned this cycle
output logic [15:0] ifu_i0_cinst // 16b compress inst for i0
);
logic ifvalid;
logic shift_f1_f0, shift_f2_f0, shift_f2_f1;
logic fetch_to_f0, fetch_to_f1, fetch_to_f2;
logic [1:0] f2val_in, f2val;
logic [1:0] f1val_in, f1val;
logic [1:0] f0val_in, f0val;
logic [1:0] sf1val, sf0val;
logic [31:0] aligndata;
logic first4B, first2B;
logic [31:0] uncompress0;
logic i0_shift;
logic shift_2B, shift_4B;
logic f1_shift_2B;
logic f2_valid, sf1_valid, sf0_valid;
logic [31:0] ifirst;
logic [1:0] alignval;
logic [31:1] firstpc, secondpc;
logic [11:0] f1poffset;
logic [11:0] f0poffset;
logic [pt.BHT_GHR_SIZE-1:0] f1fghr;
logic [pt.BHT_GHR_SIZE-1:0] f0fghr;
logic [1:0] f1hist1;
logic [1:0] f0hist1;
logic [1:0] f1hist0;
logic [1:0] f0hist0;
logic [1:0][$clog2(pt.BTB_SIZE)-1:0] f0index, f1index, alignindex;
logic [1:0] f1ictype;
logic [1:0] f0ictype;
logic [1:0] f1pc4;
logic [1:0] f0pc4;
logic [1:0] f1ret;
logic [1:0] f0ret;
logic [1:0] f1way;
logic [1:0] f0way;
logic [1:0] f1brend;
logic [1:0] f0brend;
logic [1:0] alignbrend;
logic [1:0] alignpc4;
logic [1:0] alignret;
logic [1:0] alignway;
logic [1:0] alignhist1;
logic [1:0] alignhist0;
logic [1:1] alignfromf1;
logic i0_ends_f1;
logic i0_br_start_error;
logic [31:1] f1prett;
logic [31:1] f0prett;
logic [1:0] f1dbecc;
logic [1:0] f0dbecc;
logic [1:0] f1icaf;
logic [1:0] f0icaf;
logic [1:0] aligndbecc;
logic [1:0] alignicaf;
logic i0_brp_pc4;
logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] firstpc_hash, secondpc_hash;
logic first_legal;
logic [1:0] wrptr, wrptr_in;
logic [1:0] rdptr, rdptr_in;
logic [2:0] qwen;
logic [31:0] q2,q1,q0;
logic q2off_in, q2off;
logic q1off_in, q1off;
logic q0off_in, q0off;
logic f0_shift_2B;
logic [31:0] q0eff;
logic [31:0] q0final;
logic q0ptr;
logic [1:0] q0sel;
logic [31:0] q1eff;
logic [15:0] q1final;
logic q1ptr;
logic [1:0] q1sel;
logic [2:0] qren;
logic consume_fb1, consume_fb0;
logic [1:0] icaf_eff;
localparam BRDATA_SIZE = pt.BTB_ENABLE ? 16+($clog2(pt.BTB_SIZE)*2*pt.BTB_FULLYA) : 4;
localparam BRDATA_WIDTH = pt.BTB_ENABLE ? 8+($clog2(pt.BTB_SIZE)*pt.BTB_FULLYA) : 2;
logic [BRDATA_SIZE-1:0] brdata_in, brdata2, brdata1, brdata0;
logic [BRDATA_SIZE-1:0] brdata1eff, brdata0eff;
logic [BRDATA_SIZE-1:0] brdata1final, brdata0final;
localparam MHI = 1+(pt.BTB_ENABLE * (43+pt.BHT_GHR_SIZE));
localparam MSIZE = 2+(pt.BTB_ENABLE * (43+pt.BHT_GHR_SIZE));
logic [MHI:0] misc_data_in, misc2, misc1, misc0;
logic [MHI:0] misc1eff, misc0eff;
logic [pt.BTB_BTAG_SIZE-1:0] firstbrtag_hash, secondbrtag_hash;
logic error_stall_in, error_stall;
assign error_stall_in = (error_stall | ifu_async_error_start) & ~exu_flush_final;
rvdff #(.WIDTH(7)) bundle1ff (.*,
.clk(active_clk),
.din ({wrptr_in[1:0],rdptr_in[1:0],q2off_in,q1off_in,q0off_in}),
.dout({wrptr[1:0], rdptr[1:0], q2off, q1off, q0off})
);
rvdffie #(.WIDTH(7),.OVERRIDE(1)) bundle2ff (.*,
.din ({error_stall_in,f2val_in[1:0],f1val_in[1:0],f0val_in[1:0]}),
.dout({error_stall, f2val[1:0], f1val[1:0], f0val[1:0] })
);
if(pt.BTB_ENABLE==1) begin
rvdffe #(BRDATA_SIZE) brdata2ff (.*, .clk(clk), .en(qwen[2]), .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata2[BRDATA_SIZE-1:0]));
rvdffe #(BRDATA_SIZE) brdata1ff (.*, .clk(clk), .en(qwen[1]), .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata1[BRDATA_SIZE-1:0]));
rvdffe #(BRDATA_SIZE) brdata0ff (.*, .clk(clk), .en(qwen[0]), .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata0[BRDATA_SIZE-1:0]));
rvdffe #(MSIZE) misc2ff (.*, .clk(clk), .en(qwen[2]), .din(misc_data_in[MHI:0]), .dout(misc2[MHI:0]));
rvdffe #(MSIZE) misc1ff (.*, .clk(clk), .en(qwen[1]), .din(misc_data_in[MHI:0]), .dout(misc1[MHI:0]));
rvdffe #(MSIZE) misc0ff (.*, .clk(clk), .en(qwen[0]), .din(misc_data_in[MHI:0]), .dout(misc0[MHI:0]));
end
else begin
rvdffie #((MSIZE*3)+(BRDATA_SIZE*3)) miscff (.*,
.din({qwen[2] ? {misc_data_in[MHI:0], brdata_in[BRDATA_SIZE-1:0]} : {misc2[MHI:0], brdata2[BRDATA_SIZE-1:0]},
qwen[1] ? {misc_data_in[MHI:0], brdata_in[BRDATA_SIZE-1:0]} : {misc1[MHI:0], brdata1[BRDATA_SIZE-1:0]},
qwen[0] ? {misc_data_in[MHI:0], brdata_in[BRDATA_SIZE-1:0]} : {misc0[MHI:0], brdata0[BRDATA_SIZE-1:0]}}),
.dout({misc2[MHI:0], brdata2[BRDATA_SIZE-1:0],
misc1[MHI:0], brdata1[BRDATA_SIZE-1:0],
misc0[MHI:0], brdata0[BRDATA_SIZE-1:0]})
);
end
logic [31:1] q2pc, q1pc, q0pc;
rvdffe #(31) q2pcff (.*, .clk(clk), .en(qwen[2]), .din(ifu_fetch_pc[31:1]), .dout(q2pc[31:1]));
rvdffe #(31) q1pcff (.*, .clk(clk), .en(qwen[1]), .din(ifu_fetch_pc[31:1]), .dout(q1pc[31:1]));
rvdffe #(31) q0pcff (.*, .clk(clk), .en(qwen[0]), .din(ifu_fetch_pc[31:1]), .dout(q0pc[31:1]));
rvdffe #(32) q2ff (.*, .clk(clk), .en(qwen[2]), .din(ifu_fetch_data_f[31:0]), .dout(q2[31:0]));
rvdffe #(32) q1ff (.*, .clk(clk), .en(qwen[1]), .din(ifu_fetch_data_f[31:0]), .dout(q1[31:0]));
rvdffe #(32) q0ff (.*, .clk(clk), .en(qwen[0]), .din(ifu_fetch_data_f[31:0]), .dout(q0[31:0]));
// new queue control logic
assign qren[2:0] = { rdptr[1:0] == 2'b10,
rdptr[1:0] == 2'b01,
rdptr[1:0] == 2'b00 };
assign qwen[2:0] = { (wrptr[1:0] == 2'b10) & ifvalid,
(wrptr[1:0] == 2'b01) & ifvalid,
(wrptr[1:0] == 2'b00) & ifvalid };
assign rdptr_in[1:0] = ({2{ qren[0] & ifu_fb_consume1 & ~exu_flush_final}} & 2'b01 ) |
({2{ qren[1] & ifu_fb_consume1 & ~exu_flush_final}} & 2'b10 ) |
({2{ qren[2] & ifu_fb_consume1 & ~exu_flush_final}} & 2'b00 ) |
({2{ qren[0] & ifu_fb_consume2 & ~exu_flush_final}} & 2'b10 ) |
({2{ qren[1] & ifu_fb_consume2 & ~exu_flush_final}} & 2'b00 ) |
({2{ qren[2] & ifu_fb_consume2 & ~exu_flush_final}} & 2'b01 ) |
({2{~ifu_fb_consume1 & ~ifu_fb_consume2 & ~exu_flush_final}} & rdptr[1:0]);
assign wrptr_in[1:0] = ({2{ qwen[0] & ~exu_flush_final}} & 2'b01 ) |
({2{ qwen[1] & ~exu_flush_final}} & 2'b10 ) |
({2{ qwen[2] & ~exu_flush_final}} & 2'b00 ) |
({2{~ifvalid & ~exu_flush_final}} & wrptr[1:0]);
assign q2off_in = ( ~qwen[2] & (rdptr[1:0]==2'd2) & (q2off | f0_shift_2B) ) |
( ~qwen[2] & (rdptr[1:0]==2'd1) & (q2off | f1_shift_2B) ) |
( ~qwen[2] & (rdptr[1:0]==2'd0) & q2off );
assign q1off_in = ( ~qwen[1] & (rdptr[1:0]==2'd1) & (q1off | f0_shift_2B) ) |
( ~qwen[1] & (rdptr[1:0]==2'd0) & (q1off | f1_shift_2B) ) |
( ~qwen[1] & (rdptr[1:0]==2'd2) & q1off );
assign q0off_in = ( ~qwen[0] & (rdptr[1:0]==2'd0) & (q0off | f0_shift_2B) ) |
( ~qwen[0] & (rdptr[1:0]==2'd2) & (q0off | f1_shift_2B) ) |
( ~qwen[0] & (rdptr[1:0]==2'd1) & q0off );
assign q0ptr = ( (rdptr[1:0]==2'b00) & q0off ) |
( (rdptr[1:0]==2'b01) & q1off ) |
( (rdptr[1:0]==2'b10) & q2off );
assign q1ptr = ( (rdptr[1:0]==2'b00) & q1off ) |
( (rdptr[1:0]==2'b01) & q2off ) |
( (rdptr[1:0]==2'b10) & q0off );
assign q0sel[1:0] = {q0ptr,~q0ptr};
assign q1sel[1:0] = {q1ptr,~q1ptr};
// end new queue control logic
// misc data that is associated with each fetch buffer
if(pt.BTB_ENABLE==1)
assign misc_data_in[MHI:0] = {
ic_access_fault_type_f[1:0],
ifu_bp_btb_target_f[31:1],
ifu_bp_poffset_f[11:0],
ifu_bp_fghr_f[pt.BHT_GHR_SIZE-1:0]
};
else
assign misc_data_in[MHI:0] = {
ic_access_fault_type_f[1:0]
};
assign {misc1eff[MHI:0],misc0eff[MHI:0]} = (({MSIZE*2{qren[0]}} & {misc1[MHI:0],misc0[MHI:0]}) |
({MSIZE*2{qren[1]}} & {misc2[MHI:0],misc1[MHI:0]}) |
({MSIZE*2{qren[2]}} & {misc0[MHI:0],misc2[MHI:0]}));
if(pt.BTB_ENABLE==1) begin
assign {
f1ictype[1:0],
f1prett[31:1],
f1poffset[11:0],
f1fghr[pt.BHT_GHR_SIZE-1:0]
} = misc1eff[MHI:0];
assign {
f0ictype[1:0],
f0prett[31:1],
f0poffset[11:0],
f0fghr[pt.BHT_GHR_SIZE-1:0]
} = misc0eff[MHI:0];
if(pt.BTB_FULLYA) begin
assign brdata_in[BRDATA_SIZE-1:0] = {
ifu_bp_fa_index_f[1], iccm_rd_ecc_double_err[1],ic_access_fault_f[1],ifu_bp_hist1_f[1],ifu_bp_hist0_f[1],ifu_bp_pc4_f[1],ifu_bp_way_f[1],ifu_bp_valid_f[1],ifu_bp_ret_f[1],
ifu_bp_fa_index_f[0], iccm_rd_ecc_double_err[0],ic_access_fault_f[0],ifu_bp_hist1_f[0],ifu_bp_hist0_f[0],ifu_bp_pc4_f[0],ifu_bp_way_f[0],ifu_bp_valid_f[0],ifu_bp_ret_f[0]
};
assign {f0index[1],f0dbecc[1],f0icaf[1],f0hist1[1],f0hist0[1],f0pc4[1],f0way[1],f0brend[1],f0ret[1],
f0index[0],f0dbecc[0],f0icaf[0],f0hist1[0],f0hist0[0],f0pc4[0],f0way[0],f0brend[0],f0ret[0]} = brdata0final[BRDATA_SIZE-1:0];
assign {f1index[1],f1dbecc[1],f1icaf[1],f1hist1[1],f1hist0[1],f1pc4[1],f1way[1],f1brend[1],f1ret[1],
f1index[0],f1dbecc[0],f1icaf[0],f1hist1[0],f1hist0[0],f1pc4[0],f1way[0],f1brend[0],f1ret[0]} = brdata1final[BRDATA_SIZE-1:0];
end
else begin
assign brdata_in[BRDATA_SIZE-1:0] = {
iccm_rd_ecc_double_err[1],ic_access_fault_f[1],ifu_bp_hist1_f[1],ifu_bp_hist0_f[1],ifu_bp_pc4_f[1],ifu_bp_way_f[1],ifu_bp_valid_f[1],ifu_bp_ret_f[1],
iccm_rd_ecc_double_err[0],ic_access_fault_f[0],ifu_bp_hist1_f[0],ifu_bp_hist0_f[0],ifu_bp_pc4_f[0],ifu_bp_way_f[0],ifu_bp_valid_f[0],ifu_bp_ret_f[0]
};
assign {f0dbecc[1],f0icaf[1],f0hist1[1],f0hist0[1],f0pc4[1],f0way[1],f0brend[1],f0ret[1],
f0dbecc[0],f0icaf[0],f0hist1[0],f0hist0[0],f0pc4[0],f0way[0],f0brend[0],f0ret[0]} = brdata0final[BRDATA_SIZE-1:0];
assign {f1dbecc[1],f1icaf[1],f1hist1[1],f1hist0[1],f1pc4[1],f1way[1],f1brend[1],f1ret[1],
f1dbecc[0],f1icaf[0],f1hist1[0],f1hist0[0],f1pc4[0],f1way[0],f1brend[0],f1ret[0]} = brdata1final[BRDATA_SIZE-1:0];
end
assign {brdata1eff[BRDATA_SIZE-1:0],brdata0eff[BRDATA_SIZE-1:0]} = (({BRDATA_SIZE*2{qren[0]}} & {brdata1[BRDATA_SIZE-1:0],brdata0[BRDATA_SIZE-1:0]}) |
({BRDATA_SIZE*2{qren[1]}} & {brdata2[BRDATA_SIZE-1:0],brdata1[BRDATA_SIZE-1:0]}) |
({BRDATA_SIZE*2{qren[2]}} & {brdata0[BRDATA_SIZE-1:0],brdata2[BRDATA_SIZE-1:0]}));
assign brdata0final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q0sel[0]}} & { brdata0eff[2*BRDATA_WIDTH-1:0]}) |
({BRDATA_SIZE{q0sel[1]}} & {{BRDATA_WIDTH{1'b0}},brdata0eff[BRDATA_SIZE-1:BRDATA_WIDTH]}));
assign brdata1final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q1sel[0]}} & { brdata1eff[2*BRDATA_WIDTH-1:0]}) |
({BRDATA_SIZE{q1sel[1]}} & {{BRDATA_WIDTH{1'b0}},brdata1eff[BRDATA_SIZE-1:BRDATA_WIDTH]}));
end // if (pt.BTB_ENABLE==1)
else begin
assign {
f1ictype[1:0]
} = misc1eff[MHI:0];
assign {
f0ictype[1:0]
} = misc0eff[MHI:0];
assign brdata_in[BRDATA_SIZE-1:0] = {
iccm_rd_ecc_double_err[1],ic_access_fault_f[1],
iccm_rd_ecc_double_err[0],ic_access_fault_f[0]
};
assign {f0dbecc[1],f0icaf[1],
f0dbecc[0],f0icaf[0]} = brdata0final[BRDATA_SIZE-1:0];
assign {f1dbecc[1],f1icaf[1],
f1dbecc[0],f1icaf[0]} = brdata1final[BRDATA_SIZE-1:0];
assign {brdata1eff[BRDATA_SIZE-1:0],brdata0eff[BRDATA_SIZE-1:0]} = (({BRDATA_SIZE*2{qren[0]}} & {brdata1[BRDATA_SIZE-1:0],brdata0[BRDATA_SIZE-1:0]}) |
({BRDATA_SIZE*2{qren[1]}} & {brdata2[BRDATA_SIZE-1:0],brdata1[BRDATA_SIZE-1:0]}) |
({BRDATA_SIZE*2{qren[2]}} & {brdata0[BRDATA_SIZE-1:0],brdata2[BRDATA_SIZE-1:0]}));
assign brdata0final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q0sel[0]}} & { brdata0eff[2*BRDATA_WIDTH-1:0]}) |
({BRDATA_SIZE{q0sel[1]}} & {{BRDATA_WIDTH{1'b0}},brdata0eff[BRDATA_SIZE-1:BRDATA_WIDTH]}));
assign brdata1final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q1sel[0]}} & { brdata1eff[2*BRDATA_WIDTH-1:0]}) |
({BRDATA_SIZE{q1sel[1]}} & {{BRDATA_WIDTH{1'b0}},brdata1eff[BRDATA_SIZE-1:BRDATA_WIDTH]}));
end // else: !if(pt.BTB_ENABLE==1)
// possible states of { sf0_valid, sf1_valid, f2_valid }
//
// 000 if->f0
// 100 if->f1
// 101 illegal
// 010 if->f1, f1->f0
// 110 if->f2
// 001 if->f1, f2->f0
// 011 if->f2, f2->f1, f1->f0
// 111 !if, no shift
assign f2_valid = f2val[0];
assign sf1_valid = sf1val[0];
assign sf0_valid = sf0val[0];
// interface to fetch
assign consume_fb0 = ~sf0val[0] & f0val[0];
assign consume_fb1 = ~sf1val[0] & f1val[0];
assign ifu_fb_consume1 = consume_fb0 & ~consume_fb1 & ~exu_flush_final;
assign ifu_fb_consume2 = consume_fb0 & consume_fb1 & ~exu_flush_final;
assign ifvalid = ifu_fetch_val[0];
assign shift_f1_f0 = ~sf0_valid & sf1_valid;
assign shift_f2_f0 = ~sf0_valid & ~sf1_valid & f2_valid;
assign shift_f2_f1 = ~sf0_valid & sf1_valid & f2_valid;
assign fetch_to_f0 = ~sf0_valid & ~sf1_valid & ~f2_valid & ifvalid;
assign fetch_to_f1 = (~sf0_valid & ~sf1_valid & f2_valid & ifvalid) |
(~sf0_valid & sf1_valid & ~f2_valid & ifvalid) |
( sf0_valid & ~sf1_valid & ~f2_valid & ifvalid);
assign fetch_to_f2 = (~sf0_valid & sf1_valid & f2_valid & ifvalid) |
( sf0_valid & sf1_valid & ~f2_valid & ifvalid);
assign f2val_in[1:0] = ({2{ fetch_to_f2 & ~exu_flush_final}} & ifu_fetch_val[1:0]) |
({2{~fetch_to_f2 & ~shift_f2_f1 & ~shift_f2_f0 & ~exu_flush_final}} & f2val[1:0] );
assign sf1val[1:0] = ({2{ f1_shift_2B}} & {1'b0,f1val[1]}) |
({2{~f1_shift_2B}} & f1val[1:0] );
assign f1val_in[1:0] = ({2{ fetch_to_f1 & ~exu_flush_final}} & ifu_fetch_val[1:0]) |
({2{ shift_f2_f1 & ~exu_flush_final}} & f2val[1:0] ) |
({2{~fetch_to_f1 & ~shift_f2_f1 & ~shift_f1_f0 & ~exu_flush_final}} & sf1val[1:0] );
assign sf0val[1:0] = ({2{ shift_2B }} & {1'b0,f0val[1]}) |
({2{~shift_2B & ~shift_4B}} & f0val[1:0]);
assign f0val_in[1:0] = ({2{fetch_to_f0 & ~exu_flush_final}} & ifu_fetch_val[1:0]) |
({2{ shift_f2_f0 & ~exu_flush_final}} & f2val[1:0] ) |
({2{ shift_f1_f0 & ~exu_flush_final}} & sf1val[1:0] ) |
({2{~fetch_to_f0 & ~shift_f2_f0 & ~shift_f1_f0 & ~exu_flush_final}} & sf0val[1:0] );
assign {q1eff[31:0],q0eff[31:0]} = (({64{qren[0]}} & {q1[31:0],q0[31:0]}) |
({64{qren[1]}} & {q2[31:0],q1[31:0]}) |
({64{qren[2]}} & {q0[31:0],q2[31:0]}));
assign q0final[31:0] = ({32{q0sel[0]}} & { q0eff[31:0]}) |
({32{q0sel[1]}} & {16'b0,q0eff[31:16]});
assign q1final[15:0] = ({16{q1sel[0]}} & q1eff[15:0] ) |
({16{q1sel[1]}} & q1eff[31:16]);
logic [31:1] q0pceff, q0pcfinal;
logic [31:1] q1pceff;
assign {q1pceff[31:1],q0pceff[31:1]} = (({62{qren[0]}} & {q1pc[31:1],q0pc[31:1]}) |
({62{qren[1]}} & {q2pc[31:1],q1pc[31:1]}) |
({62{qren[2]}} & {q0pc[31:1],q2pc[31:1]}));
assign q0pcfinal[31:1] = ({31{q0sel[0]}} & ( q0pceff[31:1])) |
({31{q0sel[1]}} & ( q0pceff[31:1] + 31'd1));
assign aligndata[31:0] = ({32{ f0val[1] }} & {q0final[31:0]}) |
({32{~f0val[1] & f0val[0]}} & {q1final[15:0],q0final[15:0]});
assign alignval[1:0] = ({ 2{ f0val[1] }} & {2'b11}) |
({ 2{~f0val[1] & f0val[0]}} & {f1val[0],1'b1});
assign alignicaf[1:0] = ({ 2{ f0val[1] }} & f0icaf[1:0] ) |
({ 2{~f0val[1] & f0val[0]}} & {f1icaf[0],f0icaf[0]});
assign aligndbecc[1:0] = ({ 2{ f0val[1] }} & f0dbecc[1:0] ) |
({ 2{~f0val[1] & f0val[0]}} & {f1dbecc[0],f0dbecc[0]});
if (pt.BTB_ENABLE==1) begin
// for branch prediction
assign alignbrend[1:0] = ({ 2{ f0val[1] }} & f0brend[1:0] ) |
({ 2{~f0val[1] & f0val[0]}} & {f1brend[0],f0brend[0]});
assign alignpc4[1:0] = ({ 2{ f0val[1] }} & f0pc4[1:0] ) |
({ 2{~f0val[1] & f0val[0]}} & {f1pc4[0],f0pc4[0]});
if(pt.BTB_FULLYA) begin
assign alignindex[0] = f0index[0];
assign alignindex[1] = f0val[1] ? f0index[1] : f1index[0];
end
assign alignret[1:0] = ({ 2{ f0val[1] }} & f0ret[1:0] ) |
({ 2{~f0val[1] & f0val[0]}} & {f1ret[0],f0ret[0]});
assign alignway[1:0] = ({ 2{ f0val[1] }} & f0way[1:0] ) |
({ 2{~f0val[1] & f0val[0]}} & {f1way[0],f0way[0]});
assign alignhist1[1:0] = ({ 2{ f0val[1] }} & f0hist1[1:0] ) |
({ 2{~f0val[1] & f0val[0]}} & {f1hist1[0],f0hist1[0]});
assign alignhist0[1:0] = ({ 2{ f0val[1] }} & f0hist0[1:0] ) |
({ 2{~f0val[1] & f0val[0]}} & {f1hist0[0],f0hist0[0]});
assign secondpc[31:1] = ({31{ f0val[1] }} & (q0pceff[31:1] + 31'd1)) |
// you need the base pc for 2nd one only (4B max, 2B for the 1st and 2B for the 2nd)
({31{~f0val[1] & f0val[0]}} & q1pceff[31:1] );
assign firstpc[31:1] = q0pcfinal[31:1];
end // if (pt.BTB_ENABLE==1)
assign alignfromf1[1] = ~f0val[1] & f0val[0];
assign ifu_i0_pc[31:1] = q0pcfinal[31:1];
assign ifu_i0_pc4 = first4B;
assign ifu_i0_cinst[15:0] = aligndata[15:0];
assign first4B = (aligndata[1:0] == 2'b11);
assign first2B = ~first4B;
assign ifu_i0_valid = (first4B & alignval[1]) |
(first2B & alignval[0]);
// inst access fault on any byte of inst results in access fault for the inst
assign ifu_i0_icaf = (first4B & (|alignicaf[1:0])) |
(first2B & alignicaf[0] );
assign ifu_i0_icaf_type[1:0] = (first4B & ~f0val[1] & f0val[0] & ~alignicaf[0] & ~aligndbecc[0]) ? f1ictype[1:0] : f0ictype[1:0];
assign icaf_eff[1:0] = alignicaf[1:0] | aligndbecc[1:0];
assign ifu_i0_icaf_second = first4B & ~icaf_eff[0] & icaf_eff[1];
assign ifu_i0_dbecc = (first4B & (|aligndbecc[1:0])) |
(first2B & aligndbecc[0] );
assign ifirst[31:0] = aligndata[31:0];
assign ifu_i0_instr[31:0] = ({32{first4B & alignval[1]}} & ifirst[31:0]) |
({32{first2B & alignval[0]}} & uncompress0[31:0]);
if(pt.BTB_ENABLE==1) begin
// if you detect br does not start on instruction boundary
el2_btb_addr_hash #(.pt(pt)) firsthash (.pc(firstpc [pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]),
.hash(firstpc_hash [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
el2_btb_addr_hash #(.pt(pt)) secondhash(.pc(secondpc[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]),
.hash(secondpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
if(pt.BTB_FULLYA) begin
assign firstbrtag_hash = firstpc;
assign secondbrtag_hash = secondpc;
end
else begin
if(pt.BTB_BTAG_FOLD) begin : btbfold
el2_btb_tag_hash_fold #(.pt(pt)) first_brhash (.pc(firstpc [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]),
.hash(firstbrtag_hash [pt.BTB_BTAG_SIZE-1:0]));
el2_btb_tag_hash_fold #(.pt(pt)) second_brhash(.pc(secondpc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]),
.hash(secondbrtag_hash[pt.BTB_BTAG_SIZE-1:0]));
end
else begin
el2_btb_tag_hash #(.pt(pt)) first_brhash (.pc(firstpc [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]),
.hash(firstbrtag_hash [pt.BTB_BTAG_SIZE-1:0]));
el2_btb_tag_hash #(.pt(pt)) second_brhash(.pc(secondpc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]),
.hash(secondbrtag_hash[pt.BTB_BTAG_SIZE-1:0]));
end
end // else: !if(pt.BTB_FULLYA)
// start_indexing - you want pc to be based on where the end of branch is prediction
// normal indexing pc based that's incorrect now for pc4 cases it's pc4 + 2
always_comb begin
i0_brp = '0;
i0_br_start_error = (first4B & alignval[1] & alignbrend[0]);
i0_brp.valid = (first2B & alignbrend[0]) |
(first4B & alignbrend[1]) |
i0_br_start_error;
i0_brp_pc4 = (first2B & alignpc4[0]) |
(first4B & alignpc4[1]);
i0_brp.ret = (first2B & alignret[0]) |
(first4B & alignret[1]);
i0_brp.way = (first2B | alignbrend[0]) ? alignway[0] : alignway[1];
i0_brp.hist[1] = (first2B & alignhist1[0]) |
(first4B & alignhist1[1]);
i0_brp.hist[0] = (first2B & alignhist0[0]) |
(first4B & alignhist0[1]);
i0_ends_f1 = first4B & alignfromf1[1];
i0_brp.toffset[11:0] = (i0_ends_f1) ? f1poffset[11:0] : f0poffset[11:0];
i0_brp.prett[31:1] = (i0_ends_f1) ? f1prett[31:1] : f0prett[31:1];
i0_brp.br_start_error = i0_br_start_error;
i0_brp.bank = (first2B | alignbrend[0]) ? firstpc[1] : secondpc[1];
i0_brp.br_error = (i0_brp.valid & i0_brp_pc4 & first2B) |
(i0_brp.valid & ~i0_brp_pc4 & first4B);
if(pt.BTB_FULLYA)
ifu_i0_fa_index = (first2B | alignbrend[0]) ? alignindex[0] : alignindex[1];
else
ifu_i0_fa_index = '0;
end
assign ifu_i0_bp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = (first2B | alignbrend[0]) ? firstpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] :
secondpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
assign ifu_i0_bp_fghr[pt.BHT_GHR_SIZE-1:0] = (i0_ends_f1) ? f1fghr[pt.BHT_GHR_SIZE-1:0] :
f0fghr[pt.BHT_GHR_SIZE-1:0];
assign ifu_i0_bp_btag[pt.BTB_BTAG_SIZE-1:0] = (first2B | alignbrend[0]) ? firstbrtag_hash[pt.BTB_BTAG_SIZE-1:0] :
secondbrtag_hash[pt.BTB_BTAG_SIZE-1:0];
end
else begin
assign i0_brp = '0;
assign ifu_i0_bp_index = '0;
assign ifu_i0_bp_fghr = '0;
assign ifu_i0_bp_btag = '0;
end // else: !if(pt.BTB_ENABLE==1)
// decompress
// quiet inputs for 4B inst
el2_ifu_compress_ctl compress0 (.din((first2B) ? aligndata[15:0] : '0), .dout(uncompress0[31:0]));
assign i0_shift = dec_i0_decode_d & ~error_stall;
assign ifu_pmu_instr_aligned = i0_shift;
// compute how many bytes are being shifted from f0
assign shift_2B = i0_shift & first2B;
assign shift_4B = i0_shift & first4B;
// exact equations for the queue logic
assign f0_shift_2B = (shift_2B & f0val[0] ) |
(shift_4B & f0val[0] & ~f0val[1]);
// f0 valid states
// 11
// 10
// 00
assign f1_shift_2B = f0val[0] & ~f0val[1] & shift_4B;
endmodule

View File

@ -0,0 +1,898 @@
//********************************************************************************
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
//********************************************************************************
// Function: Branch predictor
// Comments:
//
//
// Bank3 : Bank2 : Bank1 : Bank0
// FA C 8 4 0
//********************************************************************************
module el2_ifu_bp_ctl
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic clk,
input logic rst_l,
input logic ic_hit_f, // Icache hit, enables F address capture
input logic [31:1] ifc_fetch_addr_f, // look up btb address
input logic ifc_fetch_req_f, // F1 valid
input el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // BP commit update packet, includes errors
input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index
input logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associative btb error index
input logic dec_tlu_flush_lower_wb, // used to move EX4 RS to EX1 and F
input logic dec_tlu_flush_leak_one_wb, // don't hit for leak one fetches
input logic dec_tlu_bpred_disable, // disable all branch prediction
input el2_predict_pkt_t exu_mp_pkt, // mispredict packet
input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr (for patching fghr)
input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index
input logic [pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag
input logic exu_flush_final, // all flushes
output logic ifu_bp_hit_taken_f, // btb hit, select target
output logic [31:1] ifu_bp_btb_target_f, // predicted target PC
output logic ifu_bp_inst_mask_f, // tell ic which valids to kill because of a taken branch, right justified
output logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f, // fetch ghr
output logic [1:0] ifu_bp_way_f, // way
output logic [1:0] ifu_bp_ret_f, // predicted ret
output logic [1:0] ifu_bp_hist1_f, // history counters for all 4 potential branches, bit 1, right justified
output logic [1:0] ifu_bp_hist0_f, // history counters for all 4 potential branches, bit 0, right justified
output logic [1:0] ifu_bp_pc4_f, // pc4 indication, right justified
output logic [1:0] ifu_bp_valid_f, // branch valid, right justified
output logic [11:0] ifu_bp_poffset_f, // predicted target
output logic [1:0] [$clog2(pt.BTB_SIZE)-1:0] ifu_bp_fa_index_f, // predicted branch index (fully associative option)
input logic scan_mode
);
localparam BTB_DWIDTH = pt.BTB_TOFFSET_SIZE+pt.BTB_BTAG_SIZE+5;
localparam BTB_DWIDTH_TOP = int'(pt.BTB_TOFFSET_SIZE)+int'(pt.BTB_BTAG_SIZE)+4;
localparam BTB_FA_INDEX = $clog2(pt.BTB_SIZE)-1;
localparam FA_CMP_LOWER = $clog2(pt.ICACHE_LN_SZ);
localparam FA_TAG_END_UPPER= 5+int'(pt.BTB_TOFFSET_SIZE)+int'(FA_CMP_LOWER)-1; // must cast to int or vcs build fails
localparam FA_TAG_START_LOWER = 3+int'(pt.BTB_TOFFSET_SIZE)+int'(FA_CMP_LOWER);
localparam FA_TAG_END_LOWER = 5+int'(pt.BTB_TOFFSET_SIZE);
localparam TAG_START=BTB_DWIDTH-1;
localparam PC4=4;
localparam BOFF=3;
localparam CALL=2;
localparam RET=1;
localparam BV=0;
localparam LRU_SIZE=pt.BTB_ARRAY_DEPTH;
localparam NUM_BHT_LOOP = (pt.BHT_ARRAY_DEPTH > 16 ) ? 16 : pt.BHT_ARRAY_DEPTH;
localparam NUM_BHT_LOOP_INNER_HI = (pt.BHT_ARRAY_DEPTH > 16 ) ?pt.BHT_ADDR_LO+3 : pt.BHT_ADDR_HI;
localparam NUM_BHT_LOOP_OUTER_LO = (pt.BHT_ARRAY_DEPTH > 16 ) ?pt.BHT_ADDR_LO+4 : pt.BHT_ADDR_LO;
localparam BHT_NO_ADDR_MATCH = ( pt.BHT_ARRAY_DEPTH <= 16 );
logic exu_mp_valid_write;
logic exu_mp_ataken;
logic exu_mp_valid; // conditional branch mispredict
logic exu_mp_boffset; // branch offsett
logic exu_mp_pc4; // branch is a 4B inst
logic exu_mp_call; // branch is a call inst
logic exu_mp_ret; // branch is a ret inst
logic exu_mp_ja; // branch is a jump always
logic [1:0] exu_mp_hist; // new history
logic [11:0] exu_mp_tgt; // target offset
logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address
logic dec_tlu_br0_v_wb; // WB stage history update
logic [1:0] dec_tlu_br0_hist_wb; // new history
logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_tlu_br0_addr_wb; // addr
logic dec_tlu_br0_error_wb; // error; invalidate bank
logic dec_tlu_br0_start_error_wb; // error; invalidate all 4 banks in fg
logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_wb;
logic use_mp_way, use_mp_way_p1;
logic [pt.RET_STACK_SIZE-1:0][31:0] rets_out, rets_in;
logic [pt.RET_STACK_SIZE-1:0] rsenable;
logic [11:0] btb_rd_tgt_f;
logic btb_rd_pc4_f, btb_rd_call_f, btb_rd_ret_f;
logic [1:1] bp_total_branch_offset_f;
logic [31:1] bp_btb_target_adder_f;
logic [31:1] bp_rs_call_target_f;
logic rs_push, rs_pop, rs_hold;
logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_p1_f, btb_wr_addr, btb_rd_addr_f;
logic [pt.BTB_BTAG_SIZE-1:0] btb_wr_tag, fetch_rd_tag_f, fetch_rd_tag_p1_f;
logic [BTB_DWIDTH-1:0] btb_wr_data;
logic btb_wr_en_way0, btb_wr_en_way1;
logic dec_tlu_error_wb, btb_valid, dec_tlu_br0_middle_wb;
logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_error_addr_wb;
logic branch_error_collision_f, fetch_mp_collision_f, branch_error_collision_p1_f, fetch_mp_collision_p1_f;
logic branch_error_bank_conflict_f;
logic [pt.BHT_GHR_SIZE-1:0] merged_ghr, fghr_ns, fghr;
logic [1:0] num_valids;
logic [LRU_SIZE-1:0] btb_lru_b0_f, btb_lru_b0_hold, btb_lru_b0_ns,
fetch_wrindex_dec, fetch_wrindex_p1_dec, fetch_wrlru_b0, fetch_wrlru_p1_b0,
mp_wrindex_dec, mp_wrlru_b0;
logic btb_lru_rd_f, btb_lru_rd_p1_f, lru_update_valid_f;
logic tag_match_way0_f, tag_match_way1_f;
logic [1:0] way_raw, bht_dir_f, btb_sel_f, wayhit_f, vwayhit_f, wayhit_p1_f;
logic [1:0] bht_valid_f, bht_force_taken_f;
logic leak_one_f, leak_one_f_d1;
logic [LRU_SIZE-1:0][BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_out ;
logic [LRU_SIZE-1:0][BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_out ;
logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_f ;
logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_f ;
logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_p1_f ;
logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_p1_f ;
logic [BTB_DWIDTH-1:0] btb_vbank0_rd_data_f, btb_vbank1_rd_data_f;
logic final_h;
logic btb_fg_crossing_f;
logic middle_of_bank;
logic [1:0] bht_vbank0_rd_data_f, bht_vbank1_rd_data_f;
logic branch_error_bank_conflict_p1_f;
logic tag_match_way0_p1_f, tag_match_way1_p1_f;
logic [1:0] btb_vlru_rd_f, fetch_start_f, tag_match_vway1_expanded_f, tag_match_way0_expanded_p1_f, tag_match_way1_expanded_p1_f;
logic [31:2] fetch_addr_p1_f;
logic exu_mp_way, exu_mp_way_f, dec_tlu_br0_way_wb, dec_tlu_way_wb;
logic [BTB_DWIDTH-1:0] btb_bank0e_rd_data_f, btb_bank0e_rd_data_p1_f;
logic [BTB_DWIDTH-1:0] btb_bank0o_rd_data_f;
logic [1:0] tag_match_way0_expanded_f, tag_match_way1_expanded_f;
logic [1:0] bht_bank0_rd_data_f;
logic [1:0] bht_bank1_rd_data_f;
logic [1:0] bht_bank0_rd_data_p1_f;
genvar j, i;
assign exu_mp_valid = exu_mp_pkt.misp & ~leak_one_f; // conditional branch mispredict
assign exu_mp_boffset = exu_mp_pkt.boffset; // branch offset
assign exu_mp_pc4 = exu_mp_pkt.pc4; // branch is a 4B inst
assign exu_mp_call = exu_mp_pkt.pcall; // branch is a call inst
assign exu_mp_ret = exu_mp_pkt.pret; // branch is a ret inst
assign exu_mp_ja = exu_mp_pkt.pja; // branch is a jump always
assign exu_mp_way = exu_mp_pkt.way; // repl way
assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0]; // new history
assign exu_mp_tgt[11:0] = exu_mp_pkt.toffset[11:0] ; // target offset
assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ; // BTB/BHT address
assign exu_mp_ataken = exu_mp_pkt.ataken;
assign dec_tlu_br0_v_wb = dec_tlu_br0_r_pkt.valid;
assign dec_tlu_br0_hist_wb[1:0] = dec_tlu_br0_r_pkt.hist[1:0];
assign dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
assign dec_tlu_br0_error_wb = dec_tlu_br0_r_pkt.br_error;
assign dec_tlu_br0_middle_wb = dec_tlu_br0_r_pkt.middle;
assign dec_tlu_br0_way_wb = dec_tlu_br0_r_pkt.way;
assign dec_tlu_br0_start_error_wb = dec_tlu_br0_r_pkt.br_start_error;
assign exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0] = exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0];
// ----------------------------------------------------------------------
// READ
// ----------------------------------------------------------------------
// hash the incoming fetch PC, first guess at hashing algorithm
el2_btb_addr_hash #(.pt(pt)) f1hash(.pc(ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
assign fetch_addr_p1_f[31:2] = ifc_fetch_addr_f[31:2] + 30'b1;
el2_btb_addr_hash #(.pt(pt)) f1hash_p1(.pc(fetch_addr_p1_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
assign btb_sel_f[1] = ~bht_dir_f[0];
assign btb_sel_f[0] = bht_dir_f[0];
assign fetch_start_f[1:0] = {ifc_fetch_addr_f[1], ~ifc_fetch_addr_f[1]};
// Errors colliding with fetches must kill the btb/bht hit.
assign branch_error_collision_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]);
assign branch_error_collision_p1_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]);
assign branch_error_bank_conflict_f = branch_error_collision_f & dec_tlu_error_wb;
assign branch_error_bank_conflict_p1_f = branch_error_collision_p1_f & dec_tlu_error_wb;
// set on leak one, hold until next flush without leak one
assign leak_one_f = (dec_tlu_flush_leak_one_wb & dec_tlu_flush_lower_wb) | (leak_one_f_d1 & ~dec_tlu_flush_lower_wb);
logic exu_flush_final_d1;
if(!pt.BTB_FULLYA) begin
assign fetch_mp_collision_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
exu_mp_valid & ifc_fetch_req_f &
(exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])
);
assign fetch_mp_collision_p1_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
exu_mp_valid & ifc_fetch_req_f &
(exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])
);
// 2 -way SA, figure out the way hit and mux accordingly
assign tag_match_way0_f = btb_bank0_rd_data_way0_f[BV] & (btb_bank0_rd_data_way0_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
~(dec_tlu_way_wb & branch_error_bank_conflict_f) & ifc_fetch_req_f & ~leak_one_f;
assign tag_match_way1_f = btb_bank0_rd_data_way1_f[BV] & (btb_bank0_rd_data_way1_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
~(dec_tlu_way_wb & branch_error_bank_conflict_f) & ifc_fetch_req_f & ~leak_one_f;
assign tag_match_way0_p1_f = btb_bank0_rd_data_way0_p1_f[BV] & (btb_bank0_rd_data_way0_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
~(dec_tlu_way_wb & branch_error_bank_conflict_p1_f) & ifc_fetch_req_f & ~leak_one_f;
assign tag_match_way1_p1_f = btb_bank0_rd_data_way1_p1_f[BV] & (btb_bank0_rd_data_way1_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
~(dec_tlu_way_wb & branch_error_bank_conflict_p1_f) & ifc_fetch_req_f & ~leak_one_f;
// Both ways could hit, use the offset bit to reorder
assign tag_match_way0_expanded_f[1:0] = {tag_match_way0_f & (btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4]),
tag_match_way0_f & ~(btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4])};
assign tag_match_way1_expanded_f[1:0] = {tag_match_way1_f & (btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4]),
tag_match_way1_f & ~(btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4])};
assign tag_match_way0_expanded_p1_f[1:0] = {tag_match_way0_p1_f & (btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4]),
tag_match_way0_p1_f & ~(btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4])};
assign tag_match_way1_expanded_p1_f[1:0] = {tag_match_way1_p1_f & (btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4]),
tag_match_way1_p1_f & ~(btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4])};
assign wayhit_f[1:0] = tag_match_way0_expanded_f[1:0] | tag_match_way1_expanded_f[1:0];
assign wayhit_p1_f[1:0] = tag_match_way0_expanded_p1_f[1:0] | tag_match_way1_expanded_p1_f[1:0];
assign btb_bank0o_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[1]}} & btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0]) |
({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_f[1]}} & btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0]) );
assign btb_bank0e_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[0]}} & btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0]) |
({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_f[0]}} & btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0]) );
assign btb_bank0e_rd_data_p1_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_p1_f[0]}} & btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0]) |
({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_p1_f[0]}} & btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0]) );
// virtual bank order
assign btb_vbank0_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} & btb_bank0e_rd_data_f[BTB_DWIDTH-1:0]) |
({17+pt.BTB_BTAG_SIZE{fetch_start_f[1]}} & btb_bank0o_rd_data_f[BTB_DWIDTH-1:0]) );
assign btb_vbank1_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} & btb_bank0o_rd_data_f[BTB_DWIDTH-1:0]) |
({17+pt.BTB_BTAG_SIZE{fetch_start_f[1]}} & btb_bank0e_rd_data_p1_f[BTB_DWIDTH-1:0]) );
assign way_raw[1:0] = tag_match_vway1_expanded_f[1:0] | (~vwayhit_f[1:0] & btb_vlru_rd_f[1:0]);
// --------------------------------------------------------------------------------
// --------------------------------------------------------------------------------
// update lru
// mp
// create a onehot lru write vector
assign mp_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
// fetch
assign fetch_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
assign fetch_wrindex_p1_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
assign mp_wrlru_b0[LRU_SIZE-1:0] = mp_wrindex_dec[LRU_SIZE-1:0] & {LRU_SIZE{exu_mp_valid}};
assign btb_lru_b0_hold[LRU_SIZE-1:0] = ~mp_wrlru_b0[LRU_SIZE-1:0] & ~fetch_wrlru_b0[LRU_SIZE-1:0];
// Forward the mp lru information to the fetch, avoids multiple way hits later
assign use_mp_way = fetch_mp_collision_f;
assign use_mp_way_p1 = fetch_mp_collision_p1_f;
assign lru_update_valid_f = (vwayhit_f[0] | vwayhit_f[1]) & ifc_fetch_req_f & ~leak_one_f;
assign fetch_wrlru_b0[LRU_SIZE-1:0] = fetch_wrindex_dec[LRU_SIZE-1:0] &
{LRU_SIZE{lru_update_valid_f}};
assign fetch_wrlru_p1_b0[LRU_SIZE-1:0] = fetch_wrindex_p1_dec[LRU_SIZE-1:0] &
{LRU_SIZE{lru_update_valid_f}};
assign btb_lru_b0_ns[LRU_SIZE-1:0] = ( (btb_lru_b0_hold[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]) |
(mp_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{~exu_mp_way}}) |
(fetch_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_f}}) |
(fetch_wrlru_p1_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_p1_f}}) );
assign btb_lru_rd_f = use_mp_way ? exu_mp_way_f : |(fetch_wrindex_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]);
assign btb_lru_rd_p1_f = use_mp_way_p1 ? exu_mp_way_f : |(fetch_wrindex_p1_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]);
// rotated
assign btb_vlru_rd_f[1:0] = ( ({2{fetch_start_f[0]}} & {btb_lru_rd_f, btb_lru_rd_f}) |
({2{fetch_start_f[1]}} & {btb_lru_rd_p1_f, btb_lru_rd_f}));
assign tag_match_vway1_expanded_f[1:0] = ( ({2{fetch_start_f[0]}} & {tag_match_way1_expanded_f[1:0]}) |
({2{fetch_start_f[1]}} & {tag_match_way1_expanded_p1_f[0], tag_match_way1_expanded_f[1]}) );
rvdffe #(LRU_SIZE) btb_lru_ff (.*, .en(ifc_fetch_req_f | exu_mp_valid),
.din(btb_lru_b0_ns[(LRU_SIZE)-1:0]),
.dout(btb_lru_b0_f[(LRU_SIZE)-1:0]));
end // if (!pt.BTB_FULLYA)
// Detect end of cache line and mask as needed
logic eoc_near;
logic eoc_mask;
assign eoc_near = &ifc_fetch_addr_f[pt.ICACHE_BEAT_ADDR_HI:3];
assign eoc_mask = ~eoc_near| (|(~ifc_fetch_addr_f[2:1]));
// --------------------------------------------------------------------------------
// --------------------------------------------------------------------------------
// mux out critical hit bank for pc computation
// This is only useful for the first taken branch in the fetch group
logic [16:1] btb_sel_data_f;
assign btb_rd_tgt_f[11:0] = btb_sel_data_f[16:5];
assign btb_rd_pc4_f = btb_sel_data_f[4];
assign btb_rd_call_f = btb_sel_data_f[2];
assign btb_rd_ret_f = btb_sel_data_f[1];
assign btb_sel_data_f[16:1] = ( ({16{btb_sel_f[1]}} & btb_vbank1_rd_data_f[16:1]) |
({16{btb_sel_f[0]}} & btb_vbank0_rd_data_f[16:1]) );
logic [1:0] hist0_raw, hist1_raw, pc4_raw, pret_raw;
// a valid taken target needs to kill the next fetch as we compute the target address
assign ifu_bp_hit_taken_f = |(vwayhit_f[1:0] & hist1_raw[1:0]) & ifc_fetch_req_f & ~leak_one_f_d1 & ~dec_tlu_bpred_disable;
// Don't put calls/rets/ja in the predictor, force the bht taken instead
assign bht_force_taken_f[1:0] = {(btb_vbank1_rd_data_f[CALL] | btb_vbank1_rd_data_f[RET]),
(btb_vbank0_rd_data_f[CALL] | btb_vbank0_rd_data_f[RET])};
// taken and valid, otherwise, branch errors must clear the bht
assign bht_valid_f[1:0] = vwayhit_f[1:0];
assign bht_vbank0_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank0_rd_data_f[1:0]) |
({2{fetch_start_f[1]}} & bht_bank1_rd_data_f[1:0]) );
assign bht_vbank1_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank1_rd_data_f[1:0]) |
({2{fetch_start_f[1]}} & bht_bank0_rd_data_p1_f[1:0]) );
assign bht_dir_f[1:0] = {(bht_force_taken_f[1] | bht_vbank1_rd_data_f[1]) & bht_valid_f[1],
(bht_force_taken_f[0] | bht_vbank0_rd_data_f[1]) & bht_valid_f[0]};
assign ifu_bp_inst_mask_f = (ifu_bp_hit_taken_f & btb_sel_f[1]) | ~ifu_bp_hit_taken_f;
// Branch prediction info is sent with the 2byte lane associated with the end of the branch.
// Cases
// BANK1 BANK0
// -------------------------------
// | : | : |
// -------------------------------
// <------------> : PC4 branch, offset, should be in B1 (indicated on [2])
// <------------> : PC4 branch, no offset, indicate PC4, VALID, HIST on [1]
// <------------> : PC4 branch, offset, indicate PC4, VALID, HIST on [0]
// <------> : PC2 branch, offset, indicate VALID, HIST on [1]
// <------> : PC2 branch, no offset, indicate VALID, HIST on [0]
//
assign hist1_raw[1:0] = bht_force_taken_f[1:0] | {bht_vbank1_rd_data_f[1],
bht_vbank0_rd_data_f[1]};
assign hist0_raw[1:0] = {bht_vbank1_rd_data_f[0],
bht_vbank0_rd_data_f[0]};
assign pc4_raw[1:0] = {vwayhit_f[1] & btb_vbank1_rd_data_f[PC4],
vwayhit_f[0] & btb_vbank0_rd_data_f[PC4]};
assign pret_raw[1:0] = {vwayhit_f[1] & ~btb_vbank1_rd_data_f[CALL] & btb_vbank1_rd_data_f[RET],
vwayhit_f[0] & ~btb_vbank0_rd_data_f[CALL] & btb_vbank0_rd_data_f[RET]};
// GHR
// count the valids with masking based on first taken
assign num_valids[1:0] = countones(bht_valid_f[1:0]);
// Note that the following property holds
// P: prior ghr, H: history bit of last valid branch in line (could be 1 or 0)
// Num valid branches What new GHR must be
// 2 0H
// 1 PH
// 0 PP
assign final_h = |(btb_sel_f[1:0] & bht_dir_f[1:0]);
assign merged_ghr[pt.BHT_GHR_SIZE-1:0] = (
({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h2}} & {fghr[pt.BHT_GHR_SIZE-3:0], 1'b0, final_h}) | // 0H
({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h1}} & {fghr[pt.BHT_GHR_SIZE-2:0], final_h}) | // PH
({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h0}} & {fghr[pt.BHT_GHR_SIZE-1:0]}) ); // PP
logic [pt.BHT_GHR_SIZE-1:0] exu_flush_ghr;
assign exu_flush_ghr[pt.BHT_GHR_SIZE-1:0] = exu_mp_fghr[pt.BHT_GHR_SIZE-1:0];
assign fghr_ns[pt.BHT_GHR_SIZE-1:0] = ( ({pt.BHT_GHR_SIZE{exu_flush_final_d1}} & exu_flush_ghr[pt.BHT_GHR_SIZE-1:0]) |
({pt.BHT_GHR_SIZE{~exu_flush_final_d1 & ifc_fetch_req_f & ic_hit_f & ~leak_one_f_d1}} & merged_ghr[pt.BHT_GHR_SIZE-1:0]) |
({pt.BHT_GHR_SIZE{~exu_flush_final_d1 & ~(ifc_fetch_req_f & ic_hit_f & ~leak_one_f_d1)}} & fghr[pt.BHT_GHR_SIZE-1:0]));
rvdffie #(.WIDTH(pt.BHT_GHR_SIZE+3),.OVERRIDE(1)) fetchghr (.*,
.din ({exu_flush_final, exu_mp_way, leak_one_f, fghr_ns[pt.BHT_GHR_SIZE-1:0]}),
.dout({exu_flush_final_d1, exu_mp_way_f, leak_one_f_d1, fghr[pt.BHT_GHR_SIZE-1:0]}));
assign ifu_bp_fghr_f[pt.BHT_GHR_SIZE-1:0] = fghr[pt.BHT_GHR_SIZE-1:0];
assign ifu_bp_way_f[1:0] = way_raw[1:0];
assign ifu_bp_hist1_f[1:0] = hist1_raw[1:0];
assign ifu_bp_hist0_f[1:0] = hist0_raw[1:0];
assign ifu_bp_pc4_f[1:0] = pc4_raw[1:0];
assign ifu_bp_valid_f[1:0] = vwayhit_f[1:0] & ~{2{dec_tlu_bpred_disable}};
assign ifu_bp_ret_f[1:0] = pret_raw[1:0];
// compute target
// Form the fetch group offset based on the btb hit location and the location of the branch within the 4 byte chunk
// .i 5
// .o 3
// .ilb bht_dir_f[1] bht_dir_f[0] fetch_start_f[1] fetch_start_f[0] btb_rd_pc4_f
// .ob bloc_f[1] bloc_f[0] use_fa_plus
// .type fr
//
//
// ## rotdir[1:0] fs pc4 off fapl
// -1 01 - 01 0
// 10 01 - 10 0
//
// -1 10 - 10 0
// 10 10 0 01 1
// 10 10 1 01 0
logic [1:0] bloc_f;
logic use_fa_plus;
assign bloc_f[1] = (bht_dir_f[0] & ~fetch_start_f[0]) | (~bht_dir_f[0]
& fetch_start_f[0]);
assign bloc_f[0] = (bht_dir_f[0] & fetch_start_f[0]) | (~bht_dir_f[0]
& ~fetch_start_f[0]);
assign use_fa_plus = (~bht_dir_f[0] & ~fetch_start_f[0] & ~btb_rd_pc4_f);
assign btb_fg_crossing_f = fetch_start_f[0] & btb_sel_f[0] & btb_rd_pc4_f;
assign bp_total_branch_offset_f = bloc_f[1] ^ btb_rd_pc4_f;
logic [31:2] adder_pc_in_f, ifc_fetch_adder_prior;
rvdfflie #(.WIDTH(30), .LEFT(19)) faddrf_ff (.*, .en(ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f), .din(ifc_fetch_addr_f[31:2]), .dout(ifc_fetch_adder_prior[31:2]));
assign ifu_bp_poffset_f[11:0] = btb_rd_tgt_f[11:0];
assign adder_pc_in_f[31:2] = ( ({30{ use_fa_plus}} & fetch_addr_p1_f[31:2]) |
({30{ btb_fg_crossing_f}} & ifc_fetch_adder_prior[31:2]) |
({30{~btb_fg_crossing_f & ~use_fa_plus}} & ifc_fetch_addr_f[31:2]));
rvbradder predtgt_addr (.pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}),
.offset(btb_rd_tgt_f[11:0]),
.dout(bp_btb_target_adder_f[31:1])
);
// mux in the return stack address here for a predicted return assuming the RS is valid, quite if no prediction
assign ifu_bp_btb_target_f[31:1] = (({31{btb_rd_ret_f & ~btb_rd_call_f & rets_out[0][0] & ifu_bp_hit_taken_f}} & rets_out[0][31:1]) |
({31{~(btb_rd_ret_f & ~btb_rd_call_f & rets_out[0][0]) & ifu_bp_hit_taken_f}} & bp_btb_target_adder_f[31:1]) );
// ----------------------------------------------------------------------
// Return Stack
// ----------------------------------------------------------------------
rvbradder rs_addr (.pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}),
.offset({11'b0, ~btb_rd_pc4_f}),
.dout(bp_rs_call_target_f[31:1])
);
assign rs_push = (btb_rd_call_f & ~btb_rd_ret_f & ifu_bp_hit_taken_f);
assign rs_pop = (btb_rd_ret_f & ~btb_rd_call_f & ifu_bp_hit_taken_f);
assign rs_hold = ~rs_push & ~rs_pop;
// Fetch based (bit 0 is a valid)
assign rets_in[0][31:0] = ( ({32{rs_push}} & {bp_rs_call_target_f[31:1], 1'b1}) | // target[31:1], valid
({32{rs_pop}} & rets_out[1][31:0]) );
assign rsenable[0] = ~rs_hold;
for (i=0; i<pt.RET_STACK_SIZE; i++) begin : retstack
// for the last entry in the stack, we don't have a pop position
if(i==pt.RET_STACK_SIZE-1) begin
assign rets_in[i][31:0] = rets_out[i-1][31:0];
assign rsenable[i] = rs_push;
end
else if(i>0) begin
assign rets_in[i][31:0] = ( ({32{rs_push}} & rets_out[i-1][31:0]) |
({32{rs_pop}} & rets_out[i+1][31:0]) );
assign rsenable[i] = rs_push | rs_pop;
end
rvdffe #(32) rets_ff (.*, .en(rsenable[i]), .din(rets_in[i][31:0]), .dout(rets_out[i][31:0]));
end : retstack
// ----------------------------------------------------------------------
// WRITE
// ----------------------------------------------------------------------
assign dec_tlu_error_wb = dec_tlu_br0_start_error_wb | dec_tlu_br0_error_wb;
assign btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
assign dec_tlu_way_wb = dec_tlu_br0_way_wb;
assign btb_valid = exu_mp_valid & ~dec_tlu_error_wb;
assign btb_wr_tag[pt.BTB_BTAG_SIZE-1:0] = exu_mp_btag[pt.BTB_BTAG_SIZE-1:0];
if(!pt.BTB_FULLYA) begin
if(pt.BTB_BTAG_FOLD) begin : btbfold
el2_btb_tag_hash_fold #(.pt(pt)) rdtagf (.hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]),
.pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
el2_btb_tag_hash_fold #(.pt(pt)) rdtagp1f(.hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]),
.pc({fetch_addr_p1_f[ pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
end
else begin
el2_btb_tag_hash #(.pt(pt)) rdtagf(.hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]),
.pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
el2_btb_tag_hash #(.pt(pt)) rdtagp1f(.hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]),
.pc({fetch_addr_p1_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
end
assign btb_wr_en_way0 = ( ({{~exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) |
({{~dec_tlu_way_wb & dec_tlu_error_wb}}));
assign btb_wr_en_way1 = ( ({{exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) |
({{dec_tlu_way_wb & dec_tlu_error_wb}}));
assign btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_error_wb ? btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] : exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
assign vwayhit_f[1:0] = ( ({2{fetch_start_f[0]}} & {wayhit_f[1:0]}) |
({2{fetch_start_f[1]}} & {wayhit_p1_f[0], wayhit_f[1]})) & {eoc_mask, 1'b1};
end // if (!pt.BTB_FULLYA)
assign btb_wr_data[BTB_DWIDTH-1:0] = {btb_wr_tag[pt.BTB_BTAG_SIZE-1:0], exu_mp_tgt[pt.BTB_TOFFSET_SIZE-1:0], exu_mp_pc4, exu_mp_boffset,
exu_mp_call | exu_mp_ja, exu_mp_ret | exu_mp_ja, btb_valid} ;
assign exu_mp_valid_write = exu_mp_valid & exu_mp_ataken & ~exu_mp_pkt.valid;
logic [1:0] bht_wr_data0, bht_wr_data2;
logic [1:0] bht_wr_en0, bht_wr_en2;
assign middle_of_bank = exu_mp_pc4 ^ exu_mp_boffset;
assign bht_wr_en0[1:0] = {2{exu_mp_valid & ~exu_mp_call & ~exu_mp_ret & ~exu_mp_ja}} & {middle_of_bank, ~middle_of_bank};
assign bht_wr_en2[1:0] = {2{dec_tlu_br0_v_wb}} & {dec_tlu_br0_middle_wb, ~dec_tlu_br0_middle_wb} ;
// Experiments show this is the best priority scheme for same bank/index writes at the same time.
assign bht_wr_data0[1:0] = exu_mp_hist[1:0]; // lowest priority
assign bht_wr_data2[1:0] = dec_tlu_br0_hist_wb[1:0]; // highest priority
logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] bht_rd_addr_f, bht_rd_addr_p1_f, bht_wr_addr0, bht_wr_addr2;
logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] mp_hashed, br0_hashed_wb, bht_rd_addr_hashed_f, bht_rd_addr_hashed_p1_f;
el2_btb_ghr_hash #(.pt(pt)) mpghrhs (.hashin(exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(exu_mp_eghr[pt.BHT_GHR_SIZE-1:0]), .hash(mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
el2_btb_ghr_hash #(.pt(pt)) br0ghrhs (.hashin(dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0]), .hash(br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
el2_btb_ghr_hash #(.pt(pt)) fghrhs (.hashin(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), .hash(bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
el2_btb_ghr_hash #(.pt(pt)) fghrhs_p1 (.hashin(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), .hash(bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
assign bht_wr_addr0[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
assign bht_wr_addr2[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
assign bht_rd_addr_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
assign bht_rd_addr_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
// ----------------------------------------------------------------------
// Structures. Using FLOPS
// ----------------------------------------------------------------------
// BTB
// Entry -> tag[pt.BTB_BTAG_SIZE-1:0], toffset[11:0], pc4, boffset, call, ret, valid
if(!pt.BTB_FULLYA) begin
for (j=0 ; j<LRU_SIZE ; j++) begin : BTB_FLOPS
// Way 0
rvdffe #(17+pt.BTB_BTAG_SIZE) btb_bank0_way0 (.*,
.en(((btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == j) & btb_wr_en_way0)),
.din (btb_wr_data[BTB_DWIDTH-1:0]),
.dout (btb_bank0_rd_data_way0_out[j]));
// Way 1
rvdffe #(17+pt.BTB_BTAG_SIZE) btb_bank0_way1 (.*,
.en(((btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == j) & btb_wr_en_way1)),
.din (btb_wr_data[BTB_DWIDTH-1:0]),
.dout (btb_bank0_rd_data_way1_out[j]));
end
always_comb begin : BTB_rd_mux
btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0] = '0 ;
btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0] = '0 ;
btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0] = '0 ;
btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0] = '0 ;
for (int j=0; j< LRU_SIZE; j++) begin
if (btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == (pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1)'(j)) begin
btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0] = btb_bank0_rd_data_way0_out[j];
btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0] = btb_bank0_rd_data_way1_out[j];
end
end
for (int j=0; j< LRU_SIZE; j++) begin
if (btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == (pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1)'(j)) begin
btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0] = btb_bank0_rd_data_way0_out[j];
btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0] = btb_bank0_rd_data_way1_out[j];
end
end
end
end // if (!pt.BTB_FULLYA)
if(pt.BTB_FULLYA) begin : fa
logic found1, hit0, hit1;
logic btb_used_reset, write_used;
logic [$clog2(pt.BTB_SIZE)-1:0] btb_fa_wr_addr0, hit0_index, hit1_index;
logic [pt.BTB_SIZE-1:0] btb_tag_hit, btb_offset_0, btb_offset_1, btb_used_ns, btb_used,
wr0_en, btb_upper_hit;
logic [pt.BTB_SIZE-1:0][BTB_DWIDTH-1:0] btbdata;
// Fully Associative tag hash uses bits 31:3. Bits 2:1 are the offset bits used for the 4 tag comp banks
// Full tag used to speed up lookup. There is one 31:3 cmp per entry, and 4 2:1 cmps per entry.
logic [FA_CMP_LOWER-1:1] ifc_fetch_addr_p1_f;
assign ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1] = ifc_fetch_addr_f[FA_CMP_LOWER-1:1] + 1'b1;
assign fetch_mp_collision_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == ifc_fetch_addr_f[31:1]) &
exu_mp_valid & ifc_fetch_req_f & ~exu_mp_pkt.way);
assign fetch_mp_collision_p1_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == {ifc_fetch_addr_f[31:FA_CMP_LOWER], ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1]}) &
exu_mp_valid & ifc_fetch_req_f & ~exu_mp_pkt.way);
always_comb begin
btb_vbank0_rd_data_f = '0;
btb_vbank1_rd_data_f = '0;
// btb_tag_hit = '0;
// btb_upper_hit = '0;
// btb_offset_0 = '0;
// btb_offset_1 = '0;
found1 = 1'b0;
hit0 = 1'b0;
hit1 = 1'b0;
hit0_index = '0;
hit1_index = '0;
btb_fa_wr_addr0 = '0;
for(int i=0; i<pt.BTB_SIZE; i++) begin
logic upper_hit, offset_0, offset_1;
// Break the cmp into chunks for lower area.
// Chunk1: FA 31:6 or 31:5 depending on icache line size
// Chunk2: FA 5:1 or 4:1 depending on icache line size
// btb_upper_hit[i] = (btbdata[i][BTB_DWIDTH_TOP:FA_TAG_END_UPPER] == ifc_fetch_addr_f[31:FA_CMP_LOWER]) & btbdata[i][0] & ~wr0_en[i];
// btb_offset_0[i] = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_f[FA_CMP_LOWER-1:1]) & btb_upper_hit[i];
// btb_offset_1[i] = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1]) & btb_upper_hit[i];
upper_hit = (btbdata[i][BTB_DWIDTH_TOP:FA_TAG_END_UPPER] == ifc_fetch_addr_f[31:FA_CMP_LOWER]) & btbdata[i][0] & ~wr0_en[i];
offset_0 = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_f[FA_CMP_LOWER-1:1]) & upper_hit;
offset_1 = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1]) & upper_hit;
if(~hit0) begin
if(offset_0) begin
hit0_index[BTB_FA_INDEX:0] = (BTB_FA_INDEX+1)'(i);
// hit unless we are also writing this entry at the same time
hit0 = 1'b1;
end
end
if(~hit1) begin
if(offset_1) begin
hit1_index[BTB_FA_INDEX:0] = (BTB_FA_INDEX+1)'(i);
hit1 = 1'b1;
end
end
// Mux out the 2 potential branches
if(offset_0)
btb_vbank0_rd_data_f[BTB_DWIDTH-1:0] = fetch_mp_collision_f ? btb_wr_data : btbdata[i];
if(offset_1)
btb_vbank1_rd_data_f[BTB_DWIDTH-1:0] = fetch_mp_collision_p1_f ? btb_wr_data : btbdata[i];
// find the first zero from bit zero in the used vector, this is the write address
if(~found1 & ((exu_mp_valid_write & ~exu_mp_pkt.way) | dec_tlu_error_wb)) begin
if(~btb_used[i]) begin
btb_fa_wr_addr0[BTB_FA_INDEX:0] = i[BTB_FA_INDEX:0];
found1 = 1'b1;
end
end
end
end // always_comb begin
//`ifdef RV_ASSERT_ON
// btbhitonehot0: assert #0 ($onehot0(btb_offset_0));
// btbhitonehot1: assert #0 ($onehot0(btb_offset_1));
//`endif
assign vwayhit_f[1:0] = {hit1, hit0} & {eoc_mask, 1'b1};
// way bit is reused as the predicted bit
assign way_raw[1:0] = vwayhit_f[1:0] | {fetch_mp_collision_p1_f, fetch_mp_collision_f};
for (j=0 ; j<pt.BTB_SIZE ; j++) begin : BTB_FAFLOPS
assign wr0_en[j] = ((btb_fa_wr_addr0[BTB_FA_INDEX:0] == j) & (exu_mp_valid_write & ~exu_mp_pkt.way)) |
((dec_fa_error_index == j) & dec_tlu_error_wb);
rvdffe #(BTB_DWIDTH) btb_fa (.*, .clk(clk),
.en (wr0_en[j]),
.din (btb_wr_data[BTB_DWIDTH-1:0]),
.dout(btbdata[j]));
end // block: BTB_FAFLOPS
assign ifu_bp_fa_index_f[1] = hit1 ? hit1_index : '0;
assign ifu_bp_fa_index_f[0] = hit0 ? hit0_index : '0;
assign btb_used_reset = &btb_used[pt.BTB_SIZE-1:0];
assign btb_used_ns[pt.BTB_SIZE-1:0] = ({pt.BTB_SIZE{vwayhit_f[1]}} & (32'b1 << hit1_index[BTB_FA_INDEX:0])) |
({pt.BTB_SIZE{vwayhit_f[0]}} & (32'b1 << hit0_index[BTB_FA_INDEX:0])) |
({pt.BTB_SIZE{exu_mp_valid_write & ~exu_mp_pkt.way & ~dec_tlu_error_wb}} & (32'b1 << btb_fa_wr_addr0[BTB_FA_INDEX:0])) |
({pt.BTB_SIZE{btb_used_reset}} & {pt.BTB_SIZE{1'b0}}) |
({pt.BTB_SIZE{~btb_used_reset & dec_tlu_error_wb}} & (btb_used[pt.BTB_SIZE-1:0] & ~(32'b1 << dec_fa_error_index[BTB_FA_INDEX:0]))) |
(~{pt.BTB_SIZE{btb_used_reset | dec_tlu_error_wb}} & btb_used[pt.BTB_SIZE-1:0]);
assign write_used = btb_used_reset | ifu_bp_hit_taken_f | exu_mp_valid_write | dec_tlu_error_wb;
rvdffe #(pt.BTB_SIZE) btb_usedf (.*, .clk(clk),
.en (write_used),
.din (btb_used_ns[pt.BTB_SIZE-1:0]),
.dout(btb_used[pt.BTB_SIZE-1:0]));
end // block: fa
//-----------------------------------------------------------------------------
// BHT
// 2 bit Entry -> direction, strength
//
//-----------------------------------------------------------------------------
// logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0][1:0] bht_bank_wr_data ;
logic [1:0] [pt.BHT_ARRAY_DEPTH-1:0] [1:0] bht_bank_rd_data_out ;
logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0] bht_bank_clken ;
logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0] bht_bank_clk ;
// logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0] bht_bank_sel ;
for ( i=0; i<2; i++) begin : BANKS
wire[pt.BHT_ARRAY_DEPTH-1:0] wr0, wr1;
assign wr0 = bht_wr_en0[i] << bht_wr_addr0;
assign wr1 = bht_wr_en2[i] << bht_wr_addr2;
for (genvar k=0 ; k < (pt.BHT_ARRAY_DEPTH)/NUM_BHT_LOOP ; k++) begin : BHT_CLK_GROUP
assign bht_bank_clken[i][k] = (bht_wr_en0[i] & ((bht_wr_addr0[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) |
(bht_wr_en2[i] & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH));
`ifndef RV_FPGA_OPTIMIZE
rvclkhdr bht_bank_grp_cgc ( .en(bht_bank_clken[i][k]), .l1clk(bht_bank_clk[i][k]), .* ); // ifndef RV_FPGA_OPTIMIZE
`endif
for (j=0 ; j<NUM_BHT_LOOP ; j++) begin : BHT_FLOPS
wire[1:0] wdata;
wire bank_sel = wr1[NUM_BHT_LOOP*k+j] | wr0[NUM_BHT_LOOP*k+j];
// assign bht_bank_sel[i][k][j] = (bht_wr_en0[i] & (bht_wr_addr0[NUM_BHT_LOOP_INNER_HI :pt.BHT_ADDR_LO] == j) & ((bht_wr_addr0[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) |
// (bht_wr_en2[i] & (bht_wr_addr2[NUM_BHT_LOOP_INNER_HI :pt.BHT_ADDR_LO] == j) & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) ;
// assign bht_bank_wr_data[i][k][j] = (bht_wr_en2[i] & (bht_wr_addr2[NUM_BHT_LOOP_INNER_HI:pt.BHT_ADDR_LO] == j) & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) ? bht_wr_data2[1:0] :
// bht_wr_data0[1:0] ;
assign wdata = wr1[NUM_BHT_LOOP*k+j] ? bht_wr_data2[1:0] :bht_wr_data0;
rvdffs_fpga #(2) bht_bank (.*,
.clk (bht_bank_clk[i][k]),
.en (bank_sel),
.rawclk (clk),
.clken (bank_sel),
.din (wdata),
.dout (bht_bank_rd_data_out[i][(16*k)+j]));
end // block: BHT_FLOPS
end // block: BHT_CLK_GROUP
end // block: BANKS
always_comb begin : BHT_rd_mux
bht_bank0_rd_data_f[1:0] = '0 ;
bht_bank1_rd_data_f[1:0] = '0 ;
bht_bank0_rd_data_p1_f[1:0] = '0 ;
for (int j=0; j< pt.BHT_ARRAY_DEPTH; j++) begin
if (bht_rd_addr_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] == (pt.BHT_ADDR_HI-pt.BHT_ADDR_LO+1)'(j)) begin
bht_bank0_rd_data_f[1:0] = bht_bank_rd_data_out[0][j];
bht_bank1_rd_data_f[1:0] = bht_bank_rd_data_out[1][j];
end
if (bht_rd_addr_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] == (pt.BHT_ADDR_HI-pt.BHT_ADDR_LO+1)'(j)) begin
bht_bank0_rd_data_p1_f[1:0] = bht_bank_rd_data_out[0][j];
end
end
end // block: BHT_rd_mux
function [1:0] countones;
input [1:0] valid;
begin
countones[1:0] = {2'b0, valid[1]} +
{2'b0, valid[0]};
end
endfunction
endmodule // el2_ifu_bp_ctl

View File

@ -0,0 +1,383 @@
//********************************************************************************
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// purpose of this file is to convert 16b RISCV compressed instruction into 32b equivalent
module el2_ifu_compress_ctl
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic [15:0] din, // 16-bit compressed instruction
output logic [31:0] dout // 32-bit uncompressed instruction
);
logic legal;
logic [15:0] i;
logic [31:0] o,l1,l2,l3;
assign i[15:0] = din[15:0];
logic [4:0] rs2d,rdd,rdpd,rs2pd;
logic rdrd;
logic rdrs1;
logic rs2rs2;
logic rdprd;
logic rdprs1;
logic rs2prs2;
logic rs2prd;
logic uimm9_2;
logic ulwimm6_2;
logic ulwspimm7_2;
logic rdeq2;
logic rdeq1;
logic rs1eq2;
logic sbroffset8_1;
logic simm9_4;
logic simm5_0;
logic sjaloffset11_1;
logic sluimm17_12;
logic uimm5_0;
logic uswimm6_2;
logic uswspimm7_2;
// form the opcodes
// formats
//
// c.add rd 11:7 rs2 6:2
// c.and rdp 9:7 rs2p 4:2
//
// add rs2 24:20 rs1 19:15 rd 11:7
assign rs2d[4:0] = i[6:2];
assign rdd[4:0] = i[11:7];
assign rdpd[4:0] = {2'b01, i[9:7]};
assign rs2pd[4:0] = {2'b01, i[4:2]};
// merge in rd, rs1, rs2
// rd
assign l1[6:0] = o[6:0];
assign l1[11:7] = o[11:7] |
({5{rdrd}} & rdd[4:0]) |
({5{rdprd}} & rdpd[4:0]) |
({5{rs2prd}} & rs2pd[4:0]) |
({5{rdeq1}} & 5'd1) |
({5{rdeq2}} & 5'd2);
// rs1
assign l1[14:12] = o[14:12];
assign l1[19:15] = o[19:15] |
({5{rdrs1}} & rdd[4:0]) |
({5{rdprs1}} & rdpd[4:0]) |
({5{rs1eq2}} & 5'd2);
// rs2
assign l1[24:20] = o[24:20] |
({5{rs2rs2}} & rs2d[4:0]) |
({5{rs2prs2}} & rs2pd[4:0]);
assign l1[31:25] = o[31:25];
logic [5:0] simm5d;
logic [9:2] uimm9d;
logic [9:4] simm9d;
logic [6:2] ulwimm6d;
logic [7:2] ulwspimm7d;
logic [5:0] uimm5d;
logic [20:1] sjald;
logic [31:12] sluimmd;
// merge in immediates + jal offset
assign simm5d[5:0] = { i[12], i[6:2] };
assign uimm9d[9:2] = { i[10:7], i[12:11], i[5], i[6] };
assign simm9d[9:4] = { i[12], i[4:3], i[5], i[2], i[6] };
assign ulwimm6d[6:2] = { i[5], i[12:10], i[6] };
assign ulwspimm7d[7:2] = { i[3:2], i[12], i[6:4] };
assign uimm5d[5:0] = { i[12], i[6:2] };
assign sjald[11:1] = { i[12], i[8], i[10:9], i[6], i[7], i[2], i[11], i[5:4], i[3] };
assign sjald[20:12] = {9{i[12]}};
assign sluimmd[31:12] = { {15{i[12]}}, i[6:2] };
assign l2[31:20] = ( l1[31:20] ) |
( {12{simm5_0}} & {{7{simm5d[5]}},simm5d[4:0]} ) |
( {12{uimm9_2}} & {2'b0,uimm9d[9:2],2'b0} ) |
( {12{simm9_4}} & {{3{simm9d[9]}},simm9d[8:4],4'b0} ) |
( {12{ulwimm6_2}} & {5'b0,ulwimm6d[6:2],2'b0} ) |
( {12{ulwspimm7_2}} & {4'b0,ulwspimm7d[7:2],2'b0} ) |
( {12{uimm5_0}} & {6'b0,uimm5d[5:0]} ) |
( {12{sjaloffset11_1}} & {sjald[20],sjald[10:1],sjald[11]} ) |
( {12{sluimm17_12}} & sluimmd[31:20] );
assign l2[19:12] = ( l1[19:12] ) |
( {8{sjaloffset11_1}} & sjald[19:12] ) |
( {8{sluimm17_12}} & sluimmd[19:12] );
assign l2[11:0] = l1[11:0];
// merge in branch offset and store immediates
logic [8:1] sbr8d;
logic [6:2] uswimm6d;
logic [7:2] uswspimm7d;
assign sbr8d[8:1] = { i[12], i[6], i[5], i[2], i[11], i[10], i[4], i[3] };
assign uswimm6d[6:2] = { i[5], i[12:10], i[6] };
assign uswspimm7d[7:2] = { i[8:7], i[12:9] };
assign l3[31:25] = ( l2[31:25] ) |
( {7{sbroffset8_1}} & { {4{sbr8d[8]}},sbr8d[7:5] } ) |
( {7{uswimm6_2}} & { 5'b0, uswimm6d[6:5] } ) |
( {7{uswspimm7_2}} & { 4'b0, uswspimm7d[7:5] } );
assign l3[24:12] = l2[24:12];
assign l3[11:7] = ( l2[11:7] ) |
( {5{sbroffset8_1}} & { sbr8d[4:1], sbr8d[8] } ) |
( {5{uswimm6_2}} & { uswimm6d[4:2], 2'b0 } ) |
( {5{uswspimm7_2}} & { uswspimm7d[4:2], 2'b0 } );
assign l3[6:0] = l2[6:0];
assign dout[31:0] = l3[31:0] & {32{legal}};
// file "cdecode" is human readable file that has all of the compressed instruction decodes defined and is part of git repo
// modify this file as needed
// to generate all the equations below from "cdecode" except legal equation:
// 1) coredecode -in cdecode > cdecode.e
// 2) espresso -Dso -oeqntott cdecode.e | addassign > compress_equations
// to generate the legal (16b compressed instruction is legal) equation below:
// 1) coredecode -in cdecode -legal > clegal.e
// 2) espresso -Dso -oeqntott clegal.e | addassign > clegal_equation
// espresso decodes
assign rdrd = (!i[14]&i[6]&i[1]) | (!i[15]&i[14]&i[11]&i[0]) | (!i[14]&i[5]&i[1]) | (
!i[15]&i[14]&i[10]&i[0]) | (!i[14]&i[4]&i[1]) | (!i[15]&i[14]&i[9]
&i[0]) | (!i[14]&i[3]&i[1]) | (!i[15]&i[14]&!i[8]&i[0]) | (!i[14]
&i[2]&i[1]) | (!i[15]&i[14]&i[7]&i[0]) | (!i[15]&i[1]) | (!i[15]
&!i[13]&i[0]);
assign rdrs1 = (!i[14]&i[12]&i[11]&i[1]) | (!i[14]&i[12]&i[10]&i[1]) | (!i[14]
&i[12]&i[9]&i[1]) | (!i[14]&i[12]&i[8]&i[1]) | (!i[14]&i[12]&i[7]
&i[1]) | (!i[14]&!i[12]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]
&i[12]&i[6]&i[1]) | (!i[14]&i[12]&i[5]&i[1]) | (!i[14]&i[12]&i[4]
&i[1]) | (!i[14]&i[12]&i[3]&i[1]) | (!i[14]&i[12]&i[2]&i[1]) | (
!i[15]&!i[14]&!i[13]&i[0]) | (!i[15]&!i[14]&i[1]);
assign rs2rs2 = (i[15]&i[6]&i[1]) | (i[15]&i[5]&i[1]) | (i[15]&i[4]&i[1]) | (
i[15]&i[3]&i[1]) | (i[15]&i[2]&i[1]) | (i[15]&i[14]&i[1]);
assign rdprd = (i[15]&!i[14]&!i[13]&i[0]);
assign rdprs1 = (i[15]&!i[13]&i[0]) | (i[15]&i[14]&i[0]) | (i[14]&!i[1]&!i[0]);
assign rs2prs2 = (i[15]&!i[14]&!i[13]&i[11]&i[10]&i[0]) | (i[15]&!i[1]&!i[0]);
assign rs2prd = (!i[15]&!i[1]&!i[0]);
assign uimm9_2 = (!i[14]&!i[1]&!i[0]);
assign ulwimm6_2 = (!i[15]&i[14]&!i[1]&!i[0]);
assign ulwspimm7_2 = (!i[15]&i[14]&i[1]);
assign rdeq2 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]);
assign rdeq1 = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]
&i[12]&i[10]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[9]
&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[8]&!i[6]&!i[5]
&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[7]&!i[6]&!i[5]&!i[4]&!i[3]
&!i[2]&i[1]) | (!i[15]&!i[14]&i[13]);
assign rs1eq2 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]) | (i[14]
&i[1]) | (!i[14]&!i[1]&!i[0]);
assign sbroffset8_1 = (i[15]&i[14]&i[0]);
assign simm9_4 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]);
assign simm5_0 = (!i[14]&!i[13]&i[11]&!i[10]&i[0]) | (!i[15]&!i[13]&i[0]);
assign sjaloffset11_1 = (!i[14]&i[13]);
assign sluimm17_12 = (!i[15]&i[14]&i[13]&i[7]) | (!i[15]&i[14]&i[13]&!i[8]) | (
!i[15]&i[14]&i[13]&i[9]) | (!i[15]&i[14]&i[13]&i[10]) | (!i[15]&i[14]
&i[13]&i[11]);
assign uimm5_0 = (i[15]&!i[14]&!i[13]&!i[11]&i[0]) | (!i[15]&!i[14]&i[1]);
assign uswimm6_2 = (i[15]&!i[1]&!i[0]);
assign uswspimm7_2 = (i[15]&i[14]&i[1]);
assign o[31] = 1'b0;
assign o[30] = (i[15]&!i[14]&!i[13]&i[10]&!i[6]&!i[5]&i[0]) | (i[15]&!i[14]
&!i[13]&!i[11]&i[10]&i[0]);
assign o[29] = 1'b0;
assign o[28] = 1'b0;
assign o[27] = 1'b0;
assign o[26] = 1'b0;
assign o[25] = 1'b0;
assign o[24] = 1'b0;
assign o[23] = 1'b0;
assign o[22] = 1'b0;
assign o[21] = 1'b0;
assign o[20] = (!i[14]&i[12]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[6]&!i[5]&!i[4]
&!i[3]&!i[2]&i[1]);
assign o[19] = 1'b0;
assign o[18] = 1'b0;
assign o[17] = 1'b0;
assign o[16] = 1'b0;
assign o[15] = 1'b0;
assign o[14] = (i[15]&!i[14]&!i[13]&!i[11]&i[0]) | (i[15]&!i[14]&!i[13]&!i[10]
&i[0]) | (i[15]&!i[14]&!i[13]&i[6]&i[0]) | (i[15]&!i[14]&!i[13]&i[5]
&i[0]);
assign o[13] = (i[15]&!i[14]&!i[13]&i[11]&!i[10]&i[0]) | (i[15]&!i[14]&!i[13]
&i[11]&i[6]&i[0]) | (i[14]&!i[0]);
assign o[12] = (i[15]&!i[14]&!i[13]&i[6]&i[5]&i[0]) | (i[15]&!i[14]&!i[13]&!i[11]
&i[0]) | (i[15]&!i[14]&!i[13]&!i[10]&i[0]) | (!i[15]&!i[14]&i[1]) | (
i[15]&i[14]&i[13]);
assign o[11] = 1'b0;
assign o[10] = 1'b0;
assign o[9] = 1'b0;
assign o[8] = 1'b0;
assign o[7] = 1'b0;
assign o[6] = (i[15]&!i[14]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&!i[0]) | (!i[14]&i[13]) | (
i[15]&i[14]&i[0]);
assign o[5] = (i[15]&!i[0]) | (i[15]&i[11]&i[10]) | (i[13]&!i[8]) | (i[13]&i[7]) | (
i[13]&i[9]) | (i[13]&i[10]) | (i[13]&i[11]) | (!i[14]&i[13]) | (
i[15]&i[14]);
assign o[4] = (!i[14]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[0]) | (!i[15]&!i[14]
&!i[0]) | (!i[14]&i[6]&!i[0]) | (!i[15]&i[14]&i[0]) | (!i[14]&i[5]
&!i[0]) | (!i[14]&i[4]&!i[0]) | (!i[14]&!i[13]&i[0]) | (!i[14]&i[3]
&!i[0]) | (!i[14]&i[2]&!i[0]);
assign o[3] = (!i[14]&i[13]);
assign o[2] = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]
&i[12]&i[10]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[9]
&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[8]&!i[6]&!i[5]
&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[7]&!i[6]&!i[5]&!i[4]&!i[3]
&!i[2]&i[1]) | (i[15]&!i[14]&!i[12]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]
&!i[0]) | (!i[15]&i[13]&!i[8]) | (!i[15]&i[13]&i[7]) | (!i[15]&i[13]
&i[9]) | (!i[15]&i[13]&i[10]) | (!i[15]&i[13]&i[11]) | (!i[14]&i[13]);
// 32b instruction has lower two bits 2'b11
assign o[1] = 1'b1;
assign o[0] = 1'b1;
assign legal = (!i[13]&!i[12]&i[11]&i[1]&!i[0]) | (!i[13]&!i[12]&i[6]&i[1]&!i[0]) | (
!i[15]&!i[13]&i[11]&!i[1]) | (!i[13]&!i[12]&i[5]&i[1]&!i[0]) | (
!i[13]&!i[12]&i[10]&i[1]&!i[0]) | (!i[15]&!i[13]&i[6]&!i[1]) | (
i[15]&!i[12]&!i[1]&i[0]) | (!i[13]&!i[12]&i[9]&i[1]&!i[0]) | (!i[12]
&i[6]&!i[1]&i[0]) | (!i[15]&!i[13]&i[5]&!i[1]) | (!i[13]&!i[12]&i[8]
&i[1]&!i[0]) | (!i[12]&i[5]&!i[1]&i[0]) | (!i[15]&!i[13]&i[10]&!i[1]) | (
!i[13]&!i[12]&i[7]&i[1]&!i[0]) | (i[12]&i[11]&!i[10]&!i[1]&i[0]) | (
!i[15]&!i[13]&i[9]&!i[1]) | (!i[13]&!i[12]&i[4]&i[1]&!i[0]) | (i[13]
&i[12]&!i[1]&i[0]) | (!i[15]&!i[13]&i[8]&!i[1]) | (!i[13]&!i[12]&i[3]
&i[1]&!i[0]) | (i[13]&i[4]&!i[1]&i[0]) | (!i[13]&!i[12]&i[2]&i[1]
&!i[0]) | (!i[15]&!i[13]&i[7]&!i[1]) | (i[13]&i[3]&!i[1]&i[0]) | (
i[13]&i[2]&!i[1]&i[0]) | (i[14]&!i[13]&!i[1]) | (!i[14]&!i[12]&!i[1]
&i[0]) | (i[15]&!i[13]&i[12]&i[1]&!i[0]) | (!i[15]&!i[13]&!i[12]&i[1]
&!i[0]) | (!i[15]&!i[13]&i[12]&!i[1]) | (i[14]&!i[13]&!i[0]);
endmodule

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,469 @@
//********************************************************************************
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
//********************************************************************************
// Icache closely coupled memory --- ICCM
//********************************************************************************
module el2_ifu_iccm_mem
import el2_pkg::*;
#(
`include "el2_param.vh"
)(
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic rst_l, // reset, active low
input logic clk_override, // Override non-functional clock gating
input logic iccm_wren, // ICCM write enable
input logic iccm_rden, // ICCM read enable
input logic [pt.ICCM_BITS-1:1] iccm_rw_addr, // ICCM read/write address
input logic iccm_buf_correct_ecc, // ICCM is doing a single bit error correct cycle
input logic iccm_correction_state, // ICCM under a correction - This is needed to guard replacements when hit
input logic [2:0] iccm_wr_size, // ICCM write size
input logic [77:0] iccm_wr_data, // ICCM write data
input el2_ccm_ext_in_pkt_t [pt.ICCM_NUM_BANKS-1:0] iccm_ext_in_pkt, // External packet
output logic [63:0] iccm_rd_data, // ICCM read data
output logic [77:0] iccm_rd_data_ecc, // ICCM read ecc
input logic scan_mode // Scan mode control
);
logic [pt.ICCM_NUM_BANKS-1:0] wren_bank;
logic [pt.ICCM_NUM_BANKS-1:0] rden_bank;
logic [pt.ICCM_NUM_BANKS-1:0] iccm_clken;
logic [pt.ICCM_NUM_BANKS-1:0] [pt.ICCM_BITS-1:pt.ICCM_BANK_INDEX_LO] addr_bank;
logic [pt.ICCM_NUM_BANKS-1:0] [38:0] iccm_bank_dout, iccm_bank_dout_fn;
logic [pt.ICCM_NUM_BANKS-1:0] [38:0] iccm_bank_wr_data;
logic [pt.ICCM_BITS-1:1] addr_bank_inc;
logic [pt.ICCM_BANK_HI : 2] iccm_rd_addr_hi_q;
logic [pt.ICCM_BANK_HI : 1] iccm_rd_addr_lo_q;
logic [63:0] iccm_rd_data_pre;
logic [63:0] iccm_data;
logic [1:0] addr_incr;
logic [pt.ICCM_NUM_BANKS-1:0] [38:0] iccm_bank_wr_data_vec;
// logic to handle hard persisten faults
logic [1:0] [pt.ICCM_BITS-1:2] redundant_address;
logic [1:0] [38:0] redundant_data;
logic [1:0] redundant_valid;
logic [pt.ICCM_NUM_BANKS-1:0] sel_red1, sel_red0, sel_red1_q, sel_red0_q;
logic [38:0] redundant_data0_in, redundant_data1_in;
logic redundant_lru, redundant_lru_in, redundant_lru_en;
logic redundant_data0_en;
logic redundant_data1_en;
logic r0_addr_en, r1_addr_en;
// Testing persistent flip
// logic [3:0] not_iccm_bank_dout;
// logic [15:3] ecc_insert_flip_in, ecc_insert_flip;
// logic flip_en, flip_match, flip_match_q;
//
// assign flip_in = (iccm_rw_addr[3:2] != 2'b00); // dont flip when bank0 - this is to make some progress in DMA streaming cases
// assign flip_en = iccm_rden;
//
// rvdffs #(1) flipmatch (.*,
// .clk(clk),
// .din(flip_in),
// .en(flip_en),
// .dout(flip_match_q));
//
// end of testing flip
assign addr_incr[1:0] = (iccm_wr_size[1:0] == 2'b11) ? 2'b10: 2'b01;
assign addr_bank_inc[pt.ICCM_BITS-1 : 1] = iccm_rw_addr[pt.ICCM_BITS-1 : 1] + addr_incr[1:0];
for (genvar i=0; i<pt.ICCM_NUM_BANKS/2; i++) begin: mem_bank_data
assign iccm_bank_wr_data_vec[(2*i)] = iccm_wr_data[38:0];
assign iccm_bank_wr_data_vec[(2*i)+1] = iccm_wr_data[77:39];
end
for (genvar i=0; i<pt.ICCM_NUM_BANKS; i++) begin: mem_bank
assign wren_bank[i] = iccm_wren & ((iccm_rw_addr[pt.ICCM_BANK_HI:2] == i) | (addr_bank_inc[pt.ICCM_BANK_HI:2] == i));
assign iccm_bank_wr_data[i] = iccm_bank_wr_data_vec[i];
assign rden_bank[i] = iccm_rden & ( (iccm_rw_addr[pt.ICCM_BANK_HI:2] == i) | (addr_bank_inc[pt.ICCM_BANK_HI:2] == i));
assign iccm_clken[i] = wren_bank[i] | rden_bank[i] | clk_override;
assign addr_bank[i][pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO] = wren_bank[i] ? iccm_rw_addr[pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO] :
((addr_bank_inc[pt.ICCM_BANK_HI:2] == i) ?
addr_bank_inc[pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO] :
iccm_rw_addr[pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO]);
`ifdef VERILATOR
el2_ram #(.depth(1<<pt.ICCM_INDEX_BITS), .width(39)) iccm_bank (
// Primary ports
.ME(iccm_clken[i]),
.CLK(clk),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(iccm_bank_wr_data[i][38:0]),
.Q(iccm_bank_dout[i][38:0]),
.ROP ( ),
// These are used by SoC
.TEST1(iccm_ext_in_pkt[i].TEST1),
.RME(iccm_ext_in_pkt[i].RME),
.RM(iccm_ext_in_pkt[i].RM),
.LS(iccm_ext_in_pkt[i].LS),
.DS(iccm_ext_in_pkt[i].DS),
.SD(iccm_ext_in_pkt[i].SD) ,
.TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
.BC1(iccm_ext_in_pkt[i].BC1),
.BC2(iccm_ext_in_pkt[i].BC2)
);
`else
if (pt.ICCM_INDEX_BITS == 6 ) begin : iccm
ram_64x39 iccm_bank (
// Primary ports
.CLK(clk),
.ME(iccm_clken[i]),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(iccm_bank_wr_data[i][38:0]),
.Q(iccm_bank_dout[i][38:0]),
.ROP ( ),
// These are used by SoC
.TEST1(iccm_ext_in_pkt[i].TEST1),
.RME(iccm_ext_in_pkt[i].RME),
.RM(iccm_ext_in_pkt[i].RM),
.LS(iccm_ext_in_pkt[i].LS),
.DS(iccm_ext_in_pkt[i].DS),
.SD(iccm_ext_in_pkt[i].SD) ,
.TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
.BC1(iccm_ext_in_pkt[i].BC1),
.BC2(iccm_ext_in_pkt[i].BC2)
);
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 7 ) begin : iccm
ram_128x39 iccm_bank (
// Primary ports
.CLK(clk),
.ME(iccm_clken[i]),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(iccm_bank_wr_data[i][38:0]),
.Q(iccm_bank_dout[i][38:0]),
.ROP ( ),
// These are used by SoC
.TEST1(iccm_ext_in_pkt[i].TEST1),
.RME(iccm_ext_in_pkt[i].RME),
.RM(iccm_ext_in_pkt[i].RM),
.LS(iccm_ext_in_pkt[i].LS),
.DS(iccm_ext_in_pkt[i].DS),
.SD(iccm_ext_in_pkt[i].SD) ,
.TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
.BC1(iccm_ext_in_pkt[i].BC1),
.BC2(iccm_ext_in_pkt[i].BC2)
);
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 8 ) begin : iccm
ram_256x39 iccm_bank (
// Primary ports
.CLK(clk),
.ME(iccm_clken[i]),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(iccm_bank_wr_data[i][38:0]),
.Q(iccm_bank_dout[i][38:0]),
.ROP ( ),
// These are used by SoC
.TEST1(iccm_ext_in_pkt[i].TEST1),
.RME(iccm_ext_in_pkt[i].RME),
.RM(iccm_ext_in_pkt[i].RM),
.LS(iccm_ext_in_pkt[i].LS),
.DS(iccm_ext_in_pkt[i].DS),
.SD(iccm_ext_in_pkt[i].SD) ,
.TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
.BC1(iccm_ext_in_pkt[i].BC1),
.BC2(iccm_ext_in_pkt[i].BC2)
);
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 9 ) begin : iccm
ram_512x39 iccm_bank (
// Primary ports
.CLK(clk),
.ME(iccm_clken[i]),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(iccm_bank_wr_data[i][38:0]),
.Q(iccm_bank_dout[i][38:0]),
.ROP ( ),
// These are used by SoC
.TEST1(iccm_ext_in_pkt[i].TEST1),
.RME(iccm_ext_in_pkt[i].RME),
.RM(iccm_ext_in_pkt[i].RM),
.LS(iccm_ext_in_pkt[i].LS),
.DS(iccm_ext_in_pkt[i].DS),
.SD(iccm_ext_in_pkt[i].SD) ,
.TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
.BC1(iccm_ext_in_pkt[i].BC1),
.BC2(iccm_ext_in_pkt[i].BC2)
);
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 10 ) begin : iccm
ram_1024x39 iccm_bank (
// Primary ports
.CLK(clk),
.ME(iccm_clken[i]),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(iccm_bank_wr_data[i][38:0]),
.Q(iccm_bank_dout[i][38:0]),
.ROP ( ),
// These are used by SoC
.TEST1(iccm_ext_in_pkt[i].TEST1),
.RME(iccm_ext_in_pkt[i].RME),
.RM(iccm_ext_in_pkt[i].RM),
.LS(iccm_ext_in_pkt[i].LS),
.DS(iccm_ext_in_pkt[i].DS),
.SD(iccm_ext_in_pkt[i].SD) ,
.TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
.BC1(iccm_ext_in_pkt[i].BC1),
.BC2(iccm_ext_in_pkt[i].BC2)
);
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 11 ) begin : iccm
ram_2048x39 iccm_bank (
// Primary ports
.CLK(clk),
.ME(iccm_clken[i]),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(iccm_bank_wr_data[i][38:0]),
.Q(iccm_bank_dout[i][38:0]),
.ROP ( ),
// These are used by SoC
.TEST1(iccm_ext_in_pkt[i].TEST1),
.RME(iccm_ext_in_pkt[i].RME),
.RM(iccm_ext_in_pkt[i].RM),
.LS(iccm_ext_in_pkt[i].LS),
.DS(iccm_ext_in_pkt[i].DS),
.SD(iccm_ext_in_pkt[i].SD) ,
.TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
.BC1(iccm_ext_in_pkt[i].BC1),
.BC2(iccm_ext_in_pkt[i].BC2)
);
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 12 ) begin : iccm
ram_4096x39 iccm_bank (
// Primary ports
.CLK(clk),
.ME(iccm_clken[i]),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(iccm_bank_wr_data[i][38:0]),
.Q(iccm_bank_dout[i][38:0]),
.ROP ( ),
// These are used by SoC
.TEST1(iccm_ext_in_pkt[i].TEST1),
.RME(iccm_ext_in_pkt[i].RME),
.RM(iccm_ext_in_pkt[i].RM),
.LS(iccm_ext_in_pkt[i].LS),
.DS(iccm_ext_in_pkt[i].DS),
.SD(iccm_ext_in_pkt[i].SD) ,
.TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
.BC1(iccm_ext_in_pkt[i].BC1),
.BC2(iccm_ext_in_pkt[i].BC2)
);
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 13 ) begin : iccm
ram_8192x39 iccm_bank (
// Primary ports
.CLK(clk),
.ME(iccm_clken[i]),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(iccm_bank_wr_data[i][38:0]),
.Q(iccm_bank_dout[i][38:0]),
.ROP ( ),
// These are used by SoC
.TEST1(iccm_ext_in_pkt[i].TEST1),
.RME(iccm_ext_in_pkt[i].RME),
.RM(iccm_ext_in_pkt[i].RM),
.LS(iccm_ext_in_pkt[i].LS),
.DS(iccm_ext_in_pkt[i].DS),
.SD(iccm_ext_in_pkt[i].SD) ,
.TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
.BC1(iccm_ext_in_pkt[i].BC1),
.BC2(iccm_ext_in_pkt[i].BC2)
);
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 14 ) begin : iccm
ram_16384x39 iccm_bank (
// Primary ports
.CLK(clk),
.ME(iccm_clken[i]),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(iccm_bank_wr_data[i][38:0]),
.Q(iccm_bank_dout[i][38:0]),
.ROP ( ),
// These are used by SoC
.TEST1(iccm_ext_in_pkt[i].TEST1),
.RME(iccm_ext_in_pkt[i].RME),
.RM(iccm_ext_in_pkt[i].RM),
.LS(iccm_ext_in_pkt[i].LS),
.DS(iccm_ext_in_pkt[i].DS),
.SD(iccm_ext_in_pkt[i].SD) ,
.TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
.BC1(iccm_ext_in_pkt[i].BC1),
.BC2(iccm_ext_in_pkt[i].BC2)
);
end // block: iccm
else begin : iccm
ram_32768x39 iccm_bank (
// Primary ports
.CLK(clk),
.ME(iccm_clken[i]),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(iccm_bank_wr_data[i][38:0]),
.Q(iccm_bank_dout[i][38:0]),
.ROP ( ),
// These are used by SoC
.TEST1(iccm_ext_in_pkt[i].TEST1),
.RME(iccm_ext_in_pkt[i].RME),
.RM(iccm_ext_in_pkt[i].RM),
.LS(iccm_ext_in_pkt[i].LS),
.DS(iccm_ext_in_pkt[i].DS),
.SD(iccm_ext_in_pkt[i].SD) ,
.TEST_RNM(iccm_ext_in_pkt[i].TEST_RNM),
.BC1(iccm_ext_in_pkt[i].BC1),
.BC2(iccm_ext_in_pkt[i].BC2)
);
end // block: iccm
`endif
// match the redundant rows
assign sel_red1[i] = (redundant_valid[1] & (((iccm_rw_addr[pt.ICCM_BITS-1:2] == redundant_address[1][pt.ICCM_BITS-1:2]) & (iccm_rw_addr[3:2] == i)) |
((addr_bank_inc[pt.ICCM_BITS-1:2]== redundant_address[1][pt.ICCM_BITS-1:2]) & (addr_bank_inc[3:2] == i))));
assign sel_red0[i] = (redundant_valid[0] & (((iccm_rw_addr[pt.ICCM_BITS-1:2] == redundant_address[0][pt.ICCM_BITS-1:2]) & (iccm_rw_addr[3:2] == i)) |
((addr_bank_inc[pt.ICCM_BITS-1:2]== redundant_address[0][pt.ICCM_BITS-1:2]) & (addr_bank_inc[3:2] == i))));
rvdff #(1) selred0 (.*,
.clk(active_clk),
.din(sel_red0[i]),
.dout(sel_red0_q[i]));
rvdff #(1) selred1 (.*,
.clk(active_clk),
.din(sel_red1[i]),
.dout(sel_red1_q[i]));
// muxing out the memory data with the redundant data if the address matches
assign iccm_bank_dout_fn[i][38:0] = ({39{sel_red1_q[i]}} & redundant_data[1][38:0]) |
({39{sel_red0_q[i]}} & redundant_data[0][38:0]) |
({39{~sel_red0_q[i] & ~sel_red1_q[i]}} & iccm_bank_dout[i][38:0]);
end : mem_bank
// This section does the redundancy for tolerating single bit errors
// 2x 39 bit data values with address[hi:2] and a valid bit is needed to CAM and sub out the reads/writes to the particular locations
// Also a LRU flop is kept to decide which of the redundant element to replace.
assign r0_addr_en = ~redundant_lru & iccm_buf_correct_ecc;
assign r1_addr_en = redundant_lru & iccm_buf_correct_ecc;
assign redundant_lru_en = iccm_buf_correct_ecc | (((|sel_red0[pt.ICCM_NUM_BANKS-1:0]) | (|sel_red1[pt.ICCM_NUM_BANKS-1:0])) & iccm_rden & iccm_correction_state);
assign redundant_lru_in = iccm_buf_correct_ecc ? ~redundant_lru : (|sel_red0[pt.ICCM_NUM_BANKS-1:0]) ? 1'b1 : 1'b0;
rvdffs #() red_lru (.*, // LRU flop for the redundant replacements
.clk(active_clk),
.en(redundant_lru_en),
.din(redundant_lru_in),
.dout(redundant_lru));
rvdffs #(pt.ICCM_BITS-2) r0_address (.*, // Redundant Row 0 address
.clk(active_clk),
.en(r0_addr_en),
.din(iccm_rw_addr[pt.ICCM_BITS-1:2]),
.dout(redundant_address[0][pt.ICCM_BITS-1:2]));
rvdffs #(pt.ICCM_BITS-2) r1_address (.*, // Redundant Row 0 address
.clk(active_clk),
.en(r1_addr_en),
.din(iccm_rw_addr[pt.ICCM_BITS-1:2]),
.dout(redundant_address[1][pt.ICCM_BITS-1:2]));
rvdffs #(1) r0_valid (.*,
.clk(active_clk), // Redundant Row 0 Valid
.en(r0_addr_en),
.din(1'b1),
.dout(redundant_valid[0]));
rvdffs #(1) r1_valid (.*, // Redundant Row 1 Valid
.clk(active_clk),
.en(r1_addr_en),
.din(1'b1),
.dout(redundant_valid[1]));
// We will have to update the Redundant copies in addition to the memory on subsequent writes to this memory location.
// The data gets updated on : 1) correction cycle, 2) Future writes - this could be W writes from DMA ( match up till addr[2]) or DW writes ( match till address[3])
// The data to pick also depends on the current address[2], size and the addr[2] stored in the address field of the redundant flop. Correction cycle is always W write and the data is splat on both legs, so choosing lower Word
assign redundant_data0_en = ((iccm_rw_addr[pt.ICCM_BITS-1:3] == redundant_address[0][pt.ICCM_BITS-1:3]) & ((iccm_rw_addr[2] == redundant_address[0][2]) | (iccm_wr_size[1:0] == 2'b11)) & redundant_valid[0] & iccm_wren) |
(~redundant_lru & iccm_buf_correct_ecc);
assign redundant_data0_in[38:0] = (((iccm_rw_addr[2] == redundant_address[0][2]) & iccm_rw_addr[2]) | (redundant_address[0][2] & (iccm_wr_size[1:0] == 2'b11))) ? iccm_wr_data[77:39] : iccm_wr_data[38:0];
rvdffs #(39) r0_data (.*, // Redundant Row 1 data
.clk(active_clk),
.en(redundant_data0_en),
.din(redundant_data0_in[38:0]),
.dout(redundant_data[0][38:0]));
assign redundant_data1_en = ((iccm_rw_addr[pt.ICCM_BITS-1:3] == redundant_address[1][pt.ICCM_BITS-1:3]) & ((iccm_rw_addr[2] == redundant_address[1][2]) | (iccm_wr_size[1:0] == 2'b11)) & redundant_valid[1] & iccm_wren) |
(redundant_lru & iccm_buf_correct_ecc);
assign redundant_data1_in[38:0] = (((iccm_rw_addr[2] == redundant_address[1][2]) & iccm_rw_addr[2]) | (redundant_address[1][2] & (iccm_wr_size[1:0] == 2'b11))) ? iccm_wr_data[77:39] : iccm_wr_data[38:0];
rvdffs #(39) r1_data (.*, // Redundant Row 1 data
.clk(active_clk),
.en(redundant_data1_en),
.din(redundant_data1_in[38:0]),
.dout(redundant_data[1][38:0]));
rvdffs #(pt.ICCM_BANK_HI) rd_addr_lo_ff (.*, .clk(active_clk), .din(iccm_rw_addr [pt.ICCM_BANK_HI:1]), .dout(iccm_rd_addr_lo_q[pt.ICCM_BANK_HI:1]), .en(1'b1)); // bit 0 of address is always 0
rvdffs #(pt.ICCM_BANK_BITS) rd_addr_hi_ff (.*, .clk(active_clk), .din(addr_bank_inc[pt.ICCM_BANK_HI:2]), .dout(iccm_rd_addr_hi_q[pt.ICCM_BANK_HI:2]), .en(1'b1));
assign iccm_rd_data_pre[63:0] = {iccm_bank_dout_fn[iccm_rd_addr_hi_q][31:0], iccm_bank_dout_fn[iccm_rd_addr_lo_q[pt.ICCM_BANK_HI:2]][31:0]};
assign iccm_data[63:0] = 64'({16'b0, (iccm_rd_data_pre[63:0] >> (16*iccm_rd_addr_lo_q[1]))});
assign iccm_rd_data[63:0] = {iccm_data[63:0]};
assign iccm_rd_data_ecc[77:0] = {iccm_bank_dout_fn[iccm_rd_addr_hi_q][38:0], iccm_bank_dout_fn[iccm_rd_addr_lo_q[pt.ICCM_BANK_HI:2]][38:0]};
endmodule // el2_ifu_iccm_mem

View File

@ -0,0 +1,246 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// el2_ifu_ifc_ctl.sv
// Function: Fetch pipe control
//
// Comments:
//********************************************************************************
module el2_ifu_ifc_ctl
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in.
input logic rst_l, // reset enable, from core pin
input logic scan_mode, // scan
input logic ic_hit_f, // Icache hit
input logic ifu_ic_mb_empty, // Miss buffer empty
input logic ifu_fb_consume1, // Aligner consumed 1 fetch buffer
input logic ifu_fb_consume2, // Aligner consumed 2 fetch buffers
input logic dec_tlu_flush_noredir_wb, // Don't fetch on flush
input logic exu_flush_final, // FLush
input logic [31:1] exu_flush_path_final, // Flush path
input logic ifu_bp_hit_taken_f, // btb hit, select the target path
input logic [31:1] ifu_bp_btb_target_f, // predicted target PC
input logic ic_dma_active, // IC DMA active, stop fetching
input logic ic_write_stall, // IC is writing, stop fetching
input logic dma_iccm_stall_any, // force a stall in the fetch pipe for DMA ICCM access
input logic [31:0] dec_tlu_mrac_ff , // side_effect and cacheable for each region
output logic [31:1] ifc_fetch_addr_f, // fetch addr F
output logic [31:1] ifc_fetch_addr_bf, // fetch addr BF
output logic ifc_fetch_req_f, // fetch request valid F
output logic ifu_pmu_fetch_stall, // pmu event measuring fetch stall
output logic ifc_fetch_uncacheable_bf, // The fetch request is uncacheable space. BF stage
output logic ifc_fetch_req_bf, // Fetch request. Comes with the address. BF stage
output logic ifc_fetch_req_bf_raw, // Fetch request without some qualifications. Used for clock-gating. BF stage
output logic ifc_iccm_access_bf, // This request is to the ICCM. Do not generate misses to the bus.
output logic ifc_region_acc_fault_bf, // Access fault. in ICCM region but offset is outside defined ICCM.
output logic ifc_dma_access_ok // fetch is not accessing the ICCM, DMA can proceed
);
logic [31:1] fetch_addr_bf;
logic [31:1] fetch_addr_next;
logic [3:0] fb_write_f, fb_write_ns;
logic fb_full_f_ns, fb_full_f;
logic fb_right, fb_right2, fb_left, wfm, idle;
logic sel_last_addr_bf, sel_next_addr_bf;
logic miss_f, miss_a;
logic flush_fb, dma_iccm_stall_any_f;
logic mb_empty_mod, goto_idle, leave_idle;
logic fetch_bf_en;
logic line_wrap;
logic fetch_addr_next_1;
// FSM assignment
typedef enum logic [1:0] { IDLE = 2'b00 ,
FETCH = 2'b01 ,
STALL = 2'b10 ,
WFM = 2'b11 } state_t ;
state_t state ;
state_t next_state ;
logic dma_stall;
assign dma_stall = ic_dma_active | dma_iccm_stall_any_f;
// Fetch address mux
// - flush
// - Miss *or* flush during WFM (icache miss buffer is blocking)
// - Sequential
if(pt.BTB_ENABLE==1) begin
logic sel_btb_addr_bf;
assign sel_last_addr_bf = ~exu_flush_final & (~ifc_fetch_req_f | ~ic_hit_f);
assign sel_btb_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ifu_bp_hit_taken_f & ic_hit_f;
assign sel_next_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f;
assign fetch_addr_bf[31:1] = ( ({31{exu_flush_final}} & exu_flush_path_final[31:1]) | // FLUSH path
({31{sel_last_addr_bf}} & ifc_fetch_addr_f[31:1]) | // MISS path
({31{sel_btb_addr_bf}} & {ifu_bp_btb_target_f[31:1]})| // BTB target
({31{sel_next_addr_bf}} & {fetch_addr_next[31:1]})); // SEQ path
end // if (pt.BTB_ENABLE=1)
else begin
assign sel_last_addr_bf = ~exu_flush_final & (~ifc_fetch_req_f | ~ic_hit_f);
assign sel_next_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ic_hit_f;
assign fetch_addr_bf[31:1] = ( ({31{exu_flush_final}} & exu_flush_path_final[31:1]) | // FLUSH path
({31{sel_last_addr_bf}} & ifc_fetch_addr_f[31:1]) | // MISS path
({31{sel_next_addr_bf}} & {fetch_addr_next[31:1]})); // SEQ path
end
assign fetch_addr_next[31:1] = {({ifc_fetch_addr_f[31:2]} + 31'b1), fetch_addr_next_1 };
assign line_wrap = (fetch_addr_next[pt.ICACHE_TAG_INDEX_LO] ^ ifc_fetch_addr_f[pt.ICACHE_TAG_INDEX_LO]);
assign fetch_addr_next_1 = line_wrap ? 1'b0 : ifc_fetch_addr_f[1];
assign ifc_fetch_req_bf_raw = ~idle;
assign ifc_fetch_req_bf = ifc_fetch_req_bf_raw &
~(fb_full_f_ns & ~(ifu_fb_consume2 | ifu_fb_consume1)) &
~dma_stall &
~ic_write_stall &
~dec_tlu_flush_noredir_wb;
assign fetch_bf_en = exu_flush_final | ifc_fetch_req_f;
assign miss_f = ifc_fetch_req_f & ~ic_hit_f & ~exu_flush_final;
assign mb_empty_mod = (ifu_ic_mb_empty | exu_flush_final) & ~dma_stall & ~miss_f & ~miss_a;
// Halt flushes and takes us to IDLE
assign goto_idle = exu_flush_final & dec_tlu_flush_noredir_wb;
// If we're in IDLE, and we get a flush, goto FETCH
assign leave_idle = exu_flush_final & ~dec_tlu_flush_noredir_wb & idle;
//.i 7
//.o 2
//.ilb state[1] state[0] reset_delayed miss_f mb_empty_mod goto_idle leave_idle
//.ob next_state[1] next_state[0]
//.type fr
//
//# fetch 01, stall 10, wfm 11, idle 00
//-- 1---- 01
//-- 0--1- 00
//00 0--00 00
//00 0--01 01
//
//01 01-0- 11
//01 00-0- 01
//
//11 0-10- 01
//11 0-00- 11
assign next_state[1] = (~state[1] & state[0] & miss_f & ~goto_idle) |
(state[1] & ~mb_empty_mod & ~goto_idle);
assign next_state[0] = (~goto_idle & leave_idle) | (state[0] & ~goto_idle);
assign flush_fb = exu_flush_final;
// model fb write logic to mass balance the fetch buffers
assign fb_right = ( ifu_fb_consume1 & ~ifu_fb_consume2 & (~ifc_fetch_req_f | miss_f)) | // Consumed and no new fetch
(ifu_fb_consume2 & ifc_fetch_req_f); // Consumed 2 and new fetch
assign fb_right2 = (ifu_fb_consume2 & (~ifc_fetch_req_f | miss_f)); // Consumed 2 and no new fetch
assign fb_left = ifc_fetch_req_f & ~(ifu_fb_consume1 | ifu_fb_consume2) & ~miss_f;
// CBH
assign fb_write_ns[3:0] = ( ({4{(flush_fb)}} & 4'b0001) |
({4{~flush_fb & fb_right }} & {1'b0, fb_write_f[3:1]}) |
({4{~flush_fb & fb_right2}} & {2'b0, fb_write_f[3:2]}) |
({4{~flush_fb & fb_left }} & {fb_write_f[2:0], 1'b0}) |
({4{~flush_fb & ~fb_right & ~fb_right2 & ~fb_left}} & fb_write_f[3:0]));
assign fb_full_f_ns = fb_write_ns[3];
assign idle = state == IDLE ;
assign wfm = state == WFM ;
rvdffie #(10) fbwrite_ff (.*, .clk(free_l2clk),
.din( {dma_iccm_stall_any, miss_f, ifc_fetch_req_bf, next_state[1:0], fb_full_f_ns, fb_write_ns[3:0]}),
.dout({dma_iccm_stall_any_f, miss_a, ifc_fetch_req_f, state[1:0], fb_full_f, fb_write_f[3:0]}));
assign ifu_pmu_fetch_stall = wfm |
(ifc_fetch_req_bf_raw &
( (fb_full_f & ~(ifu_fb_consume2 | ifu_fb_consume1 | exu_flush_final)) |
dma_stall));
assign ifc_fetch_addr_bf[31:1] = fetch_addr_bf[31:1];
rvdffpcie #(31) faddrf1_ff (.*, .en(fetch_bf_en), .din(fetch_addr_bf[31:1]), .dout(ifc_fetch_addr_f[31:1]));
if (pt.ICCM_ENABLE) begin
logic iccm_acc_in_region_bf;
logic iccm_acc_in_range_bf;
rvrangecheck #( .CCM_SADR (pt.ICCM_SADR),
.CCM_SIZE (pt.ICCM_SIZE) ) iccm_rangecheck (
.addr ({ifc_fetch_addr_bf[31:1],1'b0}) ,
.in_range (iccm_acc_in_range_bf) ,
.in_region(iccm_acc_in_region_bf)
);
assign ifc_iccm_access_bf = iccm_acc_in_range_bf ;
assign ifc_dma_access_ok = ( (~ifc_iccm_access_bf |
(fb_full_f & ~(ifu_fb_consume2 | ifu_fb_consume1)) |
(wfm & ~ifc_fetch_req_bf) |
idle ) & ~exu_flush_final) |
dma_iccm_stall_any_f;
assign ifc_region_acc_fault_bf = ~iccm_acc_in_range_bf & iccm_acc_in_region_bf ;
end
else begin
assign ifc_iccm_access_bf = 1'b0 ;
assign ifc_dma_access_ok = 1'b0 ;
assign ifc_region_acc_fault_bf = 1'b0 ;
end
assign ifc_fetch_uncacheable_bf = ~dec_tlu_mrac_ff[{ifc_fetch_addr_bf[31:28] , 1'b0 }] ; // bit 0 of each region description is the cacheable bit
endmodule // el2_ifu_ifc_ctl

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,89 @@
//********************************************************************************
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
module el2_ifu_tb_memread;
logic [15:0] compressed [0:128000]; // vector of compressed instructions
logic [31:0] expected [0:128000]; // vector of correspoding expected instruction
logic rst_l;
logic clk;
int clk_count;
logic [31:0] expected_val;
logic [15:0] compressed_din;
logic [31:0] actual;
logic error;
integer i;
initial begin
clk=0;
rst_l=0;
// initialize the reads and populate the instruction arrays
$readmemh ("left64k", compressed );
$readmemh ("right64k", expected );
$dumpfile ("top.vcd");
$dumpvars;
$dumpon;
end
always #50 clk =~clk;
always @(posedge clk) begin
clk_count = clk_count +1;
if (clk_count>=1 & clk_count<=3) rst_l <= 1'b0;
else rst_l <= 1'b1;
if (clk_count > 3) begin
compressed_din[15:0] <= compressed[clk_count-3]; // c.mv
expected_val[31:0] <= expected[clk_count-3];
end
if (clk_count == 65000) begin
$dumpoff;
$finish;
end
end // always @ (posedge clk)
always @(negedge clk) begin
if (clk_count > 3 & error) begin
$display("clock: %d compressed %h error actual %h expected %h",clk_count,compressed_din,actual,expected_val);
end
end
el2_ifu_compress_ctl align (.*,.din(compressed_din[15:0]),.dout(actual[31:0]));
assign error = actual[31:0] != expected_val[31:0];
endmodule // el2_ifu_tb_memread

View File

@ -0,0 +1,407 @@
// performance monitor stuff
//`ifndef EL2_DEF_SV
//`define EL2_DEF_SV
package el2_pkg;
typedef struct packed {
logic trace_rv_i_valid_ip;
logic [31:0] trace_rv_i_insn_ip;
logic [31:0] trace_rv_i_address_ip;
logic trace_rv_i_exception_ip;
logic [4:0] trace_rv_i_ecause_ip;
logic trace_rv_i_interrupt_ip;
logic [31:0] trace_rv_i_tval_ip;
} el2_trace_pkt_t;
typedef enum logic [3:0] {
NULL = 4'b0000,
MUL = 4'b0001,
LOAD = 4'b0010,
STORE = 4'b0011,
ALU = 4'b0100,
CSRREAD = 4'b0101,
CSRWRITE = 4'b0110,
CSRRW = 4'b0111,
EBREAK = 4'b1000,
ECALL = 4'b1001,
FENCE = 4'b1010,
FENCEI = 4'b1011,
MRET = 4'b1100,
CONDBR = 4'b1101,
JAL = 4'b1110,
BITMANIPU = 4'b1111
} el2_inst_pkt_t;
typedef struct packed {
logic valid;
logic wb;
logic [2:0] tag;
logic [4:0] rd;
} el2_load_cam_pkt_t;
typedef struct packed {
logic pc0_call;
logic pc0_ret;
logic pc0_pc4;
} el2_rets_pkt_t;
typedef struct packed {
logic valid;
logic [11:0] toffset;
logic [1:0] hist;
logic br_error;
logic br_start_error;
logic bank;
logic [31:1] prett; // predicted ret target
logic way;
logic ret;
} el2_br_pkt_t;
typedef struct packed {
logic valid;
logic [1:0] hist;
logic br_error;
logic br_start_error;
logic way;
logic middle;
} el2_br_tlu_pkt_t;
typedef struct packed {
logic misp;
logic ataken;
logic boffset;
logic pc4;
logic [1:0] hist;
logic [11:0] toffset;
logic valid;
logic br_error;
logic br_start_error;
logic pcall;
logic pja;
logic way;
logic pret;
// for power use the pret bit to clock the prett field
logic [31:1] prett;
} el2_predict_pkt_t;
typedef struct packed {
// unlikely to change
logic icaf;
logic icaf_second;
logic [1:0] icaf_type;
logic fence_i;
logic [3:0] i0trigger;
logic pmu_i0_br_unpred; // pmu
logic pmu_divide;
// likely to change
logic legal;
logic pmu_lsu_misaligned;
el2_inst_pkt_t pmu_i0_itype; // pmu - instruction type
} el2_trap_pkt_t;
typedef struct packed {
// unlikely to change
logic i0div;
logic csrwen;
logic csrwonly;
logic [11:0] csrwaddr;
// likely to change
logic [4:0] i0rd;
logic i0load;
logic i0store;
logic i0v;
logic i0valid;
} el2_dest_pkt_t;
typedef struct packed {
logic mul;
logic load;
logic alu;
} el2_class_pkt_t;
typedef struct packed {
logic [4:0] rs1;
logic [4:0] rs2;
logic [4:0] rd;
} el2_reg_pkt_t;
typedef struct packed {
logic clz;
logic ctz;
logic cpop;
logic sext_b;
logic sext_h;
logic min;
logic max;
logic pack;
logic packu;
logic packh;
logic rol;
logic ror;
logic grev;
logic gorc;
logic zbb;
logic bset;
logic bclr;
logic binv;
logic bext;
logic sh1add;
logic sh2add;
logic sh3add;
logic zba;
logic land;
logic lor;
logic lxor;
logic sll;
logic srl;
logic sra;
logic beq;
logic bne;
logic blt;
logic bge;
logic add;
logic sub;
logic slt;
logic unsign;
logic jal;
logic predict_t;
logic predict_nt;
logic csr_write;
logic csr_imm;
} el2_alu_pkt_t;
typedef struct packed {
logic fast_int;
/* verilator lint_off SYMRSVDWORD */
logic stack;
/* verilator lint_on SYMRSVDWORD */
logic by;
logic half;
logic word;
logic dword; // for dma
logic load;
logic store;
logic unsign;
logic dma; // dma pkt
logic store_data_bypass_d;
logic load_ldst_bypass_d;
logic store_data_bypass_m;
logic valid;
} el2_lsu_pkt_t;
typedef struct packed {
logic inst_type; //0: Load, 1: Store
//logic dma_valid;
logic exc_type; //0: MisAligned, 1: Access Fault
logic [3:0] mscause;
logic [31:0] addr;
logic single_ecc_error;
logic exc_valid;
} el2_lsu_error_pkt_t;
typedef struct packed {
logic clz;
logic ctz;
logic cpop;
logic sext_b;
logic sext_h;
logic min;
logic max;
logic pack;
logic packu;
logic packh;
logic rol;
logic ror;
logic grev;
logic gorc;
logic zbb;
logic bset;
logic bclr;
logic binv;
logic bext;
logic zbs;
logic bcompress;
logic bdecompress;
logic zbe;
logic clmul;
logic clmulh;
logic clmulr;
logic zbc;
logic shfl;
logic unshfl;
logic xperm_n;
logic xperm_b;
logic xperm_h;
logic zbp;
logic crc32_b;
logic crc32_h;
logic crc32_w;
logic crc32c_b;
logic crc32c_h;
logic crc32c_w;
logic zbr;
logic bfp;
logic zbf;
logic sh1add;
logic sh2add;
logic sh3add;
logic zba;
logic alu;
logic rs1;
logic rs2;
logic imm12;
logic rd;
logic shimm5;
logic imm20;
logic pc;
logic load;
logic store;
logic lsu;
logic add;
logic sub;
logic land;
logic lor;
logic lxor;
logic sll;
logic sra;
logic srl;
logic slt;
logic unsign;
logic condbr;
logic beq;
logic bne;
logic bge;
logic blt;
logic jal;
logic by;
logic half;
logic word;
logic csr_read;
logic csr_clr;
logic csr_set;
logic csr_write;
logic csr_imm;
logic presync;
logic postsync;
logic ebreak;
logic ecall;
logic mret;
logic mul;
logic rs1_sign;
logic rs2_sign;
logic low;
logic div;
logic rem;
logic fence;
logic fence_i;
logic pm_alu;
logic legal;
} el2_dec_pkt_t;
typedef struct packed {
logic valid;
logic rs1_sign;
logic rs2_sign;
logic low;
logic bcompress;
logic bdecompress;
logic clmul;
logic clmulh;
logic clmulr;
logic grev;
logic gorc;
logic shfl;
logic unshfl;
logic crc32_b;
logic crc32_h;
logic crc32_w;
logic crc32c_b;
logic crc32c_h;
logic crc32c_w;
logic bfp;
logic xperm_n;
logic xperm_b;
logic xperm_h;
} el2_mul_pkt_t;
typedef struct packed {
logic valid;
logic unsign;
logic rem;
} el2_div_pkt_t;
typedef struct packed {
logic TEST1;
logic RME;
logic [3:0] RM;
logic LS;
logic DS;
logic SD;
logic TEST_RNM;
logic BC1;
logic BC2;
} el2_ccm_ext_in_pkt_t;
typedef struct packed {
logic TEST1;
logic RME;
logic [3:0] RM;
logic LS;
logic DS;
logic SD;
logic TEST_RNM;
logic BC1;
logic BC2;
} el2_dccm_ext_in_pkt_t;
typedef struct packed {
logic TEST1;
logic RME;
logic [3:0] RM;
logic LS;
logic DS;
logic SD;
logic TEST_RNM;
logic BC1;
logic BC2;
} el2_ic_data_ext_in_pkt_t;
typedef struct packed {
logic TEST1;
logic RME;
logic [3:0] RM;
logic LS;
logic DS;
logic SD;
logic TEST_RNM;
logic BC1;
logic BC2;
} el2_ic_tag_ext_in_pkt_t;
typedef struct packed {
logic select;
logic match;
logic store;
logic load;
logic execute;
logic m;
logic [31:0] tdata2;
} el2_trigger_pkt_t;
typedef struct packed {
logic [70:0] icache_wrdata; // {dicad1[1:0], dicad0h[31:0], dicad0[31:0]}
logic [16:0] icache_dicawics; // Arraysel:24, Waysel:21:20, Index:16:3
logic icache_rd_valid;
logic icache_wr_valid;
} el2_cache_debug_pkt_t;
//`endif
endpackage // el2_pkg

View File

@ -0,0 +1,289 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
// Owner:
// Function: AHB to AXI4 Bridge
// Comments:
//
//********************************************************************************
module ahb_to_axi4
import el2_pkg::*;
#(
TAG = 1,
`include "el2_param.vh"
)
// ,TAG = 1)
(
input clk,
input rst_l,
input scan_mode,
input bus_clk_en,
input clk_override,
// AXI signals
// AXI Write Channels
output logic axi_awvalid,
input logic axi_awready,
output logic [TAG-1:0] axi_awid,
output logic [31:0] axi_awaddr,
output logic [2:0] axi_awsize,
output logic [2:0] axi_awprot,
output logic [7:0] axi_awlen,
output logic [1:0] axi_awburst,
output logic axi_wvalid,
input logic axi_wready,
output logic [63:0] axi_wdata,
output logic [7:0] axi_wstrb,
output logic axi_wlast,
input logic axi_bvalid,
output logic axi_bready,
input logic [1:0] axi_bresp,
input logic [TAG-1:0] axi_bid,
// AXI Read Channels
output logic axi_arvalid,
input logic axi_arready,
output logic [TAG-1:0] axi_arid,
output logic [31:0] axi_araddr,
output logic [2:0] axi_arsize,
output logic [2:0] axi_arprot,
output logic [7:0] axi_arlen,
output logic [1:0] axi_arburst,
input logic axi_rvalid,
output logic axi_rready,
input logic [TAG-1:0] axi_rid,
input logic [63:0] axi_rdata,
input logic [1:0] axi_rresp,
// AHB-Lite signals
input logic [31:0] ahb_haddr, // ahb bus address
input logic [2:0] ahb_hburst, // tied to 0
input logic ahb_hmastlock, // tied to 0
input logic [3:0] ahb_hprot, // tied to 4'b0011
input logic [2:0] ahb_hsize, // size of bus transaction (possible values 0,1,2,3)
input logic [1:0] ahb_htrans, // Transaction type (possible values 0,2 only right now)
input logic ahb_hwrite, // ahb bus write
input logic [63:0] ahb_hwdata, // ahb bus write data
input logic ahb_hsel, // this slave was selected
input logic ahb_hreadyin, // previous hready was accepted or not
output logic [63:0] ahb_hrdata, // ahb bus read data
output logic ahb_hreadyout, // slave ready to accept transaction
output logic ahb_hresp // slave response (high indicates erro)
);
logic [7:0] master_wstrb;
typedef enum logic [1:0] { IDLE = 2'b00, // Nothing in the buffer. No commands yet recieved
WR = 2'b01, // Write Command recieved
RD = 2'b10, // Read Command recieved
PEND = 2'b11 // Waiting on Read Data from core
} state_t;
state_t buf_state, buf_nxtstate;
logic buf_state_en;
// Buffer signals (one entry buffer)
logic buf_read_error_in, buf_read_error;
logic [63:0] buf_rdata;
logic ahb_hready;
logic ahb_hready_q;
logic [1:0] ahb_htrans_in, ahb_htrans_q;
logic [2:0] ahb_hsize_q;
logic ahb_hwrite_q;
logic [31:0] ahb_haddr_q;
logic [63:0] ahb_hwdata_q;
logic ahb_hresp_q;
//Miscellaneous signals
logic ahb_addr_in_dccm, ahb_addr_in_iccm, ahb_addr_in_pic;
logic ahb_addr_in_dccm_region_nc, ahb_addr_in_iccm_region_nc, ahb_addr_in_pic_region_nc;
// signals needed for the read data coming back from the core and to block any further commands as AHB is a blocking bus
logic buf_rdata_en;
logic ahb_addr_clk_en, buf_rdata_clk_en;
logic bus_clk, ahb_addr_clk, buf_rdata_clk;
// Command buffer is the holding station where we convert to AXI and send to core
logic cmdbuf_wr_en, cmdbuf_rst;
logic cmdbuf_full;
logic cmdbuf_vld, cmdbuf_write;
logic [1:0] cmdbuf_size;
logic [7:0] cmdbuf_wstrb;
logic [31:0] cmdbuf_addr;
logic [63:0] cmdbuf_wdata;
// FSM to control the bus states and when to block the hready and load the command buffer
always_comb begin
buf_nxtstate = IDLE;
buf_state_en = 1'b0;
buf_rdata_en = 1'b0; // signal to load the buffer when the core sends read data back
buf_read_error_in = 1'b0; // signal indicating that an error came back with the read from the core
cmdbuf_wr_en = 1'b0; // all clear from the gasket to load the buffer with the command for reads, command/dat for writes
case (buf_state)
IDLE: begin // No commands recieved
buf_nxtstate = ahb_hwrite ? WR : RD;
buf_state_en = ahb_hready & ahb_htrans[1] & ahb_hsel; // only transition on a valid hrtans
end
WR: begin // Write command recieved last cycle
buf_nxtstate = (ahb_hresp | (ahb_htrans[1:0] == 2'b0) | ~ahb_hsel) ? IDLE : ahb_hwrite ? WR : RD;
buf_state_en = (~cmdbuf_full | ahb_hresp) ;
cmdbuf_wr_en = ~cmdbuf_full & ~(ahb_hresp | ((ahb_htrans[1:0] == 2'b01) & ahb_hsel)); // Dont send command to the buffer in case of an error or when the master is not ready with the data now.
end
RD: begin // Read command recieved last cycle.
buf_nxtstate = ahb_hresp ? IDLE :PEND; // If error go to idle, else wait for read data
buf_state_en = (~cmdbuf_full | ahb_hresp); // only when command can go, or if its an error
cmdbuf_wr_en = ~ahb_hresp & ~cmdbuf_full; // send command only when no error
end
PEND: begin // Read Command has been sent. Waiting on Data.
buf_nxtstate = IDLE; // go back for next command and present data next cycle
buf_state_en = axi_rvalid & ~cmdbuf_write; // read data is back
buf_rdata_en = buf_state_en; // buffer the read data coming back from core
buf_read_error_in = buf_state_en & |axi_rresp[1:0]; // buffer error flag if return has Error ( ECC )
end
endcase
end // always_comb begin
rvdffs_fpga #($bits(state_t)) state_reg (.*, .din(buf_nxtstate), .dout({buf_state}), .en(buf_state_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk));
assign master_wstrb[7:0] = ({8{ahb_hsize_q[2:0] == 3'b0}} & (8'b1 << ahb_haddr_q[2:0])) |
({8{ahb_hsize_q[2:0] == 3'b1}} & (8'b11 << ahb_haddr_q[2:0])) |
({8{ahb_hsize_q[2:0] == 3'b10}} & (8'b1111 << ahb_haddr_q[2:0])) |
({8{ahb_hsize_q[2:0] == 3'b11}} & 8'b1111_1111);
// AHB signals
assign ahb_hreadyout = ahb_hresp ? (ahb_hresp_q & ~ahb_hready_q) :
((~cmdbuf_full | (buf_state == IDLE)) & ~(buf_state == RD | buf_state == PEND) & ~buf_read_error);
assign ahb_hready = ahb_hreadyout & ahb_hreadyin;
assign ahb_htrans_in[1:0] = {2{ahb_hsel}} & ahb_htrans[1:0];
assign ahb_hrdata[63:0] = buf_rdata[63:0];
assign ahb_hresp = ((ahb_htrans_q[1:0] != 2'b0) & (buf_state != IDLE) &
((~(ahb_addr_in_dccm | ahb_addr_in_iccm)) | // request not for ICCM or DCCM
((ahb_addr_in_iccm | (ahb_addr_in_dccm & ahb_hwrite_q)) & ~((ahb_hsize_q[1:0] == 2'b10) | (ahb_hsize_q[1:0] == 2'b11))) | // ICCM Rd/Wr OR DCCM Wr not the right size
((ahb_hsize_q[2:0] == 3'h1) & ahb_haddr_q[0]) | // HW size but unaligned
((ahb_hsize_q[2:0] == 3'h2) & (|ahb_haddr_q[1:0])) | // W size but unaligned
((ahb_hsize_q[2:0] == 3'h3) & (|ahb_haddr_q[2:0])))) | // DW size but unaligned
buf_read_error | // Read ECC error
(ahb_hresp_q & ~ahb_hready_q);
// Buffer signals - needed for the read data and ECC error response
rvdff_fpga #(.WIDTH(64)) buf_rdata_ff (.din(axi_rdata[63:0]), .dout(buf_rdata[63:0]), .clk(buf_rdata_clk), .clken(buf_rdata_clk_en), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) buf_read_error_ff(.din(buf_read_error_in), .dout(buf_read_error), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); // buf_read_error will be high only one cycle
// All the Master signals are captured before presenting it to the command buffer. We check for Hresp before sending it to the cmd buffer.
rvdff_fpga #(.WIDTH(1)) hresp_ff (.din(ahb_hresp), .dout(ahb_hresp_q), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) hready_ff (.din(ahb_hready), .dout(ahb_hready_q), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(2)) htrans_ff (.din(ahb_htrans_in[1:0]), .dout(ahb_htrans_q[1:0]), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(3)) hsize_ff (.din(ahb_hsize[2:0]), .dout(ahb_hsize_q[2:0]), .clk(ahb_addr_clk), .clken(ahb_addr_clk_en), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) hwrite_ff (.din(ahb_hwrite), .dout(ahb_hwrite_q), .clk(ahb_addr_clk), .clken(ahb_addr_clk_en), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(32)) haddr_ff (.din(ahb_haddr[31:0]), .dout(ahb_haddr_q[31:0]), .clk(ahb_addr_clk), .clken(ahb_addr_clk_en), .rawclk(clk), .*);
// Address check dccm
rvrangecheck #(.CCM_SADR(pt.DCCM_SADR),
.CCM_SIZE(pt.DCCM_SIZE)) addr_dccm_rangecheck (
.addr(ahb_haddr_q[31:0]),
.in_range(ahb_addr_in_dccm),
.in_region(ahb_addr_in_dccm_region_nc)
);
// Address check iccm
if (pt.ICCM_ENABLE == 1) begin: GenICCM
rvrangecheck #(.CCM_SADR(pt.ICCM_SADR),
.CCM_SIZE(pt.ICCM_SIZE)) addr_iccm_rangecheck (
.addr(ahb_haddr_q[31:0]),
.in_range(ahb_addr_in_iccm),
.in_region(ahb_addr_in_iccm_region_nc)
);
end else begin: GenNoICCM
assign ahb_addr_in_iccm = '0;
assign ahb_addr_in_iccm_region_nc = '0;
end
// PIC memory address check
rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR),
.CCM_SIZE(pt.PIC_SIZE)) addr_pic_rangecheck (
.addr(ahb_haddr_q[31:0]),
.in_range(ahb_addr_in_pic),
.in_region(ahb_addr_in_pic_region_nc)
);
// Command Buffer - Holding for the commands to be sent for the AXI. It will be converted to the AXI signals.
assign cmdbuf_rst = (((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready)) & ~cmdbuf_wr_en) | (ahb_hresp & ~cmdbuf_write);
assign cmdbuf_full = (cmdbuf_vld & ~((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready)));
rvdffsc_fpga #(.WIDTH(1)) cmdbuf_vldff (.din(1'b1), .dout(cmdbuf_vld), .en(cmdbuf_wr_en), .clear(cmdbuf_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(1)) cmdbuf_writeff (.din(ahb_hwrite_q), .dout(cmdbuf_write), .en(cmdbuf_wr_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(2)) cmdbuf_sizeff (.din(ahb_hsize_q[1:0]), .dout(cmdbuf_size[1:0]), .en(cmdbuf_wr_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(8)) cmdbuf_wstrbff (.din(master_wstrb[7:0]), .dout(cmdbuf_wstrb[7:0]), .en(cmdbuf_wr_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffe #(.WIDTH(32)) cmdbuf_addrff (.din(ahb_haddr_q[31:0]), .dout(cmdbuf_addr[31:0]), .en(cmdbuf_wr_en & bus_clk_en), .clk(clk), .*);
rvdffe #(.WIDTH(64)) cmdbuf_wdataff (.din(ahb_hwdata[63:0]), .dout(cmdbuf_wdata[63:0]), .en(cmdbuf_wr_en & bus_clk_en), .clk(clk), .*);
// AXI Write Command Channel
assign axi_awvalid = cmdbuf_vld & cmdbuf_write;
assign axi_awid[TAG-1:0] = '0;
assign axi_awaddr[31:0] = cmdbuf_addr[31:0];
assign axi_awsize[2:0] = {1'b0, cmdbuf_size[1:0]};
assign axi_awprot[2:0] = 3'b0;
assign axi_awlen[7:0] = '0;
assign axi_awburst[1:0] = 2'b01;
// AXI Write Data Channel - This is tied to the command channel as we only write the command buffer once we have the data.
assign axi_wvalid = cmdbuf_vld & cmdbuf_write;
assign axi_wdata[63:0] = cmdbuf_wdata[63:0];
assign axi_wstrb[7:0] = cmdbuf_wstrb[7:0];
assign axi_wlast = 1'b1;
// AXI Write Response - Always ready. AHB does not require a write response.
assign axi_bready = 1'b1;
// AXI Read Channels
assign axi_arvalid = cmdbuf_vld & ~cmdbuf_write;
assign axi_arid[TAG-1:0] = '0;
assign axi_araddr[31:0] = cmdbuf_addr[31:0];
assign axi_arsize[2:0] = {1'b0, cmdbuf_size[1:0]};
assign axi_arprot = 3'b0;
assign axi_arlen[7:0] = '0;
assign axi_arburst[1:0] = 2'b01;
// AXI Read Response Channel - Always ready as AHB reads are blocking and the the buffer is available for the read coming back always.
assign axi_rready = 1'b1;
// Clock header logic
assign ahb_addr_clk_en = bus_clk_en & (ahb_hready & ahb_htrans[1]);
assign buf_rdata_clk_en = bus_clk_en & buf_rdata_en;
`ifdef RV_FPGA_OPTIMIZE
assign bus_clk = 1'b0;
assign ahb_addr_clk = 1'b0;
assign buf_rdata_clk = 1'b0;
`else
rvclkhdr bus_cgc (.en(bus_clk_en), .l1clk(bus_clk), .*);
rvclkhdr ahb_addr_cgc (.en(ahb_addr_clk_en), .l1clk(ahb_addr_clk), .*);
rvclkhdr buf_rdata_cgc (.en(buf_rdata_clk_en), .l1clk(buf_rdata_clk), .*);
`endif
`ifdef RV_ASSERT_ON
property ahb_error_protocol;
@(posedge bus_clk) (ahb_hready & ahb_hresp) |-> (~$past(ahb_hready) & $past(ahb_hresp));
endproperty
assert_ahb_error_protocol: assert property (ahb_error_protocol) else
$display("Bus Error with hReady isn't preceded with Bus Error without hready");
`endif
endmodule // ahb_to_axi4

View File

@ -0,0 +1,477 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
// Owner:
// Function: AXI4 -> AHB Bridge
// Comments:
//
//********************************************************************************
module axi4_to_ahb
import el2_pkg::*;
#(
`include "el2_param.vh"
,parameter TAG = 1) (
input clk,
input free_clk,
input rst_l,
input scan_mode,
input bus_clk_en,
input clk_override,
input dec_tlu_force_halt,
// AXI signals
// AXI Write Channels
input logic axi_awvalid,
output logic axi_awready,
input logic [TAG-1:0] axi_awid,
input logic [31:0] axi_awaddr,
input logic [2:0] axi_awsize,
input logic [2:0] axi_awprot,
input logic axi_wvalid,
output logic axi_wready,
input logic [63:0] axi_wdata,
input logic [7:0] axi_wstrb,
input logic axi_wlast,
output logic axi_bvalid,
input logic axi_bready,
output logic [1:0] axi_bresp,
output logic [TAG-1:0] axi_bid,
// AXI Read Channels
input logic axi_arvalid,
output logic axi_arready,
input logic [TAG-1:0] axi_arid,
input logic [31:0] axi_araddr,
input logic [2:0] axi_arsize,
input logic [2:0] axi_arprot,
output logic axi_rvalid,
input logic axi_rready,
output logic [TAG-1:0] axi_rid,
output logic [63:0] axi_rdata,
output logic [1:0] axi_rresp,
output logic axi_rlast,
// AHB-Lite signals
output logic [31:0] ahb_haddr, // ahb bus address
output logic [2:0] ahb_hburst, // tied to 0
output logic ahb_hmastlock, // tied to 0
output logic [3:0] ahb_hprot, // tied to 4'b0011
output logic [2:0] ahb_hsize, // size of bus transaction (possible values 0,1,2,3)
output logic [1:0] ahb_htrans, // Transaction type (possible values 0,2 only right now)
output logic ahb_hwrite, // ahb bus write
output logic [63:0] ahb_hwdata, // ahb bus write data
input logic [63:0] ahb_hrdata, // ahb bus read data
input logic ahb_hready, // slave ready to accept transaction
input logic ahb_hresp // slave response (high indicates erro)
);
localparam ID = 1;
localparam PRTY = 1;
typedef enum logic [2:0] {IDLE=3'b000, CMD_RD=3'b001, CMD_WR=3'b010, DATA_RD=3'b011, DATA_WR=3'b100, DONE=3'b101, STREAM_RD=3'b110, STREAM_ERR_RD=3'b111} state_t;
state_t buf_state, buf_nxtstate;
logic slave_valid;
logic slave_ready;
logic [TAG-1:0] slave_tag;
logic [63:0] slave_rdata;
logic [3:0] slave_opc;
logic wrbuf_en, wrbuf_data_en;
logic wrbuf_cmd_sent, wrbuf_rst;
logic wrbuf_vld;
logic wrbuf_data_vld;
logic [TAG-1:0] wrbuf_tag;
logic [2:0] wrbuf_size;
logic [31:0] wrbuf_addr;
logic [63:0] wrbuf_data;
logic [7:0] wrbuf_byteen;
logic master_valid;
logic master_ready;
logic [TAG-1:0] master_tag;
logic [31:0] master_addr;
logic [63:0] master_wdata;
logic [2:0] master_size;
logic [2:0] master_opc;
logic [7:0] master_byteen;
// Buffer signals (one entry buffer)
logic [31:0] buf_addr;
logic [1:0] buf_size;
logic buf_write;
logic [7:0] buf_byteen;
logic buf_aligned;
logic [63:0] buf_data;
logic [TAG-1:0] buf_tag;
//Miscellaneous signals
logic buf_rst;
logic [TAG-1:0] buf_tag_in;
logic [31:0] buf_addr_in;
logic [7:0] buf_byteen_in;
logic [63:0] buf_data_in;
logic buf_write_in;
logic buf_aligned_in;
logic [2:0] buf_size_in;
logic buf_state_en;
logic buf_wr_en;
logic buf_data_wr_en;
logic slvbuf_error_en;
logic wr_cmd_vld;
logic cmd_done_rst, cmd_done, cmd_doneQ;
logic trxn_done;
logic [2:0] buf_cmd_byte_ptr, buf_cmd_byte_ptrQ, buf_cmd_nxtbyte_ptr;
logic buf_cmd_byte_ptr_en;
logic found;
logic slave_valid_pre;
logic ahb_hready_q;
logic ahb_hresp_q;
logic [1:0] ahb_htrans_q;
logic ahb_hwrite_q;
logic [63:0] ahb_hrdata_q;
logic slvbuf_write;
logic slvbuf_error;
logic [TAG-1:0] slvbuf_tag;
logic slvbuf_error_in;
logic slvbuf_wr_en;
logic bypass_en;
logic rd_bypass_idle;
logic last_addr_en;
logic [31:0] last_bus_addr;
// Clocks
logic buf_clken;
logic ahbm_data_clken;
logic buf_clk;
logic bus_clk;
logic ahbm_data_clk;
logic dec_tlu_force_halt_bus, dec_tlu_force_halt_bus_ns, dec_tlu_force_halt_bus_q;
// Function to get the length from byte enable
function automatic logic [1:0] get_write_size;
input logic [7:0] byteen;
logic [1:0] size;
size[1:0] = (2'b11 & {2{(byteen[7:0] == 8'hff)}}) |
(2'b10 & {2{((byteen[7:0] == 8'hf0) | (byteen[7:0] == 8'h0f))}}) |
(2'b01 & {2{((byteen[7:0] == 8'hc0) | (byteen[7:0] == 8'h30) | (byteen[7:0] == 8'h0c) | (byteen[7:0] == 8'h03))}});
return size[1:0];
endfunction // get_write_size
// Function to get the length from byte enable
function automatic logic [2:0] get_write_addr;
input logic [7:0] byteen;
logic [2:0] addr;
addr[2:0] = (3'h0 & {3{((byteen[7:0] == 8'hff) | (byteen[7:0] == 8'h0f) | (byteen[7:0] == 8'h03))}}) |
(3'h2 & {3{(byteen[7:0] == 8'h0c)}}) |
(3'h4 & {3{((byteen[7:0] == 8'hf0) | (byteen[7:0] == 8'h03))}}) |
(3'h6 & {3{(byteen[7:0] == 8'hc0)}});
return addr[2:0];
endfunction // get_write_addr
// Function to get the next byte pointer
function automatic logic [2:0] get_nxtbyte_ptr (logic [2:0] current_byte_ptr, logic [7:0] byteen, logic get_next);
logic [2:0] start_ptr;
logic found;
found = '0;
//get_nxtbyte_ptr[2:0] = current_byte_ptr[2:0];
start_ptr[2:0] = get_next ? (current_byte_ptr[2:0] + 3'b1) : current_byte_ptr[2:0];
for (int j=0; j<8; j++) begin
if (~found) begin
get_nxtbyte_ptr[2:0] = 3'(j);
found |= (byteen[j] & (3'(j) >= start_ptr[2:0])) ;
end
end
endfunction // get_nextbyte_ptr
// Create bus synchronized version of force halt
assign dec_tlu_force_halt_bus = dec_tlu_force_halt | dec_tlu_force_halt_bus_q;
assign dec_tlu_force_halt_bus_ns = ~bus_clk_en & dec_tlu_force_halt_bus;
rvdff #(.WIDTH(1)) force_halt_busff(.din(dec_tlu_force_halt_bus_ns), .dout(dec_tlu_force_halt_bus_q), .clk(free_clk), .*);
// Write buffer
assign wrbuf_en = axi_awvalid & axi_awready & master_ready;
assign wrbuf_data_en = axi_wvalid & axi_wready & master_ready;
assign wrbuf_cmd_sent = master_valid & master_ready & (master_opc[2:1] == 2'b01);
assign wrbuf_rst = (wrbuf_cmd_sent & ~wrbuf_en) | dec_tlu_force_halt_bus;
assign axi_awready = ~(wrbuf_vld & ~wrbuf_cmd_sent) & master_ready;
assign axi_wready = ~(wrbuf_data_vld & ~wrbuf_cmd_sent) & master_ready;
assign axi_arready = ~(wrbuf_vld & wrbuf_data_vld) & master_ready;
assign axi_rlast = 1'b1;
assign wr_cmd_vld = (wrbuf_vld & wrbuf_data_vld);
assign master_valid = wr_cmd_vld | axi_arvalid;
assign master_tag[TAG-1:0] = wr_cmd_vld ? wrbuf_tag[TAG-1:0] : axi_arid[TAG-1:0];
assign master_opc[2:0] = wr_cmd_vld ? 3'b011 : 3'b0;
assign master_addr[31:0] = wr_cmd_vld ? wrbuf_addr[31:0] : axi_araddr[31:0];
assign master_size[2:0] = wr_cmd_vld ? wrbuf_size[2:0] : axi_arsize[2:0];
assign master_byteen[7:0] = wrbuf_byteen[7:0];
assign master_wdata[63:0] = wrbuf_data[63:0];
// AXI response channel signals
assign axi_bvalid = slave_valid & slave_ready & slave_opc[3];
assign axi_bresp[1:0] = slave_opc[0] ? 2'b10 : (slave_opc[1] ? 2'b11 : 2'b0);
assign axi_bid[TAG-1:0] = slave_tag[TAG-1:0];
assign axi_rvalid = slave_valid & slave_ready & (slave_opc[3:2] == 2'b0);
assign axi_rresp[1:0] = slave_opc[0] ? 2'b10 : (slave_opc[1] ? 2'b11 : 2'b0);
assign axi_rid[TAG-1:0] = slave_tag[TAG-1:0];
assign axi_rdata[63:0] = slave_rdata[63:0];
assign slave_ready = axi_bready & axi_rready;
// FIFO state machine
always_comb begin
buf_nxtstate = IDLE;
buf_state_en = 1'b0;
buf_wr_en = 1'b0;
buf_data_wr_en = 1'b0;
slvbuf_error_in = 1'b0;
slvbuf_error_en = 1'b0;
buf_write_in = 1'b0;
cmd_done = 1'b0;
trxn_done = 1'b0;
buf_cmd_byte_ptr_en = 1'b0;
buf_cmd_byte_ptr[2:0] = '0;
slave_valid_pre = 1'b0;
master_ready = 1'b0;
ahb_htrans[1:0] = 2'b0;
slvbuf_wr_en = 1'b0;
bypass_en = 1'b0;
rd_bypass_idle = 1'b0;
case (buf_state)
IDLE: begin
master_ready = 1'b1;
buf_write_in = (master_opc[2:1] == 2'b01);
buf_nxtstate = buf_write_in ? CMD_WR : CMD_RD;
buf_state_en = master_valid & master_ready;
buf_wr_en = buf_state_en;
buf_data_wr_en = buf_state_en & (buf_nxtstate == CMD_WR);
buf_cmd_byte_ptr_en = buf_state_en;
buf_cmd_byte_ptr[2:0] = buf_write_in ? get_nxtbyte_ptr(3'b0,buf_byteen_in[7:0],1'b0) : master_addr[2:0];
bypass_en = buf_state_en;
rd_bypass_idle = bypass_en & (buf_nxtstate == CMD_RD);
ahb_htrans[1:0] = {2{bypass_en}} & 2'b10;
end
CMD_RD: begin
buf_nxtstate = (master_valid & (master_opc[2:0] == 3'b000))? STREAM_RD : DATA_RD;
buf_state_en = ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & ~ahb_hwrite_q;
cmd_done = buf_state_en & ~master_valid;
slvbuf_wr_en = buf_state_en;
master_ready = buf_state_en & (buf_nxtstate == STREAM_RD);
buf_wr_en = master_ready;
bypass_en = master_ready & master_valid;
buf_cmd_byte_ptr[2:0] = bypass_en ? master_addr[2:0] : buf_addr[2:0];
ahb_htrans[1:0] = 2'b10 & {2{~buf_state_en | bypass_en}};
end
STREAM_RD: begin
master_ready = (ahb_hready_q & ~ahb_hresp_q) & ~(master_valid & master_opc[2:1] == 2'b01);
buf_wr_en = (master_valid & master_ready & (master_opc[2:0] == 3'b000)); // update the fifo if we are streaming the read commands
buf_nxtstate = ahb_hresp_q ? STREAM_ERR_RD : (buf_wr_en ? STREAM_RD : DATA_RD); // assuming that the master accpets the slave response right away.
buf_state_en = (ahb_hready_q | ahb_hresp_q);
buf_data_wr_en = buf_state_en;
slvbuf_error_in = ahb_hresp_q;
slvbuf_error_en = buf_state_en;
slave_valid_pre = buf_state_en & ~ahb_hresp_q; // send a response right away if we are not going through an error response.
cmd_done = buf_state_en & ~master_valid; // last one of the stream should not send a htrans
bypass_en = master_ready & master_valid & (buf_nxtstate == STREAM_RD) & buf_state_en;
buf_cmd_byte_ptr[2:0] = bypass_en ? master_addr[2:0] : buf_addr[2:0];
ahb_htrans[1:0] = 2'b10 & {2{~((buf_nxtstate != STREAM_RD) & buf_state_en)}};
slvbuf_wr_en = buf_wr_en; // shifting the contents from the buf to slv_buf for streaming cases
end // case: STREAM_RD
STREAM_ERR_RD: begin
buf_nxtstate = DATA_RD;
buf_state_en = ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & ~ahb_hwrite_q;
slave_valid_pre = buf_state_en;
slvbuf_wr_en = buf_state_en; // Overwrite slvbuf with buffer
buf_cmd_byte_ptr[2:0] = buf_addr[2:0];
ahb_htrans[1:0] = 2'b10 & {2{~buf_state_en}};
end
DATA_RD: begin
buf_nxtstate = DONE;
buf_state_en = (ahb_hready_q | ahb_hresp_q);
buf_data_wr_en = buf_state_en;
slvbuf_error_in= ahb_hresp_q;
slvbuf_error_en= buf_state_en;
slvbuf_wr_en = buf_state_en;
end
CMD_WR: begin
buf_nxtstate = DATA_WR;
trxn_done = ahb_hready_q & ahb_hwrite_q & (ahb_htrans_q[1:0] != 2'b0);
buf_state_en = trxn_done;
buf_cmd_byte_ptr_en = buf_state_en;
slvbuf_wr_en = buf_state_en;
buf_cmd_byte_ptr = trxn_done ? get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1) : buf_cmd_byte_ptrQ;
cmd_done = trxn_done & (buf_aligned | (buf_cmd_byte_ptrQ == 3'b111) |
(buf_byteen[get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1)] == 1'b0));
ahb_htrans[1:0] = {2{~(cmd_done | cmd_doneQ)}} & 2'b10;
end
DATA_WR: begin
buf_state_en = (cmd_doneQ & ahb_hready_q) | ahb_hresp_q;
master_ready = buf_state_en & ~ahb_hresp_q & slave_ready; // Ready to accept new command if current command done and no error
buf_nxtstate = (ahb_hresp_q | ~slave_ready) ? DONE :
((master_valid & master_ready) ? ((master_opc[2:1] == 2'b01) ? CMD_WR : CMD_RD) : IDLE);
slvbuf_error_in = ahb_hresp_q;
slvbuf_error_en = buf_state_en;
buf_write_in = (master_opc[2:1] == 2'b01);
buf_wr_en = buf_state_en & ((buf_nxtstate == CMD_WR) | (buf_nxtstate == CMD_RD));
buf_data_wr_en = buf_wr_en;
cmd_done = (ahb_hresp_q | (ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) &
((buf_cmd_byte_ptrQ == 3'b111) | (buf_byteen[get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1)] == 1'b0))));
bypass_en = buf_state_en & buf_write_in & (buf_nxtstate == CMD_WR); // Only bypass for writes for the time being
ahb_htrans[1:0] = {2{(~(cmd_done | cmd_doneQ) | bypass_en)}} & 2'b10;
slave_valid_pre = buf_state_en & (buf_nxtstate != DONE);
trxn_done = ahb_hready_q & ahb_hwrite_q & (ahb_htrans_q[1:0] != 2'b0);
buf_cmd_byte_ptr_en = trxn_done | bypass_en;
buf_cmd_byte_ptr = bypass_en ? get_nxtbyte_ptr(3'b0,buf_byteen_in[7:0],1'b0) :
trxn_done ? get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1) : buf_cmd_byte_ptrQ;
end
DONE: begin
buf_nxtstate = IDLE;
buf_state_en = slave_ready;
slvbuf_error_en = 1'b1;
slave_valid_pre = 1'b1;
end
endcase
end
assign buf_rst = dec_tlu_force_halt_bus;
assign cmd_done_rst = slave_valid_pre;
assign buf_addr_in[31:3] = master_addr[31:3];
assign buf_addr_in[2:0] = (buf_aligned_in & (master_opc[2:1] == 2'b01)) ? get_write_addr(master_byteen[7:0]) : master_addr[2:0];
assign buf_tag_in[TAG-1:0] = master_tag[TAG-1:0];
assign buf_byteen_in[7:0] = wrbuf_byteen[7:0];
assign buf_data_in[63:0] = (buf_state == DATA_RD) ? ahb_hrdata_q[63:0] : master_wdata[63:0];
assign buf_size_in[1:0] = (buf_aligned_in & (master_size[1:0] == 2'b11) & (master_opc[2:1] == 2'b01)) ? get_write_size(master_byteen[7:0]) : master_size[1:0];
assign buf_aligned_in = (master_opc[2:0] == 3'b0) | // reads are always aligned since they are either DW or sideeffects
(master_size[1:0] == 2'b0) | (master_size[1:0] == 2'b01) | (master_size[1:0] == 2'b10) | // Always aligned for Byte/HW/Word since they can be only for non-idempotent. IFU/SB are always aligned
((master_size[1:0] == 2'b11) &
((master_byteen[7:0] == 8'h3) | (master_byteen[7:0] == 8'hc) | (master_byteen[7:0] == 8'h30) | (master_byteen[7:0] == 8'hc0) |
(master_byteen[7:0] == 8'hf) | (master_byteen[7:0] == 8'hf0) | (master_byteen[7:0] == 8'hff)));
// Generate the ahb signals
assign ahb_haddr[31:3] = bypass_en ? master_addr[31:3] : buf_addr[31:3];
assign ahb_haddr[2:0] = {3{(ahb_htrans == 2'b10)}} & buf_cmd_byte_ptr[2:0]; // Trxn should be aligned during IDLE
assign ahb_hsize[2:0] = bypass_en ? {1'b0, ({2{buf_aligned_in}} & buf_size_in[1:0])} :
{1'b0, ({2{buf_aligned}} & buf_size[1:0])}; // Send the full size for aligned trxn
assign ahb_hburst[2:0] = 3'b0;
assign ahb_hmastlock = 1'b0;
assign ahb_hprot[3:0] = {3'b001,~axi_arprot[2]};
assign ahb_hwrite = bypass_en ? (master_opc[2:1] == 2'b01) : buf_write;
assign ahb_hwdata[63:0] = buf_data[63:0];
assign slave_valid = slave_valid_pre;// & (~slvbuf_posted_write | slvbuf_error);
assign slave_opc[3:2] = slvbuf_write ? 2'b11 : 2'b00;
assign slave_opc[1:0] = {2{slvbuf_error}} & 2'b10;
assign slave_rdata[63:0] = slvbuf_error ? {2{last_bus_addr[31:0]}} : ((buf_state == DONE) ? buf_data[63:0] : ahb_hrdata_q[63:0]);
assign slave_tag[TAG-1:0] = slvbuf_tag[TAG-1:0];
assign last_addr_en = (ahb_htrans[1:0] != 2'b0) & ahb_hready & ahb_hwrite ;
rvdffsc_fpga #(.WIDTH(1)) wrbuf_vldff (.din(1'b1), .dout(wrbuf_vld), .en(wrbuf_en), .clear(wrbuf_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffsc_fpga #(.WIDTH(1)) wrbuf_data_vldff(.din(1'b1), .dout(wrbuf_data_vld), .en(wrbuf_data_en), .clear(wrbuf_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(TAG)) wrbuf_tagff (.din(axi_awid[TAG-1:0]), .dout(wrbuf_tag[TAG-1:0]), .en(wrbuf_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(3)) wrbuf_sizeff (.din(axi_awsize[2:0]), .dout(wrbuf_size[2:0]), .en(wrbuf_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffe #(.WIDTH(32)) wrbuf_addrff (.din(axi_awaddr[31:0]), .dout(wrbuf_addr[31:0]), .en(wrbuf_en & bus_clk_en), .clk(clk), .*);
rvdffe #(.WIDTH(64)) wrbuf_dataff (.din(axi_wdata[63:0]), .dout(wrbuf_data[63:0]), .en(wrbuf_data_en & bus_clk_en), .clk(clk), .*);
rvdffs_fpga #(.WIDTH(8)) wrbuf_byteenff (.din(axi_wstrb[7:0]), .dout(wrbuf_byteen[7:0]), .en(wrbuf_data_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(32)) last_bus_addrff (.din(ahb_haddr[31:0]), .dout(last_bus_addr[31:0]), .en(last_addr_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffsc_fpga #(.WIDTH($bits(state_t))) buf_state_ff (.din(buf_nxtstate), .dout({buf_state}), .en(buf_state_en), .clear(buf_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(1)) buf_writeff (.din(buf_write_in), .dout(buf_write), .en(buf_wr_en), .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(TAG)) buf_tagff (.din(buf_tag_in[TAG-1:0]), .dout(buf_tag[TAG-1:0]), .en(buf_wr_en), .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*);
rvdffe #(.WIDTH(32)) buf_addrff (.din(buf_addr_in[31:0]), .dout(buf_addr[31:0]), .en(buf_wr_en & bus_clk_en), .clk(clk), .*);
rvdffs_fpga #(.WIDTH(2)) buf_sizeff (.din(buf_size_in[1:0]), .dout(buf_size[1:0]), .en(buf_wr_en), .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(1)) buf_alignedff (.din(buf_aligned_in), .dout(buf_aligned), .en(buf_wr_en), .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(8)) buf_byteenff (.din(buf_byteen_in[7:0]), .dout(buf_byteen[7:0]), .en(buf_wr_en), .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*);
rvdffe #(.WIDTH(64)) buf_dataff (.din(buf_data_in[63:0]), .dout(buf_data[63:0]), .en(buf_data_wr_en & bus_clk_en), .clk(clk), .*);
rvdffs_fpga #(.WIDTH(1)) slvbuf_writeff (.din(buf_write), .dout(slvbuf_write), .en(slvbuf_wr_en), .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(TAG)) slvbuf_tagff (.din(buf_tag[TAG-1:0]), .dout(slvbuf_tag[TAG-1:0]), .en(slvbuf_wr_en), .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(1)) slvbuf_errorff (.din(slvbuf_error_in), .dout(slvbuf_error), .en(slvbuf_error_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffsc_fpga #(.WIDTH(1)) buf_cmd_doneff (.din(1'b1), .dout(cmd_doneQ), .en(cmd_done), .clear(cmd_done_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(3)) buf_cmd_byte_ptrff (.din(buf_cmd_byte_ptr[2:0]), .dout(buf_cmd_byte_ptrQ[2:0]), .en(buf_cmd_byte_ptr_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) hready_ff (.din(ahb_hready), .dout(ahb_hready_q), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(2)) htrans_ff (.din(ahb_htrans[1:0]), .dout(ahb_htrans_q[1:0]), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) hwrite_ff (.din(ahb_hwrite), .dout(ahb_hwrite_q), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) hresp_ff (.din(ahb_hresp), .dout(ahb_hresp_q), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(64)) hrdata_ff (.din(ahb_hrdata[63:0]), .dout(ahb_hrdata_q[63:0]), .clk(ahbm_data_clk), .clken(ahbm_data_clken), .rawclk(clk), .*);
// Clock headers
// clock enables for ahbm addr/data
assign buf_clken = bus_clk_en & (buf_wr_en | slvbuf_wr_en | clk_override);
assign ahbm_data_clken = bus_clk_en & ((buf_state != IDLE) | clk_override);
`ifdef RV_FPGA_OPTIMIZE
assign bus_clk = 1'b0;
assign buf_clk = 1'b0;
assign ahbm_data_clk = 1'b0;
`else
rvclkhdr bus_cgc (.en(bus_clk_en), .l1clk(bus_clk), .*);
rvclkhdr buf_cgc (.en(buf_clken), .l1clk(buf_clk), .*);
rvclkhdr ahbm_data_cgc (.en(ahbm_data_clken), .l1clk(ahbm_data_clk), .*);
`endif
`ifdef RV_ASSERT_ON
property ahb_trxn_aligned;
@(posedge bus_clk) ahb_htrans[1] |-> ((ahb_hsize[2:0] == 3'h0) |
((ahb_hsize[2:0] == 3'h1) & (ahb_haddr[0] == 1'b0)) |
((ahb_hsize[2:0] == 3'h2) & (ahb_haddr[1:0] == 2'b0)) |
((ahb_hsize[2:0] == 3'h3) & (ahb_haddr[2:0] == 3'b0)));
endproperty
assert_ahb_trxn_aligned: assert property (ahb_trxn_aligned) else
$display("Assertion ahb_trxn_aligned failed: ahb_htrans=2'h%h, ahb_hsize=3'h%h, ahb_haddr=32'h%h",ahb_htrans[1:0], ahb_hsize[2:0], ahb_haddr[31:0]);
property ahb_error_protocol;
@(posedge bus_clk) (ahb_hready & ahb_hresp) |-> (~$past(ahb_hready) & $past(ahb_hresp));
endproperty
assert_ahb_error_protocol: assert property (ahb_error_protocol) else
$display("Bus Error with hReady isn't preceded with Bus Error without hready");
`endif
endmodule // axi4_to_ahb

814
Flow/design/lib/beh_lib.sv Normal file
View File

@ -0,0 +1,814 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// all flops call the rvdff flop
module rvdff #( parameter WIDTH=1, SHORT=0 )
(
input logic [WIDTH-1:0] din,
input logic clk,
input logic rst_l,
output logic [WIDTH-1:0] dout
);
if (SHORT == 1) begin
assign dout = din;
end
else begin
`ifdef RV_CLOCKGATE
always @(posedge tb_top.clk) begin
#0 $strobe("CG: %0t %m din %x dout %x clk %b width %d",$time,din,dout,clk,WIDTH);
end
`endif
always_ff @(posedge clk or negedge rst_l) begin
if (rst_l == 0)
dout[WIDTH-1:0] <= 0;
else
dout[WIDTH-1:0] <= din[WIDTH-1:0];
end
end
endmodule
// rvdff with 2:1 input mux to flop din iff sel==1
module rvdffs #( parameter WIDTH=1, SHORT=0 )
(
input logic [WIDTH-1:0] din,
input logic en,
input logic clk,
input logic rst_l,
output logic [WIDTH-1:0] dout
);
if (SHORT == 1) begin : genblock
assign dout = din;
end
else begin : genblock
rvdff #(WIDTH) dffs (.din((en) ? din[WIDTH-1:0] : dout[WIDTH-1:0]), .*);
end
endmodule
// rvdff with en and clear
module rvdffsc #( parameter WIDTH=1, SHORT=0 )
(
input logic [WIDTH-1:0] din,
input logic en,
input logic clear,
input logic clk,
input logic rst_l,
output logic [WIDTH-1:0] dout
);
logic [WIDTH-1:0] din_new;
if (SHORT == 1) begin
assign dout = din;
end
else begin
assign din_new = {WIDTH{~clear}} & (en ? din[WIDTH-1:0] : dout[WIDTH-1:0]);
rvdff #(WIDTH) dffsc (.din(din_new[WIDTH-1:0]), .*);
end
endmodule
// _fpga versions
module rvdff_fpga #( parameter WIDTH=1, SHORT=0 )
(
input logic [WIDTH-1:0] din,
input logic clk,
input logic clken,
input logic rawclk,
input logic rst_l,
output logic [WIDTH-1:0] dout
);
if (SHORT == 1) begin
assign dout = din;
end
else begin
`ifdef RV_FPGA_OPTIMIZE
rvdffs #(WIDTH) dffs (.clk(rawclk), .en(clken), .*);
`else
rvdff #(WIDTH) dff (.*);
`endif
end
endmodule
// rvdff with 2:1 input mux to flop din iff sel==1
module rvdffs_fpga #( parameter WIDTH=1, SHORT=0 )
(
input logic [WIDTH-1:0] din,
input logic en,
input logic clk,
input logic clken,
input logic rawclk,
input logic rst_l,
output logic [WIDTH-1:0] dout
);
if (SHORT == 1) begin : genblock
assign dout = din;
end
else begin : genblock
`ifdef RV_FPGA_OPTIMIZE
rvdffs #(WIDTH) dffs (.clk(rawclk), .en(clken & en), .*);
`else
rvdffs #(WIDTH) dffs (.*);
`endif
end
endmodule
// rvdff with en and clear
module rvdffsc_fpga #( parameter WIDTH=1, SHORT=0 )
(
input logic [WIDTH-1:0] din,
input logic en,
input logic clear,
input logic clk,
input logic clken,
input logic rawclk,
input logic rst_l,
output logic [WIDTH-1:0] dout
);
logic [WIDTH-1:0] din_new;
if (SHORT == 1) begin
assign dout = din;
end
else begin
`ifdef RV_FPGA_OPTIMIZE
rvdffs #(WIDTH) dffs (.clk(rawclk), .din(din[WIDTH-1:0] & {WIDTH{~clear}}),.en((en | clear) & clken), .*);
`else
rvdffsc #(WIDTH) dffsc (.*);
`endif
end
endmodule
module rvdffe #( parameter WIDTH=1, SHORT=0, OVERRIDE=0 )
(
input logic [WIDTH-1:0] din,
input logic en,
input logic clk,
input logic rst_l,
input logic scan_mode,
output logic [WIDTH-1:0] dout
);
logic l1clk;
if (SHORT == 1) begin : genblock
if (1) begin : genblock
assign dout = din;
end
end
else begin : genblock
`ifndef RV_PHYSICAL
if (WIDTH >= 8 || OVERRIDE==1) begin: genblock
`endif
`ifdef RV_FPGA_OPTIMIZE
rvdffs #(WIDTH) dff ( .* );
`else
rvclkhdr clkhdr ( .* );
rvdff #(WIDTH) dff (.*, .clk(l1clk));
`endif
`ifndef RV_PHYSICAL
end
else
$error("%m: rvdffe must be WIDTH >= 8");
`endif
end // else: !if(SHORT == 1)
endmodule // rvdffe
module rvdffpcie #( parameter WIDTH=31 )
(
input logic [WIDTH-1:0] din,
input logic clk,
input logic rst_l,
input logic en,
input logic scan_mode,
output logic [WIDTH-1:0] dout
);
`ifndef RV_PHYSICAL
if (WIDTH == 31) begin: genblock
`endif
`ifdef RV_FPGA_OPTIMIZE
rvdffs #(WIDTH) dff ( .* );
`else
rvdfflie #(.WIDTH(WIDTH), .LEFT(19)) dff (.*);
`endif
`ifndef RV_PHYSICAL
end
else
$error("%m: rvdffpcie width must be 31");
`endif
endmodule
// format: { LEFT, EXTRA }
// LEFT # of bits will be done with rvdffie, all else EXTRA with rvdffe
module rvdfflie #( parameter WIDTH=16, LEFT=8 )
(
input logic [WIDTH-1:0] din,
input logic clk,
input logic rst_l,
input logic en,
input logic scan_mode,
output logic [WIDTH-1:0] dout
);
localparam EXTRA = WIDTH-LEFT;
localparam LMSB = WIDTH-1;
localparam LLSB = LMSB-LEFT+1;
localparam XMSB = LLSB-1;
localparam XLSB = LLSB-EXTRA;
`ifndef RV_PHYSICAL
if (WIDTH >= 16 && LEFT >= 8 && EXTRA >= 8) begin: genblock
`endif
`ifdef RV_FPGA_OPTIMIZE
rvdffs #(WIDTH) dff ( .* );
`else
rvdffiee #(LEFT) dff_left (.*, .din(din[LMSB:LLSB]), .dout(dout[LMSB:LLSB]));
rvdffe #(EXTRA) dff_extra (.*, .din(din[XMSB:XLSB]), .dout(dout[XMSB:XLSB]));
`endif
`ifndef RV_PHYSICAL
end
else
$error("%m: rvdfflie musb be WIDTH >= 16 && LEFT >= 8 && EXTRA >= 8");
`endif
endmodule
// special power flop for predict packet
// format: { LEFT, RIGHT==31 }
// LEFT # of bits will be done with rvdffe; RIGHT is enabled by LEFT[LSB] & en
module rvdffppe #( parameter WIDTH=32 )
(
input logic [WIDTH-1:0] din,
input logic clk,
input logic rst_l,
input logic en,
input logic scan_mode,
output logic [WIDTH-1:0] dout
);
localparam RIGHT = 31;
localparam LEFT = WIDTH - RIGHT;
localparam LMSB = WIDTH-1;
localparam LLSB = LMSB-LEFT+1;
localparam RMSB = LLSB-1;
localparam RLSB = LLSB-RIGHT;
`ifndef RV_PHYSICAL
if (WIDTH>=32 && LEFT>=8 && RIGHT>=8) begin: genblock
`endif
`ifdef RV_FPGA_OPTIMIZE
rvdffs #(WIDTH) dff ( .* );
`else
rvdffe #(LEFT) dff_left (.*, .din(din[LMSB:LLSB]), .dout(dout[LMSB:LLSB]));
rvdffe #(RIGHT) dff_right (.*, .din(din[RMSB:RLSB]), .dout(dout[RMSB:RLSB]), .en(en & din[LLSB])); // qualify with pret
`endif
`ifndef RV_PHYSICAL
end
else
$error("%m: must be WIDTH>=32 && LEFT>=8 && RIGHT>=8");
`endif
endmodule
module rvdffie #( parameter WIDTH=1, OVERRIDE=0 )
(
input logic [WIDTH-1:0] din,
input logic clk,
input logic rst_l,
input logic scan_mode,
output logic [WIDTH-1:0] dout
);
logic l1clk;
logic en;
`ifndef RV_PHYSICAL
if (WIDTH >= 8 || OVERRIDE==1) begin: genblock
`endif
assign en = |(din ^ dout);
`ifdef RV_FPGA_OPTIMIZE
rvdffs #(WIDTH) dff ( .* );
`else
rvclkhdr clkhdr ( .* );
rvdff #(WIDTH) dff (.*, .clk(l1clk));
`endif
`ifndef RV_PHYSICAL
end
else
$error("%m: rvdffie must be WIDTH >= 8");
`endif
endmodule
// ie flop but it has an .en input
module rvdffiee #( parameter WIDTH=1, OVERRIDE=0 )
(
input logic [WIDTH-1:0] din,
input logic clk,
input logic rst_l,
input logic scan_mode,
input logic en,
output logic [WIDTH-1:0] dout
);
logic l1clk;
logic final_en;
`ifndef RV_PHYSICAL
if (WIDTH >= 8 || OVERRIDE==1) begin: genblock
`endif
assign final_en = (|(din ^ dout)) & en;
`ifdef RV_FPGA_OPTIMIZE
rvdffs #(WIDTH) dff ( .*, .en(final_en) );
`else
rvdffe #(WIDTH) dff (.*, .en(final_en));
`endif
`ifndef RV_PHYSICAL
end
else
$error("%m: rvdffie width must be >= 8");
`endif
endmodule
module rvsyncss #(parameter WIDTH = 251)
(
input logic clk,
input logic rst_l,
input logic [WIDTH-1:0] din,
output logic [WIDTH-1:0] dout
);
logic [WIDTH-1:0] din_ff1;
rvdff #(WIDTH) sync_ff1 (.*, .din (din[WIDTH-1:0]), .dout(din_ff1[WIDTH-1:0]));
rvdff #(WIDTH) sync_ff2 (.*, .din (din_ff1[WIDTH-1:0]), .dout(dout[WIDTH-1:0]));
endmodule // rvsyncss
module rvsyncss_fpga #(parameter WIDTH = 251)
(
input logic gw_clk,
input logic rawclk,
input logic clken,
input logic rst_l,
input logic [WIDTH-1:0] din,
output logic [WIDTH-1:0] dout
);
logic [WIDTH-1:0] din_ff1;
rvdff_fpga #(WIDTH) sync_ff1 (.*, .clk(gw_clk), .rawclk(rawclk), .clken(clken), .din (din[WIDTH-1:0]), .dout(din_ff1[WIDTH-1:0]));
rvdff_fpga #(WIDTH) sync_ff2 (.*, .clk(gw_clk), .rawclk(rawclk), .clken(clken), .din (din_ff1[WIDTH-1:0]), .dout(dout[WIDTH-1:0]));
endmodule // rvsyncss
module rvlsadder
(
input logic [31:0] rs1,
input logic [11:0] offset,
output logic [31:0] dout
);
logic cout;
logic sign;
logic [31:12] rs1_inc;
logic [31:12] rs1_dec;
assign {cout,dout[11:0]} = {1'b0,rs1[11:0]} + {1'b0,offset[11:0]};
assign rs1_inc[31:12] = rs1[31:12] + 1;
assign rs1_dec[31:12] = rs1[31:12] - 1;
assign sign = offset[11];
assign dout[31:12] = ({20{ sign ^~ cout}} & rs1[31:12]) |
({20{ ~sign & cout}} & rs1_inc[31:12]) |
({20{ sign & ~cout}} & rs1_dec[31:12]);
endmodule // rvlsadder
// assume we only maintain pc[31:1] in the pipe
module rvbradder
(
input [31:1] pc,
input [12:1] offset,
output [31:1] dout
);
logic cout;
logic sign;
logic [31:13] pc_inc;
logic [31:13] pc_dec;
assign {cout,dout[12:1]} = {1'b0,pc[12:1]} + {1'b0,offset[12:1]};
assign pc_inc[31:13] = pc[31:13] + 1;
assign pc_dec[31:13] = pc[31:13] - 1;
assign sign = offset[12];
assign dout[31:13] = ({19{ sign ^~ cout}} & pc[31:13]) |
({19{ ~sign & cout}} & pc_inc[31:13]) |
({19{ sign & ~cout}} & pc_dec[31:13]);
endmodule // rvbradder
// 2s complement circuit
module rvtwoscomp #( parameter WIDTH=32 )
(
input logic [WIDTH-1:0] din,
output logic [WIDTH-1:0] dout
);
logic [WIDTH-1:1] dout_temp; // holding for all other bits except for the lsb. LSB is always din
genvar i;
for ( i = 1; i < WIDTH; i++ ) begin : flip_after_first_one
assign dout_temp[i] = (|din[i-1:0]) ? ~din[i] : din[i];
end : flip_after_first_one
assign dout[WIDTH-1:0] = { dout_temp[WIDTH-1:1], din[0] };
endmodule // 2'scomp
// find first
module rvfindfirst1 #( parameter WIDTH=32, SHIFT=$clog2(WIDTH) )
(
input logic [WIDTH-1:0] din,
output logic [SHIFT-1:0] dout
);
logic done;
always_comb begin
dout[SHIFT-1:0] = {SHIFT{1'b0}};
done = 1'b0;
for ( int i = WIDTH-1; i > 0; i-- ) begin : find_first_one
done |= din[i];
dout[SHIFT-1:0] += done ? 1'b0 : 1'b1;
end : find_first_one
end
endmodule // rvfindfirst1
module rvfindfirst1hot #( parameter WIDTH=32 )
(
input logic [WIDTH-1:0] din,
output logic [WIDTH-1:0] dout
);
logic done;
always_comb begin
dout[WIDTH-1:0] = {WIDTH{1'b0}};
done = 1'b0;
for ( int i = 0; i < WIDTH; i++ ) begin : find_first_one
dout[i] = ~done & din[i];
done |= din[i];
end : find_first_one
end
endmodule // rvfindfirst1hot
// mask and match function matches bits after finding the first 0 position
// find first starting from LSB. Skip that location and match the rest of the bits
module rvmaskandmatch #( parameter WIDTH=32 )
(
input logic [WIDTH-1:0] mask, // this will have the mask in the lower bit positions
input logic [WIDTH-1:0] data, // this is what needs to be matched on the upper bits with the mask's upper bits
input logic masken, // when 1 : do mask. 0 : full match
output logic match
);
logic [WIDTH-1:0] matchvec;
logic masken_or_fullmask;
assign masken_or_fullmask = masken & ~(&mask[WIDTH-1:0]);
assign matchvec[0] = masken_or_fullmask | (mask[0] == data[0]);
genvar i;
for ( i = 1; i < WIDTH; i++ ) begin : match_after_first_zero
assign matchvec[i] = (&mask[i-1:0] & masken_or_fullmask) ? 1'b1 : (mask[i] == data[i]);
end : match_after_first_zero
assign match = &matchvec[WIDTH-1:0]; // all bits either matched or were masked off
endmodule // rvmaskandmatch
// Check if the S_ADDR <= addr < E_ADDR
module rvrangecheck #(CCM_SADR = 32'h0,
CCM_SIZE = 128) (
input logic [31:0] addr, // Address to be checked for range
output logic in_range, // S_ADDR <= start_addr < E_ADDR
output logic in_region
);
localparam REGION_BITS = 4;
localparam MASK_BITS = 10 + $clog2(CCM_SIZE);
logic [31:0] start_addr;
logic [3:0] region;
assign start_addr[31:0] = CCM_SADR;
assign region[REGION_BITS-1:0] = start_addr[31:(32-REGION_BITS)];
assign in_region = (addr[31:(32-REGION_BITS)] == region[REGION_BITS-1:0]);
if (CCM_SIZE == 48)
assign in_range = (addr[31:MASK_BITS] == start_addr[31:MASK_BITS]) & ~(&addr[MASK_BITS-1 : MASK_BITS-2]);
else
assign in_range = (addr[31:MASK_BITS] == start_addr[31:MASK_BITS]);
endmodule // rvrangechecker
// 16 bit even parity generator
module rveven_paritygen #(WIDTH = 16) (
input logic [WIDTH-1:0] data_in, // Data
output logic parity_out // generated even parity
);
assign parity_out = ^(data_in[WIDTH-1:0]) ;
endmodule // rveven_paritygen
module rveven_paritycheck #(WIDTH = 16) (
input logic [WIDTH-1:0] data_in, // Data
input logic parity_in,
output logic parity_err // Parity error
);
assign parity_err = ^(data_in[WIDTH-1:0]) ^ parity_in ;
endmodule // rveven_paritycheck
module rvecc_encode (
input [31:0] din,
output [6:0] ecc_out
);
logic [5:0] ecc_out_temp;
assign ecc_out_temp[0] = din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30];
assign ecc_out_temp[1] = din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31];
assign ecc_out_temp[2] = din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31];
assign ecc_out_temp[3] = din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25];
assign ecc_out_temp[4] = din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25];
assign ecc_out_temp[5] = din[26]^din[27]^din[28]^din[29]^din[30]^din[31];
assign ecc_out[6:0] = {(^din[31:0])^(^ecc_out_temp[5:0]),ecc_out_temp[5:0]};
endmodule // rvecc_encode
module rvecc_decode (
input en,
input [31:0] din,
input [6:0] ecc_in,
input sed_ded, // only do detection and no correction. Used for the I$
output [31:0] dout,
output [6:0] ecc_out,
output single_ecc_error,
output double_ecc_error
);
logic [6:0] ecc_check;
logic [38:0] error_mask;
logic [38:0] din_plus_parity, dout_plus_parity;
// Generate the ecc bits
assign ecc_check[0] = ecc_in[0]^din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30];
assign ecc_check[1] = ecc_in[1]^din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31];
assign ecc_check[2] = ecc_in[2]^din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31];
assign ecc_check[3] = ecc_in[3]^din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25];
assign ecc_check[4] = ecc_in[4]^din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25];
assign ecc_check[5] = ecc_in[5]^din[26]^din[27]^din[28]^din[29]^din[30]^din[31];
// This is the parity bit
assign ecc_check[6] = ((^din[31:0])^(^ecc_in[6:0])) & ~sed_ded;
assign single_ecc_error = en & (ecc_check[6:0] != 0) & ecc_check[6]; // this will never be on for sed_ded
assign double_ecc_error = en & (ecc_check[6:0] != 0) & ~ecc_check[6]; // all errors in the sed_ded case will be recorded as DE
// Generate the mask for error correctiong
for (genvar i=1; i<40; i++) begin
assign error_mask[i-1] = (ecc_check[5:0] == i);
end
// Generate the corrected data
assign din_plus_parity[38:0] = {ecc_in[6], din[31:26], ecc_in[5], din[25:11], ecc_in[4], din[10:4], ecc_in[3], din[3:1], ecc_in[2], din[0], ecc_in[1:0]};
assign dout_plus_parity[38:0] = single_ecc_error ? (error_mask[38:0] ^ din_plus_parity[38:0]) : din_plus_parity[38:0];
assign dout[31:0] = {dout_plus_parity[37:32], dout_plus_parity[30:16], dout_plus_parity[14:8], dout_plus_parity[6:4], dout_plus_parity[2]};
assign ecc_out[6:0] = {(dout_plus_parity[38] ^ (ecc_check[6:0] == 7'b1000000)), dout_plus_parity[31], dout_plus_parity[15], dout_plus_parity[7], dout_plus_parity[3], dout_plus_parity[1:0]};
endmodule // rvecc_decode
module rvecc_encode_64 (
input [63:0] din,
output [6:0] ecc_out
);
assign ecc_out[0] = din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]^din[32]^din[34]^din[36]^din[38]^din[40]^din[42]^din[44]^din[46]^din[48]^din[50]^din[52]^din[54]^din[56]^din[57]^din[59]^din[61]^din[63];
assign ecc_out[1] = din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]^din[32]^din[35]^din[36]^din[39]^din[40]^din[43]^din[44]^din[47]^din[48]^din[51]^din[52]^din[55]^din[56]^din[58]^din[59]^din[62]^din[63];
assign ecc_out[2] = din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]^din[32]^din[37]^din[38]^din[39]^din[40]^din[45]^din[46]^din[47]^din[48]^din[53]^din[54]^din[55]^din[56]^din[60]^din[61]^din[62]^din[63];
assign ecc_out[3] = din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56];
assign ecc_out[4] = din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56];
assign ecc_out[5] = din[26]^din[27]^din[28]^din[29]^din[30]^din[31]^din[32]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56];
assign ecc_out[6] = din[57]^din[58]^din[59]^din[60]^din[61]^din[62]^din[63];
endmodule // rvecc_encode_64
module rvecc_decode_64 (
input en,
input [63:0] din,
input [6:0] ecc_in,
output ecc_error
);
logic [6:0] ecc_check;
// Generate the ecc bits
assign ecc_check[0] = ecc_in[0]^din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]^din[32]^din[34]^din[36]^din[38]^din[40]^din[42]^din[44]^din[46]^din[48]^din[50]^din[52]^din[54]^din[56]^din[57]^din[59]^din[61]^din[63];
assign ecc_check[1] = ecc_in[1]^din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]^din[32]^din[35]^din[36]^din[39]^din[40]^din[43]^din[44]^din[47]^din[48]^din[51]^din[52]^din[55]^din[56]^din[58]^din[59]^din[62]^din[63];
assign ecc_check[2] = ecc_in[2]^din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]^din[32]^din[37]^din[38]^din[39]^din[40]^din[45]^din[46]^din[47]^din[48]^din[53]^din[54]^din[55]^din[56]^din[60]^din[61]^din[62]^din[63];
assign ecc_check[3] = ecc_in[3]^din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56];
assign ecc_check[4] = ecc_in[4]^din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56];
assign ecc_check[5] = ecc_in[5]^din[26]^din[27]^din[28]^din[29]^din[30]^din[31]^din[32]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56];
assign ecc_check[6] = ecc_in[6]^din[57]^din[58]^din[59]^din[60]^din[61]^din[62]^din[63];
assign ecc_error = en & (ecc_check[6:0] != 0); // all errors in the sed_ded case will be recorded as DE
endmodule // rvecc_decode_64
module `TEC_RV_ICG
(
input logic SE, EN, CK,
output Q
);
logic en_ff;
logic enable;
assign enable = EN | SE;
`ifdef VERILATOR
always @(negedge CK) begin
en_ff <= enable;
end
`else
always @(CK, enable) begin
if(!CK)
en_ff = enable;
end
`endif
assign Q = CK & en_ff;
endmodule
`ifndef RV_FPGA_OPTIMIZE
module rvclkhdr
(
input logic en,
input logic clk,
input logic scan_mode,
output logic l1clk
);
logic SE;
assign SE = 0;
`TEC_RV_ICG clkhdr ( .*, .EN(en), .CK(clk), .Q(l1clk));
endmodule // rvclkhdr
`endif
module rvoclkhdr
(
input logic en,
input logic clk,
input logic scan_mode,
output logic l1clk
);
logic SE;
assign SE = 0;
`ifdef RV_FPGA_OPTIMIZE
assign l1clk = clk;
`else
`TEC_RV_ICG clkhdr ( .*, .EN(en), .CK(clk), .Q(l1clk));
`endif
endmodule

View File

@ -0,0 +1,64 @@
module el2_btb_tag_hash #(
`include "el2_param.vh"
) (
input logic [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1] pc,
output logic [pt.BTB_BTAG_SIZE-1:0] hash
);
assign hash = {(pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+1] ^
pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+1] ^
pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1])};
endmodule
module el2_btb_tag_hash_fold #(
`include "el2_param.vh"
)(
input logic [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1] pc,
output logic [pt.BTB_BTAG_SIZE-1:0] hash
);
assign hash = {(
pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+1] ^
pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1])};
endmodule
module el2_btb_addr_hash #(
`include "el2_param.vh"
)(
input logic [pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO] pc,
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] hash
);
if(pt.BTB_FOLD2_INDEX_HASH) begin : fold2
assign hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = pc[pt.BTB_INDEX1_HI:pt.BTB_INDEX1_LO] ^
pc[pt.BTB_INDEX3_HI:pt.BTB_INDEX3_LO];
end
else begin
assign hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = pc[pt.BTB_INDEX1_HI:pt.BTB_INDEX1_LO] ^
pc[pt.BTB_INDEX2_HI:pt.BTB_INDEX2_LO] ^
pc[pt.BTB_INDEX3_HI:pt.BTB_INDEX3_LO];
end
endmodule
module el2_btb_ghr_hash #(
`include "el2_param.vh"
)(
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] hashin,
input logic [pt.BHT_GHR_SIZE-1:0] ghr,
output logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] hash
);
// The hash function is too complex to write in verilog for all cases.
// The config script generates the logic string based on the bp config.
if(pt.BHT_GHR_HASH_1) begin : ghrhash_cfg1
assign hash[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = { ghr[pt.BHT_GHR_SIZE-1:pt.BTB_INDEX1_HI-1], hashin[pt.BTB_INDEX1_HI:2]^ghr[pt.BTB_INDEX1_HI-2:0]};
end
else begin : ghrhash_cfg2
assign hash[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = { hashin[pt.BHT_GHR_SIZE+1:2]^ghr[pt.BHT_GHR_SIZE-1:0]};
end
endmodule

242
Flow/design/lib/mem_lib.sv Normal file
View File

@ -0,0 +1,242 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or it's affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`define EL2_LOCAL_RAM_TEST_IO \
input logic WE, \
input logic ME, \
input logic CLK, \
input logic TEST1, \
input logic RME, \
input logic [3:0] RM, \
input logic LS, \
input logic DS, \
input logic SD, \
input logic TEST_RNM, \
input logic BC1, \
input logic BC2, \
output logic ROP
`define EL2_RAM(depth, width) \
module ram_``depth``x``width( \
input logic [$clog2(depth)-1:0] ADR, \
input logic [(width-1):0] D, \
output logic [(width-1):0] Q, \
`EL2_LOCAL_RAM_TEST_IO \
); \
reg [(width-1):0] ram_core [(depth-1):0]; \
`ifdef GTLSIM \
integer i; \
initial begin \
for (i=0; i<depth; i=i+1) \
ram_core[i] = '0; \
end \
`endif \
always @(posedge CLK) begin \
`ifdef GTLSIM \
if (ME && WE) ram_core[ADR] <= D; \
`else \
if (ME && WE) begin ram_core[ADR] <= D; Q <= 'x; end \
`endif \
if (ME && ~WE) Q <= ram_core[ADR]; \
end \
assign ROP = ME; \
\
endmodule
`define EL2_RAM_BE(depth, width) \
module ram_be_``depth``x``width( \
input logic [$clog2(depth)-1:0] ADR, \
input logic [(width-1):0] D, WEM, \
output logic [(width-1):0] Q, \
`EL2_LOCAL_RAM_TEST_IO \
); \
reg [(width-1):0] ram_core [(depth-1):0]; \
`ifdef GTLSIM \
integer i; \
initial begin \
for (i=0; i<depth; i=i+1) \
ram_core[i] = '0; \
end \
`endif \
always @(posedge CLK) begin \
`ifdef GTLSIM \
if (ME && WE) ram_core[ADR] <= D & WEM | ~WEM & ram_core[ADR]; \
`else \
if (ME && WE) begin ram_core[ADR] <= D & WEM | ~WEM & ram_core[ADR]; Q <= 'x; end \
`endif \
if (ME && ~WE) Q <= ram_core[ADR]; \
end \
assign ROP = ME; \
\
endmodule
// parameterizable RAM for verilator sims
module el2_ram #(depth=4096, width=39) (
input logic [$clog2(depth)-1:0] ADR,
input logic [(width-1):0] D,
output logic [(width-1):0] Q,
`EL2_LOCAL_RAM_TEST_IO
);
reg [(width-1):0] ram_core [(depth-1):0];
always @(posedge CLK) begin
`ifdef GTLSIM
if (ME && WE) ram_core[ADR] <= D;
`else
if (ME && WE) begin ram_core[ADR] <= D; Q <= 'x; end
`endif
if (ME && ~WE) Q <= ram_core[ADR];
end
endmodule
//=========================================================================================================================
//=================================== START OF CCM =======================================================================
//============= Possible sram sizes for a 39 bit wide memory ( 4 bytes + 7 bits ECC ) =====================================
//-------------------------------------------------------------------------------------------------------------------------
`EL2_RAM(32768, 39)
`EL2_RAM(16384, 39)
`EL2_RAM(8192, 39)
`EL2_RAM(4096, 39)
`EL2_RAM(3072, 39)
`EL2_RAM(2048, 39)
`EL2_RAM(1536, 39) // need this for the 48KB DCCM option)
`EL2_RAM(1024, 39)
`EL2_RAM(768, 39)
`EL2_RAM(512, 39)
`EL2_RAM(256, 39)
`EL2_RAM(128, 39)
`EL2_RAM(1024, 20)
`EL2_RAM(512, 20)
`EL2_RAM(256, 20)
`EL2_RAM(128, 20)
`EL2_RAM(64, 20)
`EL2_RAM(4096, 34)
`EL2_RAM(2048, 34)
`EL2_RAM(1024, 34)
`EL2_RAM(512, 34)
`EL2_RAM(256, 34)
`EL2_RAM(128, 34)
`EL2_RAM(64, 34)
`EL2_RAM(8192, 68)
`EL2_RAM(4096, 68)
`EL2_RAM(2048, 68)
`EL2_RAM(1024, 68)
`EL2_RAM(512, 68)
`EL2_RAM(256, 68)
`EL2_RAM(128, 68)
`EL2_RAM(64, 68)
`EL2_RAM(8192, 71)
`EL2_RAM(4096, 71)
`EL2_RAM(2048, 71)
`EL2_RAM(1024, 71)
`EL2_RAM(512, 71)
`EL2_RAM(256, 71)
`EL2_RAM(128, 71)
`EL2_RAM(64, 71)
`EL2_RAM(4096, 42)
`EL2_RAM(2048, 42)
`EL2_RAM(1024, 42)
`EL2_RAM(512, 42)
`EL2_RAM(256, 42)
`EL2_RAM(128, 42)
`EL2_RAM(64, 42)
`EL2_RAM(4096, 22)
`EL2_RAM(2048, 22)
`EL2_RAM(1024, 22)
`EL2_RAM(512, 22)
`EL2_RAM(256, 22)
`EL2_RAM(128, 22)
`EL2_RAM(64, 22)
`EL2_RAM(1024, 26)
`EL2_RAM(4096, 26)
`EL2_RAM(2048, 26)
`EL2_RAM(512, 26)
`EL2_RAM(256, 26)
`EL2_RAM(128, 26)
`EL2_RAM(64, 26)
`EL2_RAM(32, 26)
`EL2_RAM(32, 22)
`EL2_RAM_BE(8192, 142)
`EL2_RAM_BE(4096, 142)
`EL2_RAM_BE(2048, 142)
`EL2_RAM_BE(1024, 142)
`EL2_RAM_BE(512, 142)
`EL2_RAM_BE(256, 142)
`EL2_RAM_BE(128, 142)
`EL2_RAM_BE(64, 142)
`EL2_RAM_BE(8192, 284)
`EL2_RAM_BE(4096, 284)
`EL2_RAM_BE(2048, 284)
`EL2_RAM_BE(1024, 284)
`EL2_RAM_BE(512, 284)
`EL2_RAM_BE(256, 284)
`EL2_RAM_BE(128, 284)
`EL2_RAM_BE(64, 284)
`EL2_RAM_BE(8192, 136)
`EL2_RAM_BE(4096, 136)
`EL2_RAM_BE(2048, 136)
`EL2_RAM_BE(1024, 136)
`EL2_RAM_BE(512, 136)
`EL2_RAM_BE(256, 136)
`EL2_RAM_BE(128, 136)
`EL2_RAM_BE(64, 136)
`EL2_RAM_BE(8192, 272)
`EL2_RAM_BE(4096, 272)
`EL2_RAM_BE(2048, 272)
`EL2_RAM_BE(1024, 272)
`EL2_RAM_BE(512, 272)
`EL2_RAM_BE(256, 272)
`EL2_RAM_BE(128, 272)
`EL2_RAM_BE(64, 272)
`EL2_RAM_BE(4096, 52)
`EL2_RAM_BE(2048, 52)
`EL2_RAM_BE(1024, 52)
`EL2_RAM_BE(512, 52)
`EL2_RAM_BE(256, 52)
`EL2_RAM_BE(128, 52)
`EL2_RAM_BE(64, 52)
`EL2_RAM_BE(32, 52)
`EL2_RAM_BE(4096, 104)
`EL2_RAM_BE(2048, 104)
`EL2_RAM_BE(1024, 104)
`EL2_RAM_BE(512, 104)
`EL2_RAM_BE(256, 104)
`EL2_RAM_BE(128, 104)
`EL2_RAM_BE(64, 104)
`EL2_RAM_BE(32, 104)
`EL2_RAM_BE(4096, 44)
`EL2_RAM_BE(2048, 44)
`EL2_RAM_BE(1024, 44)
`EL2_RAM_BE(512, 44)
`EL2_RAM_BE(256, 44)
`EL2_RAM_BE(128, 44)
`EL2_RAM_BE(64, 44)
`EL2_RAM_BE(32, 44)
`EL2_RAM_BE(4096, 88)
`EL2_RAM_BE(2048, 88)
`EL2_RAM_BE(1024, 88)
`EL2_RAM_BE(512, 88)
`EL2_RAM_BE(256, 88)
`EL2_RAM_BE(128, 88)
`EL2_RAM_BE(64, 88)
`EL2_RAM_BE(32, 88)
`EL2_RAM(64, 39)
`undef EL2_RAM
`undef EL2_RAM_BE
`undef EL2_LOCAL_RAM_TEST_IO

424
Flow/design/lsu/el2_lsu.sv Normal file
View File

@ -0,0 +1,424 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Function: Top level file for load store unit
// Comments:
//
//
// DC1 -> DC2 -> DC3 -> DC4 (Commit)
//
//********************************************************************************
module el2_lsu
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic clk_override, // Override non-functional clock gating
input logic dec_tlu_flush_lower_r, // I0/I1 writeback flush. This is used to flush the old packets only
input logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state
input logic dec_tlu_force_halt, // This will be high till TLU goes to debug halt
// chicken signals
input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals
input logic dec_tlu_wb_coalescing_disable, // disable the write buffer coalesce
input logic dec_tlu_sideeffect_posted_disable, // disable the posted sideeffect load store to the bus
input logic dec_tlu_core_ecc_disable, // disable the generation of the ecc
input logic [31:0] exu_lsu_rs1_d, // address rs operand
input logic [31:0] exu_lsu_rs2_d, // store data
input logic [11:0] dec_lsu_offset_d, // address offset operand
input el2_lsu_pkt_t lsu_p, // lsu control packet
input logic dec_lsu_valid_raw_d, // Raw valid for address computation
input logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control
output logic [31:0] lsu_result_m, // lsu load data
output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF
output logic lsu_load_stall_any, // This is for blocking loads in the decode
output logic lsu_store_stall_any, // This is for blocking stores in the decode
output logic lsu_fastint_stall_any, // Stall the fastint in decode-1 stage
output logic lsu_idle_any, // lsu buffers are empty and no instruction in the pipeline. Doesn't include DMA
output logic lsu_active, // Used to turn off top level clk
output logic [31:1] lsu_fir_addr, // fast interrupt address
output logic [1:0] lsu_fir_error, // Error during fast interrupt lookup
output logic lsu_single_ecc_error_incr, // Increment the ecc counter
output el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu exception packet
output logic lsu_imprecise_error_load_any, // bus load imprecise error
output logic lsu_imprecise_error_store_any, // bus store imprecise error
output logic [31:0] lsu_imprecise_error_addr_any, // bus store imprecise error address
// Non-blocking loads
output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load
output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated
output logic lsu_nonblock_load_data_valid, // the non block is valid - sending information back to the cam
output logic lsu_nonblock_load_data_error, // non block load has an error
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error
output logic [31:0] lsu_nonblock_load_data, // Data of the non block load
output logic lsu_pmu_load_external_m, // PMU : Bus loads
output logic lsu_pmu_store_external_m, // PMU : Bus loads
output logic lsu_pmu_misaligned_m, // PMU : misaligned
output logic lsu_pmu_bus_trxn, // PMU : bus transaction
output logic lsu_pmu_bus_misaligned, // PMU : misaligned access going to the bus
output logic lsu_pmu_bus_error, // PMU : bus sending error back
output logic lsu_pmu_bus_busy, // PMU : bus is not ready
// Trigger signals
input el2_trigger_pkt_t [3:0] trigger_pkt_any, // Trigger info from the decode
output logic [3:0] lsu_trigger_match_m, // lsu trigger hit (one bit per trigger)
// DCCM ports
output logic dccm_wren, // DCCM write enable
output logic dccm_rden, // DCCM read enable
output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // DCCM write address low bank
output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // DCCM write address hi bank
output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // DCCM read address low bank
output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // DCCM read address hi bank (hi and low same if aligned read)
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // DCCM write data for lo bank
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // DCCM write data for hi bank
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // DCCM read data low bank
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // DCCM read data hi bank
// PIC ports
output logic picm_wren, // PIC memory write enable
output logic picm_rden, // PIC memory read enable
output logic picm_mken, // Need to read the mask for stores to determine which bits to write/forward
output logic [31:0] picm_rdaddr, // address for pic read access
output logic [31:0] picm_wraddr, // address for pic write access
output logic [31:0] picm_wr_data, // PIC memory write data
input logic [31:0] picm_rd_data, // PIC memory read/mask data
// AXI Write Channels
output logic lsu_axi_awvalid,
input logic lsu_axi_awready,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid,
output logic [31:0] lsu_axi_awaddr,
output logic [3:0] lsu_axi_awregion,
output logic [7:0] lsu_axi_awlen,
output logic [2:0] lsu_axi_awsize,
output logic [1:0] lsu_axi_awburst,
output logic lsu_axi_awlock,
output logic [3:0] lsu_axi_awcache,
output logic [2:0] lsu_axi_awprot,
output logic [3:0] lsu_axi_awqos,
output logic lsu_axi_wvalid,
input logic lsu_axi_wready,
output logic [63:0] lsu_axi_wdata,
output logic [7:0] lsu_axi_wstrb,
output logic lsu_axi_wlast,
input logic lsu_axi_bvalid,
output logic lsu_axi_bready,
input logic [1:0] lsu_axi_bresp,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid,
// AXI Read Channels
output logic lsu_axi_arvalid,
input logic lsu_axi_arready,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid,
output logic [31:0] lsu_axi_araddr,
output logic [3:0] lsu_axi_arregion,
output logic [7:0] lsu_axi_arlen,
output logic [2:0] lsu_axi_arsize,
output logic [1:0] lsu_axi_arburst,
output logic lsu_axi_arlock,
output logic [3:0] lsu_axi_arcache,
output logic [2:0] lsu_axi_arprot,
output logic [3:0] lsu_axi_arqos,
input logic lsu_axi_rvalid,
output logic lsu_axi_rready,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid,
input logic [63:0] lsu_axi_rdata,
input logic [1:0] lsu_axi_rresp,
input logic lsu_axi_rlast,
input logic lsu_bus_clk_en, // external drives a clock_en to control bus ratio
// DMA slave
input logic dma_dccm_req, // DMA read/write to dccm
input logic [2:0] dma_mem_tag, // DMA request tag
input logic [31:0] dma_mem_addr, // DMA address
input logic [2:0] dma_mem_sz, // DMA access size
input logic dma_mem_write, // DMA access is a write
input logic [63:0] dma_mem_wdata, // DMA write data
output logic dccm_dma_rvalid, // lsu data valid for DMA dccm read
output logic dccm_dma_ecc_error, // DMA load had ecc error
output logic [2:0] dccm_dma_rtag, // DMA request tag
output logic [63:0] dccm_dma_rdata, // lsu data for DMA dccm read
output logic dccm_ready, // lsu ready for DMA access
input logic scan_mode, // scan mode
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic rst_l // reset, active low
);
logic lsu_dccm_rden_m;
logic lsu_dccm_rden_r;
logic [31:0] store_data_m;
logic [31:0] store_data_r;
logic [31:0] store_data_hi_r, store_data_lo_r;
logic [31:0] store_datafn_hi_r, store_datafn_lo_r;
logic [31:0] sec_data_lo_m, sec_data_hi_m;
logic [31:0] sec_data_lo_r, sec_data_hi_r;
logic [31:0] lsu_ld_data_m;
logic [31:0] dccm_rdata_hi_m, dccm_rdata_lo_m;
logic [6:0] dccm_data_ecc_hi_m, dccm_data_ecc_lo_m;
logic lsu_single_ecc_error_m;
logic lsu_double_ecc_error_m;
logic [31:0] lsu_ld_data_r;
logic [31:0] lsu_ld_data_corr_r;
logic [31:0] dccm_rdata_hi_r, dccm_rdata_lo_r;
logic [6:0] dccm_data_ecc_hi_r, dccm_data_ecc_lo_r;
logic single_ecc_error_hi_r, single_ecc_error_lo_r;
logic lsu_single_ecc_error_r;
logic lsu_double_ecc_error_r;
logic ld_single_ecc_error_r, ld_single_ecc_error_r_ff;
logic [31:0] picm_mask_data_m;
logic [31:0] lsu_addr_d, lsu_addr_m, lsu_addr_r;
logic [31:0] end_addr_d, end_addr_m, end_addr_r;
el2_lsu_pkt_t lsu_pkt_d, lsu_pkt_m, lsu_pkt_r;
logic lsu_i0_valid_d, lsu_i0_valid_m, lsu_i0_valid_r;
// Store Buffer signals
logic store_stbuf_reqvld_r;
logic ldst_stbuf_reqvld_r;
logic lsu_commit_r;
logic lsu_exc_m;
logic addr_in_dccm_d, addr_in_dccm_m, addr_in_dccm_r;
logic addr_in_pic_d, addr_in_pic_m, addr_in_pic_r;
logic ldst_dual_d, ldst_dual_m, ldst_dual_r;
logic addr_external_m;
logic stbuf_reqvld_any;
logic stbuf_reqvld_flushed_any;
logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any;
logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any;
logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any;
logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, sec_data_hi_r_ff;
logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, sec_data_ecc_lo_r_ff;
logic lsu_cmpen_m;
logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m;
logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m;
logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m;
logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m;
logic lsu_stbuf_commit_any;
logic lsu_stbuf_empty_any; // This is for blocking loads
logic lsu_stbuf_full_any;
// Bus signals
logic lsu_busreq_r;
logic lsu_bus_buffer_pend_any;
logic lsu_bus_buffer_empty_any;
logic lsu_bus_buffer_full_any;
logic lsu_busreq_m;
logic [31:0] bus_read_data_m;
logic flush_m_up, flush_r;
logic is_sideeffects_m;
logic [2:0] dma_mem_tag_d, dma_mem_tag_m;
logic ldst_nodma_mtor;
logic dma_dccm_wen, dma_pic_wen;
logic [31:0] dma_dccm_wdata_lo, dma_dccm_wdata_hi;
logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, dma_dccm_wdata_ecc_hi;
// Clocks
logic lsu_busm_clken;
logic lsu_bus_obuf_c1_clken;
logic lsu_c1_m_clk, lsu_c1_r_clk;
logic lsu_c2_m_clk, lsu_c2_r_clk;
logic lsu_store_c1_m_clk, lsu_store_c1_r_clk;
logic lsu_stbuf_c1_clk;
logic lsu_bus_ibuf_c1_clk, lsu_bus_obuf_c1_clk, lsu_bus_buf_c1_clk;
logic lsu_busm_clk;
logic lsu_free_c2_clk;
logic lsu_raw_fwd_lo_m, lsu_raw_fwd_hi_m;
logic lsu_raw_fwd_lo_r, lsu_raw_fwd_hi_r;
assign lsu_raw_fwd_lo_m = (|stbuf_fwdbyteen_lo_m[pt.DCCM_BYTE_WIDTH-1:0]);
assign lsu_raw_fwd_hi_m = (|stbuf_fwdbyteen_hi_m[pt.DCCM_BYTE_WIDTH-1:0]);
el2_lsu_lsc_ctl #(.pt(pt)) lsu_lsc_ctl (.*);
// block stores in decode - for either bus or stbuf reasons
assign lsu_store_stall_any = lsu_stbuf_full_any | lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff;
assign lsu_load_stall_any = lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff;
assign lsu_fastint_stall_any = ld_single_ecc_error_r; // Stall the fastint in decode-1 stage
// Ready to accept dma trxns
// There can't be any inpipe forwarding from non-dma packet to dma packet since they can be flushed so we can't have st in r when dma is in m
assign dma_mem_tag_d[2:0] = dma_mem_tag[2:0];
assign ldst_nodma_mtor = (lsu_pkt_m.valid & ~lsu_pkt_m.dma & (addr_in_dccm_m | addr_in_pic_m) & lsu_pkt_m.store);
assign dccm_ready = ~(dec_lsu_valid_raw_d | ldst_nodma_mtor | ld_single_ecc_error_r_ff);
assign dma_dccm_wen = dma_dccm_req & dma_mem_write & addr_in_dccm_d & dma_mem_sz[1]; // Perform DMA writes only for word/dword
assign dma_pic_wen = dma_dccm_req & dma_mem_write & addr_in_pic_d;
assign {dma_dccm_wdata_hi[31:0], dma_dccm_wdata_lo[31:0]} = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores
// Generate per cycle flush signals
assign flush_m_up = dec_tlu_flush_lower_r;
assign flush_r = dec_tlu_i0_kill_writeb_r;
// lsu idle
// lsu halt idle. This is used for entering the halt mode. Also, DMA accesses are allowed during fence.
// Indicates non-idle if there is a instruction valid in d-r or read/write buffers are non-empty since they can come with error
// Store buffer now have only non-dma dccm stores
// stbuf_empty not needed since it has only dccm stores
assign lsu_idle_any = ~((lsu_pkt_m.valid & ~lsu_pkt_m.dma) |
(lsu_pkt_r.valid & ~lsu_pkt_r.dma)) &
lsu_bus_buffer_empty_any;
assign lsu_active = (lsu_pkt_m.valid | lsu_pkt_r.valid | ld_single_ecc_error_r_ff) | ~lsu_bus_buffer_empty_any; // This includes DMA. Used for gating top clock
// Instantiate the store buffer
assign store_stbuf_reqvld_r = lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r & ~flush_r & (~lsu_pkt_r.dma | ((lsu_pkt_r.by | lsu_pkt_r.half) & ~lsu_double_ecc_error_r));
// Disable Forwarding for now
assign lsu_cmpen_m = lsu_pkt_m.valid & (lsu_pkt_m.load | lsu_pkt_m.store) & (addr_in_dccm_m | addr_in_pic_m);
// Bus signals
assign lsu_busreq_m = lsu_pkt_m.valid & ((lsu_pkt_m.load | lsu_pkt_m.store) & addr_external_m) & ~flush_m_up & ~lsu_exc_m & ~lsu_pkt_m.fast_int;
// Dual signals
assign ldst_dual_d = (lsu_addr_d[2] != end_addr_d[2]);
assign ldst_dual_m = (lsu_addr_m[2] != end_addr_m[2]);
assign ldst_dual_r = (lsu_addr_r[2] != end_addr_r[2]);
// PMU signals
assign lsu_pmu_misaligned_m = lsu_pkt_m.valid & ((lsu_pkt_m.half & lsu_addr_m[0]) | (lsu_pkt_m.word & (|lsu_addr_m[1:0])));
assign lsu_pmu_load_external_m = lsu_pkt_m.valid & lsu_pkt_m.load & addr_external_m;
assign lsu_pmu_store_external_m = lsu_pkt_m.valid & lsu_pkt_m.store & addr_external_m;
el2_lsu_dccm_ctl #(.pt(pt)) dccm_ctl (
.lsu_addr_d(lsu_addr_d[31:0]),
.end_addr_d(end_addr_d[pt.DCCM_BITS-1:0]),
.lsu_addr_m(lsu_addr_m[pt.DCCM_BITS-1:0]),
.lsu_addr_r(lsu_addr_r[31:0]),
.end_addr_m(end_addr_m[pt.DCCM_BITS-1:0]),
.end_addr_r(end_addr_r[pt.DCCM_BITS-1:0]),
.*
);
el2_lsu_stbuf #(.pt(pt)) stbuf (
.lsu_addr_d(lsu_addr_d[pt.LSU_SB_BITS-1:0]),
.end_addr_d(end_addr_d[pt.LSU_SB_BITS-1:0]),
.*
);
el2_lsu_ecc #(.pt(pt)) ecc (
.lsu_addr_r(lsu_addr_r[pt.DCCM_BITS-1:0]),
.end_addr_r(end_addr_r[pt.DCCM_BITS-1:0]),
.lsu_addr_m(lsu_addr_m[pt.DCCM_BITS-1:0]),
.end_addr_m(end_addr_m[pt.DCCM_BITS-1:0]),
.*
);
el2_lsu_trigger #(.pt(pt)) trigger (
.store_data_m(store_data_m[31:0]),
.*
);
// Clk domain
el2_lsu_clkdomain #(.pt(pt)) clkdomain (.*);
// Bus interface
el2_lsu_bus_intf #(.pt(pt)) bus_intf (
.lsu_addr_m(lsu_addr_m[31:0] & {32{addr_external_m & lsu_pkt_m.valid}}),
.lsu_addr_r(lsu_addr_r[31:0] & {32{lsu_busreq_r}}),
.end_addr_m(end_addr_m[31:0] & {32{addr_external_m & lsu_pkt_m.valid}}),
.end_addr_r(end_addr_r[31:0] & {32{lsu_busreq_r}}),
.store_data_r(store_data_r[31:0] & {32{lsu_busreq_r}}),
.*
);
//Flops
rvdff #(3) dma_mem_tag_mff (.*, .din(dma_mem_tag_d[2:0]), .dout(dma_mem_tag_m[2:0]), .clk(lsu_c1_m_clk));
rvdff #(2) lsu_raw_fwd_r_ff (.*, .din({lsu_raw_fwd_hi_m, lsu_raw_fwd_lo_m}), .dout({lsu_raw_fwd_hi_r, lsu_raw_fwd_lo_r}), .clk(lsu_c2_r_clk));
`ifdef RV_ASSERT_ON
logic [1:0] store_data_bypass_sel;
assign store_data_bypass_sel[1:0] = {lsu_p.store_data_bypass_d, lsu_p.store_data_bypass_m};
property exception_no_lsu_flush;
@(posedge clk) disable iff(~rst_l) lsu_lsc_ctl.lsu_error_pkt_m.exc_valid |-> ##[1:2] (flush_r );
endproperty
assert_exception_no_lsu_flush: assert property (exception_no_lsu_flush) else
$display("No flush within 2 cycles of exception");
// offset should be zero for fast interrupt
property offset_0_fastint;
@(posedge clk) disable iff(~rst_l) (lsu_p.valid & lsu_p.fast_int) |-> (dec_lsu_offset_d[11:0] == 12'b0);
endproperty
assert_offset_0_fastint: assert property (offset_0_fastint) else
$display("dec_tlu_offset_d not zero for fast interrupt redirect");
// DMA req should assert dccm rden/wren
property dmareq_dccm_wren_or_rden;
@(posedge clk) disable iff(~rst_l) dma_dccm_req |-> (dccm_rden | dccm_wren | addr_in_pic_d);
endproperty
assert_dmareq_dccm_wren_or_rden: assert property(dmareq_dccm_wren_or_rden) else
$display("dccm rden or wren not asserted during DMA request");
// fastint_stall should cause load/store stall next cycle
property fastint_stall_imply_loadstore_stall;
@(posedge clk) disable iff(~rst_l) (lsu_fastint_stall_any & (lsu_commit_r | lsu_pkt_r.dma)) |-> ##1 ((lsu_load_stall_any | lsu_store_stall_any) | ~ld_single_ecc_error_r_ff);
endproperty
assert_fastint_stall_imply_loadstore_stall: assert property (fastint_stall_imply_loadstore_stall) else
$display("fastint_stall should be followed by lsu_load/store_stall_any");
// Single ECC error implies rfnpc flush
property single_ecc_error_rfnpc_flush;
@(posedge clk) disable iff(~rst_l) (lsu_error_pkt_r.single_ecc_error & lsu_pkt_r.load) |=> ~lsu_commit_r;
endproperty
assert_single_ecc_error_rfnpc_flush: assert property (single_ecc_error_rfnpc_flush) else
$display("LSU commit next cycle after single ecc error");
`endif
endmodule // el2_lsu

View File

@ -0,0 +1,191 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Owner:
// Function: Checks the memory map for the address
// Comments:
//
//********************************************************************************
module el2_lsu_addrcheck
import el2_pkg::*;
#(
`include "el2_param.vh"
)(
input logic lsu_c2_m_clk, // clock
input logic rst_l, // reset
input logic [31:0] start_addr_d, // start address for lsu
input logic [31:0] end_addr_d, // end address for lsu
input el2_lsu_pkt_t lsu_pkt_d, // packet in d
input logic [31:0] dec_tlu_mrac_ff, // CSR read
input logic [3:0] rs1_region_d, // address rs operand [31:28]
input logic [31:0] rs1_d, // address rs operand
output logic is_sideeffects_m, // is sideffects space
output logic addr_in_dccm_d, // address in dccm
output logic addr_in_pic_d, // address in pic
output logic addr_external_d, // address in external
output logic access_fault_d, // access fault
output logic misaligned_fault_d, // misaligned
output logic [3:0] exc_mscause_d, // mscause for access/misaligned faults
output logic fir_dccm_access_error_d, // Fast interrupt dccm access error
output logic fir_nondccm_access_error_d,// Fast interrupt dccm access error
input logic scan_mode // Scan mode
);
logic non_dccm_access_ok;
logic is_sideeffects_d, is_aligned_d;
logic start_addr_in_dccm_d, end_addr_in_dccm_d;
logic start_addr_in_dccm_region_d, end_addr_in_dccm_region_d;
logic start_addr_in_pic_d, end_addr_in_pic_d;
logic start_addr_in_pic_region_d, end_addr_in_pic_region_d;
logic [4:0] csr_idx;
logic addr_in_iccm;
logic start_addr_dccm_or_pic;
logic base_reg_dccm_or_pic;
logic unmapped_access_fault_d, mpu_access_fault_d, picm_access_fault_d, regpred_access_fault_d;
logic regcross_misaligned_fault_d, sideeffect_misaligned_fault_d;
logic [3:0] access_fault_mscause_d;
logic [3:0] misaligned_fault_mscause_d;
if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
// Start address check
rvrangecheck #(.CCM_SADR(pt.DCCM_SADR),
.CCM_SIZE(pt.DCCM_SIZE)) start_addr_dccm_rangecheck (
.addr(start_addr_d[31:0]),
.in_range(start_addr_in_dccm_d),
.in_region(start_addr_in_dccm_region_d)
);
// End address check
rvrangecheck #(.CCM_SADR(pt.DCCM_SADR),
.CCM_SIZE(pt.DCCM_SIZE)) end_addr_dccm_rangecheck (
.addr(end_addr_d[31:0]),
.in_range(end_addr_in_dccm_d),
.in_region(end_addr_in_dccm_region_d)
);
end else begin: Gen_dccm_disable // block: Gen_dccm_enable
assign start_addr_in_dccm_d = '0;
assign start_addr_in_dccm_region_d = '0;
assign end_addr_in_dccm_d = '0;
assign end_addr_in_dccm_region_d = '0;
end
if (pt.ICCM_ENABLE == 1) begin : check_iccm
assign addr_in_iccm = (start_addr_d[31:28] == pt.ICCM_REGION);
end else begin
assign addr_in_iccm = 1'b0;
end
// PIC memory check
// Start address check
rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR),
.CCM_SIZE(pt.PIC_SIZE)) start_addr_pic_rangecheck (
.addr(start_addr_d[31:0]),
.in_range(start_addr_in_pic_d),
.in_region(start_addr_in_pic_region_d)
);
// End address check
rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR),
.CCM_SIZE(pt.PIC_SIZE)) end_addr_pic_rangecheck (
.addr(end_addr_d[31:0]),
.in_range(end_addr_in_pic_d),
.in_region(end_addr_in_pic_region_d)
);
assign start_addr_dccm_or_pic = start_addr_in_dccm_region_d | start_addr_in_pic_region_d;
assign base_reg_dccm_or_pic = ((rs1_region_d[3:0] == pt.DCCM_REGION) & pt.DCCM_ENABLE) | (rs1_region_d[3:0] == pt.PIC_REGION);
assign addr_in_dccm_d = (start_addr_in_dccm_d & end_addr_in_dccm_d);
assign addr_in_pic_d = (start_addr_in_pic_d & end_addr_in_pic_d);
assign addr_external_d = ~(start_addr_in_dccm_region_d | start_addr_in_pic_region_d);
assign csr_idx[4:0] = {start_addr_d[31:28], 1'b1};
assign is_sideeffects_d = dec_tlu_mrac_ff[csr_idx] & ~(start_addr_in_dccm_region_d | start_addr_in_pic_region_d | addr_in_iccm) & lsu_pkt_d.valid & (lsu_pkt_d.store | lsu_pkt_d.load); //every region has the 2 LSB indicating ( 1: sideeffects/no_side effects, and 0: cacheable ). Ignored in internal regions
assign is_aligned_d = (lsu_pkt_d.word & (start_addr_d[1:0] == 2'b0)) |
(lsu_pkt_d.half & (start_addr_d[0] == 1'b0)) |
lsu_pkt_d.by;
assign non_dccm_access_ok = (~(|{pt.DATA_ACCESS_ENABLE0,pt.DATA_ACCESS_ENABLE1,pt.DATA_ACCESS_ENABLE2,pt.DATA_ACCESS_ENABLE3,pt.DATA_ACCESS_ENABLE4,pt.DATA_ACCESS_ENABLE5,pt.DATA_ACCESS_ENABLE6,pt.DATA_ACCESS_ENABLE7})) |
(((pt.DATA_ACCESS_ENABLE0 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK0)) == (pt.DATA_ACCESS_ADDR0 | pt.DATA_ACCESS_MASK0)) |
(pt.DATA_ACCESS_ENABLE1 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK1)) == (pt.DATA_ACCESS_ADDR1 | pt.DATA_ACCESS_MASK1)) |
(pt.DATA_ACCESS_ENABLE2 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK2)) == (pt.DATA_ACCESS_ADDR2 | pt.DATA_ACCESS_MASK2)) |
(pt.DATA_ACCESS_ENABLE3 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK3)) == (pt.DATA_ACCESS_ADDR3 | pt.DATA_ACCESS_MASK3)) |
(pt.DATA_ACCESS_ENABLE4 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK4)) == (pt.DATA_ACCESS_ADDR4 | pt.DATA_ACCESS_MASK4)) |
(pt.DATA_ACCESS_ENABLE5 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK5)) == (pt.DATA_ACCESS_ADDR5 | pt.DATA_ACCESS_MASK5)) |
(pt.DATA_ACCESS_ENABLE6 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK6)) == (pt.DATA_ACCESS_ADDR6 | pt.DATA_ACCESS_MASK6)) |
(pt.DATA_ACCESS_ENABLE7 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK7)) == (pt.DATA_ACCESS_ADDR7 | pt.DATA_ACCESS_MASK7))) &
((pt.DATA_ACCESS_ENABLE0 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK0)) == (pt.DATA_ACCESS_ADDR0 | pt.DATA_ACCESS_MASK0)) |
(pt.DATA_ACCESS_ENABLE1 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK1)) == (pt.DATA_ACCESS_ADDR1 | pt.DATA_ACCESS_MASK1)) |
(pt.DATA_ACCESS_ENABLE2 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK2)) == (pt.DATA_ACCESS_ADDR2 | pt.DATA_ACCESS_MASK2)) |
(pt.DATA_ACCESS_ENABLE3 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK3)) == (pt.DATA_ACCESS_ADDR3 | pt.DATA_ACCESS_MASK3)) |
(pt.DATA_ACCESS_ENABLE4 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK4)) == (pt.DATA_ACCESS_ADDR4 | pt.DATA_ACCESS_MASK4)) |
(pt.DATA_ACCESS_ENABLE5 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK5)) == (pt.DATA_ACCESS_ADDR5 | pt.DATA_ACCESS_MASK5)) |
(pt.DATA_ACCESS_ENABLE6 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK6)) == (pt.DATA_ACCESS_ADDR6 | pt.DATA_ACCESS_MASK6)) |
(pt.DATA_ACCESS_ENABLE7 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK7)) == (pt.DATA_ACCESS_ADDR7 | pt.DATA_ACCESS_MASK7))));
// Access fault logic
// 0. Unmapped local memory : Addr in dccm region but not in dccm offset OR Addr in picm region but not in picm offset OR DCCM -> PIC cross when DCCM/PIC in same region
// 1. Uncorrectable (double bit) ECC error
// 3. Address is not in a populated non-dccm region
// 5. Region predication access fault: Base Address in DCCM/PIC and Final address in non-DCCM/non-PIC region or vice versa
// 6. Ld/St access to picm are not word aligned or word size
assign regpred_access_fault_d = (start_addr_dccm_or_pic ^ base_reg_dccm_or_pic); // 5. Region predication access fault: Base Address in DCCM/PIC and Final address in non-DCCM/non-PIC region or vice versa
assign picm_access_fault_d = (addr_in_pic_d & ((start_addr_d[1:0] != 2'b0) | ~lsu_pkt_d.word)); // 6. Ld/St access to picm are not word aligned or word size
if (pt.DCCM_ENABLE & (pt.DCCM_REGION == pt.PIC_REGION)) begin
assign unmapped_access_fault_d = ((start_addr_in_dccm_region_d & ~(start_addr_in_dccm_d | start_addr_in_pic_d)) | // 0. Addr in dccm/pic region but not in dccm/pic offset
(end_addr_in_dccm_region_d & ~(end_addr_in_dccm_d | end_addr_in_pic_d)) | // 0. Addr in dccm/pic region but not in dccm/pic offset
(start_addr_in_dccm_d & end_addr_in_pic_d) | // 0. DCCM -> PIC cross when DCCM/PIC in same region
(start_addr_in_pic_d & end_addr_in_dccm_d)); // 0. DCCM -> PIC cross when DCCM/PIC in same region
assign mpu_access_fault_d = (~start_addr_in_dccm_region_d & ~non_dccm_access_ok); // 3. Address is not in a populated non-dccm region
end else begin
assign unmapped_access_fault_d = ((start_addr_in_dccm_region_d & ~start_addr_in_dccm_d) | // 0. Addr in dccm region but not in dccm offset
(end_addr_in_dccm_region_d & ~end_addr_in_dccm_d) | // 0. Addr in dccm region but not in dccm offset
(start_addr_in_pic_region_d & ~start_addr_in_pic_d) | // 0. Addr in picm region but not in picm offset
(end_addr_in_pic_region_d & ~end_addr_in_pic_d)); // 0. Addr in picm region but not in picm offset
assign mpu_access_fault_d = (~start_addr_in_pic_region_d & ~start_addr_in_dccm_region_d & ~non_dccm_access_ok); // 3. Address is not in a populated non-dccm region
end
assign access_fault_d = (unmapped_access_fault_d | mpu_access_fault_d | picm_access_fault_d | regpred_access_fault_d) & lsu_pkt_d.valid & ~lsu_pkt_d.dma;
assign access_fault_mscause_d[3:0] = unmapped_access_fault_d ? 4'h2 : mpu_access_fault_d ? 4'h3 : regpred_access_fault_d ? 4'h5 : picm_access_fault_d ? 4'h6 : 4'h0;
// Misaligned happens due to 2 reasons
// 0. Region cross
// 1. sideeffects access which are not aligned
assign regcross_misaligned_fault_d = (start_addr_d[31:28] != end_addr_d[31:28]);
assign sideeffect_misaligned_fault_d = (is_sideeffects_d & ~is_aligned_d);
assign misaligned_fault_d = (regcross_misaligned_fault_d | (sideeffect_misaligned_fault_d & addr_external_d)) & lsu_pkt_d.valid & ~lsu_pkt_d.dma;
assign misaligned_fault_mscause_d[3:0] = regcross_misaligned_fault_d ? 4'h2 : sideeffect_misaligned_fault_d ? 4'h1 : 4'h0;
assign exc_mscause_d[3:0] = misaligned_fault_d ? misaligned_fault_mscause_d[3:0] : access_fault_mscause_d[3:0];
// Fast interrupt error logic
assign fir_dccm_access_error_d = ((start_addr_in_dccm_region_d & ~start_addr_in_dccm_d) |
(end_addr_in_dccm_region_d & ~end_addr_in_dccm_d)) & lsu_pkt_d.valid & lsu_pkt_d.fast_int;
assign fir_nondccm_access_error_d = ~(start_addr_in_dccm_region_d & end_addr_in_dccm_region_d) & lsu_pkt_d.valid & lsu_pkt_d.fast_int;
rvdff #(.WIDTH(1)) is_sideeffects_mff (.din(is_sideeffects_d), .dout(is_sideeffects_m), .clk(lsu_c2_m_clk), .*);
endmodule // el2_lsu_addrcheck

View File

@ -0,0 +1,936 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Owner:
// Function: lsu interface with interface queue
// Comments:
//
//********************************************************************************
module el2_lsu_bus_buffer
import el2_pkg::*;
#(
`include "el2_param.vh"
)(
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic clk_override, // Override non-functional clock gating
input logic rst_l, // reset, active low
input logic scan_mode, // scan mode
input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals
input logic dec_tlu_wb_coalescing_disable, // disable write buffer coalescing
input logic dec_tlu_sideeffect_posted_disable, // Don't block the sideeffect load store to the bus
input logic dec_tlu_force_halt,
// various clocks needed for the bus reads and writes
input logic lsu_bus_obuf_c1_clken,
input logic lsu_busm_clken,
input logic lsu_c2_r_clk,
input logic lsu_bus_ibuf_c1_clk,
input logic lsu_bus_obuf_c1_clk,
input logic lsu_bus_buf_c1_clk,
input logic lsu_free_c2_clk,
input logic lsu_busm_clk,
input logic dec_lsu_valid_raw_d, // Raw valid for address computation
input el2_lsu_pkt_t lsu_pkt_m, // lsu packet flowing down the pipe
input el2_lsu_pkt_t lsu_pkt_r, // lsu packet flowing down the pipe
input logic [31:0] lsu_addr_m, // lsu address flowing down the pipe
input logic [31:0] end_addr_m, // lsu address flowing down the pipe
input logic [31:0] lsu_addr_r, // lsu address flowing down the pipe
input logic [31:0] end_addr_r, // lsu address flowing down the pipe
input logic [31:0] store_data_r, // store data flowing down the pipe
input logic no_word_merge_r, // r store doesn't need to wait in ibuf since it will not coalesce
input logic no_dword_merge_r, // r store doesn't need to wait in ibuf since it will not coalesce
input logic lsu_busreq_m, // bus request is in m
output logic lsu_busreq_r, // bus request is in r
input logic ld_full_hit_m, // load can get all its byte from a write buffer entry
input logic flush_m_up, // flush
input logic flush_r, // flush
input logic lsu_commit_r, // lsu instruction in r commits
input logic is_sideeffects_r, // lsu attribute is side_effects
input logic ldst_dual_d, // load/store is unaligned at 32 bit boundary
input logic ldst_dual_m, // load/store is unaligned at 32 bit boundary
input logic ldst_dual_r, // load/store is unaligned at 32 bit boundary
input logic [7:0] ldst_byteen_ext_m, // HI and LO signals
output logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry
output logic lsu_bus_buffer_full_any, // bus buffer is full
output logic lsu_bus_buffer_empty_any, // bus buffer is empty
output logic [3:0] ld_byte_hit_buf_lo, ld_byte_hit_buf_hi, // Byte enables for forwarding data
output logic [31:0] ld_fwddata_buf_lo, ld_fwddata_buf_hi, // load forwarding data
output logic lsu_imprecise_error_load_any, // imprecise load bus error
output logic lsu_imprecise_error_store_any, // imprecise store bus error
output logic [31:0] lsu_imprecise_error_addr_any, // address of the imprecise error
// Non-blocking loads
output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load
output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated
output logic lsu_nonblock_load_data_valid, // the non block is valid - sending information back to the cam
output logic lsu_nonblock_load_data_error, // non block load has an error
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error
output logic [31:0] lsu_nonblock_load_data, // Data of the non block load
// PMU events
output logic lsu_pmu_bus_trxn,
output logic lsu_pmu_bus_misaligned,
output logic lsu_pmu_bus_error,
output logic lsu_pmu_bus_busy,
// AXI Write Channels
output logic lsu_axi_awvalid,
input logic lsu_axi_awready,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid,
output logic [31:0] lsu_axi_awaddr,
output logic [3:0] lsu_axi_awregion,
output logic [7:0] lsu_axi_awlen,
output logic [2:0] lsu_axi_awsize,
output logic [1:0] lsu_axi_awburst,
output logic lsu_axi_awlock,
output logic [3:0] lsu_axi_awcache,
output logic [2:0] lsu_axi_awprot,
output logic [3:0] lsu_axi_awqos,
output logic lsu_axi_wvalid,
input logic lsu_axi_wready,
output logic [63:0] lsu_axi_wdata,
output logic [7:0] lsu_axi_wstrb,
output logic lsu_axi_wlast,
input logic lsu_axi_bvalid,
output logic lsu_axi_bready,
input logic [1:0] lsu_axi_bresp,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid,
// AXI Read Channels
output logic lsu_axi_arvalid,
input logic lsu_axi_arready,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid,
output logic [31:0] lsu_axi_araddr,
output logic [3:0] lsu_axi_arregion,
output logic [7:0] lsu_axi_arlen,
output logic [2:0] lsu_axi_arsize,
output logic [1:0] lsu_axi_arburst,
output logic lsu_axi_arlock,
output logic [3:0] lsu_axi_arcache,
output logic [2:0] lsu_axi_arprot,
output logic [3:0] lsu_axi_arqos,
input logic lsu_axi_rvalid,
output logic lsu_axi_rready,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid,
input logic [63:0] lsu_axi_rdata,
input logic [1:0] lsu_axi_rresp,
input logic lsu_bus_clk_en,
input logic lsu_bus_clk_en_q
);
// For Ld: IDLE -> WAIT -> CMD -> RESP -> DONE_PARTIAL(?) -> DONE_WAIT(?) -> DONE -> IDLE
// For St: IDLE -> WAIT -> CMD -> RESP(?) -> IDLE
typedef enum logic [2:0] {IDLE=3'b000, WAIT=3'b001, CMD=3'b010, RESP=3'b011, DONE_PARTIAL=3'b100, DONE_WAIT=3'b101, DONE=3'b110} state_t;
localparam DEPTH = pt.LSU_NUM_NBLOAD;
localparam DEPTH_LOG2 = pt.LSU_NUM_NBLOAD_WIDTH;
localparam TIMER = 8; // This can be only power of 2
localparam TIMER_MAX = TIMER - 1; // Maximum value of timer
localparam TIMER_LOG2 = (TIMER < 2) ? 1 : $clog2(TIMER);
logic [3:0] ldst_byteen_hi_m, ldst_byteen_lo_m;
logic [DEPTH-1:0] ld_addr_hitvec_lo, ld_addr_hitvec_hi;
logic [3:0][DEPTH-1:0] ld_byte_hitvec_lo, ld_byte_hitvec_hi;
logic [3:0][DEPTH-1:0] ld_byte_hitvecfn_lo, ld_byte_hitvecfn_hi;
logic ld_addr_ibuf_hit_lo, ld_addr_ibuf_hit_hi;
logic [3:0] ld_byte_ibuf_hit_lo, ld_byte_ibuf_hit_hi;
logic [3:0] ldst_byteen_r;
logic [3:0] ldst_byteen_hi_r, ldst_byteen_lo_r;
logic [31:0] store_data_hi_r, store_data_lo_r;
logic is_aligned_r; // Aligned load/store
logic ldst_samedw_r;
logic lsu_nonblock_load_valid_r;
logic [31:0] lsu_nonblock_load_data_hi, lsu_nonblock_load_data_lo, lsu_nonblock_data_unalgn;
logic [1:0] lsu_nonblock_addr_offset;
logic [1:0] lsu_nonblock_sz;
logic lsu_nonblock_unsign;
logic lsu_nonblock_load_data_ready;
logic [DEPTH-1:0] CmdPtr0Dec, CmdPtr1Dec;
logic [DEPTH-1:0] RspPtrDec;
logic [DEPTH_LOG2-1:0] CmdPtr0, CmdPtr1;
logic [DEPTH_LOG2-1:0] RspPtr;
logic [DEPTH_LOG2-1:0] WrPtr0_m, WrPtr0_r;
logic [DEPTH_LOG2-1:0] WrPtr1_m, WrPtr1_r;
logic found_wrptr0, found_wrptr1, found_cmdptr0, found_cmdptr1;
logic [3:0] buf_numvld_any, buf_numvld_wrcmd_any, buf_numvld_cmd_any, buf_numvld_pend_any;
logic any_done_wait_state;
logic bus_sideeffect_pend;
logic bus_coalescing_disable;
logic bus_addr_match_pending;
logic bus_cmd_sent, bus_cmd_ready;
logic bus_wcmd_sent, bus_wdata_sent;
logic bus_rsp_read, bus_rsp_write;
logic [pt.LSU_BUS_TAG-1:0] bus_rsp_read_tag, bus_rsp_write_tag;
logic bus_rsp_read_error, bus_rsp_write_error;
logic [63:0] bus_rsp_rdata;
// Bus buffer signals
state_t [DEPTH-1:0] buf_state;
logic [DEPTH-1:0][1:0] buf_sz;
logic [DEPTH-1:0][31:0] buf_addr;
logic [DEPTH-1:0][3:0] buf_byteen;
logic [DEPTH-1:0] buf_sideeffect;
logic [DEPTH-1:0] buf_write;
logic [DEPTH-1:0] buf_unsign;
logic [DEPTH-1:0] buf_dual;
logic [DEPTH-1:0] buf_samedw;
logic [DEPTH-1:0] buf_nomerge;
logic [DEPTH-1:0] buf_dualhi;
logic [DEPTH-1:0][DEPTH_LOG2-1:0] buf_dualtag;
logic [DEPTH-1:0] buf_ldfwd;
logic [DEPTH-1:0][DEPTH_LOG2-1:0] buf_ldfwdtag;
logic [DEPTH-1:0] buf_error;
logic [DEPTH-1:0][31:0] buf_data;
logic [DEPTH-1:0][DEPTH-1:0] buf_age, buf_age_younger;
logic [DEPTH-1:0][DEPTH-1:0] buf_rspage, buf_rsp_pickage;
state_t [DEPTH-1:0] buf_nxtstate;
logic [DEPTH-1:0] buf_rst;
logic [DEPTH-1:0] buf_state_en;
logic [DEPTH-1:0] buf_cmd_state_bus_en;
logic [DEPTH-1:0] buf_resp_state_bus_en;
logic [DEPTH-1:0] buf_state_bus_en;
logic [DEPTH-1:0] buf_dual_in;
logic [DEPTH-1:0] buf_samedw_in;
logic [DEPTH-1:0] buf_nomerge_in;
logic [DEPTH-1:0] buf_sideeffect_in;
logic [DEPTH-1:0] buf_unsign_in;
logic [DEPTH-1:0][1:0] buf_sz_in;
logic [DEPTH-1:0] buf_write_in;
logic [DEPTH-1:0] buf_wr_en;
logic [DEPTH-1:0] buf_dualhi_in;
logic [DEPTH-1:0][DEPTH_LOG2-1:0] buf_dualtag_in;
logic [DEPTH-1:0] buf_ldfwd_en;
logic [DEPTH-1:0] buf_ldfwd_in;
logic [DEPTH-1:0][DEPTH_LOG2-1:0] buf_ldfwdtag_in;
logic [DEPTH-1:0][3:0] buf_byteen_in;
logic [DEPTH-1:0][31:0] buf_addr_in;
logic [DEPTH-1:0][31:0] buf_data_in;
logic [DEPTH-1:0] buf_error_en;
logic [DEPTH-1:0] buf_data_en;
logic [DEPTH-1:0][DEPTH-1:0] buf_age_in;
logic [DEPTH-1:0][DEPTH-1:0] buf_ageQ;
logic [DEPTH-1:0][DEPTH-1:0] buf_rspage_set;
logic [DEPTH-1:0][DEPTH-1:0] buf_rspage_in;
logic [DEPTH-1:0][DEPTH-1:0] buf_rspageQ;
// Input buffer signals
logic ibuf_valid;
logic ibuf_dual;
logic ibuf_samedw;
logic ibuf_nomerge;
logic [DEPTH_LOG2-1:0] ibuf_tag;
logic [DEPTH_LOG2-1:0] ibuf_dualtag;
logic ibuf_sideeffect;
logic ibuf_unsign;
logic ibuf_write;
logic [1:0] ibuf_sz;
logic [3:0] ibuf_byteen;
logic [31:0] ibuf_addr;
logic [31:0] ibuf_data;
logic [TIMER_LOG2-1:0] ibuf_timer;
logic ibuf_byp;
logic ibuf_wr_en;
logic ibuf_rst;
logic ibuf_force_drain;
logic ibuf_drain_vld;
logic [DEPTH-1:0] ibuf_drainvec_vld;
logic [DEPTH_LOG2-1:0] ibuf_tag_in;
logic [DEPTH_LOG2-1:0] ibuf_dualtag_in;
logic [1:0] ibuf_sz_in;
logic [31:0] ibuf_addr_in;
logic [3:0] ibuf_byteen_in;
logic [31:0] ibuf_data_in;
logic [TIMER_LOG2-1:0] ibuf_timer_in;
logic [3:0] ibuf_byteen_out;
logic [31:0] ibuf_data_out;
logic ibuf_merge_en, ibuf_merge_in;
// Output buffer signals
logic obuf_valid;
logic obuf_write;
logic obuf_nosend;
logic obuf_rdrsp_pend;
logic obuf_sideeffect;
logic [31:0] obuf_addr;
logic [63:0] obuf_data;
logic [1:0] obuf_sz;
logic [7:0] obuf_byteen;
logic obuf_merge;
logic obuf_cmd_done, obuf_data_done;
logic [pt.LSU_BUS_TAG-1:0] obuf_tag0;
logic [pt.LSU_BUS_TAG-1:0] obuf_tag1;
logic [pt.LSU_BUS_TAG-1:0] obuf_rdrsp_tag;
logic ibuf_buf_byp;
logic obuf_force_wr_en;
logic obuf_wr_wait;
logic obuf_wr_en, obuf_wr_enQ;
logic obuf_rst;
logic obuf_write_in;
logic obuf_nosend_in;
logic obuf_rdrsp_pend_en;
logic obuf_rdrsp_pend_in;
logic obuf_sideeffect_in;
logic obuf_aligned_in;
logic [31:0] obuf_addr_in;
logic [63:0] obuf_data_in;
logic [1:0] obuf_sz_in;
logic [7:0] obuf_byteen_in;
logic obuf_merge_in;
logic obuf_cmd_done_in, obuf_data_done_in;
logic [pt.LSU_BUS_TAG-1:0] obuf_tag0_in;
logic [pt.LSU_BUS_TAG-1:0] obuf_tag1_in;
logic [pt.LSU_BUS_TAG-1:0] obuf_rdrsp_tag_in;
logic obuf_merge_en;
logic [TIMER_LOG2-1:0] obuf_wr_timer, obuf_wr_timer_in;
logic [7:0] obuf_byteen0_in, obuf_byteen1_in;
logic [63:0] obuf_data0_in, obuf_data1_in;
logic lsu_axi_awvalid_q, lsu_axi_awready_q;
logic lsu_axi_wvalid_q, lsu_axi_wready_q;
logic lsu_axi_arvalid_q, lsu_axi_arready_q;
logic lsu_axi_bvalid_q, lsu_axi_bready_q;
logic lsu_axi_rvalid_q, lsu_axi_rready_q;
logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid_q, lsu_axi_rid_q;
logic [1:0] lsu_axi_bresp_q, lsu_axi_rresp_q;
logic [pt.LSU_BUS_TAG-1:0] lsu_imprecise_error_store_tag;
logic [63:0] lsu_axi_rdata_q;
//------------------------------------------------------------------------------
// Load forwarding logic start
//------------------------------------------------------------------------------
// Function to do 8 to 3 bit encoding
function automatic logic [2:0] f_Enc8to3;
input logic [7:0] Dec_value;
logic [2:0] Enc_value;
Enc_value[0] = Dec_value[1] | Dec_value[3] | Dec_value[5] | Dec_value[7];
Enc_value[1] = Dec_value[2] | Dec_value[3] | Dec_value[6] | Dec_value[7];
Enc_value[2] = Dec_value[4] | Dec_value[5] | Dec_value[6] | Dec_value[7];
return Enc_value[2:0];
endfunction // f_Enc8to3
// Buffer hit logic for bus load forwarding
assign ldst_byteen_hi_m[3:0] = ldst_byteen_ext_m[7:4];
assign ldst_byteen_lo_m[3:0] = ldst_byteen_ext_m[3:0];
for (genvar i=0; i<DEPTH; i++) begin
assign ld_addr_hitvec_lo[i] = (lsu_addr_m[31:2] == buf_addr[i][31:2]) & buf_write[i] & (buf_state[i] != IDLE) & lsu_busreq_m;
assign ld_addr_hitvec_hi[i] = (end_addr_m[31:2] == buf_addr[i][31:2]) & buf_write[i] & (buf_state[i] != IDLE) & lsu_busreq_m;
end
for (genvar j=0; j<4; j++) begin
assign ld_byte_hit_buf_lo[j] = |(ld_byte_hitvecfn_lo[j]) | ld_byte_ibuf_hit_lo[j];
assign ld_byte_hit_buf_hi[j] = |(ld_byte_hitvecfn_hi[j]) | ld_byte_ibuf_hit_hi[j];
for (genvar i=0; i<DEPTH; i++) begin
assign ld_byte_hitvec_lo[j][i] = ld_addr_hitvec_lo[i] & buf_byteen[i][j] & ldst_byteen_lo_m[j];
assign ld_byte_hitvec_hi[j][i] = ld_addr_hitvec_hi[i] & buf_byteen[i][j] & ldst_byteen_hi_m[j];
assign ld_byte_hitvecfn_lo[j][i] = ld_byte_hitvec_lo[j][i] & ~(|(ld_byte_hitvec_lo[j] & buf_age_younger[i])) & ~ld_byte_ibuf_hit_lo[j]; // Kill the byte enable if younger entry exists or byte exists in ibuf
assign ld_byte_hitvecfn_hi[j][i] = ld_byte_hitvec_hi[j][i] & ~(|(ld_byte_hitvec_hi[j] & buf_age_younger[i])) & ~ld_byte_ibuf_hit_hi[j]; // Kill the byte enable if younger entry exists or byte exists in ibuf
end
end
// Hit in the ibuf
assign ld_addr_ibuf_hit_lo = (lsu_addr_m[31:2] == ibuf_addr[31:2]) & ibuf_write & ibuf_valid & lsu_busreq_m;
assign ld_addr_ibuf_hit_hi = (end_addr_m[31:2] == ibuf_addr[31:2]) & ibuf_write & ibuf_valid & lsu_busreq_m;
for (genvar i=0; i<4; i++) begin
assign ld_byte_ibuf_hit_lo[i] = ld_addr_ibuf_hit_lo & ibuf_byteen[i] & ldst_byteen_lo_m[i];
assign ld_byte_ibuf_hit_hi[i] = ld_addr_ibuf_hit_hi & ibuf_byteen[i] & ldst_byteen_hi_m[i];
end
always_comb begin
ld_fwddata_buf_lo[31:0] = {{8{ld_byte_ibuf_hit_lo[3]}},{8{ld_byte_ibuf_hit_lo[2]}},{8{ld_byte_ibuf_hit_lo[1]}},{8{ld_byte_ibuf_hit_lo[0]}}} & ibuf_data[31:0];
ld_fwddata_buf_hi[31:0] = {{8{ld_byte_ibuf_hit_hi[3]}},{8{ld_byte_ibuf_hit_hi[2]}},{8{ld_byte_ibuf_hit_hi[1]}},{8{ld_byte_ibuf_hit_hi[0]}}} & ibuf_data[31:0];
for (int i=0; i<DEPTH; i++) begin
ld_fwddata_buf_lo[7:0] |= {8{ld_byte_hitvecfn_lo[0][i]}} & buf_data[i][7:0];
ld_fwddata_buf_lo[15:8] |= {8{ld_byte_hitvecfn_lo[1][i]}} & buf_data[i][15:8];
ld_fwddata_buf_lo[23:16] |= {8{ld_byte_hitvecfn_lo[2][i]}} & buf_data[i][23:16];
ld_fwddata_buf_lo[31:24] |= {8{ld_byte_hitvecfn_lo[3][i]}} & buf_data[i][31:24];
ld_fwddata_buf_hi[7:0] |= {8{ld_byte_hitvecfn_hi[0][i]}} & buf_data[i][7:0];
ld_fwddata_buf_hi[15:8] |= {8{ld_byte_hitvecfn_hi[1][i]}} & buf_data[i][15:8];
ld_fwddata_buf_hi[23:16] |= {8{ld_byte_hitvecfn_hi[2][i]}} & buf_data[i][23:16];
ld_fwddata_buf_hi[31:24] |= {8{ld_byte_hitvecfn_hi[3][i]}} & buf_data[i][31:24];
end
end
//------------------------------------------------------------------------------
// Load forwarding logic end
//------------------------------------------------------------------------------
assign bus_coalescing_disable = dec_tlu_wb_coalescing_disable | pt.BUILD_AHB_LITE;
// Get the hi/lo byte enable
assign ldst_byteen_r[3:0] = ({4{lsu_pkt_r.by}} & 4'b0001) |
({4{lsu_pkt_r.half}} & 4'b0011) |
({4{lsu_pkt_r.word}} & 4'b1111);
assign {ldst_byteen_hi_r[3:0], ldst_byteen_lo_r[3:0]} = {4'b0,ldst_byteen_r[3:0]} << lsu_addr_r[1:0];
assign {store_data_hi_r[31:0], store_data_lo_r[31:0]} = {32'b0,store_data_r[31:0]} << 8*lsu_addr_r[1:0];
assign ldst_samedw_r = (lsu_addr_r[3] == end_addr_r[3]);
assign is_aligned_r = (lsu_pkt_r.word & (lsu_addr_r[1:0] == 2'b0)) |
(lsu_pkt_r.half & (lsu_addr_r[0] == 1'b0)) |
lsu_pkt_r.by;
//------------------------------------------------------------------------------
// Input buffer logic starts here
//------------------------------------------------------------------------------
assign ibuf_byp = lsu_busreq_r & (lsu_pkt_r.load | no_word_merge_r) & ~ibuf_valid;
assign ibuf_wr_en = lsu_busreq_r & lsu_commit_r & ~ibuf_byp;
assign ibuf_rst = (ibuf_drain_vld & ~ibuf_wr_en) | dec_tlu_force_halt;
assign ibuf_force_drain = lsu_busreq_m & ~lsu_busreq_r & ibuf_valid & (lsu_pkt_m.load | (ibuf_addr[31:2] != lsu_addr_m[31:2])); // Move the ibuf to buf if there is a non-colaescable ld/st in m but nothing in r
assign ibuf_drain_vld = ibuf_valid & (((ibuf_wr_en | (ibuf_timer == TIMER_MAX)) & ~(ibuf_merge_en & ibuf_merge_in)) | ibuf_byp | ibuf_force_drain | ibuf_sideeffect | ~ibuf_write | bus_coalescing_disable);
assign ibuf_tag_in[DEPTH_LOG2-1:0] = (ibuf_merge_en & ibuf_merge_in) ? ibuf_tag[DEPTH_LOG2-1:0] : (ldst_dual_r ? WrPtr1_r : WrPtr0_r);
assign ibuf_dualtag_in[DEPTH_LOG2-1:0] = WrPtr0_r;
assign ibuf_sz_in[1:0] = {lsu_pkt_r.word, lsu_pkt_r.half};
assign ibuf_addr_in[31:0] = ldst_dual_r ? end_addr_r[31:0] : lsu_addr_r[31:0];
assign ibuf_byteen_in[3:0] = (ibuf_merge_en & ibuf_merge_in) ? (ibuf_byteen[3:0] | ldst_byteen_lo_r[3:0]) : (ldst_dual_r ? ldst_byteen_hi_r[3:0] : ldst_byteen_lo_r[3:0]);
for (genvar i=0; i<4; i++) begin
assign ibuf_data_in[(8*i)+7:(8*i)] = (ibuf_merge_en & ibuf_merge_in) ? (ldst_byteen_lo_r[i] ? store_data_lo_r[(8*i)+7:(8*i)] : ibuf_data[(8*i)+7:(8*i)]) :
(ldst_dual_r ? store_data_hi_r[(8*i)+7:(8*i)] : store_data_lo_r[(8*i)+7:(8*i)]);
end
assign ibuf_timer_in = ibuf_wr_en ? '0 : (ibuf_timer < TIMER_MAX) ? (ibuf_timer + 1'b1) : ibuf_timer;
assign ibuf_merge_en = lsu_busreq_r & lsu_commit_r & lsu_pkt_r.store & ibuf_valid & ibuf_write & (lsu_addr_r[31:2] == ibuf_addr[31:2]) & ~is_sideeffects_r & ~bus_coalescing_disable;
assign ibuf_merge_in = ~ldst_dual_r; // If it's a unaligned store, merge needs to happen on the way out of ibuf
// ibuf signals going to bus buffer after merging
for (genvar i=0; i<4; i++) begin
assign ibuf_byteen_out[i] = (ibuf_merge_en & ~ibuf_merge_in) ? (ibuf_byteen[i] | ldst_byteen_lo_r[i]) : ibuf_byteen[i];
assign ibuf_data_out[(8*i)+7:(8*i)] = (ibuf_merge_en & ~ibuf_merge_in) ? (ldst_byteen_lo_r[i] ? store_data_lo_r[(8*i)+7:(8*i)] : ibuf_data[(8*i)+7:(8*i)]) :
ibuf_data[(8*i)+7:(8*i)];
end
rvdffsc #(.WIDTH(1)) ibuf_valid_ff (.din(1'b1), .dout(ibuf_valid), .en(ibuf_wr_en), .clear(ibuf_rst), .clk(lsu_free_c2_clk), .*);
rvdffs #(.WIDTH(DEPTH_LOG2)) ibuf_tagff (.din(ibuf_tag_in), .dout(ibuf_tag), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*);
rvdffs #(.WIDTH(DEPTH_LOG2)) ibuf_dualtagff (.din(ibuf_dualtag_in), .dout(ibuf_dualtag), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*);
rvdffs #(.WIDTH(1)) ibuf_dualff (.din(ldst_dual_r), .dout(ibuf_dual), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*);
rvdffs #(.WIDTH(1)) ibuf_samedwff (.din(ldst_samedw_r), .dout(ibuf_samedw), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*);
rvdffs #(.WIDTH(1)) ibuf_nomergeff (.din(no_dword_merge_r), .dout(ibuf_nomerge), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*);
rvdffs #(.WIDTH(1)) ibuf_sideeffectff (.din(is_sideeffects_r), .dout(ibuf_sideeffect), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*);
rvdffs #(.WIDTH(1)) ibuf_unsignff (.din(lsu_pkt_r.unsign), .dout(ibuf_unsign), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*);
rvdffs #(.WIDTH(1)) ibuf_writeff (.din(lsu_pkt_r.store), .dout(ibuf_write), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*);
rvdffs #(.WIDTH(2)) ibuf_szff (.din(ibuf_sz_in[1:0]), .dout(ibuf_sz), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*);
rvdffe #(.WIDTH(32)) ibuf_addrff (.din(ibuf_addr_in[31:0]), .dout(ibuf_addr), .en(ibuf_wr_en), .*);
rvdffs #(.WIDTH(4)) ibuf_byteenff (.din(ibuf_byteen_in[3:0]), .dout(ibuf_byteen), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*);
rvdffe #(.WIDTH(32)) ibuf_dataff (.din(ibuf_data_in[31:0]), .dout(ibuf_data), .en(ibuf_wr_en), .*);
rvdff #(.WIDTH(TIMER_LOG2)) ibuf_timerff (.din(ibuf_timer_in), .dout(ibuf_timer), .clk(lsu_free_c2_clk), .*);
//------------------------------------------------------------------------------
// Input buffer logic ends here
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// Output buffer logic starts here
//------------------------------------------------------------------------------
assign obuf_wr_wait = (buf_numvld_wrcmd_any[3:0] == 4'b1) & (buf_numvld_cmd_any[3:0] == 4'b1) & (obuf_wr_timer != TIMER_MAX) &
~bus_coalescing_disable & ~buf_nomerge[CmdPtr0] & ~buf_sideeffect[CmdPtr0] & ~obuf_force_wr_en;
assign obuf_wr_timer_in = obuf_wr_en ? 3'b0: (((buf_numvld_cmd_any > 4'b0) & (obuf_wr_timer < TIMER_MAX)) ? (obuf_wr_timer + 1'b1) : obuf_wr_timer);
assign obuf_force_wr_en = lsu_busreq_m & ~lsu_busreq_r & ~ibuf_valid & (buf_numvld_cmd_any[3:0] == 4'b1) & (lsu_addr_m[31:2] != buf_addr[CmdPtr0][31:2]); // Entry in m can't merge with entry going to obuf and there is no entry in between
assign ibuf_buf_byp = ibuf_byp & (buf_numvld_pend_any[3:0] == 4'b0) & (~lsu_pkt_r.store | no_dword_merge_r);
assign obuf_wr_en = ((ibuf_buf_byp & lsu_commit_r & ~(is_sideeffects_r & bus_sideeffect_pend)) |
((buf_state[CmdPtr0] == CMD) & found_cmdptr0 & ~buf_cmd_state_bus_en[CmdPtr0] & ~(buf_sideeffect[CmdPtr0] & bus_sideeffect_pend) &
(~(buf_dual[CmdPtr0] & buf_samedw[CmdPtr0] & ~buf_write[CmdPtr0]) | found_cmdptr1 | buf_nomerge[CmdPtr0] | obuf_force_wr_en))) &
(bus_cmd_ready | ~obuf_valid | obuf_nosend) & ~obuf_wr_wait & ~bus_addr_match_pending & lsu_bus_clk_en;
assign obuf_rst = ((bus_cmd_sent | (obuf_valid & obuf_nosend)) & ~obuf_wr_en & lsu_bus_clk_en) | dec_tlu_force_halt;
assign obuf_write_in = ibuf_buf_byp ? lsu_pkt_r.store : buf_write[CmdPtr0];
assign obuf_sideeffect_in = ibuf_buf_byp ? is_sideeffects_r : buf_sideeffect[CmdPtr0];
assign obuf_addr_in[31:0] = ibuf_buf_byp ? lsu_addr_r[31:0] : buf_addr[CmdPtr0];
assign obuf_sz_in[1:0] = ibuf_buf_byp ? {lsu_pkt_r.word, lsu_pkt_r.half} : buf_sz[CmdPtr0];
assign obuf_merge_in = obuf_merge_en;
assign obuf_tag0_in[pt.LSU_BUS_TAG-1:0] = ibuf_buf_byp ? (pt.LSU_BUS_TAG)'(WrPtr0_r) : (pt.LSU_BUS_TAG)'(CmdPtr0);
assign obuf_tag1_in[pt.LSU_BUS_TAG-1:0] = ibuf_buf_byp ? (pt.LSU_BUS_TAG)'(WrPtr1_r) : (pt.LSU_BUS_TAG)'(CmdPtr1);
assign obuf_cmd_done_in = ~(obuf_wr_en | obuf_rst) & (obuf_cmd_done | bus_wcmd_sent);
assign obuf_data_done_in = ~(obuf_wr_en | obuf_rst) & (obuf_data_done | bus_wdata_sent);
assign obuf_aligned_in = ibuf_buf_byp ? is_aligned_r : ((obuf_sz_in[1:0] == 2'b0) |
(obuf_sz_in[0] & ~obuf_addr_in[0]) |
(obuf_sz_in[1] & ~(|obuf_addr_in[1:0])));
assign obuf_rdrsp_pend_in = ((~(obuf_wr_en & ~obuf_nosend_in) & obuf_rdrsp_pend & ~(bus_rsp_read & (bus_rsp_read_tag == obuf_rdrsp_tag))) | (bus_cmd_sent & ~obuf_write)) & ~dec_tlu_force_halt;
assign obuf_rdrsp_pend_en = lsu_bus_clk_en | dec_tlu_force_halt;
assign obuf_rdrsp_tag_in[pt.LSU_BUS_TAG-1:0] = (bus_cmd_sent & ~obuf_write) ? obuf_tag0[pt.LSU_BUS_TAG-1:0] : obuf_rdrsp_tag[pt.LSU_BUS_TAG-1:0];
// No ld to ld fwd for aligned
assign obuf_nosend_in = (obuf_addr_in[31:3] == obuf_addr[31:3]) & obuf_aligned_in & ~obuf_sideeffect & ~obuf_write & ~obuf_write_in & ~dec_tlu_external_ldfwd_disable &
((obuf_valid & ~obuf_nosend) | (obuf_rdrsp_pend & ~(bus_rsp_read & (bus_rsp_read_tag == obuf_rdrsp_tag))));
assign obuf_byteen0_in[7:0] = ibuf_buf_byp ? (lsu_addr_r[2] ? {ldst_byteen_lo_r[3:0],4'b0} : {4'b0,ldst_byteen_lo_r[3:0]}) :
(buf_addr[CmdPtr0][2] ? {buf_byteen[CmdPtr0],4'b0} : {4'b0,buf_byteen[CmdPtr0]});
assign obuf_byteen1_in[7:0] = ibuf_buf_byp ? (end_addr_r[2] ? {ldst_byteen_hi_r[3:0],4'b0} : {4'b0,ldst_byteen_hi_r[3:0]}) :
(buf_addr[CmdPtr1][2] ? {buf_byteen[CmdPtr1],4'b0} : {4'b0,buf_byteen[CmdPtr1]});
assign obuf_data0_in[63:0] = ibuf_buf_byp ? (lsu_addr_r[2] ? {store_data_lo_r[31:0],32'b0} : {32'b0,store_data_lo_r[31:0]}) :
(buf_addr[CmdPtr0][2] ? {buf_data[CmdPtr0],32'b0} : {32'b0,buf_data[CmdPtr0]});
assign obuf_data1_in[63:0] = ibuf_buf_byp ? (end_addr_r[2] ? {store_data_hi_r[31:0],32'b0} :{32'b0,store_data_hi_r[31:0]}) :
(buf_addr[CmdPtr1][2] ? {buf_data[CmdPtr1],32'b0} : {32'b0,buf_data[CmdPtr1]});
for (genvar i=0 ;i<8; i++) begin
assign obuf_byteen_in[i] = obuf_byteen0_in[i] | (obuf_merge_en & obuf_byteen1_in[i]);
assign obuf_data_in[(8*i)+7:(8*i)] = (obuf_merge_en & obuf_byteen1_in[i]) ? obuf_data1_in[(8*i)+7:(8*i)] : obuf_data0_in[(8*i)+7:(8*i)];
end
// No store obuf merging for AXI since all stores are sent non-posted. Can't track the second id right now
assign obuf_merge_en = ((CmdPtr0 != CmdPtr1) & found_cmdptr0 & found_cmdptr1 & (buf_state[CmdPtr0] == CMD) & (buf_state[CmdPtr1] == CMD) &
~buf_cmd_state_bus_en[CmdPtr0] & ~buf_sideeffect[CmdPtr0] &
(~buf_write[CmdPtr0] & buf_dual[CmdPtr0] & ~buf_dualhi[CmdPtr0] & buf_samedw[CmdPtr0])) | // CmdPtr0/CmdPtr1 are for same load which is within a DW
(ibuf_buf_byp & ldst_samedw_r & ldst_dual_r);
rvdff_fpga #(.WIDTH(1)) obuf_wren_ff (.din(obuf_wr_en), .dout(obuf_wr_enQ), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdffsc #(.WIDTH(1)) obuf_valid_ff (.din(1'b1), .dout(obuf_valid), .en(obuf_wr_en), .clear(obuf_rst), .clk(lsu_free_c2_clk), .*);
rvdffs #(.WIDTH(1)) obuf_nosend_ff (.din(obuf_nosend_in), .dout(obuf_nosend), .en(obuf_wr_en), .clk(lsu_free_c2_clk), .*);
rvdffs #(.WIDTH(1)) obuf_rdrsp_pend_ff(.din(obuf_rdrsp_pend_in), .dout(obuf_rdrsp_pend), .en(obuf_rdrsp_pend_en), .clk(lsu_free_c2_clk), .*);
rvdff_fpga #(.WIDTH(1)) obuf_cmd_done_ff (.din(obuf_cmd_done_in), .dout(obuf_cmd_done), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) obuf_data_done_ff (.din(obuf_data_done_in), .dout(obuf_data_done), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(pt.LSU_BUS_TAG)) obuf_rdrsp_tagff (.din(obuf_rdrsp_tag_in), .dout(obuf_rdrsp_tag), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(pt.LSU_BUS_TAG)) obuf_tag0ff (.din(obuf_tag0_in), .dout(obuf_tag0), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(pt.LSU_BUS_TAG)) obuf_tag1ff (.din(obuf_tag1_in), .dout(obuf_tag1), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(1)) obuf_mergeff (.din(obuf_merge_in), .dout(obuf_merge), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(1)) obuf_writeff (.din(obuf_write_in), .dout(obuf_write), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(1)) obuf_sideeffectff (.din(obuf_sideeffect_in), .dout(obuf_sideeffect), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(2)) obuf_szff (.din(obuf_sz_in[1:0]), .dout(obuf_sz), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(8)) obuf_byteenff (.din(obuf_byteen_in[7:0]), .dout(obuf_byteen), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*);
rvdffe #(.WIDTH(32)) obuf_addrff (.din(obuf_addr_in[31:0]), .dout(obuf_addr), .en(obuf_wr_en), .*);
rvdffe #(.WIDTH(64)) obuf_dataff (.din(obuf_data_in[63:0]), .dout(obuf_data), .en(obuf_wr_en), .*);
rvdff_fpga #(.WIDTH(TIMER_LOG2)) obuf_timerff (.din(obuf_wr_timer_in), .dout(obuf_wr_timer), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
//------------------------------------------------------------------------------
// Output buffer logic ends here
//------------------------------------------------------------------------------
// Find the entry to allocate and entry to send
always_comb begin
WrPtr0_m[DEPTH_LOG2-1:0] = '0;
WrPtr1_m[DEPTH_LOG2-1:0] = '0;
found_wrptr0 = '0;
found_wrptr1 = '0;
// Find first write pointer
for (int i=0; i<DEPTH; i++) begin
if (~found_wrptr0) begin
WrPtr0_m[DEPTH_LOG2-1:0] = DEPTH_LOG2'(i);
found_wrptr0 = (buf_state[i] == IDLE) & ~((ibuf_valid & (ibuf_tag == i)) |
(lsu_busreq_r & ((WrPtr0_r == i) | (ldst_dual_r & (WrPtr1_r == i)))));
end
end
// Find second write pointer
for (int i=0; i<DEPTH; i++) begin
if (~found_wrptr1) begin
WrPtr1_m[DEPTH_LOG2-1:0] = DEPTH_LOG2'(i);
found_wrptr1 = (buf_state[i] == IDLE) & ~((ibuf_valid & (ibuf_tag == i)) |
(lsu_busreq_m & (WrPtr0_m == i)) |
(lsu_busreq_r & ((WrPtr0_r == i) | (ldst_dual_r & (WrPtr1_r == i)))));
end
end
end
// Get the command ptr
for (genvar i=0; i<DEPTH; i++) begin
// These should be one-hot
assign CmdPtr0Dec[i] = ~(|buf_age[i]) & (buf_state[i] == CMD) & ~buf_cmd_state_bus_en[i];
assign CmdPtr1Dec[i] = ~(|(buf_age[i] & ~CmdPtr0Dec)) & ~CmdPtr0Dec[i] & (buf_state[i] == CMD) & ~buf_cmd_state_bus_en[i];
assign RspPtrDec[i] = ~(|buf_rsp_pickage[i]) & (buf_state[i] == DONE_WAIT);
end
assign found_cmdptr0 = |CmdPtr0Dec;
assign found_cmdptr1 = |CmdPtr1Dec;
assign CmdPtr0 = f_Enc8to3(8'(CmdPtr0Dec[DEPTH-1:0]));
assign CmdPtr1 = f_Enc8to3(8'(CmdPtr1Dec[DEPTH-1:0]));
assign RspPtr = f_Enc8to3(8'(RspPtrDec[DEPTH-1:0]));
// Age vector
for (genvar i=0; i<DEPTH; i++) begin: GenAgeVec
for (genvar j=0; j<DEPTH; j++) begin
assign buf_age_in[i][j] = (((buf_state[i] == IDLE) & buf_state_en[i]) &
(((buf_state[j] == WAIT) | ((buf_state[j] == CMD) & ~buf_cmd_state_bus_en[j])) | // Set age bit for older entries
(ibuf_drain_vld & lsu_busreq_r & (ibuf_byp | ldst_dual_r) & (i == WrPtr0_r) & (j == ibuf_tag)) | // Set case for dual lo
(ibuf_byp & lsu_busreq_r & ldst_dual_r & (i == WrPtr1_r) & (j == WrPtr0_r)))) | // ibuf bypass case
buf_age[i][j];
assign buf_age[i][j] = buf_ageQ[i][j] & ~((buf_state[j] == CMD) & buf_cmd_state_bus_en[j]) & ~dec_tlu_force_halt; // Reset case
assign buf_age_younger[i][j] = (i == j) ? 1'b0: (~buf_age[i][j] & (buf_state[j] != IDLE)); // Younger entries
end
end
// Age vector for responses
for (genvar i=0; i<DEPTH; i++) begin: GenRspAgeVec
for (genvar j=0; j<DEPTH; j++) begin
assign buf_rspage_set[i][j] = ((buf_state[i] == IDLE) & buf_state_en[i]) &
(~((buf_state[j] == IDLE) | (buf_state[j] == DONE)) | // Set age bit for older entries
(ibuf_drain_vld & lsu_busreq_r & (ibuf_byp | ldst_dual_r) & (DEPTH_LOG2'(i) == WrPtr0_r) & (DEPTH_LOG2'(j) == ibuf_tag)) | // Set case for dual lo
(ibuf_byp & lsu_busreq_r & ldst_dual_r & (DEPTH_LOG2'(i) == WrPtr1_r) & (DEPTH_LOG2'(j) == WrPtr0_r)));
assign buf_rspage_in[i][j] = buf_rspage_set[i][j] | buf_rspage[i][j];
assign buf_rspage[i][j] = buf_rspageQ[i][j] & ~((buf_state[j] == DONE) | (buf_state[j] == IDLE)) & ~dec_tlu_force_halt; // Reset case
assign buf_rsp_pickage[i][j] = buf_rspageQ[i][j] & (buf_state[j] == DONE_WAIT);
end
end
//------------------------------------------------------------------------------
// Buffer logic
//------------------------------------------------------------------------------
for (genvar i=0; i<DEPTH; i++) begin
assign ibuf_drainvec_vld[i] = (ibuf_drain_vld & (i == ibuf_tag));
assign buf_byteen_in[i] = ibuf_drainvec_vld[i] ? ibuf_byteen_out[3:0] : ((ibuf_byp & ldst_dual_r & (i == WrPtr1_r)) ? ldst_byteen_hi_r[3:0] : ldst_byteen_lo_r[3:0]);
assign buf_addr_in[i] = ibuf_drainvec_vld[i] ? ibuf_addr[31:0] : ((ibuf_byp & ldst_dual_r & (i == WrPtr1_r)) ? end_addr_r[31:0] : lsu_addr_r[31:0]);
assign buf_dual_in[i] = ibuf_drainvec_vld[i] ? ibuf_dual : ldst_dual_r;
assign buf_samedw_in[i] = ibuf_drainvec_vld[i] ? ibuf_samedw : ldst_samedw_r;
assign buf_nomerge_in[i] = ibuf_drainvec_vld[i] ? (ibuf_nomerge | ibuf_force_drain) : no_dword_merge_r;
assign buf_dualhi_in[i] = ibuf_drainvec_vld[i] ? ibuf_dual : (ibuf_byp & ldst_dual_r & (i == WrPtr1_r)); // If it's dual, ibuf will always have the high
assign buf_dualtag_in[i] = ibuf_drainvec_vld[i] ? ibuf_dualtag : ((ibuf_byp & ldst_dual_r & (i == WrPtr1_r)) ? WrPtr0_r : WrPtr1_r);
assign buf_sideeffect_in[i] = ibuf_drainvec_vld[i] ? ibuf_sideeffect : is_sideeffects_r;
assign buf_unsign_in[i] = ibuf_drainvec_vld[i] ? ibuf_unsign : lsu_pkt_r.unsign;
assign buf_sz_in[i] = ibuf_drainvec_vld[i] ? ibuf_sz : {lsu_pkt_r.word, lsu_pkt_r.half};
assign buf_write_in[i] = ibuf_drainvec_vld[i] ? ibuf_write : lsu_pkt_r.store;
// Buffer entry state machine
always_comb begin
buf_nxtstate[i] = IDLE;
buf_state_en[i] = '0;
buf_resp_state_bus_en[i] = '0;
buf_state_bus_en[i] = '0;
buf_wr_en[i] = '0;
buf_data_in[i] = '0;
buf_data_en[i] = '0;
buf_error_en[i] = '0;
buf_rst[i] = dec_tlu_force_halt;
buf_ldfwd_en[i] = dec_tlu_force_halt;
buf_ldfwd_in[i] = '0;
buf_ldfwdtag_in[i] = '0;
case (buf_state[i])
IDLE: begin
buf_nxtstate[i] = lsu_bus_clk_en ? CMD : WAIT;
buf_state_en[i] = (lsu_busreq_r & lsu_commit_r & (((ibuf_byp | ldst_dual_r) & ~ibuf_merge_en & (i == WrPtr0_r)) | (ibuf_byp & ldst_dual_r & (i == WrPtr1_r)))) |
(ibuf_drain_vld & (i == ibuf_tag));
buf_wr_en[i] = buf_state_en[i];
buf_data_en[i] = buf_state_en[i];
buf_data_in[i] = (ibuf_drain_vld & (i == ibuf_tag)) ? ibuf_data_out[31:0] : store_data_lo_r[31:0];
buf_cmd_state_bus_en[i] = '0;
end
WAIT: begin
buf_nxtstate[i] = dec_tlu_force_halt ? IDLE : CMD;
buf_state_en[i] = lsu_bus_clk_en | dec_tlu_force_halt;
buf_cmd_state_bus_en[i] = '0;
end
CMD: begin
buf_nxtstate[i] = dec_tlu_force_halt ? IDLE : (obuf_nosend & bus_rsp_read & (bus_rsp_read_tag == obuf_rdrsp_tag)) ? DONE_WAIT : RESP;
buf_cmd_state_bus_en[i] = ((obuf_tag0 == i) | (obuf_merge & (obuf_tag1 == i))) & obuf_valid & obuf_wr_enQ; // Just use the recently written obuf_valid
buf_state_bus_en[i] = buf_cmd_state_bus_en[i];
buf_state_en[i] = (buf_state_bus_en[i] & lsu_bus_clk_en) | dec_tlu_force_halt;
buf_ldfwd_in[i] = 1'b1;
buf_ldfwd_en[i] = buf_state_en[i] & ~buf_write[i] & obuf_nosend & ~dec_tlu_force_halt;
buf_ldfwdtag_in[i] = DEPTH_LOG2'(obuf_rdrsp_tag[pt.LSU_BUS_TAG-2:0]);
buf_data_en[i] = buf_state_bus_en[i] & lsu_bus_clk_en & obuf_nosend & bus_rsp_read;
buf_error_en[i] = buf_state_bus_en[i] & lsu_bus_clk_en & obuf_nosend & bus_rsp_read_error;
buf_data_in[i] = buf_error_en[i] ? bus_rsp_rdata[31:0] : (buf_addr[i][2] ? bus_rsp_rdata[63:32] : bus_rsp_rdata[31:0]);
end
RESP: begin
buf_nxtstate[i] = (dec_tlu_force_halt | (buf_write[i] & ~bus_rsp_write_error)) ? IDLE : // Side-effect writes will be non-posted
(buf_dual[i] & ~buf_samedw[i] & ~buf_write[i] & (buf_state[buf_dualtag[i]] != DONE_PARTIAL)) ? DONE_PARTIAL : // Goto DONE_PARTIAL if this is the first return of dual
(buf_ldfwd[i] | any_done_wait_state |
(buf_dual[i] & ~buf_samedw[i] & ~buf_write[i] & buf_ldfwd[buf_dualtag[i]] &
(buf_state[buf_dualtag[i]] == DONE_PARTIAL) & any_done_wait_state)) ? DONE_WAIT : DONE;
buf_resp_state_bus_en[i] = (bus_rsp_write & (bus_rsp_write_tag == (pt.LSU_BUS_TAG)'(i))) |
(bus_rsp_read & ((bus_rsp_read_tag == (pt.LSU_BUS_TAG)'(i)) |
(buf_ldfwd[i] & (bus_rsp_read_tag == (pt.LSU_BUS_TAG)'(buf_ldfwdtag[i]))) |
(buf_dual[i] & buf_dualhi[i] & ~buf_write[i] & buf_samedw[i] & (bus_rsp_read_tag == (pt.LSU_BUS_TAG)'(buf_dualtag[i])))));
buf_state_bus_en[i] = buf_resp_state_bus_en[i];
buf_state_en[i] = (buf_state_bus_en[i] & lsu_bus_clk_en) | dec_tlu_force_halt;
buf_data_en[i] = buf_state_bus_en[i] & bus_rsp_read & lsu_bus_clk_en;
// Need to capture the error for stores as well for AXI
buf_error_en[i] = buf_state_bus_en[i] & lsu_bus_clk_en & ((bus_rsp_read_error & (bus_rsp_read_tag == (pt.LSU_BUS_TAG)'(i))) |
(bus_rsp_read_error & buf_ldfwd[i] & (bus_rsp_read_tag == (pt.LSU_BUS_TAG)'(buf_ldfwdtag[i]))) |
(bus_rsp_write_error & (bus_rsp_write_tag == (pt.LSU_BUS_TAG)'(i))));
buf_data_in[i][31:0] = (buf_state_en[i] & ~buf_error_en[i]) ? (buf_addr[i][2] ? bus_rsp_rdata[63:32] : bus_rsp_rdata[31:0]) : bus_rsp_rdata[31:0];
buf_cmd_state_bus_en[i] = '0;
end
DONE_PARTIAL: begin // Other part of dual load hasn't returned
buf_nxtstate[i] = dec_tlu_force_halt ? IDLE : (buf_ldfwd[i] | buf_ldfwd[buf_dualtag[i]] | any_done_wait_state) ? DONE_WAIT : DONE;
buf_state_bus_en[i] = bus_rsp_read & ((bus_rsp_read_tag == (pt.LSU_BUS_TAG)'(buf_dualtag[i])) |
(buf_ldfwd[buf_dualtag[i]] & (bus_rsp_read_tag == (pt.LSU_BUS_TAG)'(buf_ldfwdtag[buf_dualtag[i]]))));
buf_state_en[i] = (buf_state_bus_en[i] & lsu_bus_clk_en) | dec_tlu_force_halt;
buf_cmd_state_bus_en[i] = '0;
end
DONE_WAIT: begin // WAIT state if there are multiple outstanding nb returns
buf_nxtstate[i] = dec_tlu_force_halt ? IDLE : DONE;
buf_state_en[i] = ((RspPtr == DEPTH_LOG2'(i)) | (buf_dual[i] & (buf_dualtag[i] == RspPtr))) | dec_tlu_force_halt;
buf_cmd_state_bus_en[i] = '0;
end
DONE: begin
buf_nxtstate[i] = IDLE;
buf_rst[i] = 1'b1;
buf_state_en[i] = 1'b1;
buf_ldfwd_in[i] = 1'b0;
buf_ldfwd_en[i] = buf_state_en[i];
buf_cmd_state_bus_en[i] = '0;
end
default : begin
buf_nxtstate[i] = IDLE;
buf_state_en[i] = '0;
buf_resp_state_bus_en[i] = '0;
buf_state_bus_en[i] = '0;
buf_wr_en[i] = '0;
buf_data_in[i] = '0;
buf_data_en[i] = '0;
buf_error_en[i] = '0;
buf_rst[i] = '0;
buf_cmd_state_bus_en[i] = '0;
end
endcase
end
rvdffs #(.WIDTH($bits(state_t))) buf_state_ff (.din(buf_nxtstate[i]), .dout({buf_state[i]}), .en(buf_state_en[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdff #(.WIDTH(DEPTH)) buf_ageff (.din(buf_age_in[i]), .dout(buf_ageQ[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdff #(.WIDTH(DEPTH)) buf_rspageff (.din(buf_rspage_in[i]), .dout(buf_rspageQ[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdffs #(.WIDTH(DEPTH_LOG2)) buf_dualtagff (.din(buf_dualtag_in[i]), .dout(buf_dualtag[i]), .en(buf_wr_en[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdffs #(.WIDTH(1)) buf_dualff (.din(buf_dual_in[i]), .dout(buf_dual[i]), .en(buf_wr_en[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdffs #(.WIDTH(1)) buf_samedwff (.din(buf_samedw_in[i]), .dout(buf_samedw[i]), .en(buf_wr_en[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdffs #(.WIDTH(1)) buf_nomergeff (.din(buf_nomerge_in[i]), .dout(buf_nomerge[i]), .en(buf_wr_en[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdffs #(.WIDTH(1)) buf_dualhiff (.din(buf_dualhi_in[i]), .dout(buf_dualhi[i]), .en(buf_wr_en[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdffs #(.WIDTH(1)) buf_ldfwdff (.din(buf_ldfwd_in[i]), .dout(buf_ldfwd[i]), .en(buf_ldfwd_en[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdffs #(.WIDTH(DEPTH_LOG2)) buf_ldfwdtagff (.din(buf_ldfwdtag_in[i]), .dout(buf_ldfwdtag[i]), .en(buf_ldfwd_en[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdffs #(.WIDTH(1)) buf_sideeffectff (.din(buf_sideeffect_in[i]), .dout(buf_sideeffect[i]), .en(buf_wr_en[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdffs #(.WIDTH(1)) buf_unsignff (.din(buf_unsign_in[i]), .dout(buf_unsign[i]), .en(buf_wr_en[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdffs #(.WIDTH(1)) buf_writeff (.din(buf_write_in[i]), .dout(buf_write[i]), .en(buf_wr_en[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdffs #(.WIDTH(2)) buf_szff (.din(buf_sz_in[i]), .dout(buf_sz[i]), .en(buf_wr_en[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdffe #(.WIDTH(32)) buf_addrff (.din(buf_addr_in[i][31:0]), .dout(buf_addr[i]), .en(buf_wr_en[i]), .*);
rvdffs #(.WIDTH(4)) buf_byteenff (.din(buf_byteen_in[i][3:0]), .dout(buf_byteen[i]), .en(buf_wr_en[i]), .clk(lsu_bus_buf_c1_clk), .*);
rvdffe #(.WIDTH(32)) buf_dataff (.din(buf_data_in[i][31:0]), .dout(buf_data[i]), .en(buf_data_en[i]), .*);
rvdffsc #(.WIDTH(1)) buf_errorff (.din(1'b1), .dout(buf_error[i]), .en(buf_error_en[i]), .clear(buf_rst[i]), .clk(lsu_bus_buf_c1_clk), .*);
end
// buffer full logic
always_comb begin
buf_numvld_any[3:0] = ({1'b0,lsu_busreq_m} << ldst_dual_m) +
({1'b0,lsu_busreq_r} << ldst_dual_r) +
ibuf_valid;
buf_numvld_wrcmd_any[3:0] = 4'b0;
buf_numvld_cmd_any[3:0] = 4'b0;
buf_numvld_pend_any[3:0] = 4'b0;
any_done_wait_state = 1'b0;
for (int i=0; i<DEPTH; i++) begin
buf_numvld_any[3:0] += {3'b0, (buf_state[i] != IDLE)};
buf_numvld_wrcmd_any[3:0] += {3'b0, (buf_write[i] & (buf_state[i] == CMD) & ~buf_cmd_state_bus_en[i])};
buf_numvld_cmd_any[3:0] += {3'b0, ((buf_state[i] == CMD) & ~buf_cmd_state_bus_en[i])};
buf_numvld_pend_any[3:0] += {3'b0, ((buf_state[i] == WAIT) | ((buf_state[i] == CMD) & ~buf_cmd_state_bus_en[i]))};
any_done_wait_state |= (buf_state[i] == DONE_WAIT);
end
end
assign lsu_bus_buffer_pend_any = (buf_numvld_pend_any != 0);
assign lsu_bus_buffer_full_any = (ldst_dual_d & dec_lsu_valid_raw_d) ? (buf_numvld_any[3:0] >= (DEPTH-1)) : (buf_numvld_any[3:0] == DEPTH);
assign lsu_bus_buffer_empty_any = ~(|buf_state[DEPTH-1:0]) & ~ibuf_valid & ~obuf_valid;
// Non blocking ports
assign lsu_nonblock_load_valid_m = lsu_busreq_m & lsu_pkt_m.valid & lsu_pkt_m.load & ~flush_m_up & ~ld_full_hit_m;
assign lsu_nonblock_load_tag_m[DEPTH_LOG2-1:0] = WrPtr0_m[DEPTH_LOG2-1:0];
assign lsu_nonblock_load_inv_r = lsu_nonblock_load_valid_r & ~lsu_commit_r;
assign lsu_nonblock_load_inv_tag_r[DEPTH_LOG2-1:0] = WrPtr0_r[DEPTH_LOG2-1:0]; // r tag needs to be accurate even if there is no invalidate
always_comb begin
lsu_nonblock_load_data_ready = '0;
lsu_nonblock_load_data_error = '0;
lsu_nonblock_load_data_tag[DEPTH_LOG2-1:0] = '0;
lsu_nonblock_load_data_lo[31:0] = '0;
lsu_nonblock_load_data_hi[31:0] = '0;
for (int i=0; i<DEPTH; i++) begin
// Use buf_rst[i] instead of buf_state_en[i] for timing
lsu_nonblock_load_data_ready |= (buf_state[i] == DONE) & ~buf_write[i];
lsu_nonblock_load_data_error |= (buf_state[i] == DONE) & buf_error[i] & ~buf_write[i];
lsu_nonblock_load_data_tag[DEPTH_LOG2-1:0] |= DEPTH_LOG2'(i) & {DEPTH_LOG2{((buf_state[i] == DONE) & ~buf_write[i] & (~buf_dual[i] | ~buf_dualhi[i]))}};
lsu_nonblock_load_data_lo[31:0] |= buf_data[i][31:0] & {32{((buf_state[i] == DONE) & ~buf_write[i] & (~buf_dual[i] | ~buf_dualhi[i]))}};
lsu_nonblock_load_data_hi[31:0] |= buf_data[i][31:0] & {32{((buf_state[i] == DONE) & ~buf_write[i] & (buf_dual[i] & buf_dualhi[i]))}};
end
end
assign lsu_nonblock_addr_offset[1:0] = buf_addr[lsu_nonblock_load_data_tag][1:0];
assign lsu_nonblock_sz[1:0] = buf_sz[lsu_nonblock_load_data_tag][1:0];
assign lsu_nonblock_unsign = buf_unsign[lsu_nonblock_load_data_tag];
assign lsu_nonblock_data_unalgn[31:0] = 32'({lsu_nonblock_load_data_hi[31:0], lsu_nonblock_load_data_lo[31:0]} >> 8*lsu_nonblock_addr_offset[1:0]);
assign lsu_nonblock_load_data_valid = lsu_nonblock_load_data_ready & ~lsu_nonblock_load_data_error;
assign lsu_nonblock_load_data[31:0] = ({32{ lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b00)}} & {24'b0,lsu_nonblock_data_unalgn[7:0]}) |
({32{ lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b01)}} & {16'b0,lsu_nonblock_data_unalgn[15:0]}) |
({32{~lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b00)}} & {{24{lsu_nonblock_data_unalgn[7]}}, lsu_nonblock_data_unalgn[7:0]}) |
({32{~lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b01)}} & {{16{lsu_nonblock_data_unalgn[15]}},lsu_nonblock_data_unalgn[15:0]}) |
({32{(lsu_nonblock_sz[1:0] == 2'b10)}} & lsu_nonblock_data_unalgn[31:0]);
// Determine if there is a pending return to sideeffect load/store
always_comb begin
bus_sideeffect_pend = obuf_valid & obuf_sideeffect & dec_tlu_sideeffect_posted_disable;
for (int i=0; i<DEPTH; i++) begin
bus_sideeffect_pend |= ((buf_state[i] == RESP) & buf_sideeffect[i] & dec_tlu_sideeffect_posted_disable);
end
end
// We have no ordering rules for AXI. Need to check outstanding trxns to same address for AXI
always_comb begin
bus_addr_match_pending = '0;
for (int i=0; i<DEPTH; i++) begin
bus_addr_match_pending |= (obuf_valid & (obuf_addr[31:3] == buf_addr[i][31:3]) & (buf_state[i] == RESP) & ~((obuf_tag0 == (pt.LSU_BUS_TAG)'(i)) | (obuf_merge & (obuf_tag1 == (pt.LSU_BUS_TAG)'(i)))));
end
end
// Generic bus signals
assign bus_cmd_ready = obuf_write ? ((obuf_cmd_done | obuf_data_done) ? (obuf_cmd_done ? lsu_axi_wready : lsu_axi_awready) : (lsu_axi_awready & lsu_axi_wready)) : lsu_axi_arready;
assign bus_wcmd_sent = lsu_axi_awvalid & lsu_axi_awready;
assign bus_wdata_sent = lsu_axi_wvalid & lsu_axi_wready;
assign bus_cmd_sent = ((obuf_cmd_done | bus_wcmd_sent) & (obuf_data_done | bus_wdata_sent)) | (lsu_axi_arvalid & lsu_axi_arready);
assign bus_rsp_read = lsu_axi_rvalid & lsu_axi_rready;
assign bus_rsp_write = lsu_axi_bvalid & lsu_axi_bready;
assign bus_rsp_read_tag[pt.LSU_BUS_TAG-1:0] = lsu_axi_rid[pt.LSU_BUS_TAG-1:0];
assign bus_rsp_write_tag[pt.LSU_BUS_TAG-1:0] = lsu_axi_bid[pt.LSU_BUS_TAG-1:0];
assign bus_rsp_write_error = bus_rsp_write & (lsu_axi_bresp[1:0] != 2'b0);
assign bus_rsp_read_error = bus_rsp_read & (lsu_axi_rresp[1:0] != 2'b0);
assign bus_rsp_rdata[63:0] = lsu_axi_rdata[63:0];
// AXI command signals
assign lsu_axi_awvalid = obuf_valid & obuf_write & ~obuf_cmd_done & ~bus_addr_match_pending;
assign lsu_axi_awid[pt.LSU_BUS_TAG-1:0] = (pt.LSU_BUS_TAG)'(obuf_tag0);
assign lsu_axi_awaddr[31:0] = obuf_sideeffect ? obuf_addr[31:0] : {obuf_addr[31:3],3'b0};
assign lsu_axi_awsize[2:0] = obuf_sideeffect ? {1'b0, obuf_sz[1:0]} : 3'b011;
assign lsu_axi_awprot[2:0] = 3'b001;
assign lsu_axi_awcache[3:0] = obuf_sideeffect ? 4'b0 : 4'b1111;
assign lsu_axi_awregion[3:0] = obuf_addr[31:28];
assign lsu_axi_awlen[7:0] = '0;
assign lsu_axi_awburst[1:0] = 2'b01;
assign lsu_axi_awqos[3:0] = '0;
assign lsu_axi_awlock = '0;
assign lsu_axi_wvalid = obuf_valid & obuf_write & ~obuf_data_done & ~bus_addr_match_pending;
assign lsu_axi_wstrb[7:0] = obuf_byteen[7:0] & {8{obuf_write}};
assign lsu_axi_wdata[63:0] = obuf_data[63:0];
assign lsu_axi_wlast = '1;
assign lsu_axi_arvalid = obuf_valid & ~obuf_write & ~obuf_nosend & ~bus_addr_match_pending;
assign lsu_axi_arid[pt.LSU_BUS_TAG-1:0] = (pt.LSU_BUS_TAG)'(obuf_tag0);
assign lsu_axi_araddr[31:0] = obuf_sideeffect ? obuf_addr[31:0] : {obuf_addr[31:3],3'b0};
assign lsu_axi_arsize[2:0] = obuf_sideeffect ? {1'b0, obuf_sz[1:0]} : 3'b011;
assign lsu_axi_arprot[2:0] = 3'b001;
assign lsu_axi_arcache[3:0] = obuf_sideeffect ? 4'b0 : 4'b1111;
assign lsu_axi_arregion[3:0] = obuf_addr[31:28];
assign lsu_axi_arlen[7:0] = '0;
assign lsu_axi_arburst[1:0] = 2'b01;
assign lsu_axi_arqos[3:0] = '0;
assign lsu_axi_arlock = '0;
assign lsu_axi_bready = 1;
assign lsu_axi_rready = 1;
always_comb begin
lsu_imprecise_error_store_any = '0;
lsu_imprecise_error_store_tag = '0;
for (int i=0; i<DEPTH; i++) begin
lsu_imprecise_error_store_any |= lsu_bus_clk_en_q & (buf_state[i] == DONE) & buf_error[i] & buf_write[i];
lsu_imprecise_error_store_tag |= DEPTH_LOG2'(i) & {DEPTH_LOG2{((buf_state[i] == DONE) & buf_error[i] & buf_write[i])}};
end
end
assign lsu_imprecise_error_load_any = lsu_nonblock_load_data_error & ~lsu_imprecise_error_store_any; // This is to make sure we send only one imprecise error for load/store
assign lsu_imprecise_error_addr_any[31:0] = lsu_imprecise_error_store_any ? buf_addr[lsu_imprecise_error_store_tag] : buf_addr[lsu_nonblock_load_data_tag];
// PMU signals
assign lsu_pmu_bus_trxn = (lsu_axi_awvalid & lsu_axi_awready) | (lsu_axi_wvalid & lsu_axi_wready) | (lsu_axi_arvalid & lsu_axi_arready);
assign lsu_pmu_bus_misaligned = lsu_busreq_r & ldst_dual_r & lsu_commit_r;
assign lsu_pmu_bus_error = lsu_imprecise_error_load_any | lsu_imprecise_error_store_any;
assign lsu_pmu_bus_busy = (lsu_axi_awvalid & ~lsu_axi_awready) | (lsu_axi_wvalid & ~lsu_axi_wready) | (lsu_axi_arvalid & ~lsu_axi_arready);
rvdff_fpga #(.WIDTH(1)) lsu_axi_awvalid_ff (.din(lsu_axi_awvalid), .dout(lsu_axi_awvalid_q), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) lsu_axi_awready_ff (.din(lsu_axi_awready), .dout(lsu_axi_awready_q), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) lsu_axi_wvalid_ff (.din(lsu_axi_wvalid), .dout(lsu_axi_wvalid_q), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) lsu_axi_wready_ff (.din(lsu_axi_wready), .dout(lsu_axi_wready_q), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) lsu_axi_arvalid_ff (.din(lsu_axi_arvalid), .dout(lsu_axi_arvalid_q), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) lsu_axi_arready_ff (.din(lsu_axi_arready), .dout(lsu_axi_arready_q), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) lsu_axi_bvalid_ff (.din(lsu_axi_bvalid), .dout(lsu_axi_bvalid_q), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) lsu_axi_bready_ff (.din(lsu_axi_bready), .dout(lsu_axi_bready_q), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(2)) lsu_axi_bresp_ff (.din(lsu_axi_bresp[1:0]), .dout(lsu_axi_bresp_q[1:0]), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(pt.LSU_BUS_TAG)) lsu_axi_bid_ff (.din(lsu_axi_bid[pt.LSU_BUS_TAG-1:0]),.dout(lsu_axi_bid_q[pt.LSU_BUS_TAG-1:0]),.clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdffe #(.WIDTH(64)) lsu_axi_rdata_ff (.din(lsu_axi_rdata[63:0]), .dout(lsu_axi_rdata_q[63:0]), .en((lsu_axi_rvalid | clk_override) & lsu_bus_clk_en), .*);
rvdff_fpga #(.WIDTH(1)) lsu_axi_rvalid_ff (.din(lsu_axi_rvalid), .dout(lsu_axi_rvalid_q), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) lsu_axi_rready_ff (.din(lsu_axi_rready), .dout(lsu_axi_rready_q), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(2)) lsu_axi_rresp_ff (.din(lsu_axi_rresp[1:0]), .dout(lsu_axi_rresp_q[1:0]), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(pt.LSU_BUS_TAG)) lsu_axi_rid_ff (.din(lsu_axi_rid[pt.LSU_BUS_TAG-1:0]),.dout(lsu_axi_rid_q[pt.LSU_BUS_TAG-1:0]),.clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*);
rvdff #(.WIDTH(DEPTH_LOG2)) lsu_WrPtr0_rff (.din(WrPtr0_m), .dout(WrPtr0_r), .clk(lsu_c2_r_clk), .*);
rvdff #(.WIDTH(DEPTH_LOG2)) lsu_WrPtr1_rff (.din(WrPtr1_m), .dout(WrPtr1_r), .clk(lsu_c2_r_clk), .*);
rvdff #(.WIDTH(1)) lsu_busreq_rff (.din(lsu_busreq_m & ~flush_r & ~ld_full_hit_m), .dout(lsu_busreq_r), .clk(lsu_c2_r_clk), .*);
rvdff #(.WIDTH(1)) lsu_nonblock_load_valid_rff (.din(lsu_nonblock_load_valid_m), .dout(lsu_nonblock_load_valid_r), .clk(lsu_c2_r_clk), .*);
`ifdef RV_ASSERT_ON
for (genvar i=0; i<4; i++) begin: GenByte
assert_ld_byte_hitvecfn_lo_onehot: assert #0 ($onehot0(ld_byte_hitvecfn_lo[i][DEPTH-1:0]));
assert_ld_byte_hitvecfn_hi_onehot: assert #0 ($onehot0(ld_byte_hitvecfn_hi[i][DEPTH-1:0]));
end
for (genvar i=0; i<DEPTH; i++) begin: GenAssertAge
assert_bufempty_agevec: assert #0 (~(lsu_bus_buffer_empty_any & |(buf_age[i])));
end
assert_CmdPtr0Dec_onehot: assert #0 ($onehot0(CmdPtr0Dec[DEPTH-1:0] & ~{DEPTH{dec_tlu_force_halt}}));
assert_CmdPtr1Dec_onehot: assert #0 ($onehot0(CmdPtr1Dec[DEPTH-1:0] & ~{DEPTH{dec_tlu_force_halt}}));
`endif
endmodule // el2_lsu_bus_buffer

View File

@ -0,0 +1,365 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Owner:
// Function: lsu interface with interface queue
// Comments:
//
//********************************************************************************
module el2_lsu_bus_intf
import el2_pkg::*;
#(
`include "el2_param.vh"
)(
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic clk_override, // Override non-functional clock gating
input logic rst_l, // reset, active low
input logic scan_mode, // scan mode
input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals
input logic dec_tlu_wb_coalescing_disable, // disable write buffer coalescing
input logic dec_tlu_sideeffect_posted_disable, // disable the posted sideeffect load store to the bus
// various clocks needed for the bus reads and writes
input logic lsu_bus_obuf_c1_clken, // obuf clock enable
input logic lsu_busm_clken, // bus clock enable
input logic lsu_c1_r_clk, // r pipe single pulse clock
input logic lsu_c2_r_clk, // r pipe double pulse clock
input logic lsu_bus_ibuf_c1_clk, // ibuf single pulse clock
input logic lsu_bus_obuf_c1_clk, // obuf single pulse clock
input logic lsu_bus_buf_c1_clk, // buf single pulse clock
input logic lsu_free_c2_clk, // free clock double pulse clock
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic lsu_busm_clk, // bus clock
input logic dec_lsu_valid_raw_d, // Raw valid for address computation
input logic lsu_busreq_m, // bus request is in m
input el2_lsu_pkt_t lsu_pkt_m, // lsu packet flowing down the pipe
input el2_lsu_pkt_t lsu_pkt_r, // lsu packet flowing down the pipe
input logic [31:0] lsu_addr_m, // lsu address flowing down the pipe
input logic [31:0] lsu_addr_r, // lsu address flowing down the pipe
input logic [31:0] end_addr_m, // lsu address flowing down the pipe
input logic [31:0] end_addr_r, // lsu address flowing down the pipe
input logic [31:0] store_data_r, // store data flowing down the pipe
input logic dec_tlu_force_halt,
input logic lsu_commit_r, // lsu instruction in r commits
input logic is_sideeffects_m, // lsu attribute is side_effects
input logic flush_m_up, // flush
input logic flush_r, // flush
input logic ldst_dual_d, ldst_dual_m, ldst_dual_r,
output logic lsu_busreq_r, // bus request is in r
output logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry
output logic lsu_bus_buffer_full_any, // write buffer is full
output logic lsu_bus_buffer_empty_any, // write buffer is empty
output logic [31:0] bus_read_data_m, // the bus return data
output logic lsu_imprecise_error_load_any, // imprecise load bus error
output logic lsu_imprecise_error_store_any, // imprecise store bus error
output logic [31:0] lsu_imprecise_error_addr_any, // address of the imprecise error
// Non-blocking loads
output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load
output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated
output logic lsu_nonblock_load_data_valid,// the non block is valid - sending information back to the cam
output logic lsu_nonblock_load_data_error,// non block load has an error
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error
output logic [31:0] lsu_nonblock_load_data, // Data of the non block load
// PMU events
output logic lsu_pmu_bus_trxn,
output logic lsu_pmu_bus_misaligned,
output logic lsu_pmu_bus_error,
output logic lsu_pmu_bus_busy,
// AXI Write Channels
output logic lsu_axi_awvalid,
input logic lsu_axi_awready,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid,
output logic [31:0] lsu_axi_awaddr,
output logic [3:0] lsu_axi_awregion,
output logic [7:0] lsu_axi_awlen,
output logic [2:0] lsu_axi_awsize,
output logic [1:0] lsu_axi_awburst,
output logic lsu_axi_awlock,
output logic [3:0] lsu_axi_awcache,
output logic [2:0] lsu_axi_awprot,
output logic [3:0] lsu_axi_awqos,
output logic lsu_axi_wvalid,
input logic lsu_axi_wready,
output logic [63:0] lsu_axi_wdata,
output logic [7:0] lsu_axi_wstrb,
output logic lsu_axi_wlast,
input logic lsu_axi_bvalid,
output logic lsu_axi_bready,
input logic [1:0] lsu_axi_bresp,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid,
// AXI Read Channels
output logic lsu_axi_arvalid,
input logic lsu_axi_arready,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid,
output logic [31:0] lsu_axi_araddr,
output logic [3:0] lsu_axi_arregion,
output logic [7:0] lsu_axi_arlen,
output logic [2:0] lsu_axi_arsize,
output logic [1:0] lsu_axi_arburst,
output logic lsu_axi_arlock,
output logic [3:0] lsu_axi_arcache,
output logic [2:0] lsu_axi_arprot,
output logic [3:0] lsu_axi_arqos,
input logic lsu_axi_rvalid,
output logic lsu_axi_rready,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid,
input logic [63:0] lsu_axi_rdata,
input logic [1:0] lsu_axi_rresp,
input logic lsu_bus_clk_en
);
logic lsu_bus_clk_en_q;
logic [3:0] ldst_byteen_m, ldst_byteen_r;
logic [7:0] ldst_byteen_ext_m, ldst_byteen_ext_r;
logic [3:0] ldst_byteen_hi_m, ldst_byteen_hi_r;
logic [3:0] ldst_byteen_lo_m, ldst_byteen_lo_r;
logic is_sideeffects_r;
logic [63:0] store_data_ext_r;
logic [31:0] store_data_hi_r;
logic [31:0] store_data_lo_r;
logic addr_match_dw_lo_r_m;
logic addr_match_word_lo_r_m;
logic no_word_merge_r, no_dword_merge_r;
logic ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi;
logic [3:0] ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi;
logic [3:0] ld_byte_hit_lo, ld_byte_rhit_lo;
logic [3:0] ld_byte_hit_hi, ld_byte_rhit_hi;
logic [31:0] ld_fwddata_rpipe_lo;
logic [31:0] ld_fwddata_rpipe_hi;
logic [3:0] ld_byte_hit_buf_lo, ld_byte_hit_buf_hi;
logic [31:0] ld_fwddata_buf_lo, ld_fwddata_buf_hi;
logic [63:0] ld_fwddata_lo, ld_fwddata_hi;
logic [63:0] ld_fwddata_m;
logic ld_full_hit_hi_m, ld_full_hit_lo_m;
logic ld_full_hit_m;
assign ldst_byteen_m[3:0] = ({4{lsu_pkt_m.by}} & 4'b0001) |
({4{lsu_pkt_m.half}} & 4'b0011) |
({4{lsu_pkt_m.word}} & 4'b1111);
// Read/Write Buffer
el2_lsu_bus_buffer #(.pt(pt)) bus_buffer (
.*
);
// Logic to determine if dc5 store can be coalesced or not with younger stores. Bypass ibuf if cannot colaesced
assign addr_match_dw_lo_r_m = (lsu_addr_r[31:3] == lsu_addr_m[31:3]);
assign addr_match_word_lo_r_m = addr_match_dw_lo_r_m & ~(lsu_addr_r[2]^lsu_addr_m[2]);
assign no_word_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_word_lo_r_m);
assign no_dword_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_dw_lo_r_m);
// Create Hi/Lo signals
assign ldst_byteen_ext_m[7:0] = {4'b0,ldst_byteen_m[3:0]} << lsu_addr_m[1:0];
assign ldst_byteen_ext_r[7:0] = {4'b0,ldst_byteen_r[3:0]} << lsu_addr_r[1:0];
assign store_data_ext_r[63:0] = {32'b0,store_data_r[31:0]} << {lsu_addr_r[1:0],3'b0};
assign ldst_byteen_hi_m[3:0] = ldst_byteen_ext_m[7:4];
assign ldst_byteen_lo_m[3:0] = ldst_byteen_ext_m[3:0];
assign ldst_byteen_hi_r[3:0] = ldst_byteen_ext_r[7:4];
assign ldst_byteen_lo_r[3:0] = ldst_byteen_ext_r[3:0];
assign store_data_hi_r[31:0] = store_data_ext_r[63:32];
assign store_data_lo_r[31:0] = store_data_ext_r[31:0];
assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r;
assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r;
assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r;
assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r;
for (genvar i=0; i<4; i++) begin: GenBusBufFwd
assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i] & ldst_byteen_lo_m[i];
assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i] & ldst_byteen_hi_m[i];
assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i] & ldst_byteen_lo_m[i];
assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i] & ldst_byteen_hi_m[i];
assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i] |
ld_byte_hit_buf_lo[i];
assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i] |
ld_byte_hit_buf_hi[i];
assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
({8{ld_byte_rhit_hi_lo[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
({8{ld_byte_rhit_hi_hi[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
// Final muxing between m/r
assign ld_fwddata_lo[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i] ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : ld_fwddata_buf_lo[(8*i)+7:(8*i)];
assign ld_fwddata_hi[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i] ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : ld_fwddata_buf_hi[(8*i)+7:(8*i)];
end
always_comb begin
ld_full_hit_lo_m = 1'b1;
ld_full_hit_hi_m = 1'b1;
for (int i=0; i<4; i++) begin
ld_full_hit_lo_m &= (ld_byte_hit_lo[i] | ~ldst_byteen_lo_m[i]);
ld_full_hit_hi_m &= (ld_byte_hit_hi[i] | ~ldst_byteen_hi_m[i]);
end
end
// This will be high if all the bytes of load hit the stores in pipe/write buffer (m/r/wrbuf)
assign ld_full_hit_m = ld_full_hit_lo_m & ld_full_hit_hi_m & lsu_busreq_m & lsu_pkt_m.load & ~is_sideeffects_m;
assign ld_fwddata_m[63:0] = {ld_fwddata_hi[31:0], ld_fwddata_lo[31:0]} >> (8*lsu_addr_m[1:0]);
assign bus_read_data_m[31:0] = ld_fwddata_m[31:0];
// Fifo flops
rvdff #(.WIDTH(1)) clken_ff (.din(lsu_bus_clk_en), .dout(lsu_bus_clk_en_q), .clk(active_clk), .*);
rvdff #(.WIDTH(1)) is_sideeffects_rff (.din(is_sideeffects_m), .dout(is_sideeffects_r), .clk(lsu_c1_r_clk), .*);
rvdff #(4) lsu_byten_rff (.*, .din(ldst_byteen_m[3:0]), .dout(ldst_byteen_r[3:0]), .clk(lsu_c1_r_clk));
`ifdef RV_ASSERT_ON
// Assertion to check AXI write address is aligned to size
property lsu_axi_awaddr_aligned;
@(posedge lsu_busm_clk) disable iff(~rst_l) lsu_axi_awvalid |-> ((lsu_axi_awsize[2:0] == 3'h0) |
((lsu_axi_awsize[2:0] == 3'h1) & (lsu_axi_awaddr[0] == 1'b0)) |
((lsu_axi_awsize[2:0] == 3'h2) & (lsu_axi_awaddr[1:0] == 2'b0)) |
((lsu_axi_awsize[2:0] == 3'h3) & (lsu_axi_awaddr[2:0] == 3'b0)));
endproperty
assert_lsu_axi_awaddr_aligned: assert property (lsu_axi_awaddr_aligned) else
$display("Assertion lsu_axi_awaddr_aligned failed: lsu_axi_awvalid=1'b%b, lsu_axi_awsize=3'h%h, lsu_axi_awaddr=32'h%h",lsu_axi_awvalid, lsu_axi_awsize[2:0], lsu_axi_awaddr[31:0]);
// Assertion to check awvalid stays stable during entire bus clock
// Assertion to check AXI read address is aligned to size
property lsu_axi_araddr_aligned;
@(posedge lsu_busm_clk) disable iff(~rst_l) lsu_axi_arvalid |-> ((lsu_axi_arsize[2:0] == 3'h0) |
((lsu_axi_arsize[2:0] == 3'h1) & (lsu_axi_araddr[0] == 1'b0)) |
((lsu_axi_arsize[2:0] == 3'h2) & (lsu_axi_araddr[1:0] == 2'b0)) |
((lsu_axi_arsize[2:0] == 3'h3) & (lsu_axi_araddr[2:0] == 3'b0)));
endproperty
assert_lsu_axi_araddr_aligned: assert property (lsu_axi_araddr_aligned) else
$display("Assertion lsu_axi_araddr_aligned failed: lsu_axi_awvalid=1'b%b, lsu_axi_awsize=3'h%h, lsu_axi_araddr=32'h%h",lsu_axi_awvalid, lsu_axi_awsize[2:0], lsu_axi_araddr[31:0]);
// Assertion to check awvalid stays stable during entire bus clock
property lsu_axi_awvalid_stable;
@(posedge clk) disable iff(~rst_l) (lsu_axi_awvalid != $past(lsu_axi_awvalid)) |-> ($past(lsu_bus_clk_en) | dec_tlu_force_halt);
endproperty
assert_lsu_axi_awvalid_stable: assert property (lsu_axi_awvalid_stable) else
$display("LSU AXI awvalid changed in middle of bus clock");
// Assertion to check awid stays stable during entire bus clock
property lsu_axi_awid_stable;
@(posedge clk) disable iff(~rst_l) (lsu_axi_awvalid & (lsu_axi_awid[pt.LSU_BUS_TAG-1:0] != $past(lsu_axi_awid[pt.LSU_BUS_TAG-1:0]))) |-> $past(lsu_bus_clk_en);
endproperty
assert_lsu_axi_awid_stable: assert property (lsu_axi_awid_stable) else
$display("LSU AXI awid changed in middle of bus clock");
// Assertion to check awaddr stays stable during entire bus clock
property lsu_axi_awaddr_stable;
@(posedge clk) disable iff(~rst_l) (lsu_axi_awvalid & (lsu_axi_awaddr[31:0] != $past(lsu_axi_awaddr[31:0]))) |-> $past(lsu_bus_clk_en);
endproperty
assert_lsu_axi_awaddr_stable: assert property (lsu_axi_awaddr_stable) else
$display("LSU AXI awaddr changed in middle of bus clock");
// Assertion to check awsize stays stable during entire bus clock
property lsu_axi_awsize_stable;
@(posedge clk) disable iff(~rst_l) (lsu_axi_awvalid & (lsu_axi_awsize[2:0] != $past(lsu_axi_awsize[2:0]))) |-> $past(lsu_bus_clk_en);
endproperty
assert_lsu_axi_awsize_stable: assert property (lsu_axi_awsize_stable) else
$display("LSU AXI awsize changed in middle of bus clock");
// Assertion to check wstrb stays stable during entire bus clock
property lsu_axi_wstrb_stable;
@(posedge clk) disable iff(~rst_l) (lsu_axi_wvalid & (lsu_axi_wstrb[7:0] != $past(lsu_axi_wstrb[7:0]))) |-> $past(lsu_bus_clk_en);
endproperty
assert_lsu_axi_wstrb_stable: assert property (lsu_axi_wstrb_stable) else
$display("LSU AXI wstrb changed in middle of bus clock");
// Assertion to check wdata stays stable during entire bus clock
property lsu_axi_wdata_stable;
@(posedge clk) disable iff(~rst_l) (lsu_axi_wvalid & (lsu_axi_wdata[63:0] != $past(lsu_axi_wdata[63:0]))) |-> $past(lsu_bus_clk_en);
endproperty
assert_lsu_axi_wdata_stable: assert property (lsu_axi_wdata_stable) else
$display("LSU AXI wdata changed in middle of bus clock");
// Assertion to check awvalid stays stable during entire bus clock
property lsu_axi_arvalid_stable;
@(posedge clk) disable iff(~rst_l) (lsu_axi_arvalid != $past(lsu_axi_arvalid)) |-> ($past(lsu_bus_clk_en) | dec_tlu_force_halt);
endproperty
assert_lsu_axi_arvalid_stable: assert property (lsu_axi_arvalid_stable) else
$display("LSU AXI awvalid changed in middle of bus clock");
// Assertion to check awid stays stable during entire bus clock
property lsu_axi_arid_stable;
@(posedge clk) disable iff(~rst_l) (lsu_axi_arvalid & (lsu_axi_arid[pt.LSU_BUS_TAG-1:0] != $past(lsu_axi_arid[pt.LSU_BUS_TAG-1:0]))) |-> $past(lsu_bus_clk_en);
endproperty
assert_lsu_axi_arid_stable: assert property (lsu_axi_arid_stable) else
$display("LSU AXI awid changed in middle of bus clock");
// Assertion to check awaddr stays stable during entire bus clock
property lsu_axi_araddr_stable;
@(posedge clk) disable iff(~rst_l) (lsu_axi_arvalid & (lsu_axi_araddr[31:0] != $past(lsu_axi_araddr[31:0]))) |-> $past(lsu_bus_clk_en);
endproperty
assert_lsu_axi_araddr_stable: assert property (lsu_axi_araddr_stable) else
$display("LSU AXI awaddr changed in middle of bus clock");
// Assertion to check awsize stays stable during entire bus clock
property lsu_axi_arsize_stable;
@(posedge clk) disable iff(~rst_l) (lsu_axi_awvalid & (lsu_axi_arsize[2:0] != $past(lsu_axi_arsize[2:0]))) |-> $past(lsu_bus_clk_en);
endproperty
assert_lsu_axi_arsize_stable: assert property (lsu_axi_arsize_stable) else
$display("LSU AXI awsize changed in middle of bus clock");
`endif
endmodule // el2_lsu_bus_intf

View File

@ -0,0 +1,145 @@
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Owner:
// Function: Clock Generation Block
// Comments: All the clocks are generate here
//
// //********************************************************************************
module el2_lsu_clkdomain
import el2_pkg::*;
#(
`include "el2_param.vh"
)(
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic rst_l, // reset, active low
input logic dec_tlu_force_halt, // This will be high till TLU goes to debug halt
// Inputs
input logic clk_override, // chciken bit to turn off clock gating
input logic dma_dccm_req, // dma is active
input logic ldst_stbuf_reqvld_r, // allocating in to the store queue
input logic stbuf_reqvld_any, // stbuf is draining
input logic stbuf_reqvld_flushed_any, // instruction going to stbuf is flushed
input logic lsu_busreq_r, // busreq in r
input logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry
input logic lsu_bus_buffer_empty_any, // external bus buffer is empty
input logic lsu_stbuf_empty_any, // stbuf is empty
input logic lsu_bus_clk_en, // bus clock enable
input el2_lsu_pkt_t lsu_p, // lsu packet in decode
input el2_lsu_pkt_t lsu_pkt_d, // lsu packet in d
input el2_lsu_pkt_t lsu_pkt_m, // lsu packet in m
input el2_lsu_pkt_t lsu_pkt_r, // lsu packet in r
// Outputs
output logic lsu_bus_obuf_c1_clken, // obuf clock enable
output logic lsu_busm_clken, // bus clock enable
output logic lsu_c1_m_clk, // m pipe single pulse clock
output logic lsu_c1_r_clk, // r pipe single pulse clock
output logic lsu_c2_m_clk, // m pipe double pulse clock
output logic lsu_c2_r_clk, // r pipe double pulse clock
output logic lsu_store_c1_m_clk, // store in m
output logic lsu_store_c1_r_clk, // store in r
output logic lsu_stbuf_c1_clk,
output logic lsu_bus_obuf_c1_clk, // ibuf clock
output logic lsu_bus_ibuf_c1_clk, // ibuf clock
output logic lsu_bus_buf_c1_clk, // ibuf clock
output logic lsu_busm_clk, // bus clock
output logic lsu_free_c2_clk, // free double pulse clock
input logic scan_mode // Scan mode
);
logic lsu_c1_m_clken, lsu_c1_r_clken;
logic lsu_c2_m_clken, lsu_c2_r_clken;
logic lsu_c1_m_clken_q, lsu_c1_r_clken_q;
logic lsu_store_c1_m_clken, lsu_store_c1_r_clken;
logic lsu_stbuf_c1_clken;
logic lsu_bus_ibuf_c1_clken, lsu_bus_buf_c1_clken;
logic lsu_free_c1_clken, lsu_free_c1_clken_q, lsu_free_c2_clken;
//-------------------------------------------------------------------------------------------
// Clock Enable logic
//-------------------------------------------------------------------------------------------
assign lsu_c1_m_clken = lsu_p.valid | dma_dccm_req | clk_override;
assign lsu_c1_r_clken = lsu_pkt_m.valid | lsu_c1_m_clken_q | clk_override;
assign lsu_c2_m_clken = lsu_c1_m_clken | lsu_c1_m_clken_q | clk_override;
assign lsu_c2_r_clken = lsu_c1_r_clken | lsu_c1_r_clken_q | clk_override;
assign lsu_store_c1_m_clken = ((lsu_c1_m_clken & lsu_pkt_d.store) | clk_override) ;
assign lsu_store_c1_r_clken = ((lsu_c1_r_clken & lsu_pkt_m.store) | clk_override) ;
assign lsu_stbuf_c1_clken = ldst_stbuf_reqvld_r | stbuf_reqvld_any | stbuf_reqvld_flushed_any | clk_override;
assign lsu_bus_ibuf_c1_clken = lsu_busreq_r | clk_override;
assign lsu_bus_obuf_c1_clken = (lsu_bus_buffer_pend_any | lsu_busreq_r | clk_override) & lsu_bus_clk_en;
assign lsu_bus_buf_c1_clken = ~lsu_bus_buffer_empty_any | lsu_busreq_r | dec_tlu_force_halt | clk_override;
assign lsu_free_c1_clken = (lsu_p.valid | lsu_pkt_d.valid | lsu_pkt_m.valid | lsu_pkt_r.valid) |
~lsu_bus_buffer_empty_any | ~lsu_stbuf_empty_any | clk_override;
assign lsu_free_c2_clken = lsu_free_c1_clken | lsu_free_c1_clken_q | clk_override;
// Flops
rvdff #(1) lsu_free_c1_clkenff (.din(lsu_free_c1_clken), .dout(lsu_free_c1_clken_q), .clk(active_clk), .*);
rvdff #(1) lsu_c1_m_clkenff (.din(lsu_c1_m_clken), .dout(lsu_c1_m_clken_q), .clk(lsu_free_c2_clk), .*);
rvdff #(1) lsu_c1_r_clkenff (.din(lsu_c1_r_clken), .dout(lsu_c1_r_clken_q), .clk(lsu_free_c2_clk), .*);
// Clock Headers
rvoclkhdr lsu_c1m_cgc ( .en(lsu_c1_m_clken), .l1clk(lsu_c1_m_clk), .* );
rvoclkhdr lsu_c1r_cgc ( .en(lsu_c1_r_clken), .l1clk(lsu_c1_r_clk), .* );
rvoclkhdr lsu_c2m_cgc ( .en(lsu_c2_m_clken), .l1clk(lsu_c2_m_clk), .* );
rvoclkhdr lsu_c2r_cgc ( .en(lsu_c2_r_clken), .l1clk(lsu_c2_r_clk), .* );
rvoclkhdr lsu_store_c1m_cgc (.en(lsu_store_c1_m_clken), .l1clk(lsu_store_c1_m_clk), .*);
rvoclkhdr lsu_store_c1r_cgc (.en(lsu_store_c1_r_clken), .l1clk(lsu_store_c1_r_clk), .*);
rvoclkhdr lsu_stbuf_c1_cgc ( .en(lsu_stbuf_c1_clken), .l1clk(lsu_stbuf_c1_clk), .* );
rvoclkhdr lsu_bus_ibuf_c1_cgc ( .en(lsu_bus_ibuf_c1_clken), .l1clk(lsu_bus_ibuf_c1_clk), .* );
rvoclkhdr lsu_bus_buf_c1_cgc ( .en(lsu_bus_buf_c1_clken), .l1clk(lsu_bus_buf_c1_clk), .* );
assign lsu_busm_clken = (~lsu_bus_buffer_empty_any | lsu_busreq_r | clk_override) & lsu_bus_clk_en;
`ifdef RV_FPGA_OPTIMIZE
assign lsu_busm_clk = 1'b0;
assign lsu_bus_obuf_c1_clk = 1'b0;
`else
rvclkhdr lsu_bus_obuf_c1_cgc ( .en(lsu_bus_obuf_c1_clken), .l1clk(lsu_bus_obuf_c1_clk), .* );
rvclkhdr lsu_busm_cgc (.en(lsu_busm_clken), .l1clk(lsu_busm_clk), .*);
`endif
rvoclkhdr lsu_free_cgc (.en(lsu_free_c2_clken), .l1clk(lsu_free_c2_clk), .*);
endmodule

View File

@ -0,0 +1,425 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Owner:
// Function: DCCM for LSU pipe
// Comments: Single ported memory
//
//
// DC1 -> DC2 -> DC3 -> DC4 (Commit)
//
// //********************************************************************************
module el2_lsu_dccm_ctl
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic lsu_c2_m_clk, // clocks
input logic lsu_c2_r_clk, // clocks
input logic lsu_c1_r_clk, // clocks
input logic lsu_store_c1_r_clk, // clocks
input logic lsu_free_c2_clk, // clocks
input logic clk_override, // Override non-functional clock gating
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic rst_l, // reset, active low
input el2_lsu_pkt_t lsu_pkt_r,// lsu packets
input el2_lsu_pkt_t lsu_pkt_m,// lsu packets
input el2_lsu_pkt_t lsu_pkt_d,// lsu packets
input logic addr_in_dccm_d, // address maps to dccm
input logic addr_in_pic_d, // address maps to pic
input logic addr_in_pic_m, // address maps to pic
input logic addr_in_dccm_m, addr_in_dccm_r, // address in dccm per pipe stage
input logic addr_in_pic_r, // address in pic per pipe stage
input logic lsu_raw_fwd_lo_r, lsu_raw_fwd_hi_r,
input logic lsu_commit_r, // lsu instruction in r commits
input logic ldst_dual_m, ldst_dual_r,// load/store is unaligned at 32 bit boundary per pipe stage
// lsu address down the pipe
input logic [31:0] lsu_addr_d,
input logic [pt.DCCM_BITS-1:0] lsu_addr_m,
input logic [31:0] lsu_addr_r,
// lsu address down the pipe - needed to check unaligned
input logic [pt.DCCM_BITS-1:0] end_addr_d,
input logic [pt.DCCM_BITS-1:0] end_addr_m,
input logic [pt.DCCM_BITS-1:0] end_addr_r,
input logic stbuf_reqvld_any, // write enable
input logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any, // stbuf address (aligned)
input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, // the read out from stbuf
input logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any, // the encoded data with ECC bits
input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m, // stbuf fowarding to load
input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m, // stbuf fowarding to load
input logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m, // stbuf fowarding to load
input logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m, // stbuf fowarding to load
output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_r, // data from the dccm
output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_r, // data from the dccm
output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_r, // data from the dccm + ecc
output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_r,
output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_r, // right justified, ie load byte will have data at 7:0
output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_corr_r, // right justified & ECC corrected, ie load byte will have data at 7:0
input logic lsu_double_ecc_error_r, // lsu has a DED
input logic single_ecc_error_hi_r, // sec detected on hi dccm bank
input logic single_ecc_error_lo_r, // sec detected on lower dccm bank
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r, // corrected dccm data
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r, // corrected dccm data
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r_ff, // corrected dccm data
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, // corrected dccm data
input logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, // the encoded data with ECC bits
input logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_lo_r_ff, // the encoded data with ECC bits
output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_m, // data from the dccm
output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_m, // data from the dccm
output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_m, // data from the dccm + ecc
output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_m,
output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_m, // right justified, ie load byte will have data at 7:0
input logic lsu_double_ecc_error_m, // lsu has a DED
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_m, // corrected dccm data
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_m, // corrected dccm data
input logic [31:0] store_data_m, // Store data M-stage
input logic dma_dccm_wen, // Perform DMA writes only for word/dword
input logic dma_pic_wen, // Perform PIC writes
input logic [2:0] dma_mem_tag_m, // DMA Buffer entry number M-stage
input logic [31:0] dma_mem_addr, // DMA request address
input logic [63:0] dma_mem_wdata, // DMA write data
input logic [31:0] dma_dccm_wdata_lo, // Shift the dma data to lower bits to make it consistent to lsu stores
input logic [31:0] dma_dccm_wdata_hi, // Shift the dma data to lower bits to make it consistent to lsu stores
input logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_hi, // ECC bits for the DMA wdata
input logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, // ECC bits for the DMA wdata
output logic [pt.DCCM_DATA_WIDTH-1:0] store_data_hi_r,
output logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r,
output logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_hi_r, // data from the dccm
output logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_lo_r, // data from the dccm
output logic [31:0] store_data_r, // raw store data to be sent to bus
output logic ld_single_ecc_error_r,
output logic ld_single_ecc_error_r_ff,
output logic [31:0] picm_mask_data_m, // pic data to stbuf
output logic lsu_stbuf_commit_any, // stbuf wins the dccm port or is to pic
output logic lsu_dccm_rden_m, // dccm read
output logic lsu_dccm_rden_r, // dccm read
output logic dccm_dma_rvalid, // dccm serviving the dma load
output logic dccm_dma_ecc_error, // DMA load had ecc error
output logic [2:0] dccm_dma_rtag, // DMA return tag
output logic [63:0] dccm_dma_rdata, // dccm data to dma request
// DCCM ports
output logic dccm_wren, // dccm interface -- write
output logic dccm_rden, // dccm interface -- write
output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // dccm interface -- wr addr for lo bank
output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // dccm interface -- wr addr for hi bank
output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // dccm interface -- read address for lo bank
output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // dccm interface -- read address for hi bank
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // dccm write data for lo bank
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // dccm write data for hi bank
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // dccm read data back from the dccm
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // dccm read data back from the dccm
// PIC ports
output logic picm_wren, // write to pic
output logic picm_rden, // read to pick
output logic picm_mken, // write to pic need a mask
output logic [31:0] picm_rdaddr, // address for pic read access
output logic [31:0] picm_wraddr, // address for pic write access
output logic [31:0] picm_wr_data, // write data
input logic [31:0] picm_rd_data, // read data
input logic scan_mode // scan mode
);
localparam DCCM_WIDTH_BITS = $clog2(pt.DCCM_BYTE_WIDTH);
logic lsu_dccm_rden_d, lsu_dccm_wren_d;
logic ld_single_ecc_error_lo_r, ld_single_ecc_error_hi_r;
logic ld_single_ecc_error_lo_r_ns, ld_single_ecc_error_hi_r_ns;
logic ld_single_ecc_error_lo_r_ff, ld_single_ecc_error_hi_r_ff;
logic lsu_double_ecc_error_r_ff;
logic [pt.DCCM_BITS-1:0] ld_sec_addr_lo_r_ff, ld_sec_addr_hi_r_ff;
logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r_in, store_data_hi_r_in ;
logic [63:0] picm_rd_data_m;
logic dccm_wr_bypass_d_m_hi, dccm_wr_bypass_d_r_hi;
logic dccm_wr_bypass_d_m_lo, dccm_wr_bypass_d_r_lo;
logic kill_ecc_corr_lo_r, kill_ecc_corr_hi_r;
// byte_en flowing down
logic [3:0] store_byteen_m ,store_byteen_r;
logic [7:0] store_byteen_ext_m, store_byteen_ext_r;
if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1
logic [63:0] lsu_rdata_r, lsu_rdata_corr_r;
logic [63:0] dccm_rdata_r, dccm_rdata_corr_r;
logic [63:0] stbuf_fwddata_r;
logic [7:0] stbuf_fwdbyteen_r;
logic [31:0] stbuf_fwddata_lo_r, stbuf_fwddata_hi_r;
logic [3:0] stbuf_fwdbyteen_lo_r, stbuf_fwdbyteen_hi_r;
logic [31:0] lsu_rdata_lo_r, lsu_rdata_hi_r;
logic [63:0] picm_rd_data_r;
logic [63:32] lsu_ld_data_r_nc, lsu_ld_data_corr_r_nc;
logic [2:0] dma_mem_tag_r;
logic stbuf_fwddata_en;
assign dccm_dma_rvalid = lsu_pkt_r.valid & lsu_pkt_r.load & lsu_pkt_r.dma;
assign dccm_dma_ecc_error = lsu_double_ecc_error_r;
assign dccm_dma_rtag[2:0] = dma_mem_tag_r[2:0];
assign dccm_dma_rdata[63:0] = ldst_dual_r ? lsu_rdata_corr_r[63:0] : {2{lsu_rdata_corr_r[31:0]}};
assign {lsu_ld_data_r_nc[63:32], lsu_ld_data_r[31:0]} = lsu_rdata_r[63:0] >> 8*lsu_addr_r[1:0];
assign {lsu_ld_data_corr_r_nc[63:32], lsu_ld_data_corr_r[31:0]} = lsu_rdata_corr_r[63:0] >> 8*lsu_addr_r[1:0];
assign picm_rd_data_r[63:32] = picm_rd_data_r[31:0];
assign dccm_rdata_r[63:0] = {dccm_rdata_hi_r[31:0],dccm_rdata_lo_r[31:0]};
assign dccm_rdata_corr_r[63:0] = {sec_data_hi_r[31:0],sec_data_lo_r[31:0]};
assign stbuf_fwddata_r[63:0] = {stbuf_fwddata_hi_r[31:0], stbuf_fwddata_lo_r[31:0]};
assign stbuf_fwdbyteen_r[7:0] = {stbuf_fwdbyteen_hi_r[3:0], stbuf_fwdbyteen_lo_r[3:0]};
assign stbuf_fwddata_en = (|stbuf_fwdbyteen_hi_m[3:0]) | (|stbuf_fwdbyteen_lo_m[3:0]) | clk_override;
for (genvar i=0; i<8; i++) begin: GenDMAData
assign lsu_rdata_corr_r[(8*i)+7:8*i] = stbuf_fwdbyteen_r[i] ? stbuf_fwddata_r[(8*i)+7:8*i] :
(addr_in_pic_r ? picm_rd_data_r[(8*i)+7:8*i] : ({8{addr_in_dccm_r}} & dccm_rdata_corr_r[(8*i)+7:8*i]));
assign lsu_rdata_r[(8*i)+7:8*i] = stbuf_fwdbyteen_r[i] ? stbuf_fwddata_r[(8*i)+7:8*i] :
(addr_in_pic_r ? picm_rd_data_r[(8*i)+7:8*i] : ({8{addr_in_dccm_r}} & dccm_rdata_r[(8*i)+7:8*i]));
end
rvdffe #(pt.DCCM_DATA_WIDTH) dccm_rdata_hi_r_ff (.*, .din(dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(dccm_rdata_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .en((lsu_dccm_rden_m & ldst_dual_m) | clk_override));
rvdffe #(pt.DCCM_DATA_WIDTH) dccm_rdata_lo_r_ff (.*, .din(dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(dccm_rdata_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .en(lsu_dccm_rden_m | clk_override));
rvdffe #(2*pt.DCCM_ECC_WIDTH) dccm_data_ecc_r_ff (.*, .din({dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0], dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0]}),
.dout({dccm_data_ecc_hi_r[pt.DCCM_ECC_WIDTH-1:0], dccm_data_ecc_lo_r[pt.DCCM_ECC_WIDTH-1:0]}), .en(lsu_dccm_rden_m | clk_override));
rvdff #(8) stbuf_fwdbyteen_ff (.*, .din({stbuf_fwdbyteen_hi_m[3:0], stbuf_fwdbyteen_lo_m[3:0]}), .dout({stbuf_fwdbyteen_hi_r[3:0], stbuf_fwdbyteen_lo_r[3:0]}), .clk(lsu_c2_r_clk));
rvdffe #(64) stbuf_fwddata_ff (.*, .din({stbuf_fwddata_hi_m[31:0], stbuf_fwddata_lo_m[31:0]}), .dout({stbuf_fwddata_hi_r[31:0], stbuf_fwddata_lo_r[31:0]}), .en(stbuf_fwddata_en));
rvdffe #(32) picm_rddata_rff (.*, .din(picm_rd_data_m[31:0]), .dout(picm_rd_data_r[31:0]), .en(addr_in_pic_m | clk_override));
rvdff #(3) dma_mem_tag_rff (.*, .din(dma_mem_tag_m[2:0]), .dout(dma_mem_tag_r[2:0]), .clk(lsu_c1_r_clk));
end else begin: L2U_Plus1_0
logic [63:0] lsu_rdata_m, lsu_rdata_corr_m;
logic [63:0] dccm_rdata_m, dccm_rdata_corr_m;
logic [63:0] stbuf_fwddata_m;
logic [7:0] stbuf_fwdbyteen_m;
logic [63:32] lsu_ld_data_m_nc, lsu_ld_data_corr_m_nc;
logic [31:0] lsu_ld_data_corr_m;
assign dccm_dma_rvalid = lsu_pkt_m.valid & lsu_pkt_m.load & lsu_pkt_m.dma;
assign dccm_dma_ecc_error = lsu_double_ecc_error_m;
assign dccm_dma_rtag[2:0] = dma_mem_tag_m[2:0];
assign dccm_dma_rdata[63:0] = ldst_dual_m ? lsu_rdata_corr_m[63:0] : {2{lsu_rdata_corr_m[31:0]}};
assign {lsu_ld_data_m_nc[63:32], lsu_ld_data_m[31:0]} = lsu_rdata_m[63:0] >> 8*lsu_addr_m[1:0];
assign {lsu_ld_data_corr_m_nc[63:32], lsu_ld_data_corr_m[31:0]} = lsu_rdata_corr_m[63:0] >> 8*lsu_addr_m[1:0];
assign dccm_rdata_m[63:0] = {dccm_rdata_hi_m[31:0],dccm_rdata_lo_m[31:0]};
assign dccm_rdata_corr_m[63:0] = {sec_data_hi_m[31:0],sec_data_lo_m[31:0]};
assign stbuf_fwddata_m[63:0] = {stbuf_fwddata_hi_m[31:0], stbuf_fwddata_lo_m[31:0]};
assign stbuf_fwdbyteen_m[7:0] = {stbuf_fwdbyteen_hi_m[3:0], stbuf_fwdbyteen_lo_m[3:0]};
for (genvar i=0; i<8; i++) begin: GenLoop
assign lsu_rdata_corr_m[(8*i)+7:8*i] = stbuf_fwdbyteen_m[i] ? stbuf_fwddata_m[(8*i)+7:8*i] :
(addr_in_pic_m ? picm_rd_data_m[(8*i)+7:8*i] : ({8{addr_in_dccm_m}} & dccm_rdata_corr_m[(8*i)+7:8*i]));
assign lsu_rdata_m[(8*i)+7:8*i] = stbuf_fwdbyteen_m[i] ? stbuf_fwddata_m[(8*i)+7:8*i] :
(addr_in_pic_m ? picm_rd_data_m[(8*i)+7:8*i] : ({8{addr_in_dccm_m}} & dccm_rdata_m[(8*i)+7:8*i]));
end
rvdffe #(32) lsu_ld_data_corr_rff(.*, .din(lsu_ld_data_corr_m[31:0]), .dout(lsu_ld_data_corr_r[31:0]), .en((lsu_pkt_m.valid & lsu_pkt_m.load & (addr_in_pic_m | addr_in_dccm_m)) | clk_override));
end
assign kill_ecc_corr_lo_r = (((lsu_addr_d[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_d[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_d.valid & lsu_pkt_d.store & lsu_pkt_d.dma & addr_in_dccm_d) |
(((lsu_addr_m[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_m[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_m.valid & lsu_pkt_m.store & lsu_pkt_m.dma & addr_in_dccm_m);
assign kill_ecc_corr_hi_r = (((lsu_addr_d[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_d[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_d.valid & lsu_pkt_d.store & lsu_pkt_d.dma & addr_in_dccm_d) |
(((lsu_addr_m[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_m[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_m.valid & lsu_pkt_m.store & lsu_pkt_m.dma & addr_in_dccm_m);
assign ld_single_ecc_error_lo_r = lsu_pkt_r.load & single_ecc_error_lo_r & ~lsu_raw_fwd_lo_r;
assign ld_single_ecc_error_hi_r = lsu_pkt_r.load & single_ecc_error_hi_r & ~lsu_raw_fwd_hi_r;
assign ld_single_ecc_error_r = (ld_single_ecc_error_lo_r | ld_single_ecc_error_hi_r) & ~lsu_double_ecc_error_r;
assign ld_single_ecc_error_lo_r_ns = ld_single_ecc_error_lo_r & (lsu_commit_r | lsu_pkt_r.dma) & ~kill_ecc_corr_lo_r;
assign ld_single_ecc_error_hi_r_ns = ld_single_ecc_error_hi_r & (lsu_commit_r | lsu_pkt_r.dma) & ~kill_ecc_corr_hi_r;
assign ld_single_ecc_error_r_ff = (ld_single_ecc_error_lo_r_ff | ld_single_ecc_error_hi_r_ff) & ~lsu_double_ecc_error_r_ff;
assign lsu_stbuf_commit_any = stbuf_reqvld_any &
(~(lsu_dccm_rden_d | lsu_dccm_wren_d | ld_single_ecc_error_r_ff) |
(lsu_dccm_rden_d & ~((stbuf_addr_any[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] == lsu_addr_d[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]) |
(stbuf_addr_any[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] == end_addr_d[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]))));
// No need to read for aligned word/dword stores since ECC will come by new data completely
assign lsu_dccm_rden_d = lsu_pkt_d.valid & (lsu_pkt_d.load | (lsu_pkt_d.store & (~(lsu_pkt_d.word | lsu_pkt_d.dword) | (lsu_addr_d[1:0] != 2'b0)))) & addr_in_dccm_d;
// DMA will read/write in decode stage
assign lsu_dccm_wren_d = dma_dccm_wen;
// DCCM inputs
assign dccm_wren = lsu_dccm_wren_d | lsu_stbuf_commit_any | ld_single_ecc_error_r_ff;
assign dccm_rden = lsu_dccm_rden_d & addr_in_dccm_d;
assign dccm_wr_addr_lo[pt.DCCM_BITS-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_lo_r_ff ? ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0] : ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0]) :
lsu_dccm_wren_d ? lsu_addr_d[pt.DCCM_BITS-1:0] : stbuf_addr_any[pt.DCCM_BITS-1:0];
assign dccm_wr_addr_hi[pt.DCCM_BITS-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_hi_r_ff ? ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0] : ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0]) :
lsu_dccm_wren_d ? end_addr_d[pt.DCCM_BITS-1:0] : stbuf_addr_any[pt.DCCM_BITS-1:0];
assign dccm_rd_addr_lo[pt.DCCM_BITS-1:0] = lsu_addr_d[pt.DCCM_BITS-1:0];
assign dccm_rd_addr_hi[pt.DCCM_BITS-1:0] = end_addr_d[pt.DCCM_BITS-1:0];
assign dccm_wr_data_lo[pt.DCCM_FDATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_lo_r_ff ? {sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]} :
{sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]}) :
(dma_dccm_wen ? {dma_dccm_wdata_ecc_lo[pt.DCCM_ECC_WIDTH-1:0],dma_dccm_wdata_lo[pt.DCCM_DATA_WIDTH-1:0]} :
{stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0],stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]});
assign dccm_wr_data_hi[pt.DCCM_FDATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_hi_r_ff ? {sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]} :
{sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]}) :
(dma_dccm_wen ? {dma_dccm_wdata_ecc_hi[pt.DCCM_ECC_WIDTH-1:0],dma_dccm_wdata_hi[pt.DCCM_DATA_WIDTH-1:0]} :
{stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0],stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]});
// DCCM outputs
assign store_byteen_m[3:0] = {4{lsu_pkt_m.store}} &
(({4{lsu_pkt_m.by}} & 4'b0001) |
({4{lsu_pkt_m.half}} & 4'b0011) |
({4{lsu_pkt_m.word}} & 4'b1111));
assign store_byteen_r[3:0] = {4{lsu_pkt_r.store}} &
(({4{lsu_pkt_r.by}} & 4'b0001) |
({4{lsu_pkt_r.half}} & 4'b0011) |
({4{lsu_pkt_r.word}} & 4'b1111));
assign store_byteen_ext_m[7:0] = {4'b0,store_byteen_m[3:0]} << lsu_addr_m[1:0]; // The packet in m
assign store_byteen_ext_r[7:0] = {4'b0,store_byteen_r[3:0]} << lsu_addr_r[1:0];
assign dccm_wr_bypass_d_m_lo = (stbuf_addr_any[pt.DCCM_BITS-1:2] == lsu_addr_m[pt.DCCM_BITS-1:2]) & addr_in_dccm_m;
assign dccm_wr_bypass_d_m_hi = (stbuf_addr_any[pt.DCCM_BITS-1:2] == end_addr_m[pt.DCCM_BITS-1:2]) & addr_in_dccm_m;
assign dccm_wr_bypass_d_r_lo = (stbuf_addr_any[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) & addr_in_dccm_r;
assign dccm_wr_bypass_d_r_hi = (stbuf_addr_any[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) & addr_in_dccm_r;
if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U1_Plus1_1
logic dccm_wren_Q;
logic [31:0] dccm_wr_data_Q;
logic dccm_wr_bypass_d_m_lo_Q, dccm_wr_bypass_d_m_hi_Q;
logic [31:0] store_data_pre_hi_r, store_data_pre_lo_r;
assign {store_data_pre_hi_r[31:0], store_data_pre_lo_r[31:0]} = {32'b0,store_data_r[31:0]} << 8*lsu_addr_r[1:0];
for (genvar i=0; i<4; i++) begin
assign store_data_lo_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i] ? store_data_pre_lo_r[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_lo_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_lo_r[(8*i)+7:(8*i)]);
assign store_data_hi_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i+4] ? store_data_pre_hi_r[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_hi_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_hi_r[(8*i)+7:(8*i)]);
assign store_datafn_lo_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i] ? store_data_pre_lo_r[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_r_lo) ? stbuf_data_any[(8*i)+7:(8*i)] :
((dccm_wren_Q & dccm_wr_bypass_d_m_lo_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_lo_r[(8*i)+7:(8*i)]));
assign store_datafn_hi_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i+4] ? store_data_pre_hi_r[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_r_hi) ? stbuf_data_any[(8*i)+7:(8*i)] :
((dccm_wren_Q & dccm_wr_bypass_d_m_hi_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_hi_r[(8*i)+7:(8*i)]));
end
rvdff #(1) dccm_wren_ff (.*, .din(lsu_stbuf_commit_any), .dout(dccm_wren_Q), .clk(lsu_free_c2_clk)); // ECC load errors writing to dccm shouldn't fwd to stores in pipe
rvdffe #(32) dccm_wrdata_ff (.*, .din(stbuf_data_any[31:0]), .dout(dccm_wr_data_Q[31:0]), .en(lsu_stbuf_commit_any | clk_override), .clk(clk));
rvdff #(1) dccm_wrbyp_dm_loff (.*, .din(dccm_wr_bypass_d_m_lo), .dout(dccm_wr_bypass_d_m_lo_Q), .clk(lsu_free_c2_clk));
rvdff #(1) dccm_wrbyp_dm_hiff (.*, .din(dccm_wr_bypass_d_m_hi), .dout(dccm_wr_bypass_d_m_hi_Q), .clk(lsu_free_c2_clk));
rvdff #(32) store_data_rff (.*, .din(store_data_m[31:0]), .dout(store_data_r[31:0]), .clk(lsu_store_c1_r_clk));
end else begin: L2U1_Plus1_0
logic [31:0] store_data_hi_m, store_data_lo_m;
logic [63:0] store_data_mask;
assign {store_data_hi_m[31:0] , store_data_lo_m[31:0]} = {32'b0,store_data_m[31:0]} << 8*lsu_addr_m[1:0];
for (genvar i=0; i<4; i++) begin
assign store_data_hi_r_in[(8*i)+7:(8*i)] = store_byteen_ext_m[i+4] ? store_data_hi_m[(8*i)+7:(8*i)] :
((lsu_stbuf_commit_any & dccm_wr_bypass_d_m_hi) ? stbuf_data_any[(8*i)+7:(8*i)] : sec_data_hi_m[(8*i)+7:(8*i)]);
assign store_data_lo_r_in[(8*i)+7:(8*i)] = store_byteen_ext_m[i] ? store_data_lo_m[(8*i)+7:(8*i)] :
((lsu_stbuf_commit_any & dccm_wr_bypass_d_m_lo) ? stbuf_data_any[(8*i)+7:(8*i)] : sec_data_lo_m[(8*i)+7:(8*i)]);
assign store_datafn_lo_r[(8*i)+7:(8*i)] = (lsu_stbuf_commit_any & dccm_wr_bypass_d_r_lo & ~store_byteen_ext_r[i]) ? stbuf_data_any[(8*i)+7:(8*i)] : store_data_lo_r[(8*i)+7:(8*i)];
assign store_datafn_hi_r[(8*i)+7:(8*i)] = (lsu_stbuf_commit_any & dccm_wr_bypass_d_r_hi & ~store_byteen_ext_r[i+4]) ? stbuf_data_any[(8*i)+7:(8*i)] : store_data_hi_r[(8*i)+7:(8*i)];
end // for (genvar i=0; i<BYTE_WIDTH; i++)
for (genvar i=0; i<4; i++) begin
assign store_data_mask[(8*i)+7:(8*i)] = {8{store_byteen_r[i]}};
end
assign store_data_r[31:0] = 32'({store_data_hi_r[31:0],store_data_lo_r[31:0]} >> 8*lsu_addr_r[1:0]) & store_data_mask[31:0];
rvdffe #(pt.DCCM_DATA_WIDTH) store_data_hi_rff (.*, .din(store_data_hi_r_in[pt.DCCM_DATA_WIDTH-1:0]), .dout(store_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .en((ldst_dual_m & lsu_pkt_m.valid & lsu_pkt_m.store) | clk_override), .clk(clk));
rvdff #(pt.DCCM_DATA_WIDTH) store_data_lo_rff (.*, .din(store_data_lo_r_in[pt.DCCM_DATA_WIDTH-1:0]), .dout(store_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .clk(lsu_store_c1_r_clk));
end
assign dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0] = dccm_rd_data_lo[pt.DCCM_DATA_WIDTH-1:0]; // for ld choose dccm_out
assign dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0] = dccm_rd_data_hi[pt.DCCM_DATA_WIDTH-1:0]; // for ld this is used for ecc
assign dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_lo[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH];
assign dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_hi[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH];
// PIC signals. PIC ignores the lower 2 bits of address since PIC memory registers are 32-bits
assign picm_wren = (lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_pic_r & lsu_commit_r) | dma_pic_wen;
assign picm_rden = lsu_pkt_d.valid & lsu_pkt_d.load & addr_in_pic_d;
assign picm_mken = lsu_pkt_d.valid & lsu_pkt_d.store & addr_in_pic_d; // Get the mask for stores
assign picm_rdaddr[31:0] = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},lsu_addr_d[pt.PIC_BITS-1:0]};
assign picm_wraddr[31:0] = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},(dma_pic_wen ? dma_mem_addr[pt.PIC_BITS-1:0] : lsu_addr_r[pt.PIC_BITS-1:0])};
assign picm_wr_data[31:0] = dma_pic_wen ? dma_mem_wdata[31:0] : store_datafn_lo_r[31:0];
assign picm_mask_data_m[31:0] = picm_rd_data_m[31:0];
assign picm_rd_data_m[63:0] = {picm_rd_data[31:0],picm_rd_data[31:0]};
if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
rvdff #(1) dccm_rden_mff (.*, .din(lsu_dccm_rden_d), .dout(lsu_dccm_rden_m), .clk(lsu_c2_m_clk));
rvdff #(1) dccm_rden_rff (.*, .din(lsu_dccm_rden_m), .dout(lsu_dccm_rden_r), .clk(lsu_c2_r_clk));
// ECC correction flops since dccm write happens next cycle
// We are writing to dccm in r+1 for ecc correction since fast_int needs to be blocked in decode - 1. We can probably write in r for plus0 configuration since we know ecc error in M.
// In that case these (_ff) flops are needed only in plus1 configuration
rvdff #(1) ld_double_ecc_error_rff (.*, .din(lsu_double_ecc_error_r), .dout(lsu_double_ecc_error_r_ff), .clk(lsu_free_c2_clk));
rvdff #(1) ld_single_ecc_error_hi_rff (.*, .din(ld_single_ecc_error_hi_r_ns), .dout(ld_single_ecc_error_hi_r_ff), .clk(lsu_free_c2_clk));
rvdff #(1) ld_single_ecc_error_lo_rff (.*, .din(ld_single_ecc_error_lo_r_ns), .dout(ld_single_ecc_error_lo_r_ff), .clk(lsu_free_c2_clk));
rvdffe #(pt.DCCM_BITS) ld_sec_addr_hi_rff (.*, .din(end_addr_r[pt.DCCM_BITS-1:0]), .dout(ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk));
rvdffe #(pt.DCCM_BITS) ld_sec_addr_lo_rff (.*, .din(lsu_addr_r[pt.DCCM_BITS-1:0]), .dout(ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk));
end else begin: Gen_dccm_disable
assign lsu_dccm_rden_m = '0;
assign lsu_dccm_rden_r = '0;
assign lsu_double_ecc_error_r_ff = 1'b0;
assign ld_single_ecc_error_hi_r_ff = 1'b0;
assign ld_single_ecc_error_lo_r_ff = 1'b0;
assign ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0] = '0;
assign ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0] = '0;
end
`ifdef RV_ASSERT_ON
// Load single ECC error correction implies commit/dma
property ld_single_ecc_error_commit;
@(posedge clk) disable iff(~rst_l) (ld_single_ecc_error_r_ff & dccm_wren) |-> ($past(lsu_commit_r | lsu_pkt_r.dma));
endproperty
assert_ld_single_ecc_error_commit: assert property (ld_single_ecc_error_commit) else
$display("No commit or DMA but ECC correction happened");
`endif
endmodule

View File

@ -0,0 +1,292 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Owner:
// Function: DCCM for LSU pipe
// Comments: Single ported memory
//
//
// DC1 -> DC2 -> DC3 -> DC4 (Commit)
//
// //********************************************************************************
`define EL2_LOCAL_DCCM_RAM_TEST_PORTS .TEST1(dccm_ext_in_pkt[i].TEST1), \
.RME(dccm_ext_in_pkt[i].RME), \
.RM(dccm_ext_in_pkt[i].RM), \
.LS(dccm_ext_in_pkt[i].LS), \
.DS(dccm_ext_in_pkt[i].DS), \
.SD(dccm_ext_in_pkt[i].SD), \
.TEST_RNM(dccm_ext_in_pkt[i].TEST_RNM), \
.BC1(dccm_ext_in_pkt[i].BC1), \
.BC2(dccm_ext_in_pkt[i].BC2), \
module el2_lsu_dccm_mem
import el2_pkg::*;
#(
`include "el2_param.vh"
)(
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic rst_l, // reset, active low
input logic clk_override, // Override non-functional clock gating
input logic dccm_wren, // write enable
input logic dccm_rden, // read enable
input logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // write address
input logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // write address
input logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // read address
input logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // read address for the upper bank in case of a misaligned access
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // write data
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // write data
input el2_dccm_ext_in_pkt_t [pt.DCCM_NUM_BANKS-1:0] dccm_ext_in_pkt, // the dccm packet from the soc
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // read data from the lo bank
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // read data from the hi bank
input logic scan_mode
);
localparam DCCM_WIDTH_BITS = $clog2(pt.DCCM_BYTE_WIDTH);
localparam DCCM_INDEX_BITS = (pt.DCCM_BITS - pt.DCCM_BANK_BITS - pt.DCCM_WIDTH_BITS);
localparam DCCM_INDEX_DEPTH = ((pt.DCCM_SIZE)*1024)/((pt.DCCM_BYTE_WIDTH)*(pt.DCCM_NUM_BANKS)); // Depth of memory bank
logic [pt.DCCM_NUM_BANKS-1:0] wren_bank;
logic [pt.DCCM_NUM_BANKS-1:0] rden_bank;
logic [pt.DCCM_NUM_BANKS-1:0] [pt.DCCM_BITS-1:(pt.DCCM_BANK_BITS+2)] addr_bank;
logic [pt.DCCM_BITS-1:(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)] rd_addr_even, rd_addr_odd;
logic rd_unaligned, wr_unaligned;
logic [pt.DCCM_NUM_BANKS-1:0] [pt.DCCM_FDATA_WIDTH-1:0] dccm_bank_dout;
logic [pt.DCCM_FDATA_WIDTH-1:0] wrdata;
logic [pt.DCCM_NUM_BANKS-1:0][pt.DCCM_FDATA_WIDTH-1:0] wr_data_bank;
logic [(DCCM_WIDTH_BITS+pt.DCCM_BANK_BITS-1):DCCM_WIDTH_BITS] dccm_rd_addr_lo_q;
logic [(DCCM_WIDTH_BITS+pt.DCCM_BANK_BITS-1):DCCM_WIDTH_BITS] dccm_rd_addr_hi_q;
logic [pt.DCCM_NUM_BANKS-1:0] dccm_clken;
assign rd_unaligned = (dccm_rd_addr_lo[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] != dccm_rd_addr_hi[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]);
assign wr_unaligned = (dccm_wr_addr_lo[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] != dccm_wr_addr_hi[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]);
// Align the read data
assign dccm_rd_data_lo[pt.DCCM_FDATA_WIDTH-1:0] = dccm_bank_dout[dccm_rd_addr_lo_q[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]][pt.DCCM_FDATA_WIDTH-1:0];
assign dccm_rd_data_hi[pt.DCCM_FDATA_WIDTH-1:0] = dccm_bank_dout[dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]][pt.DCCM_FDATA_WIDTH-1:0];
// 8 Banks, 16KB each (2048 x 72)
for (genvar i=0; i<pt.DCCM_NUM_BANKS; i++) begin: mem_bank
assign wren_bank[i] = dccm_wren & ((dccm_wr_addr_hi[2+:pt.DCCM_BANK_BITS] == i) | (dccm_wr_addr_lo[2+:pt.DCCM_BANK_BITS] == i));
assign rden_bank[i] = dccm_rden & ((dccm_rd_addr_hi[2+:pt.DCCM_BANK_BITS] == i) | (dccm_rd_addr_lo[2+:pt.DCCM_BANK_BITS] == i));
assign addr_bank[i][(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] = wren_bank[i] ? (((dccm_wr_addr_hi[2+:pt.DCCM_BANK_BITS] == i) & wr_unaligned) ?
dccm_wr_addr_hi[(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] :
dccm_wr_addr_lo[(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS]) :
(((dccm_rd_addr_hi[2+:pt.DCCM_BANK_BITS] == i) & rd_unaligned) ?
dccm_rd_addr_hi[(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] :
dccm_rd_addr_lo[(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS]);
assign wr_data_bank[i] = ((dccm_wr_addr_hi[2+:pt.DCCM_BANK_BITS] == i) & wr_unaligned) ? dccm_wr_data_hi[pt.DCCM_FDATA_WIDTH-1:0] : dccm_wr_data_lo[pt.DCCM_FDATA_WIDTH-1:0];
// clock gating section
assign dccm_clken[i] = (wren_bank[i] | rden_bank[i] | clk_override) ;
// end clock gating section
`ifdef VERILATOR
el2_ram #(DCCM_INDEX_DEPTH,39) ram (
// Primary ports
.ME(dccm_clken[i]),
.CLK(clk),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
.Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
.ROP ( ),
// These are used by SoC
`EL2_LOCAL_DCCM_RAM_TEST_PORTS
.*
);
`else
if (DCCM_INDEX_DEPTH == 32768) begin : dccm
ram_32768x39 dccm_bank (
// Primary ports
.ME(dccm_clken[i]),
.CLK(clk),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
.Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
.ROP ( ),
// These are used by SoC
`EL2_LOCAL_DCCM_RAM_TEST_PORTS
.*
);
end
else if (DCCM_INDEX_DEPTH == 16384) begin : dccm
ram_16384x39 dccm_bank (
// Primary ports
.ME(dccm_clken[i]),
.CLK(clk),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
.Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
.ROP ( ),
// These are used by SoC
`EL2_LOCAL_DCCM_RAM_TEST_PORTS
.*
);
end
else if (DCCM_INDEX_DEPTH == 8192) begin : dccm
ram_8192x39 dccm_bank (
// Primary ports
.ME(dccm_clken[i]),
.CLK(clk),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
.Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
.ROP ( ),
// These are used by SoC
`EL2_LOCAL_DCCM_RAM_TEST_PORTS
.*
);
end
else if (DCCM_INDEX_DEPTH == 4096) begin : dccm
ram_4096x39 dccm_bank (
// Primary ports
.ME(dccm_clken[i]),
.CLK(clk),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
.Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
.ROP ( ),
// These are used by SoC
`EL2_LOCAL_DCCM_RAM_TEST_PORTS
.*
);
end
else if (DCCM_INDEX_DEPTH == 3072) begin : dccm
ram_3072x39 dccm_bank (
// Primary ports
.ME(dccm_clken[i]),
.CLK(clk),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
.Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
.ROP ( ),
// These are used by SoC
`EL2_LOCAL_DCCM_RAM_TEST_PORTS
.*
);
end
else if (DCCM_INDEX_DEPTH == 2048) begin : dccm
ram_2048x39 dccm_bank (
// Primary ports
.ME(dccm_clken[i]),
.CLK(clk),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
.Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
.ROP ( ),
// These are used by SoC
`EL2_LOCAL_DCCM_RAM_TEST_PORTS
.*
);
end
else if (DCCM_INDEX_DEPTH == 1024) begin : dccm
ram_1024x39 dccm_bank (
// Primary ports
.ME(dccm_clken[i]),
.CLK(clk),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
.Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
.ROP ( ),
// These are used by SoC
`EL2_LOCAL_DCCM_RAM_TEST_PORTS
.*
);
end
else if (DCCM_INDEX_DEPTH == 512) begin : dccm
ram_512x39 dccm_bank (
// Primary ports
.ME(dccm_clken[i]),
.CLK(clk),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
.Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
.ROP ( ),
// These are used by SoC
`EL2_LOCAL_DCCM_RAM_TEST_PORTS
.*
);
end
else if (DCCM_INDEX_DEPTH == 256) begin : dccm
ram_256x39 dccm_bank (
// Primary ports
.ME(dccm_clken[i]),
.CLK(clk),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
.Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
.ROP ( ),
// These are used by SoC
`EL2_LOCAL_DCCM_RAM_TEST_PORTS
.*
);
end
else if (DCCM_INDEX_DEPTH == 128) begin : dccm
ram_128x39 dccm_bank (
// Primary ports
.ME(dccm_clken[i]),
.CLK(clk),
.WE(wren_bank[i]),
.ADR(addr_bank[i]),
.D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]),
.Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]),
.ROP ( ),
// These are used by SoC
`EL2_LOCAL_DCCM_RAM_TEST_PORTS
.*
);
end
`endif
end : mem_bank
// Flops
rvdff #(pt.DCCM_BANK_BITS) rd_addr_lo_ff (.*, .din(dccm_rd_addr_lo[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]), .dout(dccm_rd_addr_lo_q[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]), .clk(active_clk));
rvdff #(pt.DCCM_BANK_BITS) rd_addr_hi_ff (.*, .din(dccm_rd_addr_hi[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]), .dout(dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]), .clk(active_clk));
`undef EL2_LOCAL_DCCM_RAM_TEST_PORTS
endmodule // el2_lsu_dccm_mem

View File

@ -0,0 +1,241 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Owner:
// Function: Top level file for load store unit
// Comments:
//
//
// DC1 -> DC2 -> DC3 -> DC4 (Commit)
//
//********************************************************************************
module el2_lsu_ecc
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic lsu_c2_r_clk, // clock
input logic clk_override, // Override non-functional clock gating
input logic rst_l, // reset, active low
input logic scan_mode, // scan mode
input el2_lsu_pkt_t lsu_pkt_m, // packet in m
input el2_lsu_pkt_t lsu_pkt_r, // packet in r
input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any,
input logic dec_tlu_core_ecc_disable, // disables the ecc computation and error flagging
input logic lsu_dccm_rden_r, // dccm rden
input logic addr_in_dccm_r, // address in dccm
input logic [pt.DCCM_BITS-1:0] lsu_addr_r, // start address
input logic [pt.DCCM_BITS-1:0] end_addr_r, // end address
input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_r, // data from the dccm
input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_r, // data from the dccm
input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_r, // data from the dccm + ecc
input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_r, // data from the dccm + ecc
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r, // corrected dccm data R-stage
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r, // corrected dccm data R-stage
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r_ff, // corrected dccm data R+1 stage
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, // corrected dccm data R+1 stage
input logic ld_single_ecc_error_r, // ld has a single ecc error
input logic ld_single_ecc_error_r_ff, // ld has a single ecc error
input logic lsu_dccm_rden_m, // dccm rden
input logic addr_in_dccm_m, // address in dccm
input logic [pt.DCCM_BITS-1:0] lsu_addr_m, // start address
input logic [pt.DCCM_BITS-1:0] end_addr_m, // end address
input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_m, // raw data from mem
input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_m, // raw data from mem
input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_m, // ecc read out from mem
input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_m, // ecc read out from mem
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_m, // corrected dccm data M-stage
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_m, // corrected dccm data M-stage
input logic dma_dccm_wen, // Perform DMA writes only for word/dword
input logic [31:0] dma_dccm_wdata_lo, // Shifted dma data to lower bits to make it consistent to lsu stores
input logic [31:0] dma_dccm_wdata_hi, // Shifted dma data to lower bits to make it consistent to lsu stores
output logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_hi, // ECC bits for the DMA wdata
output logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, // ECC bits for the DMA wdata
output logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any, // Encoded data with ECC bits
output logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, // Encoded data with ECC bits
output logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_lo_r_ff, // Encoded data with ECC bits
output logic single_ecc_error_hi_r, // sec detected
output logic single_ecc_error_lo_r, // sec detected on lower dccm bank
output logic lsu_single_ecc_error_r, // or of the 2
output logic lsu_double_ecc_error_r, // double error detected
output logic lsu_single_ecc_error_m, // or of the 2
output logic lsu_double_ecc_error_m // double error detected
);
logic is_ldst_r;
logic is_ldst_hi_any, is_ldst_lo_any;
logic [pt.DCCM_DATA_WIDTH-1:0] dccm_wdata_hi_any, dccm_wdata_lo_any;
logic [pt.DCCM_ECC_WIDTH-1:0] dccm_wdata_ecc_hi_any, dccm_wdata_ecc_lo_any;
logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_any, dccm_rdata_lo_any;
logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_any, dccm_data_ecc_lo_any;
logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_any, sec_data_lo_any;
logic single_ecc_error_hi_any, single_ecc_error_lo_any;
logic double_ecc_error_hi_any, double_ecc_error_lo_any;
logic double_ecc_error_hi_m, double_ecc_error_lo_m;
logic double_ecc_error_hi_r, double_ecc_error_lo_r;
logic [6:0] ecc_out_hi_nc, ecc_out_lo_nc;
if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1
logic ldst_dual_m, ldst_dual_r;
logic is_ldst_m;
logic is_ldst_hi_r, is_ldst_lo_r;
assign ldst_dual_r = (lsu_addr_r[2] != end_addr_r[2]);
assign is_ldst_r = lsu_pkt_r.valid & (lsu_pkt_r.load | lsu_pkt_r.store) & addr_in_dccm_r & lsu_dccm_rden_r;
assign is_ldst_lo_r = is_ldst_r & ~dec_tlu_core_ecc_disable;
assign is_ldst_hi_r = is_ldst_r & ldst_dual_r & ~dec_tlu_core_ecc_disable; // Always check the ECC Hi/Lo for DMA since we don't align for DMA
assign is_ldst_hi_any = is_ldst_hi_r;
assign dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_hi_r[pt.DCCM_DATA_WIDTH-1:0];
assign dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_hi_r[pt.DCCM_ECC_WIDTH-1:0];
assign is_ldst_lo_any = is_ldst_lo_r;
assign dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_lo_r[pt.DCCM_DATA_WIDTH-1:0];
assign dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_lo_r[pt.DCCM_ECC_WIDTH-1:0];
assign sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0] = sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0];
assign single_ecc_error_hi_r = single_ecc_error_hi_any;
assign double_ecc_error_hi_r = double_ecc_error_hi_any;
assign sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0] = sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0];
assign single_ecc_error_lo_r = single_ecc_error_lo_any;
assign double_ecc_error_lo_r = double_ecc_error_lo_any;
assign lsu_single_ecc_error_r = single_ecc_error_hi_r | single_ecc_error_lo_r;
assign lsu_double_ecc_error_r = double_ecc_error_hi_r | double_ecc_error_lo_r;
end else begin: L2U_Plus1_0
logic ldst_dual_m;
logic is_ldst_m;
logic is_ldst_hi_m, is_ldst_lo_m;
assign ldst_dual_m = (lsu_addr_m[2] != end_addr_m[2]);
assign is_ldst_m = lsu_pkt_m.valid & (lsu_pkt_m.load | lsu_pkt_m.store) & addr_in_dccm_m & lsu_dccm_rden_m;
assign is_ldst_lo_m = is_ldst_m & ~dec_tlu_core_ecc_disable;
assign is_ldst_hi_m = is_ldst_m & (ldst_dual_m | lsu_pkt_m.dma) & ~dec_tlu_core_ecc_disable; // Always check the ECC Hi/Lo for DMA since we don't align for DMA
assign is_ldst_hi_any = is_ldst_hi_m;
assign dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0];
assign dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0];
assign is_ldst_lo_any = is_ldst_lo_m;
assign dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0];
assign dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0];
assign sec_data_hi_m[pt.DCCM_DATA_WIDTH-1:0] = sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0];
assign double_ecc_error_hi_m = double_ecc_error_hi_any;
assign sec_data_lo_m[pt.DCCM_DATA_WIDTH-1:0] = sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0];
assign double_ecc_error_lo_m = double_ecc_error_lo_any;
assign lsu_single_ecc_error_m = single_ecc_error_hi_any | single_ecc_error_lo_any;
assign lsu_double_ecc_error_m = double_ecc_error_hi_m | double_ecc_error_lo_m;
// Flops
rvdff #(1) lsu_single_ecc_err_r (.din(lsu_single_ecc_error_m), .dout(lsu_single_ecc_error_r), .clk(lsu_c2_r_clk), .*);
rvdff #(1) lsu_double_ecc_err_r (.din(lsu_double_ecc_error_m), .dout(lsu_double_ecc_error_r), .clk(lsu_c2_r_clk), .*);
rvdff #(.WIDTH(1)) ldst_sec_lo_rff (.din(single_ecc_error_lo_any), .dout(single_ecc_error_lo_r), .clk(lsu_c2_r_clk), .*);
rvdff #(.WIDTH(1)) ldst_sec_hi_rff (.din(single_ecc_error_hi_any), .dout(single_ecc_error_hi_r), .clk(lsu_c2_r_clk), .*);
rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_hi_rff (.din(sec_data_hi_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .en(lsu_single_ecc_error_m | clk_override), .*);
rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_lo_rff (.din(sec_data_lo_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .en(lsu_single_ecc_error_m | clk_override), .*);
end
// Logic for ECC generation during write
assign dccm_wdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0] : (dma_dccm_wen ? dma_dccm_wdata_lo[pt.DCCM_DATA_WIDTH-1:0] : stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]);
assign dccm_wdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0] : (dma_dccm_wen ? dma_dccm_wdata_hi[pt.DCCM_DATA_WIDTH-1:0] : 32'h0);
assign sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0];
assign sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0];
assign stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0];
assign dma_dccm_wdata_ecc_hi[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0];
assign dma_dccm_wdata_ecc_lo[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0];
// Instantiate ECC blocks
if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
//Detect/Repair for Hi
rvecc_decode lsu_ecc_decode_hi (
// Inputs
.en(is_ldst_hi_any),
.sed_ded (1'b0), // 1 : means only detection
.din(dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0]),
.ecc_in(dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]),
// Outputs
.dout(sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0]),
.ecc_out (ecc_out_hi_nc[6:0]),
.single_ecc_error(single_ecc_error_hi_any),
.double_ecc_error(double_ecc_error_hi_any),
.*
);
//Detect/Repair for Lo
rvecc_decode lsu_ecc_decode_lo (
// Inputs
.en(is_ldst_lo_any),
.sed_ded (1'b0), // 1 : means only detection
.din(dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] ),
.ecc_in(dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]),
// Outputs
.dout(sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0]),
.ecc_out (ecc_out_lo_nc[6:0]),
.single_ecc_error(single_ecc_error_lo_any),
.double_ecc_error(double_ecc_error_lo_any),
.*
);
rvecc_encode lsu_ecc_encode_hi (
//Inputs
.din(dccm_wdata_hi_any[pt.DCCM_DATA_WIDTH-1:0]),
//Outputs
.ecc_out(dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]),
.*
);
rvecc_encode lsu_ecc_encode_lo (
//Inputs
.din(dccm_wdata_lo_any[pt.DCCM_DATA_WIDTH-1:0]),
//Outputs
.ecc_out(dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]),
.*
);
end else begin: Gen_dccm_disable // block: Gen_dccm_enable
assign sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0] = '0;
assign sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0] = '0;
assign single_ecc_error_hi_any = '0;
assign double_ecc_error_hi_any = '0;
assign single_ecc_error_lo_any = '0;
assign double_ecc_error_lo_any = '0;
end
rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_hi_rplus1ff (.din(sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk), .*);
rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_lo_rplus1ff (.din(sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk), .*);
endmodule // el2_lsu_ecc

View File

@ -0,0 +1,341 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Owner:
// Function: LSU control
// Comments:
//
//
// DC1 -> DC2 -> DC3 -> DC4 (Commit)
//
//********************************************************************************
module el2_lsu_lsc_ctl
import el2_pkg::*;
#(
`include "el2_param.vh"
)(
input logic rst_l, // reset, active low
input logic clk_override, // Override non-functional clock gating
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
// clocks per pipe
input logic lsu_c1_m_clk,
input logic lsu_c1_r_clk,
input logic lsu_c2_m_clk,
input logic lsu_c2_r_clk,
input logic lsu_store_c1_m_clk,
input logic [31:0] lsu_ld_data_r, // Load data R-stage
input logic [31:0] lsu_ld_data_corr_r, // ECC corrected data R-stage
input logic lsu_single_ecc_error_r, // ECC single bit error R-stage
input logic lsu_double_ecc_error_r, // ECC double bit error R-stage
input logic [31:0] lsu_ld_data_m, // Load data M-stage
input logic lsu_single_ecc_error_m, // ECC single bit error M-stage
input logic lsu_double_ecc_error_m, // ECC double bit error M-stage
input logic flush_m_up, // Flush M and D stage
input logic flush_r, // Flush R-stage
input logic ldst_dual_d, // load/store is unaligned at 32 bit boundary D-stage
input logic ldst_dual_m, // load/store is unaligned at 32 bit boundary M-stage
input logic ldst_dual_r, // load/store is unaligned at 32 bit boundary R-stage
input logic [31:0] exu_lsu_rs1_d, // address
input logic [31:0] exu_lsu_rs2_d, // store data
input el2_lsu_pkt_t lsu_p, // lsu control packet
input logic dec_lsu_valid_raw_d, // Raw valid for address computation
input logic [11:0] dec_lsu_offset_d, // 12b offset for load/store addresses
input logic [31:0] picm_mask_data_m, // PIC data M-stage
input logic [31:0] bus_read_data_m, // the bus return data
output logic [31:0] lsu_result_m, // lsu load data
output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF
// lsu address down the pipe
output logic [31:0] lsu_addr_d,
output logic [31:0] lsu_addr_m,
output logic [31:0] lsu_addr_r,
// lsu address down the pipe - needed to check unaligned
output logic [31:0] end_addr_d,
output logic [31:0] end_addr_m,
output logic [31:0] end_addr_r,
// store data down the pipe
output logic [31:0] store_data_m,
input logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control
output logic lsu_exc_m, // Access or misaligned fault
output logic is_sideeffects_m, // is sideffects space
output logic lsu_commit_r, // lsu instruction in r commits
output logic lsu_single_ecc_error_incr,// LSU inc SB error counter
output el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu exception packet
output logic [31:1] lsu_fir_addr, // fast interrupt address
output logic [1:0] lsu_fir_error, // Error during fast interrupt lookup
// address in dccm/pic/external per pipe stage
output logic addr_in_dccm_d,
output logic addr_in_dccm_m,
output logic addr_in_dccm_r,
output logic addr_in_pic_d,
output logic addr_in_pic_m,
output logic addr_in_pic_r,
output logic addr_external_m,
// DMA slave
input logic dma_dccm_req,
input logic [31:0] dma_mem_addr,
input logic [2:0] dma_mem_sz,
input logic dma_mem_write,
input logic [63:0] dma_mem_wdata,
// Store buffer related signals
output el2_lsu_pkt_t lsu_pkt_d,
output el2_lsu_pkt_t lsu_pkt_m,
output el2_lsu_pkt_t lsu_pkt_r,
input logic scan_mode // Scan mode
);
logic [31:3] end_addr_pre_m, end_addr_pre_r;
logic [31:0] full_addr_d;
logic [31:0] full_end_addr_d;
logic [31:0] lsu_rs1_d;
logic [11:0] lsu_offset_d;
logic [31:0] rs1_d;
logic [11:0] offset_d;
logic [12:0] end_addr_offset_d;
logic [2:0] addr_offset_d;
logic [63:0] dma_mem_wdata_shifted;
logic addr_external_d;
logic addr_external_r;
logic access_fault_d, misaligned_fault_d;
logic access_fault_m, misaligned_fault_m;
logic fir_dccm_access_error_d, fir_nondccm_access_error_d;
logic fir_dccm_access_error_m, fir_nondccm_access_error_m;
logic [3:0] exc_mscause_d, exc_mscause_m;
logic [31:0] rs1_d_raw;
logic [31:0] store_data_d, store_data_pre_m, store_data_m_in;
logic [31:0] bus_read_data_r;
el2_lsu_pkt_t dma_pkt_d;
el2_lsu_pkt_t lsu_pkt_m_in, lsu_pkt_r_in;
el2_lsu_error_pkt_t lsu_error_pkt_m;
// Premux the rs1/offset for dma
assign lsu_rs1_d[31:0] = dec_lsu_valid_raw_d ? exu_lsu_rs1_d[31:0] : dma_mem_addr[31:0];
assign lsu_offset_d[11:0] = dec_lsu_offset_d[11:0] & {12{dec_lsu_valid_raw_d}};
assign rs1_d_raw[31:0] = lsu_rs1_d[31:0];
assign offset_d[11:0] = lsu_offset_d[11:0];
assign rs1_d[31:0] = (lsu_pkt_d.load_ldst_bypass_d) ? lsu_result_m[31:0] : rs1_d_raw[31:0];
// generate the ls address
rvlsadder lsadder (.rs1(rs1_d[31:0]),
.offset(offset_d[11:0]),
.dout(full_addr_d[31:0])
);
// Module to generate the memory map of the address
el2_lsu_addrcheck addrcheck (
.start_addr_d(full_addr_d[31:0]),
.end_addr_d(full_end_addr_d[31:0]),
.rs1_region_d(rs1_d[31:28]),
.*
);
// Calculate start/end address for load/store
assign addr_offset_d[2:0] = ({3{lsu_pkt_d.half}} & 3'b01) | ({3{lsu_pkt_d.word}} & 3'b11) | ({3{lsu_pkt_d.dword}} & 3'b111);
assign end_addr_offset_d[12:0] = {offset_d[11],offset_d[11:0]} + {9'b0,addr_offset_d[2:0]};
assign full_end_addr_d[31:0] = rs1_d[31:0] + {{19{end_addr_offset_d[12]}},end_addr_offset_d[12:0]};
assign end_addr_d[31:0] = full_end_addr_d[31:0];
assign lsu_exc_m = access_fault_m | misaligned_fault_m;
// Goes to TLU to increment the ECC error counter
assign lsu_single_ecc_error_incr = (lsu_single_ecc_error_r & ~lsu_double_ecc_error_r) & (lsu_commit_r | lsu_pkt_r.dma) & lsu_pkt_r.valid;
if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1
logic access_fault_r, misaligned_fault_r;
logic [3:0] exc_mscause_r;
logic fir_dccm_access_error_r, fir_nondccm_access_error_r;
// Generate exception packet
assign lsu_error_pkt_r.exc_valid = (access_fault_r | misaligned_fault_r | lsu_double_ecc_error_r) & lsu_pkt_r.valid & ~lsu_pkt_r.dma & ~lsu_pkt_r.fast_int;
assign lsu_error_pkt_r.single_ecc_error = lsu_single_ecc_error_r & ~lsu_error_pkt_r.exc_valid & ~lsu_pkt_r.dma;
assign lsu_error_pkt_r.inst_type = lsu_pkt_r.store;
assign lsu_error_pkt_r.exc_type = ~misaligned_fault_r;
assign lsu_error_pkt_r.mscause[3:0] = (lsu_double_ecc_error_r & ~misaligned_fault_r & ~access_fault_r) ? 4'h1 : exc_mscause_r[3:0];
assign lsu_error_pkt_r.addr[31:0] = lsu_addr_r[31:0];
assign lsu_fir_error[1:0] = fir_nondccm_access_error_r ? 2'b11 : (fir_dccm_access_error_r ? 2'b10 : ((lsu_pkt_r.fast_int & lsu_double_ecc_error_r) ? 2'b01 : 2'b00));
rvdff #(1) access_fault_rff (.din(access_fault_m), .dout(access_fault_r), .clk(lsu_c1_r_clk), .*);
rvdff #(1) misaligned_fault_rff (.din(misaligned_fault_m), .dout(misaligned_fault_r), .clk(lsu_c1_r_clk), .*);
rvdff #(4) exc_mscause_rff (.din(exc_mscause_m[3:0]), .dout(exc_mscause_r[3:0]), .clk(lsu_c1_r_clk), .*);
rvdff #(1) fir_dccm_access_error_mff (.din(fir_dccm_access_error_m), .dout(fir_dccm_access_error_r), .clk(lsu_c1_r_clk), .*);
rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_m), .dout(fir_nondccm_access_error_r), .clk(lsu_c1_r_clk), .*);
end else begin: L2U_Plus1_0
logic [1:0] lsu_fir_error_m;
// Generate exception packet
assign lsu_error_pkt_m.exc_valid = (access_fault_m | misaligned_fault_m | lsu_double_ecc_error_m) & lsu_pkt_m.valid & ~lsu_pkt_m.dma & ~lsu_pkt_m.fast_int & ~flush_m_up;
assign lsu_error_pkt_m.single_ecc_error = lsu_single_ecc_error_m & ~lsu_error_pkt_m.exc_valid & ~lsu_pkt_m.dma;
assign lsu_error_pkt_m.inst_type = lsu_pkt_m.store;
assign lsu_error_pkt_m.exc_type = ~misaligned_fault_m;
assign lsu_error_pkt_m.mscause[3:0] = (lsu_double_ecc_error_m & ~misaligned_fault_m & ~access_fault_m) ? 4'h1 : exc_mscause_m[3:0];
assign lsu_error_pkt_m.addr[31:0] = lsu_addr_m[31:0];
assign lsu_fir_error_m[1:0] = fir_nondccm_access_error_m ? 2'b11 : (fir_dccm_access_error_m ? 2'b10 : ((lsu_pkt_m.fast_int & lsu_double_ecc_error_m) ? 2'b01 : 2'b00));
rvdff #(1) lsu_exc_valid_rff (.*, .din(lsu_error_pkt_m.exc_valid), .dout(lsu_error_pkt_r.exc_valid), .clk(lsu_c2_r_clk));
rvdff #(1) lsu_single_ecc_error_rff(.*, .din(lsu_error_pkt_m.single_ecc_error), .dout(lsu_error_pkt_r.single_ecc_error), .clk(lsu_c2_r_clk));
rvdffe #($bits(el2_lsu_error_pkt_t)-2) lsu_error_pkt_rff (.*, .din(lsu_error_pkt_m[$bits(el2_lsu_error_pkt_t)-1:2]), .dout(lsu_error_pkt_r[$bits(el2_lsu_error_pkt_t)-1:2]), .en(lsu_error_pkt_m.exc_valid | lsu_error_pkt_m.single_ecc_error | clk_override));
rvdff #(2) lsu_fir_error_rff (.*, .din(lsu_fir_error_m[1:0]), .dout(lsu_fir_error[1:0]), .clk(lsu_c2_r_clk));
end
//Create DMA packet
always_comb begin
dma_pkt_d = '0;
dma_pkt_d.valid = dma_dccm_req;
dma_pkt_d.dma = 1'b1;
dma_pkt_d.store = dma_mem_write;
dma_pkt_d.load = ~dma_mem_write;
dma_pkt_d.by = (dma_mem_sz[2:0] == 3'b0);
dma_pkt_d.half = (dma_mem_sz[2:0] == 3'b1);
dma_pkt_d.word = (dma_mem_sz[2:0] == 3'b10);
dma_pkt_d.dword = (dma_mem_sz[2:0] == 3'b11);
end
always_comb begin
lsu_pkt_d = dec_lsu_valid_raw_d ? lsu_p : dma_pkt_d;
lsu_pkt_m_in = lsu_pkt_d;
lsu_pkt_r_in = lsu_pkt_m;
lsu_pkt_d.valid = (lsu_p.valid & ~(flush_m_up & ~lsu_p.fast_int)) | dma_dccm_req;
lsu_pkt_m_in.valid = lsu_pkt_d.valid & ~(flush_m_up & ~lsu_pkt_d.dma);
lsu_pkt_r_in.valid = lsu_pkt_m.valid & ~(flush_m_up & ~lsu_pkt_m.dma) ;
end
// C2 clock for valid and C1 for other bits of packet
rvdff #(1) lsu_pkt_vldmff (.*, .din(lsu_pkt_m_in.valid), .dout(lsu_pkt_m.valid), .clk(lsu_c2_m_clk));
rvdff #(1) lsu_pkt_vldrff (.*, .din(lsu_pkt_r_in.valid), .dout(lsu_pkt_r.valid), .clk(lsu_c2_r_clk));
rvdff #($bits(el2_lsu_pkt_t)-1) lsu_pkt_mff (.*, .din(lsu_pkt_m_in[$bits(el2_lsu_pkt_t)-1:1]), .dout(lsu_pkt_m[$bits(el2_lsu_pkt_t)-1:1]), .clk(lsu_c1_m_clk));
rvdff #($bits(el2_lsu_pkt_t)-1) lsu_pkt_rff (.*, .din(lsu_pkt_r_in[$bits(el2_lsu_pkt_t)-1:1]), .dout(lsu_pkt_r[$bits(el2_lsu_pkt_t)-1:1]), .clk(lsu_c1_r_clk));
if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U1_Plus1_1
logic [31:0] lsu_ld_datafn_r, lsu_ld_datafn_corr_r;
assign lsu_ld_datafn_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_r[31:0];
assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0];
// this is really R stage signal
assign lsu_result_m[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_r[7:0]}) |
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_r[15:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_r[7]}}, lsu_ld_datafn_r[7:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_r[15]}},lsu_ld_datafn_r[15:0]}) |
({32{lsu_pkt_r.word}} & lsu_ld_datafn_r[31:0]);
// this signal is used for gpr update
assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) |
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) |
({32{lsu_pkt_r.word}} & lsu_ld_datafn_corr_r[31:0]);
end else begin: L2U1_Plus1_0 // block: L2U1_Plus1_1
logic [31:0] lsu_ld_datafn_m, lsu_ld_datafn_corr_r;
assign lsu_ld_datafn_m[31:0] = addr_external_m ? bus_read_data_m[31:0] : lsu_ld_data_m[31:0];
assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0];
// this result must look at prior stores and merge them in
assign lsu_result_m[31:0] = ({32{ lsu_pkt_m.unsign & lsu_pkt_m.by }} & {24'b0,lsu_ld_datafn_m[7:0]}) |
({32{ lsu_pkt_m.unsign & lsu_pkt_m.half}} & {16'b0,lsu_ld_datafn_m[15:0]}) |
({32{~lsu_pkt_m.unsign & lsu_pkt_m.by }} & {{24{ lsu_ld_datafn_m[7]}}, lsu_ld_datafn_m[7:0]}) |
({32{~lsu_pkt_m.unsign & lsu_pkt_m.half}} & {{16{ lsu_ld_datafn_m[15]}},lsu_ld_datafn_m[15:0]}) |
({32{lsu_pkt_m.word}} & lsu_ld_datafn_m[31:0]);
// this signal is used for gpr update
assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) |
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) |
({32{lsu_pkt_r.word}} & lsu_ld_datafn_corr_r[31:0]);
end
// Fast interrupt address
assign lsu_fir_addr[31:1] = lsu_ld_data_corr_r[31:1];
// absence load/store all 0's
assign lsu_addr_d[31:0] = full_addr_d[31:0];
// Interrupt as a flush source allows the WB to occur
assign lsu_commit_r = lsu_pkt_r.valid & (lsu_pkt_r.store | lsu_pkt_r.load) & ~flush_r & ~lsu_pkt_r.dma;
assign dma_mem_wdata_shifted[63:0] = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores
assign store_data_d[31:0] = dma_dccm_req ? dma_mem_wdata_shifted[31:0] : exu_lsu_rs2_d[31:0]; // Write to PIC still happens in r stage
assign store_data_m_in[31:0] = (lsu_pkt_d.store_data_bypass_d) ? lsu_result_m[31:0] : store_data_d[31:0];
assign store_data_m[31:0] = (picm_mask_data_m[31:0] | {32{~addr_in_pic_m}}) & ((lsu_pkt_m.store_data_bypass_m) ? lsu_result_m[31:0] : store_data_pre_m[31:0]);
rvdff #(32) sdmff (.*, .din(store_data_m_in[31:0]), .dout(store_data_pre_m[31:0]), .clk(lsu_store_c1_m_clk));
rvdff #(32) samff (.*, .din(lsu_addr_d[31:0]), .dout(lsu_addr_m[31:0]), .clk(lsu_c1_m_clk));
rvdff #(32) sarff (.*, .din(lsu_addr_m[31:0]), .dout(lsu_addr_r[31:0]), .clk(lsu_c1_r_clk));
assign end_addr_m[31:3] = ldst_dual_m ? end_addr_pre_m[31:3] : lsu_addr_m[31:3]; // This is for power saving
assign end_addr_r[31:3] = ldst_dual_r ? end_addr_pre_r[31:3] : lsu_addr_r[31:3]; // This is for power saving
rvdffe #(29) end_addr_hi_mff (.*, .din(end_addr_d[31:3]), .dout(end_addr_pre_m[31:3]), .en((lsu_pkt_d.valid & ldst_dual_d) | clk_override));
rvdffe #(29) end_addr_hi_rff (.*, .din(end_addr_m[31:3]), .dout(end_addr_pre_r[31:3]), .en((lsu_pkt_m.valid & ldst_dual_m) | clk_override));
rvdff #(3) end_addr_lo_mff (.*, .din(end_addr_d[2:0]), .dout(end_addr_m[2:0]), .clk(lsu_c1_m_clk));
rvdff #(3) end_addr_lo_rff (.*, .din(end_addr_m[2:0]), .dout(end_addr_r[2:0]), .clk(lsu_c1_r_clk));
rvdff #(1) addr_in_dccm_mff(.din(addr_in_dccm_d), .dout(addr_in_dccm_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) addr_in_dccm_rff(.din(addr_in_dccm_m), .dout(addr_in_dccm_r), .clk(lsu_c1_r_clk), .*);
rvdff #(1) addr_in_pic_mff(.din(addr_in_pic_d), .dout(addr_in_pic_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) addr_in_pic_rff(.din(addr_in_pic_m), .dout(addr_in_pic_r), .clk(lsu_c1_r_clk), .*);
rvdff #(1) addr_external_mff(.din(addr_external_d), .dout(addr_external_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) addr_external_rff(.din(addr_external_m), .dout(addr_external_r), .clk(lsu_c1_r_clk), .*);
rvdff #(1) access_fault_mff (.din(access_fault_d), .dout(access_fault_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) misaligned_fault_mff (.din(misaligned_fault_d), .dout(misaligned_fault_m), .clk(lsu_c1_m_clk), .*);
rvdff #(4) exc_mscause_mff (.din(exc_mscause_d[3:0]), .dout(exc_mscause_m[3:0]), .clk(lsu_c1_m_clk), .*);
rvdff #(1) fir_dccm_access_error_mff (.din(fir_dccm_access_error_d), .dout(fir_dccm_access_error_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_d), .dout(fir_nondccm_access_error_m), .clk(lsu_c1_m_clk), .*);
rvdffe #(32) bus_read_data_r_ff (.*, .din(bus_read_data_m[31:0]), .dout(bus_read_data_r[31:0]), .en(addr_external_m | clk_override));
endmodule

View File

@ -0,0 +1,351 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Owner:
// Function: Store Buffer
// Comments: Dual writes and single drain
//
//
// DC1 -> DC2 -> DC3 -> DC4 (Commit)
//
// //********************************************************************************
module el2_lsu_stbuf
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic clk, // core clock
input logic rst_l, // reset
input logic lsu_stbuf_c1_clk, // stbuf clock
input logic lsu_free_c2_clk, // free clk
// Store Buffer input
input logic store_stbuf_reqvld_r, // core instruction goes to stbuf
input logic lsu_commit_r, // lsu commits
input logic dec_lsu_valid_raw_d, // Speculative decode valid
input logic [pt.DCCM_DATA_WIDTH-1:0] store_data_hi_r, // merged data from the dccm for stores. This is used for fwding
input logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r, // merged data from the dccm for stores. This is used for fwding
input logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_hi_r, // merged data from the dccm for stores
input logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_lo_r, // merged data from the dccm for stores
// Store Buffer output
output logic stbuf_reqvld_any, // stbuf is draining
output logic stbuf_reqvld_flushed_any, // Top entry is flushed
output logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any, // address
output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, // stbuf data
input logic lsu_stbuf_commit_any, // pop the stbuf as it commite
output logic lsu_stbuf_full_any, // stbuf is full
output logic lsu_stbuf_empty_any, // stbuf is empty
output logic ldst_stbuf_reqvld_r, // needed for clocking
input logic [pt.LSU_SB_BITS-1:0] lsu_addr_d, // lsu address D-stage
input logic [31:0] lsu_addr_m, // lsu address M-stage
input logic [31:0] lsu_addr_r, // lsu address R-stage
input logic [pt.LSU_SB_BITS-1:0] end_addr_d, // lsu end address D-stage - needed to check unaligned
input logic [31:0] end_addr_m, // lsu end address M-stage - needed to check unaligned
input logic [31:0] end_addr_r, // lsu end address R-stage - needed to check unaligned
input logic ldst_dual_d, ldst_dual_m, ldst_dual_r,
input logic addr_in_dccm_m, // address is in dccm
input logic addr_in_dccm_r, // address is in dccm
// Forwarding signals
input logic lsu_cmpen_m, // needed for forwarding stbuf - load
input el2_lsu_pkt_t lsu_pkt_m, // LSU packet M-stage
input el2_lsu_pkt_t lsu_pkt_r, // LSU packet R-stage
output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m, // stbuf data
output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m, // stbuf data
output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m, // stbuf data
output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m, // stbuf data
input logic scan_mode // Scan mode
);
localparam DEPTH = pt.LSU_STBUF_DEPTH;
localparam DATA_WIDTH = pt.DCCM_DATA_WIDTH;
localparam BYTE_WIDTH = pt.DCCM_BYTE_WIDTH;
localparam DEPTH_LOG2 = $clog2(DEPTH);
// These are the fields in the store queue
logic [DEPTH-1:0] stbuf_vld;
logic [DEPTH-1:0] stbuf_dma_kill;
logic [DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addr;
logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_byteen;
logic [DEPTH-1:0][DATA_WIDTH-1:0] stbuf_data;
logic [DEPTH-1:0] sel_lo;
logic [DEPTH-1:0] stbuf_wr_en;
logic [DEPTH-1:0] stbuf_dma_kill_en;
logic [DEPTH-1:0] stbuf_reset;
logic [DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addrin;
logic [DEPTH-1:0][DATA_WIDTH-1:0] stbuf_datain;
logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_byteenin;
logic [7:0] store_byteen_ext_r;
logic [BYTE_WIDTH-1:0] store_byteen_hi_r;
logic [BYTE_WIDTH-1:0] store_byteen_lo_r;
logic WrPtrEn, RdPtrEn;
logic [DEPTH_LOG2-1:0] WrPtr, RdPtr;
logic [DEPTH_LOG2-1:0] NxtWrPtr, NxtRdPtr;
logic [DEPTH_LOG2-1:0] WrPtrPlus1, WrPtrPlus2, RdPtrPlus1;
logic dual_stbuf_write_r;
logic isdccmst_m, isdccmst_r;
logic [3:0] stbuf_numvld_any, stbuf_specvld_any;
logic [1:0] stbuf_specvld_m, stbuf_specvld_r;
logic [pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] cmpaddr_hi_m, cmpaddr_lo_m;
// variables to detect matching from the store queue
logic [DEPTH-1:0] stbuf_match_hi, stbuf_match_lo;
logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_fwdbyteenvec_hi, stbuf_fwdbyteenvec_lo;
logic [DATA_WIDTH-1:0] stbuf_fwddata_hi_pre_m, stbuf_fwddata_lo_pre_m;
logic [BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_pre_m, stbuf_fwdbyteen_lo_pre_m;
// logic to detect matching from the pipe - needed for store - load forwarding
logic [BYTE_WIDTH-1:0] ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi;
logic ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi;
logic [BYTE_WIDTH-1:0] ld_byte_hit_lo, ld_byte_rhit_lo;
logic [BYTE_WIDTH-1:0] ld_byte_hit_hi, ld_byte_rhit_hi;
logic [BYTE_WIDTH-1:0] ldst_byteen_hi_r;
logic [BYTE_WIDTH-1:0] ldst_byteen_lo_r;
// byte_en flowing down
logic [7:0] ldst_byteen_r;
logic [7:0] ldst_byteen_ext_r;
// fwd data through the pipe
logic [31:0] ld_fwddata_rpipe_lo;
logic [31:0] ld_fwddata_rpipe_hi;
// coalescing signals
logic [DEPTH-1:0] store_matchvec_lo_r, store_matchvec_hi_r;
logic store_coalesce_lo_r, store_coalesce_hi_r;
//----------------------------------------
// Logic starts here
//----------------------------------------
// Create high/low byte enables
assign store_byteen_ext_r[7:0] = ldst_byteen_r[7:0] << lsu_addr_r[1:0];
assign store_byteen_hi_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[7:4] & {4{lsu_pkt_r.store}};
assign store_byteen_lo_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[3:0] & {4{lsu_pkt_r.store}};
assign RdPtrPlus1[DEPTH_LOG2-1:0] = RdPtr[DEPTH_LOG2-1:0] + 1'b1;
assign WrPtrPlus1[DEPTH_LOG2-1:0] = WrPtr[DEPTH_LOG2-1:0] + 1'b1;
assign WrPtrPlus2[DEPTH_LOG2-1:0] = WrPtr[DEPTH_LOG2-1:0] + 2'b10;
// ecc error on both hi/lo
assign dual_stbuf_write_r = ldst_dual_r & store_stbuf_reqvld_r;
assign ldst_stbuf_reqvld_r = ((lsu_commit_r | lsu_pkt_r.dma) & store_stbuf_reqvld_r);
// Store Buffer coalescing
for (genvar i=0; i<DEPTH; i++) begin: FindMatchEntry
assign store_matchvec_lo_r[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == lsu_addr_r[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & ~stbuf_reset[i];
assign store_matchvec_hi_r[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == end_addr_r[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & dual_stbuf_write_r & ~stbuf_reset[i];
end: FindMatchEntry
assign store_coalesce_lo_r = |store_matchvec_lo_r[DEPTH-1:0];
assign store_coalesce_hi_r = |store_matchvec_hi_r[DEPTH-1:0];
if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
// Allocate new in this entry if :
// 1. wrptr, single allocate, lo did not coalesce
// 2. wrptr, double allocate, lo ^ hi coalesced
// 3. wrptr + 1, double alloacte, niether lo or hi coalesced
// Also update if there is a hi or a lo coalesce to this entry
// Store Buffer instantiation
for (genvar i=0; i<DEPTH; i++) begin: GenStBuf
assign stbuf_wr_en[i] = ldst_stbuf_reqvld_r & (
( (i == WrPtr[DEPTH_LOG2-1:0]) & ~store_coalesce_lo_r) | // Allocate : new Lo
( (i == WrPtr[DEPTH_LOG2-1:0]) & dual_stbuf_write_r & ~store_coalesce_hi_r) | // Allocate : only 1 new Write Either
( (i == WrPtrPlus1[DEPTH_LOG2-1:0]) & dual_stbuf_write_r & ~(store_coalesce_lo_r | store_coalesce_hi_r)) | // Allocate2 : 2 new so Write Hi
store_matchvec_lo_r[i] | store_matchvec_hi_r[i]); // Coalesced Write Lo or Hi
assign stbuf_reset[i] = (lsu_stbuf_commit_any | stbuf_reqvld_flushed_any) & (i == RdPtr[DEPTH_LOG2-1:0]);
// Mux select for start/end address
assign sel_lo[i] = ((~ldst_dual_r | store_stbuf_reqvld_r) & (i == WrPtr[DEPTH_LOG2-1:0]) & ~store_coalesce_lo_r) | // lo allocated new entry
store_matchvec_lo_r[i]; // lo coalesced in to this entry
assign stbuf_addrin[i][pt.LSU_SB_BITS-1:0] = sel_lo[i] ? lsu_addr_r[pt.LSU_SB_BITS-1:0] : end_addr_r[pt.LSU_SB_BITS-1:0];
assign stbuf_byteenin[i][BYTE_WIDTH-1:0] = sel_lo[i] ? (stbuf_byteen[i][BYTE_WIDTH-1:0] | store_byteen_lo_r[BYTE_WIDTH-1:0]) : (stbuf_byteen[i][BYTE_WIDTH-1:0] | store_byteen_hi_r[BYTE_WIDTH-1:0]);
assign stbuf_datain[i][7:0] = sel_lo[i] ? ((~stbuf_byteen[i][0] | store_byteen_lo_r[0]) ? store_datafn_lo_r[7:0] : stbuf_data[i][7:0]) :
((~stbuf_byteen[i][0] | store_byteen_hi_r[0]) ? store_datafn_hi_r[7:0] : stbuf_data[i][7:0]);
assign stbuf_datain[i][15:8] = sel_lo[i] ? ((~stbuf_byteen[i][1] | store_byteen_lo_r[1]) ? store_datafn_lo_r[15:8] : stbuf_data[i][15:8]) :
((~stbuf_byteen[i][1] | store_byteen_hi_r[1]) ? store_datafn_hi_r[15:8] : stbuf_data[i][15:8]);
assign stbuf_datain[i][23:16] = sel_lo[i] ? ((~stbuf_byteen[i][2] | store_byteen_lo_r[2]) ? store_datafn_lo_r[23:16] : stbuf_data[i][23:16]) :
((~stbuf_byteen[i][2] | store_byteen_hi_r[2]) ? store_datafn_hi_r[23:16] : stbuf_data[i][23:16]);
assign stbuf_datain[i][31:24] = sel_lo[i] ? ((~stbuf_byteen[i][3] | store_byteen_lo_r[3]) ? store_datafn_lo_r[31:24] : stbuf_data[i][31:24]) :
((~stbuf_byteen[i][3] | store_byteen_hi_r[3]) ? store_datafn_hi_r[31:24] : stbuf_data[i][31:24]);
rvdffsc #(.WIDTH(1)) stbuf_vldff (.din(1'b1), .dout(stbuf_vld[i]), .en(stbuf_wr_en[i]), .clear(stbuf_reset[i]), .clk(lsu_free_c2_clk), .*);
rvdffsc #(.WIDTH(1)) stbuf_killff (.din(1'b1), .dout(stbuf_dma_kill[i]), .en(stbuf_dma_kill_en[i]), .clear(stbuf_reset[i]), .clk(lsu_free_c2_clk), .*);
rvdffe #(.WIDTH(pt.LSU_SB_BITS)) stbuf_addrff (.din(stbuf_addrin[i][pt.LSU_SB_BITS-1:0]), .dout(stbuf_addr[i][pt.LSU_SB_BITS-1:0]), .en(stbuf_wr_en[i]), .*);
rvdffsc #(.WIDTH(BYTE_WIDTH)) stbuf_byteenff (.din(stbuf_byteenin[i][BYTE_WIDTH-1:0]), .dout(stbuf_byteen[i][BYTE_WIDTH-1:0]), .en(stbuf_wr_en[i]), .clear(stbuf_reset[i]), .clk(lsu_stbuf_c1_clk), .*);
rvdffe #(.WIDTH(DATA_WIDTH)) stbuf_dataff (.din(stbuf_datain[i][DATA_WIDTH-1:0]), .dout(stbuf_data[i][DATA_WIDTH-1:0]), .en(stbuf_wr_en[i]), .*);
end
end else begin: Gen_dccm_disable
assign stbuf_wr_en[DEPTH-1:0] = '0;
assign stbuf_reset[DEPTH-1:0] = '0;
assign stbuf_vld[DEPTH-1:0] = '0;
assign stbuf_dma_kill[DEPTH-1:0] = '0;
assign stbuf_addr[DEPTH-1:0] = '0;
assign stbuf_byteen[DEPTH-1:0] = '0;
assign stbuf_data[DEPTH-1:0] = '0;
end
// Store Buffer drain logic
assign stbuf_reqvld_flushed_any = stbuf_vld[RdPtr] & stbuf_dma_kill[RdPtr];
assign stbuf_reqvld_any = stbuf_vld[RdPtr] & ~stbuf_dma_kill[RdPtr] & ~(|stbuf_dma_kill_en[DEPTH-1:0]); // Don't drain if some kill bit is being set this cycle
assign stbuf_addr_any[pt.LSU_SB_BITS-1:0] = stbuf_addr[RdPtr][pt.LSU_SB_BITS-1:0];
assign stbuf_data_any[DATA_WIDTH-1:0] = stbuf_data[RdPtr][DATA_WIDTH-1:0];
// Update the RdPtr/WrPtr logic
// Need to revert the WrPtr for flush cases. Also revert the pipe WrPtrs
assign WrPtrEn = (ldst_stbuf_reqvld_r & ~dual_stbuf_write_r & ~(store_coalesce_hi_r | store_coalesce_lo_r)) | // writing 1 and did not coalesce
(ldst_stbuf_reqvld_r & dual_stbuf_write_r & ~(store_coalesce_hi_r & store_coalesce_lo_r)); // writing 2 and atleast 1 did not coalesce
assign NxtWrPtr[DEPTH_LOG2-1:0] = (ldst_stbuf_reqvld_r & dual_stbuf_write_r & ~(store_coalesce_hi_r | store_coalesce_lo_r)) ? WrPtrPlus2[DEPTH_LOG2-1:0] : WrPtrPlus1[DEPTH_LOG2-1:0];
assign RdPtrEn = lsu_stbuf_commit_any | stbuf_reqvld_flushed_any;
assign NxtRdPtr[DEPTH_LOG2-1:0] = RdPtrPlus1[DEPTH_LOG2-1:0];
always_comb begin
stbuf_numvld_any[3:0] = '0;
for (int i=0; i<DEPTH; i++) begin
stbuf_numvld_any[3:0] += {3'b0, stbuf_vld[i]};
end
end
// These go to store buffer to detect full
assign isdccmst_m = lsu_pkt_m.valid & lsu_pkt_m.store & addr_in_dccm_m & ~lsu_pkt_m.dma;
assign isdccmst_r = lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r & ~lsu_pkt_r.dma;
assign stbuf_specvld_m[1:0] = {1'b0,isdccmst_m} << (isdccmst_m & ldst_dual_m);
assign stbuf_specvld_r[1:0] = {1'b0,isdccmst_r} << (isdccmst_r & ldst_dual_r);
assign stbuf_specvld_any[3:0] = stbuf_numvld_any[3:0] + {2'b0, stbuf_specvld_m[1:0]} + {2'b0, stbuf_specvld_r[1:0]};
assign lsu_stbuf_full_any = (~ldst_dual_d & dec_lsu_valid_raw_d) ? (stbuf_specvld_any[3:0] >= DEPTH) : (stbuf_specvld_any[3:0] >= (DEPTH-1));
assign lsu_stbuf_empty_any = (stbuf_numvld_any[3:0] == 4'b0);
// Load forwarding logic from the store queue
assign cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = end_addr_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)];
assign cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = lsu_addr_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)];
always_comb begin: GenLdFwd
stbuf_fwdbyteen_hi_pre_m[BYTE_WIDTH-1:0] = '0;
stbuf_fwdbyteen_lo_pre_m[BYTE_WIDTH-1:0] = '0;
for (int i=0; i<DEPTH; i++) begin
stbuf_match_hi[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m;
stbuf_match_lo[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m;
// Kill the store buffer entry if there is a dma store since it already updated the dccm
stbuf_dma_kill_en[i] = (stbuf_match_hi[i] | stbuf_match_lo[i]) & lsu_pkt_m.valid & lsu_pkt_m.dma & lsu_pkt_m.store;
for (int j=0; j<BYTE_WIDTH; j++) begin
stbuf_fwdbyteenvec_hi[i][j] = stbuf_match_hi[i] & stbuf_byteen[i][j] & stbuf_vld[i];
stbuf_fwdbyteen_hi_pre_m[j] |= stbuf_fwdbyteenvec_hi[i][j];
stbuf_fwdbyteenvec_lo[i][j] = stbuf_match_lo[i] & stbuf_byteen[i][j] & stbuf_vld[i];
stbuf_fwdbyteen_lo_pre_m[j] |= stbuf_fwdbyteenvec_lo[i][j];
end
end
end // block: GenLdFwd
always_comb begin: GenLdData
stbuf_fwddata_hi_pre_m[31:0] = '0;
stbuf_fwddata_lo_pre_m[31:0] = '0;
for (int i=0; i<DEPTH; i++) begin
stbuf_fwddata_hi_pre_m[31:0] |= {32{stbuf_match_hi[i]}} & stbuf_data[i][31:0];
stbuf_fwddata_lo_pre_m[31:0] |= {32{stbuf_match_lo[i]}} & stbuf_data[i][31:0];
end
end // block: GenLdData
// Create Hi/Lo signals - needed for the pipe forwarding
assign ldst_byteen_r[7:0] = ({8{lsu_pkt_r.by}} & 8'b0000_0001) |
({8{lsu_pkt_r.half}} & 8'b0000_0011) |
({8{lsu_pkt_r.word}} & 8'b0000_1111) |
({8{lsu_pkt_r.dword}} & 8'b1111_1111);
assign ldst_byteen_ext_r[7:0] = ldst_byteen_r[7:0] << lsu_addr_r[1:0];
assign ldst_byteen_hi_r[3:0] = ldst_byteen_ext_r[7:4];
assign ldst_byteen_lo_r[3:0] = ldst_byteen_ext_r[3:0];
assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma;
assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma;
assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma & dual_stbuf_write_r;
assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma & dual_stbuf_write_r;
for (genvar i=0; i<BYTE_WIDTH; i++) begin
assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i];
assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i];
assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i];
assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i];
assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
({8{ld_byte_rhit_hi_lo[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
({8{ld_byte_rhit_hi_hi[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
assign stbuf_fwdbyteen_hi_m[i] = ld_byte_hit_hi[i] | stbuf_fwdbyteen_hi_pre_m[i];
assign stbuf_fwdbyteen_lo_m[i] = ld_byte_hit_lo[i] | stbuf_fwdbyteen_lo_pre_m[i];
// // Pipe vs Store Queue priority
assign stbuf_fwddata_lo_m[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i] ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : stbuf_fwddata_lo_pre_m[(8*i)+7:(8*i)];
// // Pipe vs Store Queue priority
assign stbuf_fwddata_hi_m[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i] ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : stbuf_fwddata_hi_pre_m[(8*i)+7:(8*i)];
end
// Flops
rvdffs #(.WIDTH(DEPTH_LOG2)) WrPtrff (.din(NxtWrPtr[DEPTH_LOG2-1:0]), .dout(WrPtr[DEPTH_LOG2-1:0]), .en(WrPtrEn), .clk(lsu_stbuf_c1_clk), .*);
rvdffs #(.WIDTH(DEPTH_LOG2)) RdPtrff (.din(NxtRdPtr[DEPTH_LOG2-1:0]), .dout(RdPtr[DEPTH_LOG2-1:0]), .en(RdPtrEn), .clk(lsu_stbuf_c1_clk), .*);
`ifdef RV_ASSERT_ON
assert_stbuf_overflow: assert #0 (stbuf_specvld_any[2:0] <= DEPTH);
property stbuf_wren_store_dccm;
@(posedge clk) disable iff(~rst_l) (|stbuf_wr_en[DEPTH-1:0]) |-> (lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r);
endproperty
assert_stbuf_wren_store_dccm: assert property (stbuf_wren_store_dccm) else
$display("Illegal store buffer write");
`endif
endmodule

View File

@ -0,0 +1,68 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Owner:
// Function: LSU Trigger logic
// Comments:
//
//********************************************************************************
module el2_lsu_trigger
import el2_pkg::*;
#(
`include "el2_param.vh"
)(
input el2_trigger_pkt_t [3:0] trigger_pkt_any, // trigger packet from dec
input el2_lsu_pkt_t lsu_pkt_m, // lsu packet
input logic [31:0] lsu_addr_m, // address
input logic [31:0] store_data_m, // store data
output logic [3:0] lsu_trigger_match_m // match result
);
logic trigger_enable;
logic [3:0][31:0] lsu_match_data;
logic [3:0] lsu_trigger_data_match;
logic [31:0] store_data_trigger_m;
logic [31:0] ldst_addr_trigger_m;
// Generate the trigger enable (This is for power)
always_comb begin
trigger_enable = 1'b0;
for (int i=0; i<4; i++) begin
trigger_enable |= trigger_pkt_any[i].m;
end
end
assign store_data_trigger_m[31:0] = {({16{lsu_pkt_m.word}} & store_data_m[31:16]),({8{(lsu_pkt_m.half | lsu_pkt_m.word)}} & store_data_m[15:8]), store_data_m[7:0]} & {32{trigger_enable}};
assign ldst_addr_trigger_m[31:0] = lsu_addr_m[31:0] & {32{trigger_enable}};
for (genvar i=0; i<4; i++) begin
assign lsu_match_data[i][31:0] = ({32{~trigger_pkt_any[i].select}} & ldst_addr_trigger_m[31:0]) |
({32{trigger_pkt_any[i].select & trigger_pkt_any[i].store}} & store_data_trigger_m[31:0]);
rvmaskandmatch trigger_match (.mask(trigger_pkt_any[i].tdata2[31:0]), .data(lsu_match_data[i][31:0]), .masken(trigger_pkt_any[i].match), .match(lsu_trigger_data_match[i]));
assign lsu_trigger_match_m[i] = lsu_pkt_m.valid & ~lsu_pkt_m.dma & trigger_enable &
((trigger_pkt_any[i].store & lsu_pkt_m.store) | (trigger_pkt_any[i].load & lsu_pkt_m.load & ~trigger_pkt_any[i].select)) &
lsu_trigger_data_match[i];
end
endmodule // el2_lsu_trigger

2267
Flow/soc/JSON.pm Normal file

File diff suppressed because it is too large Load Diff

0
Flow/soc/Makefile Normal file
View File

238
Flow/soc/ahb_sif.sv Normal file
View File

@ -0,0 +1,238 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2019 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
module axi_slv #(
TAGW = 1
) (
input aclk,
input rst_l,
input arvalid,
output reg arready,
input [ 31:0] araddr,
input [TAGW-1:0] arid,
input [ 7:0] arlen,
input [ 1:0] arburst,
input [ 2:0] arsize,
output reg rvalid,
input rready,
output reg [ 63:0] rdata,
output reg [ 1:0] rresp,
output reg [TAGW-1:0] rid,
output rlast,
input awvalid,
output awready,
input [ 31:0] awaddr,
input [TAGW-1:0] awid,
input [ 7:0] awlen,
input [ 1:0] awburst,
input [ 2:0] awsize,
input [63:0] wdata,
input [ 7:0] wstrb,
input wvalid,
output wready,
output reg bvalid,
input bready,
output reg [ 1:0] bresp,
output reg [TAGW-1:0] bid
);
parameter MEM_DEPTH = 15; // memory size = 0x8000 = 32k WIDTH=15
parameter MEM_DEPTH_EACH = MEM_DEPTH - 3; // memory size = 0x8000 = 32k WIDTH=15
bit [7:0] mem0[(1<<MEM_DEPTH_EACH)-1:0];
bit [7:0] mem1[(1<<MEM_DEPTH_EACH)-1:0];
bit [7:0] mem2[(1<<MEM_DEPTH_EACH)-1:0];
bit [7:0] mem3[(1<<MEM_DEPTH_EACH)-1:0];
bit [7:0] mem4[(1<<MEM_DEPTH_EACH)-1:0];
bit [7:0] mem5[(1<<MEM_DEPTH_EACH)-1:0];
bit [7:0] mem6[(1<<MEM_DEPTH_EACH)-1:0];
bit [7:0] mem7[(1<<MEM_DEPTH_EACH)-1:0];
bit [63:0] memdata;
wire [MEM_DEPTH-4:0] saraddr0 = araddr[MEM_DEPTH - 1:3] + (araddr[2:0] > 3'h0 ? 1 : 0);
wire [MEM_DEPTH-4:0] saraddr1 = araddr[MEM_DEPTH - 1:3] + (araddr[2:0] > 3'h1 ? 1 : 0);
wire [MEM_DEPTH-4:0] saraddr2 = araddr[MEM_DEPTH - 1:3] + (araddr[2:0] > 3'h2 ? 1 : 0);
wire [MEM_DEPTH-4:0] saraddr3 = araddr[MEM_DEPTH - 1:3] + (araddr[2:0] > 3'h3 ? 1 : 0);
wire [MEM_DEPTH-4:0] saraddr4 = araddr[MEM_DEPTH - 1:3] + (araddr[2:0] > 3'h4 ? 1 : 0);
wire [MEM_DEPTH-4:0] saraddr5 = araddr[MEM_DEPTH - 1:3] + (araddr[2:0] > 3'h5 ? 1 : 0);
wire [MEM_DEPTH-4:0] saraddr6 = araddr[MEM_DEPTH - 1:3] + (araddr[2:0] > 3'h6 ? 1 : 0);
wire [MEM_DEPTH-4:0] saraddr7 = araddr[MEM_DEPTH - 1:3];
wire [7:0] rm0 = mem0[saraddr0];
wire [7:0] rm1 = mem1[saraddr1];
wire [7:0] rm2 = mem2[saraddr2];
wire [7:0] rm3 = mem3[saraddr3];
wire [7:0] rm4 = mem4[saraddr4];
wire [7:0] rm5 = mem5[saraddr5];
wire [7:0] rm6 = mem6[saraddr6];
wire [7:0] rm7 = mem7[saraddr7];
wire [MEM_DEPTH-4:0] sawaddr0 = awaddr[MEM_DEPTH - 1:3] + (awaddr[2:0] > 0 ? 1 : 0);
wire [MEM_DEPTH-4:0] sawaddr1 = awaddr[MEM_DEPTH - 1:3] + (awaddr[2:0] > 1 ? 1 : 0);
wire [MEM_DEPTH-4:0] sawaddr2 = awaddr[MEM_DEPTH - 1:3] + (awaddr[2:0] > 2 ? 1 : 0);
wire [MEM_DEPTH-4:0] sawaddr3 = awaddr[MEM_DEPTH - 1:3] + (awaddr[2:0] > 3 ? 1 : 0);
wire [MEM_DEPTH-4:0] sawaddr4 = awaddr[MEM_DEPTH - 1:3] + (awaddr[2:0] > 4 ? 1 : 0);
wire [MEM_DEPTH-4:0] sawaddr5 = awaddr[MEM_DEPTH - 1:3] + (awaddr[2:0] > 5 ? 1 : 0);
wire [MEM_DEPTH-4:0] sawaddr6 = awaddr[MEM_DEPTH - 1:3] + (awaddr[2:0] > 6 ? 1 : 0);
wire [MEM_DEPTH-4:0] sawaddr7 = awaddr[MEM_DEPTH - 1:3];
initial begin
mem0[0] = 8'h63;
mem1[0] = 8'h0;
mem2[0] = 8'h0;
mem3[0] = 8'h0;
end
always @(posedge aclk or negedge rst_l) begin
if (!rst_l) begin
rvalid <= 0;
bvalid <= 0;
end else begin
bid <= awid;
rid <= arid;
rvalid <= arvalid;
bvalid <= awvalid;
rdata <= memdata;
end
end
always @(negedge aclk) begin
if (arvalid)
case (araddr[2:0])
3'h0: begin
memdata <= {rm7, rm6, rm5, rm4, rm3, rm2, rm1, rm0};
end
3'h1: begin
memdata <= {rm0, rm7, rm6, rm5, rm4, rm3, rm2, rm1};
end
3'h2: begin
memdata <= {rm1, rm0, rm7, rm6, rm5, rm4, rm3, rm2};
end
3'h3: begin
memdata <= {rm2, rm1, rm0, rm7, rm6, rm5, rm4, rm3};
end
3'h4: begin
memdata <= {rm3, rm2, rm1, rm0, rm7, rm6, rm5, rm4};
end
3'h5: begin
memdata <= {rm4, rm3, rm2, rm1, rm0, rm7, rm6, rm5};
end
3'h6: begin
memdata <= {rm5, rm4, rm3, rm2, rm1, rm0, rm7, rm6};
end
3'h7: begin
memdata <= {rm7, rm6, rm5, rm4, rm3, rm2, rm1, rm0};
end
endcase
if (awvalid) begin
case (awaddr[2:0])
3'h0: begin
if (wstrb[7]) mem7[sawaddr7] = wdata[63:56];
if (wstrb[6]) mem6[sawaddr6] = wdata[55:48];
if (wstrb[5]) mem5[sawaddr5] = wdata[47:40];
if (wstrb[4]) mem4[sawaddr4] = wdata[39:32];
if (wstrb[3]) mem3[sawaddr3] = wdata[31:24];
if (wstrb[2]) mem2[sawaddr2] = wdata[23:16];
if (wstrb[1]) mem1[sawaddr1] = wdata[15:08];
if (wstrb[0]) mem0[sawaddr0] = wdata[07:00];
end
3'h1: begin
if (wstrb[7]) mem0[sawaddr0] = wdata[63:56];
if (wstrb[6]) mem7[sawaddr7] = wdata[55:48];
if (wstrb[5]) mem6[sawaddr6] = wdata[47:40];
if (wstrb[4]) mem5[sawaddr5] = wdata[39:32];
if (wstrb[3]) mem4[sawaddr4] = wdata[31:24];
if (wstrb[2]) mem3[sawaddr3] = wdata[23:16];
if (wstrb[1]) mem2[sawaddr2] = wdata[15:08];
if (wstrb[0]) mem1[sawaddr1] = wdata[07:00];
end
3'h2: begin
if (wstrb[7]) mem1[sawaddr1] = wdata[63:56];
if (wstrb[6]) mem0[sawaddr0] = wdata[55:48];
if (wstrb[5]) mem7[sawaddr7] = wdata[47:40];
if (wstrb[4]) mem6[sawaddr6] = wdata[39:32];
if (wstrb[3]) mem5[sawaddr5] = wdata[31:24];
if (wstrb[2]) mem4[sawaddr4] = wdata[23:16];
if (wstrb[1]) mem3[sawaddr3] = wdata[15:08];
if (wstrb[0]) mem2[sawaddr2] = wdata[07:00];
end
3'h3: begin
if (wstrb[7]) mem2[sawaddr2] = wdata[63:56];
if (wstrb[6]) mem1[sawaddr1] = wdata[55:48];
if (wstrb[5]) mem0[sawaddr0] = wdata[47:40];
if (wstrb[4]) mem7[sawaddr7] = wdata[39:32];
if (wstrb[3]) mem6[sawaddr6] = wdata[31:24];
if (wstrb[2]) mem5[sawaddr5] = wdata[23:16];
if (wstrb[1]) mem4[sawaddr4] = wdata[15:08];
if (wstrb[0]) mem3[sawaddr3] = wdata[07:00];
end
3'h4: begin
if (wstrb[7]) mem3[sawaddr3] = wdata[63:56];
if (wstrb[6]) mem2[sawaddr2] = wdata[55:48];
if (wstrb[5]) mem1[sawaddr1] = wdata[47:40];
if (wstrb[4]) mem0[sawaddr0] = wdata[39:32];
if (wstrb[3]) mem7[sawaddr7] = wdata[31:24];
if (wstrb[2]) mem6[sawaddr6] = wdata[23:16];
if (wstrb[1]) mem5[sawaddr5] = wdata[15:08];
if (wstrb[0]) mem4[sawaddr4] = wdata[07:00];
end
3'h5: begin
if (wstrb[7]) mem4[sawaddr4] = wdata[63:56];
if (wstrb[6]) mem3[sawaddr3] = wdata[55:48];
if (wstrb[5]) mem2[sawaddr2] = wdata[47:40];
if (wstrb[4]) mem1[sawaddr1] = wdata[39:32];
if (wstrb[3]) mem0[sawaddr0] = wdata[31:24];
if (wstrb[2]) mem7[sawaddr7] = wdata[23:16];
if (wstrb[1]) mem6[sawaddr6] = wdata[15:08];
if (wstrb[0]) mem5[sawaddr5] = wdata[07:00];
end
3'h6: begin
if (wstrb[7]) mem5[sawaddr5] = wdata[63:56];
if (wstrb[6]) mem4[sawaddr4] = wdata[55:48];
if (wstrb[5]) mem3[sawaddr3] = wdata[47:40];
if (wstrb[4]) mem2[sawaddr2] = wdata[39:32];
if (wstrb[3]) mem1[sawaddr1] = wdata[31:24];
if (wstrb[2]) mem0[sawaddr0] = wdata[23:16];
if (wstrb[1]) mem7[sawaddr7] = wdata[15:08];
if (wstrb[0]) mem6[sawaddr6] = wdata[07:00];
end
3'h7: begin
if (wstrb[7]) mem6[sawaddr6] = wdata[63:56];
if (wstrb[6]) mem5[sawaddr5] = wdata[55:48];
if (wstrb[5]) mem4[sawaddr4] = wdata[47:40];
if (wstrb[4]) mem3[sawaddr3] = wdata[39:32];
if (wstrb[3]) mem2[sawaddr2] = wdata[31:24];
if (wstrb[2]) mem1[sawaddr1] = wdata[23:16];
if (wstrb[1]) mem0[sawaddr0] = wdata[15:08];
if (wstrb[0]) mem7[sawaddr7] = wdata[07:00];
end
endcase
end
end
assign arready = 1'b1;
assign awready = 1'b1;
assign wready = 1'b1;
assign rresp = 2'b0;
assign bresp = 2'b0;
assign rlast = 1'b1;
endmodule

View File

@ -0,0 +1,196 @@
// connects LSI master to external AXI slave and DMA slave
module axi_lsu_dma_bridge #(
parameter M_ID_WIDTH = 8,
parameter S0_ID_WIDTH = 8
) (
input clk,
input reset_l,
// master read bus
input m_arvalid,
input [M_ID_WIDTH-1:0] m_arid,
input [ 31:0] m_araddr,
output m_arready,
output m_rvalid,
input m_rready,
output [ 63:0] m_rdata,
output [M_ID_WIDTH-1:0] m_rid,
output [ 1:0] m_rresp,
output m_rlast,
// master write bus
input m_awvalid,
input [M_ID_WIDTH-1:0] m_awid,
input [ 31:0] m_awaddr,
output m_awready,
input m_wvalid,
output m_wready,
output [ 1:0] m_bresp,
output m_bvalid,
output [M_ID_WIDTH-1:0] m_bid,
input m_bready,
// slave 0 if general ext memory
output s0_arvalid,
input s0_arready,
input s0_rvalid,
input [S0_ID_WIDTH-1:0] s0_rid,
input [ 1:0] s0_rresp,
input [ 63:0] s0_rdata,
input s0_rlast,
output s0_rready,
output s0_awvalid,
input s0_awready,
output s0_wvalid,
input s0_wready,
input [ 1:0] s0_bresp,
input s0_bvalid,
input [S0_ID_WIDTH-1:0] s0_bid,
output s0_bready,
// slave 1 if DMA port
output s1_arvalid,
input s1_arready,
input s1_rvalid,
input [ 1:0] s1_rresp,
input [63:0] s1_rdata,
input s1_rlast,
output s1_rready,
output s1_awvalid,
input s1_awready,
output s1_wvalid,
input s1_wready,
input [1:0] s1_bresp,
input s1_bvalid,
output s1_bready
);
parameter ICCM_BASE = `RV_ICCM_BITS; // in LSBs
localparam IDFIFOSZ = $clog2(`RV_DMA_BUF_DEPTH);
bit [31:0] iccm_real_base_addr = `RV_ICCM_SADR;
wire ar_slave_select;
wire aw_slave_select;
wire w_slave_select;
wire rresp_select;
wire bresp_select;
wire ar_iccm_select;
wire aw_iccm_select;
reg [1:0] wsel_iptr, wsel_optr;
reg [2:0] wsel_count;
reg [3:0] wsel;
reg [M_ID_WIDTH-1:0] arid[1<<IDFIFOSZ];
reg [M_ID_WIDTH-1:0] awid[1<<IDFIFOSZ];
reg [IDFIFOSZ-1:0] arid_cnt;
reg [IDFIFOSZ-1:0] awid_cnt;
reg [IDFIFOSZ-1:0] rid_cnt;
reg [IDFIFOSZ-1:0] bid_cnt;
// 1 select slave 1; 0 - slave 0
assign ar_slave_select = ar_iccm_select;
assign aw_slave_select = aw_iccm_select;
assign ar_iccm_select = m_araddr[31:ICCM_BASE] == iccm_real_base_addr[31:ICCM_BASE];
assign aw_iccm_select = m_awaddr[31:ICCM_BASE] == iccm_real_base_addr[31:ICCM_BASE];
assign s0_arvalid = m_arvalid & ~ar_slave_select;
assign s1_arvalid = m_arvalid & ar_slave_select;
assign m_arready = ar_slave_select ? s1_arready : s0_arready;
assign s0_awvalid = m_awvalid & ~aw_slave_select;
assign s1_awvalid = m_awvalid & aw_slave_select;
assign m_awready = aw_slave_select ? s1_awready : s0_awready;
assign s0_wvalid = m_wvalid & ~w_slave_select;
assign s1_wvalid = m_wvalid & w_slave_select;
assign m_wready = w_slave_select ? s1_wready : s0_wready;
assign w_slave_select = (wsel_count == 0 || wsel_count[2]) ? aw_slave_select : wsel[wsel_optr];
assign m_rvalid = s0_rvalid | s1_rvalid;
assign s0_rready = m_rready & ~rresp_select;
assign s1_rready = m_rready & rresp_select;
assign m_rdata = rresp_select ? s1_rdata : s0_rdata;
assign m_rresp = rresp_select ? s1_rresp : s0_rresp;
assign m_rid = rresp_select ? arid[rid_cnt] : s0_rid;
assign m_rlast = rresp_select ? s1_rlast : s0_rlast;
assign rresp_select = s1_rvalid & ~s0_rvalid;
assign m_bvalid = s0_bvalid | s1_bvalid;
assign s0_bready = m_bready & ~bresp_select;
assign s1_bready = m_bready & bresp_select;
assign m_bid = bresp_select ? awid[bid_cnt] : s0_bid;
assign m_bresp = bresp_select ? s1_bresp : s0_bresp;
assign bresp_select = s1_bvalid & ~s0_bvalid;
// W-channel select fifo
always @(posedge clk or negedge reset_l)
if (!reset_l) begin
wsel_count <= '0;
wsel_iptr <= '0;
wsel_optr <= '0;
end else begin
if (m_awvalid & m_awready) begin
wsel[wsel_iptr] <= aw_slave_select;
if (!(m_wready & m_wvalid)) wsel_count <= wsel_count + 1;
wsel_iptr <= wsel_iptr + 1;
end
if (m_wvalid & m_wready) begin
if (!(m_awready & m_awvalid)) wsel_count <= wsel_count - 1;
wsel_optr <= wsel_optr + 1;
end
end
// id replacement for narrow ID slave
always @(posedge clk or negedge reset_l)
if (!reset_l) begin
arid_cnt <= '0;
rid_cnt <= '0;
end else begin
if (ar_slave_select & m_arready & m_arvalid) begin
arid[arid_cnt] <= m_arid;
arid_cnt <= arid_cnt + 1;
end
if (rresp_select & m_rready & m_rvalid) begin
rid_cnt <= rid_cnt + 1;
end
end
always @(posedge clk or negedge reset_l)
if (!reset_l) begin
awid_cnt <= '0;
bid_cnt <= '0;
end else begin
if (aw_slave_select & m_awready & m_awvalid) begin
awid[awid_cnt] <= m_awid;
awid_cnt <= awid_cnt + 1;
end
if (bresp_select & m_bready & m_bvalid) begin
bid_cnt <= bid_cnt + 1;
end
end
endmodule

395
Flow/soc/dasm.svi Normal file
View File

@ -0,0 +1,395 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2019 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Run time disassembler functions
// supports RISCV extentions I, C, M, A
`ifndef RV_NUM_THREADS
`define RV_NUM_THREADS 1
`endif
bit[31:0] [31:0] gpr[`RV_NUM_THREADS];
// main DASM function
function string dasm(input[31:0] opcode, input[31:0] pc, input[4:0] regn, input[31:0] regv, input tid=0);
dasm = (opcode[1:0] == 2'b11) ? dasm32(opcode, pc, tid) : dasm16(opcode, pc, tid);
if(regn) gpr[tid][regn] = regv;
endfunction
///////////////// 16 bits instructions ///////////////////////
function string dasm16( input[31:0] opcode, input[31:0] pc, input tid=0);
case(opcode[1:0])
0: return dasm16_0(opcode, tid);
1: return dasm16_1(opcode, pc);
2: return dasm16_2(opcode);
endcase
return $sformatf(".short 0x%h", opcode[15:0]);
endfunction
function string dasm16_0( input[31:0] opcode, tid);
case(opcode[15:13])
3'b000: return dasm16_ciw(opcode);
3'b001: return {"c.fld ", dasm16_cl(opcode, tid)};
3'b010: return {"c.lw ", dasm16_cl(opcode, tid)};
3'b011: return {"c.flw ", dasm16_cl(opcode, tid)};
3'b101: return {"c.fsd ", dasm16_cl(opcode, tid)};
3'b110: return {"c.sw ", dasm16_cl(opcode, tid)};
3'b111: return {"c.fsw ", dasm16_cl(opcode, tid)};
endcase
return $sformatf(".short 0x%h", opcode[15:0]);
endfunction
function string dasm16_ciw( input[31:0] opcode);
int imm;
imm=0;
if(opcode[15:0] == 0) return ".short 0";
{imm[5:4],imm[9:6],imm[2],imm[3]} = opcode[12:5];
return $sformatf("c.addi4spn %s,0x%0h", abi_reg[opcode[4:2]+8], imm);
endfunction
function string dasm16_cl( input[31:0] opcode, input tid=0);
int imm;
imm=0;
imm[5:3] = opcode[12:10];
imm[7:6] = opcode[6:5];
return $sformatf(" %s,%0d(%s) [%h]", abi_reg[opcode[4:2]+8], imm, abi_reg[opcode[9:7]+8], gpr[tid][opcode[9:7]+8]+imm);
endfunction
function string dasm16_1( input[31:0] opcode, input[31:0] pc);
case(opcode[15:13])
3'b000: return opcode[11:7]==0 ? "c.nop" : {"c.addi ",dasm16_ci(opcode)};
3'b001: return {"c.jal ", dasm16_cj(opcode, pc)};
3'b010: return {"c.li ", dasm16_ci(opcode)};
3'b011: return dasm16_1_3(opcode);
3'b100: return dasm16_cr(opcode);
3'b101: return {"c.j ", dasm16_cj(opcode, pc)};
3'b110: return {"c.beqz ", dasm16_cb(opcode, pc)};
3'b111: return {"c.bnez ", dasm16_cb(opcode, pc)};
endcase
endfunction
function string dasm16_ci( input[31:0] opcode);
int imm;
imm=0;
imm[4:0] = opcode[6:2];
if(opcode[12]) imm [31:5] = '1;
return $sformatf("%s,%0d", abi_reg[opcode[11:7]], imm);
endfunction
function string dasm16_cj( input[31:0] opcode, input[31:0] pc);
bit[31:0] imm;
imm=0;
{imm[11],imm[4],imm[9:8],imm[10],imm[6], imm[7],imm[3:1], imm[5]} = opcode[12:2];
if(opcode[12]) imm [31:12] = '1;
return $sformatf("0x%0h", imm+pc);
endfunction
function string dasm16_cb( input[31:0] opcode, input[31:0] pc);
bit[31:0] imm;
imm=0;
{imm[8],imm[4:3]} = opcode[12:10];
{imm[7:6],imm[2:1], imm[5]} = opcode[6:2];
if(opcode[12]) imm [31:9] = '1;
return $sformatf("%s,0x%0h",abi_reg[opcode[9:7]+8], imm+pc);
endfunction
function string dasm16_cr( input[31:0] opcode);
bit[31:0] imm;
imm = 0;
imm[4:0] = opcode[6:2];
if(opcode[5]) imm [31:5] = '1;
case(opcode[11:10])
0: return $sformatf("c.srli %s,%0d", abi_reg[opcode[9:7]+8], imm[5:0]);
1: return $sformatf("c.srai %s,%0d", abi_reg[opcode[9:7]+8], imm[5:0]);
2: return $sformatf("c.andi %s,0x%0h", abi_reg[opcode[9:7]+8], imm);
endcase
case(opcode[6:5])
0: return $sformatf("c.sub %s,%s", abi_reg[opcode[9:7]+8], abi_reg[opcode[4:2]+8]);
1: return $sformatf("c.xor %s,%s", abi_reg[opcode[9:7]+8], abi_reg[opcode[4:2]+8]);
2: return $sformatf("c.or %s,%s", abi_reg[opcode[9:7]+8], abi_reg[opcode[4:2]+8]);
3: return $sformatf("c.and %s,%s", abi_reg[opcode[9:7]+8], abi_reg[opcode[4:2]+8]);
endcase
endfunction
function string dasm16_1_3( input[31:0] opcode);
int imm;
imm=0;
if(opcode[11:7] == 2) begin
{imm[4], imm[6],imm[8:7], imm[5]} = opcode[6:2];
if(opcode[12]) imm [31:9] = '1;
return $sformatf("c.addi16sp %0d", imm);
end
else begin
imm[16:12] = opcode[6:2];
if(opcode[12]) imm [31:17] = '1;
return $sformatf("c.lui %s,0x%0h", abi_reg[opcode[11:7]], imm);
end
endfunction
function string dasm16_2( input[31:0] opcode, input tid=0);
case(opcode[15:13])
3'b000: return {"c.slli ", dasm16_ci(opcode)};
3'b001: return {"c.fldsp ", dasm16_cls(opcode,1,tid)};
3'b010: return {"c.lwsp ", dasm16_cls(opcode,0,tid)};
3'b011: return {"c.flwsp ", dasm16_cls(opcode,0,tid)};
3'b101: return {"c.fsdsp ", dasm16_css(opcode,1,tid)};
3'b110: return {"c.swsp ", dasm16_css(opcode,0,tid)};
3'b111: return {"c.fswsp ", dasm16_css(opcode,0,tid)};
endcase
if(opcode[12]) begin
if(opcode[12:2] == 0) return "c.ebreak";
else if(opcode[6:2] == 0) return $sformatf("c.jalr %s", abi_reg[opcode[11:7]]);
else return $sformatf("c.add %s,%s", abi_reg[opcode[11:7]], abi_reg[opcode[6:2]]);
end
else begin
if(opcode[6:2] == 0) return $sformatf("c.jr %s", abi_reg[opcode[11:7]]);
else return $sformatf("c.mv %s,%s", abi_reg[opcode[11:7]], abi_reg[opcode[6:2]]);
end
endfunction
function string dasm16_cls( input[31:0] opcode, input sh1=0, tid=0);
bit[31:0] imm;
imm=0;
if(sh1) {imm[4:3],imm[8:6]} = opcode[6:2];
else {imm[4:2],imm[7:6]} = opcode[6:2];
imm[5] = opcode[12];
return $sformatf("%s,0x%0h [%h]", abi_reg[opcode[11:7]], imm, gpr[tid][2]+imm);
endfunction
function string dasm16_css( input[31:0] opcode, input sh1=0, tid=0);
bit[31:0] imm;
imm=0;
if(sh1) {imm[5:3],imm[8:6]} = opcode[12:7];
else {imm[5:2],imm[7:6]} = opcode[12:7];
return $sformatf("%s,0x%0h [%h]", abi_reg[opcode[6:2]], imm, gpr[tid][2]+imm);
endfunction
///////////////// 32 bit instructions ///////////////////////
function string dasm32( input[31:0] opcode, input[31:0] pc, input tid=0);
case(opcode[6:0])
7'b0110111: return {"lui ", dasm32_u(opcode)};
7'b0010111: return {"auipc ", dasm32_u(opcode)};
7'b1101111: return {"jal ", dasm32_j(opcode,pc)};
7'b1100111: return {"jalr ", dasm32_jr(opcode,pc)};
7'b1100011: return dasm32_b(opcode,pc);
7'b0000011: return dasm32_l(opcode,tid);
7'b0100011: return dasm32_s(opcode,tid);
7'b0010011: return dasm32_ai(opcode);
7'b0110011: return dasm32_ar(opcode);
7'b0001111: return {"fence", dasm32_fence(opcode)};
7'b1110011: return dasm32_e(opcode);
7'b0101111: return dasm32_a(opcode,tid);
endcase
return $sformatf(".long 0x%h", opcode);
endfunction
function string dasm32_u( input[31:0] opcode);
bit[31:0] imm;
imm=0;
imm[31:12] = opcode[31:12];
return $sformatf("%s,0x%0h", abi_reg[opcode[11:7]], imm);
endfunction
function string dasm32_j( input[31:0] opcode, input[31:0] pc);
int imm;
imm=0;
{imm[20], imm[10:1], imm[11], imm[19:12]} = opcode[31:12];
if(opcode[31]) imm[31:20] = '1;
return $sformatf("%s,0x%0h",abi_reg[opcode[11:7]], imm+pc);
endfunction
function string dasm32_jr( input[31:0] opcode, input[31:0] pc);
int imm;
imm=0;
imm[11:1] = opcode[31:19];
if(opcode[31]) imm[31:12] = '1;
return $sformatf("%s,%s,0x%0h",abi_reg[opcode[11:7]], abi_reg[opcode[19:15]], imm+pc);
endfunction
function string dasm32_b( input[31:0] opcode, input[31:0] pc);
int imm;
string mn;
imm=0;
{imm[12],imm[10:5]} = opcode[31:25];
{imm[4:1],imm[11]} = opcode[11:7];
if(opcode[31]) imm[31:12] = '1;
case(opcode[14:12])
0: mn = "beq ";
1: mn = "bne ";
2,3 : return $sformatf(".long 0x%h", opcode);
4: mn = "blt ";
5: mn = "bge ";
6: mn = "bltu ";
7: mn = "bgeu ";
endcase
return $sformatf("%s%s,%s,0x%0h", mn, abi_reg[opcode[19:15]], abi_reg[opcode[24:20]], imm+pc);
endfunction
function string dasm32_l( input[31:0] opcode, input tid=0);
int imm;
string mn;
imm=0;
imm[11:0] = opcode[31:20];
if(opcode[31]) imm[31:12] = '1;
case(opcode[14:12])
0: mn = "lb ";
1: mn = "lh ";
2: mn = "lw ";
4: mn = "lbu ";
5: mn = "lhu ";
default : return $sformatf(".long 0x%h", opcode);
endcase
return $sformatf("%s%s,%0d(%s) [%h]", mn, abi_reg[opcode[11:7]], imm, abi_reg[opcode[19:15]], imm+gpr[tid][opcode[19:15]]);
endfunction
function string dasm32_s( input[31:0] opcode, input tid=0);
int imm;
string mn;
imm=0;
imm[11:5] = opcode[31:25];
imm[4:0] = opcode[11:7];
if(opcode[31]) imm[31:12] = '1;
case(opcode[14:12])
0: mn = "sb ";
1: mn = "sh ";
2: mn = "sw ";
default : return $sformatf(".long 0x%h", opcode);
endcase
return $sformatf("%s%s,%0d(%s) [%h]", mn, abi_reg[opcode[24:20]], imm, abi_reg[opcode[19:15]], imm+gpr[tid][opcode[19:15]]);
endfunction
function string dasm32_ai( input[31:0] opcode);
int imm;
string mn;
imm=0;
imm[11:0] = opcode[31:20];
if(opcode[31]) imm[31:12] = '1;
case(opcode[14:12])
0: mn = "addi ";
2: mn = "slti ";
3: mn = "sltiu ";
4: mn = "xori ";
6: mn = "ori ";
7: mn = "andi ";
default: return dasm32_si(opcode);
endcase
return $sformatf("%s%s,%s,%0d", mn, abi_reg[opcode[11:7]], abi_reg[opcode[19:15]], imm);
endfunction
function string dasm32_si( input[31:0] opcode);
int imm;
string mn;
imm = opcode[24:20];
case(opcode[14:12])
1: mn = "slli";
5: mn = opcode[30] ? "srli": "srai";
endcase
return $sformatf("%s %s,%s,%0d", mn, abi_reg[opcode[11:7]], abi_reg[opcode[19:15]], imm);
endfunction
function string dasm32_ar( input[31:0] opcode);
string mn;
if(opcode[25])
case(opcode[14:12])
0: mn = "mul ";
1: mn = "mulh ";
2: mn = "mulhsu ";
3: mn = "mulhu ";
4: mn = "div ";
5: mn = "divu ";
6: mn = "rem ";
7: mn = "remu ";
endcase
else
case(opcode[14:12])
0: mn = opcode[30]? "sub ":"add ";
1: mn = "sll ";
2: mn = "slt ";
3: mn = "sltu ";
4: mn = "xor ";
5: mn = opcode[30]? "sra ":"srl ";
6: mn = "or ";
7: mn = "and ";
endcase
return $sformatf("%s%s,%s,%s", mn, abi_reg[opcode[11:7]], abi_reg[opcode[19:15]], abi_reg[opcode[24:20]]);
endfunction
function string dasm32_fence( input[31:0] opcode);
return opcode[12] ? ".i" : "";
endfunction
function string dasm32_e(input[31:0] opcode);
if(opcode[31:7] == 0) return "ecall";
else if({opcode[31:21],opcode [19:7]} == 0) return "ebreak";
else
case(opcode[14:12])
1: return {"csrrw ", dasm32_csr(opcode)};
2: return {"csrrs ", dasm32_csr(opcode)};
3: return {"csrrc ", dasm32_csr(opcode)};
5: return {"csrrwi ", dasm32_csr(opcode, 1)};
6: return {"csrrsi ", dasm32_csr(opcode, 1)};
7: return {"csrrci ", dasm32_csr(opcode, 1)};
endcase
endfunction
function string dasm32_csr(input[31:0] opcode, input im=0);
bit[11:0] csr;
csr = opcode[31:20];
if(im) begin
return $sformatf("%s,csr_%0h,0x%h", abi_reg[opcode[11:7]], csr, opcode[19:15]);
end
else begin
return $sformatf("%s,csr_%0h,%s", abi_reg[opcode[11:7]], csr, abi_reg[opcode[19:15]]);
end
endfunction
//atomics
function string dasm32_a(input[31:0] opcode, input tid=0);
case(opcode[31:27])
'b00010: return $sformatf("lr.w %s,(%s) [%h]", abi_reg[opcode[11:7]], abi_reg[opcode[19:15]], gpr[tid][opcode[19:15]]);
'b00011: return $sformatf("sc.w %s,%s,(%s) [%h]", abi_reg[opcode[11:7]], abi_reg[opcode[24:20]], abi_reg[opcode[19:15]], gpr[tid][opcode[19:15]]);
'b00001: return {"amoswap.w", dasm32_amo(opcode, tid)};
'b00000: return {"amoadd.w", dasm32_amo(opcode, tid)};
'b00100: return {"amoxor.w", dasm32_amo(opcode, tid)};
'b01100: return {"amoand.w", dasm32_amo(opcode, tid)};
'b01000: return {"amoor.w", dasm32_amo(opcode, tid)};
'b10000: return {"amomin.w", dasm32_amo(opcode, tid)};
'b10100: return {"amomax.w", dasm32_amo(opcode, tid)};
'b11000: return {"amominu.w", dasm32_amo(opcode, tid)};
'b11100: return {"amomaxu.w", dasm32_amo(opcode, tid)};
endcase
return $sformatf(".long 0x%h", opcode);
endfunction
function string dasm32_amo( input[31:0] opcode, input tid=0);
return $sformatf(" %s,%s,(%s) [%h]", abi_reg[opcode[11:7]], abi_reg[opcode[24:20]], abi_reg[opcode[19:15]], gpr[tid][opcode[19:15]]);
endfunction

View File

@ -0,0 +1,437 @@
// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
#include "tcp_server.h"
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
/**
* Simple buffer for passing data between TCP sockets and DPI modules
*/
const int BUFSIZE_BYTE = 25600;
struct tcp_buf {
unsigned int rptr;
unsigned int wptr;
char buf[BUFSIZE_BYTE];
};
/**
* TCP Server thread context structure
*/
struct tcp_server_ctx {
// Writeable by the host thread
char *display_name;
uint16_t listen_port;
volatile bool socket_run;
// Writeable by the server thread
tcp_buf *buf_in;
tcp_buf *buf_out;
int sfd; // socket fd
int cfd; // client fd
pthread_t sock_thread;
};
static bool tcp_buffer_is_full(struct tcp_buf *buf) {
if (buf->wptr >= buf->rptr) {
return (buf->wptr - buf->rptr) == (BUFSIZE_BYTE - 1);
} else {
return (buf->rptr - buf->wptr) == 1;
}
}
static bool tcp_buffer_is_empty(struct tcp_buf *buf) {
return (buf->wptr == buf->rptr);
}
static void tcp_buffer_put_byte(struct tcp_buf *buf, char dat) {
bool done = false;
while (!done) {
if (!tcp_buffer_is_full(buf)) {
buf->buf[buf->wptr++] = dat;
buf->wptr %= BUFSIZE_BYTE;
done = true;
}
}
}
static bool tcp_buffer_get_byte(struct tcp_buf *buf, char *dat) {
if (tcp_buffer_is_empty(buf)) {
return false;
}
*dat = buf->buf[buf->rptr++];
buf->rptr %= BUFSIZE_BYTE;
return true;
}
static struct tcp_buf *tcp_buffer_new(void) {
struct tcp_buf *buf_new;
buf_new = (struct tcp_buf *)malloc(sizeof(struct tcp_buf));
buf_new->rptr = 0;
buf_new->wptr = 0;
return buf_new;
}
static void tcp_buffer_free(struct tcp_buf **buf) {
free(*buf);
*buf = NULL;
}
/**
* Start a TCP server
*
* This function creates attempts to create a new TCP socket instance. The
* socket is a non-blocking stream socket, with buffering disabled.
*
* @param ctx context object
* @return 0 on success, -1 in case of an error
*/
static int start(struct tcp_server_ctx *ctx) {
int rv;
assert(ctx->sfd == 0 && "Server already started.");
// create socket
int sfd = socket(AF_INET, SOCK_STREAM, 0);
if (sfd == -1) {
fprintf(stderr, "%s: Unable to create socket: %s (%d)\n", ctx->display_name,
strerror(errno), errno);
return -1;
}
rv = fcntl(sfd, F_SETFL, O_NONBLOCK);
if (rv != 0) {
fprintf(stderr, "%s: Unable to make socket non-blocking: %s (%d)\n",
ctx->display_name, strerror(errno), errno);
return -1;
}
// reuse existing socket (if existing)
int reuse_socket = 1;
rv = setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, &reuse_socket, sizeof(int));
if (rv != 0) {
fprintf(stderr, "%s: Unable to set socket options: %s (%d)\n",
ctx->display_name, strerror(errno), errno);
return -1;
}
// stop tcp socket from buffering (buffering prevents timely responses to
// OpenOCD which severly limits debugging performance)
int tcp_nodelay = 1;
rv = setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, &tcp_nodelay, sizeof(int));
if (rv != 0) {
fprintf(stderr, "%s: Unable to set socket nodelay: %s (%d)\n",
ctx->display_name, strerror(errno), errno);
return -1;
}
// bind server
struct sockaddr_in addr;
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_ANY);
addr.sin_port = htons(ctx->listen_port);
rv = bind(sfd, (struct sockaddr *)&addr, sizeof(addr));
if (rv != 0) {
fprintf(stderr, "%s: Failed to bind socket: %s (%d)\n", ctx->display_name,
strerror(errno), errno);
return -1;
}
// listen for incoming connections
rv = listen(sfd, 1);
if (rv != 0) {
fprintf(stderr, "%s: Failed to listen on socket: %s (%d)\n",
ctx->display_name, strerror(errno), errno);
return -1;
}
ctx->sfd = sfd;
assert(ctx->sfd > 0);
return 0;
}
/**
* Accept an incoming connection from a client (nonblocking)
*
* The resulting client fd is made non-blocking.
*
* @param ctx context object
* @return 0 on success, any other value indicates an error
*/
static int client_tryaccept(struct tcp_server_ctx *ctx) {
int rv;
assert(ctx->sfd > 0);
assert(ctx->cfd == 0);
int cfd = accept(ctx->sfd, NULL, NULL);
if (cfd == -1 && errno == EAGAIN) {
return -EAGAIN;
}
if (cfd == -1) {
fprintf(stderr, "%s: Unable to accept incoming connection: %s (%d)\n",
ctx->display_name, strerror(errno), errno);
return -1;
}
rv = fcntl(cfd, F_SETFL, O_NONBLOCK);
if (rv != 0) {
fprintf(stderr, "%s: Unable to make client socket non-blocking: %s (%d)\n",
ctx->display_name, strerror(errno), errno);
return -1;
}
ctx->cfd = cfd;
assert(ctx->cfd > 0);
printf("%s: Accepted client connection\n", ctx->display_name);
return 0;
}
/**
* Stop the TCP server
*
* @param ctx context object
*/
static void stop(struct tcp_server_ctx *ctx) {
assert(ctx);
if (!ctx->sfd) {
return;
}
close(ctx->sfd);
ctx->sfd = 0;
}
/**
* Receive a byte from a connected client
*
* @param ctx context object
* @param cmd byte received
* @return true if a byte was read
*/
static bool get_byte(struct tcp_server_ctx *ctx, char *cmd) {
assert(ctx);
ssize_t num_read = read(ctx->cfd, cmd, 1);
if (num_read == 0) {
return false;
}
if (num_read == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
return false;
} else if (errno == EBADF) {
// Possibly client went away? Accept a new connection.
fprintf(stderr, "%s: Client disappeared.\n", ctx->display_name);
tcp_server_client_close(ctx);
return false;
} else {
fprintf(stderr, "%s: Error while reading from client: %s (%d)\n",
ctx->display_name, strerror(errno), errno);
assert(0 && "Error reading from client");
}
}
assert(num_read == 1);
return true;
}
/**
* Send a byte to a connected client
*
* @param ctx context object
* @param cmd byte to send
*/
static void put_byte(struct tcp_server_ctx *ctx, char cmd) {
while (1) {
ssize_t num_written = send(ctx->cfd, &cmd, sizeof(cmd), MSG_NOSIGNAL);
if (num_written == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
continue;
} else if (errno == EPIPE) {
printf("%s: Remote disconnected.\n", ctx->display_name);
tcp_server_client_close(ctx);
break;
} else {
fprintf(stderr, "%s: Error while writing to client: %s (%d)\n",
ctx->display_name, strerror(errno), errno);
assert(0 && "Error writing to client.");
}
}
if (num_written >= 1) {
break;
}
}
}
/**
* Cleanup server context
*
* @param ctx context object
*/
static void ctx_free(struct tcp_server_ctx *ctx) {
// Free the buffers
tcp_buffer_free(&ctx->buf_in);
tcp_buffer_free(&ctx->buf_out);
// Free the display name
free(ctx->display_name);
// Free the ctx
free(ctx);
ctx = NULL;
}
/**
* Thread function to create a new server instance
*
* @param ctx_void context object
* @return Always returns NULL
*/
static void *server_create(void *ctx_void) {
// Cast to a server struct
struct tcp_server_ctx *ctx = (struct tcp_server_ctx *)ctx_void;
struct timeval timeout;
// Start the server
int rv = start(ctx);
if (rv != 0) {
fprintf(stderr, "%s: Unable to create TCP server on port %d\n",
ctx->display_name, ctx->listen_port);
goto err_cleanup_return;
}
// Initialise timeout
timeout.tv_sec = 0;
// Initialise fd_set
// Start waiting for connection / data
char xfer_data;
while (ctx->socket_run) {
// Initialise structure of fds
fd_set read_fds;
FD_ZERO(&read_fds);
if (ctx->sfd) {
FD_SET(ctx->sfd, &read_fds);
}
if (ctx->cfd) {
FD_SET(ctx->cfd, &read_fds);
}
// max fd num
int mfd = (ctx->cfd > ctx->sfd) ? ctx->cfd : ctx->sfd;
// Set timeout - 50us gives good performance
timeout.tv_usec = 50;
// Wait for socket activity or timeout
rv = select(mfd + 1, &read_fds, NULL, NULL, &timeout);
if (rv < 0) {
printf("%s: Socket read failed, port: %d\n", ctx->display_name,
ctx->listen_port);
tcp_server_client_close(ctx);
}
// New connection
if (FD_ISSET(ctx->sfd, &read_fds)) {
client_tryaccept(ctx);
}
// New client data
if (FD_ISSET(ctx->cfd, &read_fds)) {
while (get_byte(ctx, &xfer_data)) {
tcp_buffer_put_byte(ctx->buf_in, xfer_data);
}
}
if (ctx->cfd != 0) {
while (tcp_buffer_get_byte(ctx->buf_out, &xfer_data)) {
put_byte(ctx, xfer_data);
}
}
}
err_cleanup_return:
// Simulation done - clean up
tcp_server_client_close(ctx);
stop(ctx);
return NULL;
}
// Abstract interface functions
tcp_server_ctx *tcp_server_create(const char *display_name, int listen_port) {
struct tcp_server_ctx *ctx =
(struct tcp_server_ctx *)calloc(1, sizeof(struct tcp_server_ctx));
assert(ctx);
// Create the buffers
struct tcp_buf *buf_in = tcp_buffer_new();
struct tcp_buf *buf_out = tcp_buffer_new();
assert(buf_in);
assert(buf_out);
// Populate the struct with buffer pointers
ctx->buf_in = buf_in;
ctx->buf_out = buf_out;
// Set up socket details
ctx->socket_run = true;
ctx->listen_port = listen_port;
ctx->display_name = strdup(display_name);
assert(ctx->display_name);
if (pthread_create(&ctx->sock_thread, NULL, server_create, (void *)ctx) !=
0) {
fprintf(stderr, "%s: Unable to create TCP socket thread\n",
ctx->display_name);
ctx_free(ctx);
free(ctx);
return NULL;
}
return ctx;
}
bool tcp_server_read(struct tcp_server_ctx *ctx, char *dat) {
return tcp_buffer_get_byte(ctx->buf_in, dat);
}
void tcp_server_write(struct tcp_server_ctx *ctx, char dat) {
tcp_buffer_put_byte(ctx->buf_out, dat);
}
void tcp_server_close(struct tcp_server_ctx *ctx) {
// Shut down the socket thread
ctx->socket_run = false;
pthread_join(ctx->sock_thread, NULL);
ctx_free(ctx);
}
void tcp_server_client_close(struct tcp_server_ctx *ctx) {
assert(ctx);
if (!ctx->cfd) {
return;
}
close(ctx->cfd);
ctx->cfd = 0;
}

View File

@ -0,0 +1,69 @@
// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
/**
* Functions to create and interact with a threaded TCP server
*
* This is intended to be used by simulation add-on DPI modules to provide
* basic TCP socket communication between a host and simulated peripherals.
*/
#ifndef TCP_SERVER_H_
#define TCP_SERVER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
struct tcp_server_ctx;
/**
* Non-blocking read of a byte from a connected client
*
* @param ctx tcp server context object
* @param dat byte received
* @return true if a byte was read
*/
bool tcp_server_read(struct tcp_server_ctx *ctx, char *dat);
/**
* Write a byte to a connected client
*
* The write is internally buffered and so does not block if the client is not
* ready to accept data, but does block if the buffer is full.
*
* @param ctx tcp server context object
* @param dat byte to send
*/
void tcp_server_write(struct tcp_server_ctx *ctx, char dat);
/**
* Create a new TCP server instance
*
* @param display_name C string description of server
* @param listen_port On which port the server should listen
* @return A pointer to the created context struct
*/
tcp_server_ctx *tcp_server_create(const char *display_name, int listen_port);
/**
* Shut down the server and free all reserved memory
*
* @param ctx tcp server context object
*/
void tcp_server_close(struct tcp_server_ctx *ctx);
/**
* Instruct the server to disconnect a client
*
* @param ctx tcp server context object
*/
void tcp_server_client_close(struct tcp_server_ctx *ctx);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TCP_SERVER_H_

View File

@ -0,0 +1,154 @@
// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
#include "jtagdpi.h"
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../common/tcp_server.h"
struct jtagdpi_ctx {
// Server context
struct tcp_server_ctx *sock;
// Signals
uint8_t tck;
uint8_t tms;
uint8_t tdi;
uint8_t tdo;
uint8_t trst_n;
uint8_t srst_n;
};
/**
* Reset the JTAG signals to a "dongle unplugged" state
*/
static void reset_jtag_signals(struct jtagdpi_ctx *ctx) {
assert(ctx);
ctx->tck = 0;
ctx->tms = 0;
ctx->tdi = 0;
// trst_n is pulled down (reset active) by default
ctx->trst_n = 0;
// srst_n is pulled up (reset not active) by default
ctx->srst_n = 1;
}
/**
* Update the JTAG signals in the context structure
*/
static void update_jtag_signals(struct jtagdpi_ctx *ctx) {
assert(ctx);
/*
* Documentation pointer:
* The remote_bitbang protocol implemented below is documented in the OpenOCD
* source tree at doc/manual/jtag/drivers/remote_bitbang.txt, or online at
* https://repo.or.cz/openocd.git/blob/HEAD:/doc/manual/jtag/drivers/remote_bitbang.txt
*/
// read a command byte
char cmd;
if (!tcp_server_read(ctx->sock, &cmd)) {
return;
}
bool act_send_resp = false;
bool act_quit = false;
// parse received command byte
if (cmd >= '0' && cmd <= '7') {
// JTAG write
char cmd_bit = cmd - '0';
ctx->tdi = (cmd_bit >> 0) & 0x1;
ctx->tms = (cmd_bit >> 1) & 0x1;
ctx->tck = (cmd_bit >> 2) & 0x1;
} else if (cmd >= 'r' && cmd <= 'u') {
// JTAG reset (active high from OpenOCD)
char cmd_bit = cmd - 'r';
ctx->srst_n = !((cmd_bit >> 0) & 0x1);
ctx->trst_n = !((cmd_bit >> 1) & 0x1);
} else if (cmd == 'R') {
// JTAG read
act_send_resp = true;
} else if (cmd == 'B') {
// printf("%s: BLINK ON!\n", ctx->display_name);
} else if (cmd == 'b') {
// printf("%s: BLINK OFF!\n", ctx->display_name);
} else if (cmd == 'Q') {
// quit (client disconnect)
act_quit = true;
} else {
fprintf(stderr,
"JTAG DPI Protocol violation detected: unsupported command %c\n",
cmd);
exit(1);
}
// send tdo as response
if (act_send_resp) {
char tdo_ascii = ctx->tdo + '0';
tcp_server_write(ctx->sock, tdo_ascii);
}
if (act_quit) {
printf("JTAG DPI: Remote disconnected.\n");
tcp_server_client_close(ctx->sock);
}
}
void *jtagdpi_create(const char *display_name, int listen_port) {
struct jtagdpi_ctx *ctx =
(struct jtagdpi_ctx *)calloc(1, sizeof(struct jtagdpi_ctx));
assert(ctx);
// Create socket
ctx->sock = tcp_server_create(display_name, listen_port);
reset_jtag_signals(ctx);
printf(
"\n"
"JTAG: Virtual JTAG interface %s is listening on port %d. Use\n"
"OpenOCD and the following configuration to connect:\n"
" interface remote_bitbang\n"
" remote_bitbang_host localhost\n"
" remote_bitbang_port %d\n",
display_name, listen_port, listen_port);
return (void *)ctx;
}
void jtagdpi_close(void *ctx_void) {
struct jtagdpi_ctx *ctx = (struct jtagdpi_ctx *)ctx_void;
if (!ctx) {
return;
}
tcp_server_close(ctx->sock);
free(ctx);
}
void jtagdpi_tick(void *ctx_void, svBit *tck, svBit *tms, svBit *tdi,
svBit *trst_n, svBit *srst_n, const svBit tdo) {
struct jtagdpi_ctx *ctx = (struct jtagdpi_ctx *)ctx_void;
ctx->tdo = tdo;
// TODO: Evaluate moving this functionality into a separate thread
if (ctx) {
update_jtag_signals(ctx);
}
*tdi = ctx->tdi;
*tms = ctx->tms;
*tck = ctx->tck;
*srst_n = ctx->srst_n;
*trst_n = ctx->trst_n;
}

View File

@ -0,0 +1,56 @@
// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
#ifndef JTAGDPI_H_
#define JTAGDPI_H_
#include <svdpi.h>
#ifdef __cplusplus
extern "C" {
#endif
struct jtagdpi_ctx;
/**
* Constructor: Create and initialize jtagdpi context object
*
* Call from a initial block.
*
* @param display_name Name of the JTAG interface (for display purposes only)
* @param listen_port Port to listen on
* @return an initialized struct jtagdpi_ctx context object
*/
void *jtagdpi_create(const char *display_name, int listen_port);
/**
* Destructor: Close all connections and free all resources
*
* Call from a finish block.
*
* @param ctx_void a struct jtagdpi_ctx context object
*/
void jtagdpi_close(void *ctx_void);
/**
* Drive JTAG signals
*
* Call this function from the simulation at every clock tick to read/write
* from/to the JTAG signals.
*
* @param ctx_void a struct jtagdpi_ctx context object
* @param tck JTAG test clock signal
* @param tms JTAG test mode select signal
* @param tdi JTAG test data input signal
* @param trst_n JTAG test reset signal (active low)
* @param srst_n JTAG system reset signal (active low)
* @param tdo JTAG test data out
*/
void jtagdpi_tick(void *ctx_void, svBit *tck, svBit *tms, svBit *tdi,
svBit *trst_n, svBit *srst_n, const svBit tdo);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // JTAGDPI_H_

View File

@ -0,0 +1,56 @@
// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
module jtagdpi #(
parameter string Name = "jtag0", // name of the JTAG interface (display only)
parameter int ListenPort = 44853 // TCP port to listen on
) (
input logic clk_i,
input logic rst_ni,
output logic jtag_tck,
output logic jtag_tms,
output logic jtag_tdi,
input logic jtag_tdo,
output logic jtag_trst_n,
output logic jtag_srst_n
);
import "DPI-C" function chandle jtagdpi_create(
input string name,
input int listen_port
);
import "DPI-C" function void jtagdpi_tick(
input chandle ctx,
output bit tck,
output bit tms,
output bit tdi,
output bit trst_n,
output bit srst_n,
input bit tdo
);
import "DPI-C" function void jtagdpi_close(input chandle ctx);
chandle ctx;
initial begin
ctx = jtagdpi_create(Name, ListenPort);
end
final begin
jtagdpi_close(ctx);
ctx = 0;
end
reg [1:0] plit;
always_ff @(posedge clk_i) plit <= plit + 1'b1;
always_ff @(posedge plit[1], negedge rst_ni) begin
jtagdpi_tick(ctx, jtag_tck, jtag_tms, jtag_tdi, jtag_trst_n, jtag_srst_n,
jtag_tdo);
end
endmodule

9
Flow/soc/soc_sim.mk Normal file
View File

@ -0,0 +1,9 @@
-I../design/include
-I./
-CFLAGS -lpthread
-LDFLAGS -lpthread
./dpi/common/tcp_server.c
./dpi/jtagdpi/jtagdpi.sv
./dpi/jtagdpi/jtagdpi.c

238
Flow/soc/soc_sim.sv Normal file
View File

@ -0,0 +1,238 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
module soc_sim (
input bit core_clk
);
logic rst_l;
logic dbg_rst_l;
wire jtag_tdo;
wire jtag_tck;
wire jtag_tms;
wire jtag_tdi;
wire jtag_trst_n;
bit [31:0] cycleCnt;
logic mailbox_data_val;
int commit_count;
logic wb_valid;
logic [4:0] wb_dest;
logic [31:0] wb_data;
wire [63:0] WriteData;
string abi_reg[32]; // ABI register names
assign WriteData = rvsoc.lsu_axi_wdata;
assign mailbox_data_val = WriteData[7:0] > 8'h5 && WriteData[7:0] < 8'h7f;
parameter MAILBOX_ADDR = 32'hD0580000;
logic write;
logic [31:0] laddr;
wire mailbox_write = rvsoc.lmem_axi_awvalid && rvsoc.lsu_axi_awaddr == MAILBOX_ADDR && rst_l;
always @(posedge core_clk or negedge rst_l) begin
if (~rst_l) begin
laddr <= 32'b0;
write <= 1'b0;
end else begin
if (rvsoc.lsu_hready) begin
laddr <= rvsoc.lsu_haddr;
write <= rvsoc.lsu_hwrite & |rvsoc.lsu_htrans;
end
end
end
parameter MAX_CYCLES = 10_000_000_0;
integer fd, tp, el;
/* verilator lint_off WIDTH */
/* verilator lint_off CASEINCOMPLETE */
`include "dasm.svi"
/* verilator lint_on CASEINCOMPLETE */
/* verilator lint_on WIDTH */
always @(posedge core_clk) begin
cycleCnt <= cycleCnt + 1;
if (cycleCnt == MAX_CYCLES) begin
$display("Hit max cycle count (%0d) .. stopping", cycleCnt);
$finish;
end
if (mailbox_data_val & mailbox_write) begin
$fwrite(fd, "%c", WriteData[7:0]);
$write("%c", WriteData[7:0]);
end
if (mailbox_write && WriteData[7:0] == 8'hff) begin
$display("\nFinished : minstret = %0d, mcycle = %0d",
rvsoc.rvtop.swerv.dec.tlu.minstretl[31:0],
rvsoc.rvtop.swerv.dec.tlu.mcyclel[31:0]);
$display(
"See \"exec.log\" for execution trace with register updates..\n");
$display("TEST_PASSED");
$finish;
end else if (mailbox_write && WriteData[7:0] == 8'h1) begin
$display("TEST_FAILED");
$finish;
end
end
// trace monitor
always @(posedge core_clk) begin
wb_valid <= rvsoc.rvtop.swerv.dec.dec_i0_wen_r;
wb_dest <= rvsoc.rvtop.swerv.dec.dec_i0_waddr_r;
wb_data <= rvsoc.rvtop.swerv.dec.dec_i0_wdata_r;
if (rvsoc.trace_rv_i_valid_ip) begin
$fwrite(tp, "%b,%h,%h,%0h,%0h,3,%b,%h,%h,%b\n", rvsoc.trace_rv_i_valid_ip,
0, rvsoc.trace_rv_i_address_ip, 0, rvsoc.trace_rv_i_insn_ip,
rvsoc.trace_rv_i_exception_ip, rvsoc.trace_rv_i_ecause_ip,
rvsoc.trace_rv_i_tval_ip, rvsoc.trace_rv_i_interrupt_ip);
// Basic trace - no exception register updates
// #1 0 ee000000 b0201073 c 0b02 00000000
commit_count++;
$fwrite(el, "%10d : %8s 0 %h %h%13s ; %s\n", cycleCnt, $sformatf(
"#%0d", commit_count), rvsoc.trace_rv_i_address_ip,
rvsoc.trace_rv_i_insn_ip, (wb_dest != 0 && wb_valid) ? $sformatf(
"%s=%h", abi_reg[wb_dest], wb_data) : " ", dasm(
rvsoc.trace_rv_i_insn_ip,
rvsoc.trace_rv_i_address_ip,
wb_dest & {5{wb_valid}},
wb_data
));
end
if (rvsoc.rvtop.swerv.dec.dec_nonblock_load_wen) begin
$fwrite(el, "%10d : %32s=%h ; nbL\n", cycleCnt,
abi_reg[rvsoc.rvtop.swerv.dec.dec_nonblock_load_waddr],
rvsoc.rvtop.swerv.dec.lsu_nonblock_load_data);
soc_sim.gpr[0][rvsoc.rvtop.swerv.dec.dec_nonblock_load_waddr] = rvsoc.rvtop.swerv.dec.lsu_nonblock_load_data;
end
if (rvsoc.rvtop.swerv.dec.exu_div_wren) begin
$fwrite(el, "%10d : %32s=%h ; nbD\n", cycleCnt,
abi_reg[rvsoc.rvtop.swerv.dec.div_waddr_wb],
rvsoc.rvtop.swerv.dec.exu_div_result);
soc_sim.gpr[0][rvsoc.rvtop.swerv.dec.div_waddr_wb] = rvsoc.rvtop.swerv.dec.exu_div_result;
end
end
reg [7:0] hex[(1<<rvsoc.imem.MEM_DEPTH)-1:0];
initial begin
abi_reg[0] = "zero";
abi_reg[1] = "ra";
abi_reg[2] = "sp";
abi_reg[3] = "gp";
abi_reg[4] = "tp";
abi_reg[5] = "t0";
abi_reg[6] = "t1";
abi_reg[7] = "t2";
abi_reg[8] = "s0";
abi_reg[9] = "s1";
abi_reg[10] = "a0";
abi_reg[11] = "a1";
abi_reg[12] = "a2";
abi_reg[13] = "a3";
abi_reg[14] = "a4";
abi_reg[15] = "a5";
abi_reg[16] = "a6";
abi_reg[17] = "a7";
abi_reg[18] = "s2";
abi_reg[19] = "s3";
abi_reg[20] = "s4";
abi_reg[21] = "s5";
abi_reg[22] = "s6";
abi_reg[23] = "s7";
abi_reg[24] = "s8";
abi_reg[25] = "s9";
abi_reg[26] = "s10";
abi_reg[27] = "s11";
abi_reg[28] = "t3";
abi_reg[29] = "t4";
abi_reg[30] = "t5";
abi_reg[31] = "t6";
tp = $fopen("trace_port.csv", "w");
el = $fopen("exec.log", "w");
$fwrite(el, "//Cycle : #inst 0 pc opcode reg regnum value\n");
fd = $fopen("console.log", "w");
commit_count = 0;
$readmemh("program.hex", hex);
for (
reg [rvsoc.imem.MEM_DEPTH-1:0] i = 0; i < (1 << (rvsoc.imem.MEM_DEPTH - 3)); i++
) begin
rvsoc.imem.mem0[i] = hex[(i << 3) + 0];
rvsoc.imem.mem1[i] = hex[(i << 3) + 1];
rvsoc.imem.mem2[i] = hex[(i << 3) + 2];
rvsoc.imem.mem3[i] = hex[(i << 3) + 3];
rvsoc.imem.mem4[i] = hex[(i << 3) + 4];
rvsoc.imem.mem5[i] = hex[(i << 3) + 5];
rvsoc.imem.mem6[i] = hex[(i << 3) + 6];
rvsoc.imem.mem7[i] = hex[(i << 3) + 7];
end
for (
reg [rvsoc.lmem.MEM_DEPTH-1:0] i = 0; i < (1 << (rvsoc.lmem.MEM_DEPTH - 3)); i++
) begin
rvsoc.lmem.mem0[i] = hex[(i << 3) + 0];
rvsoc.lmem.mem1[i] = hex[(i << 3) + 1];
rvsoc.lmem.mem2[i] = hex[(i << 3) + 2];
rvsoc.lmem.mem3[i] = hex[(i << 3) + 3];
rvsoc.lmem.mem4[i] = hex[(i << 3) + 4];
rvsoc.lmem.mem5[i] = hex[(i << 3) + 5];
rvsoc.lmem.mem6[i] = hex[(i << 3) + 6];
rvsoc.lmem.mem7[i] = hex[(i << 3) + 7];
end
end
assign rst_l = cycleCnt > 20;
assign dbg_rst_l = cycleCnt > 10;
soc_top rvsoc (
.clk (core_clk),
.rst (rst_l),
.dbg_rst(dbg_rst_l),
.jtag_tdo (jtag_tdo),
.jtag_tck (jtag_tck),
.jtag_tms (jtag_tms),
.jtag_tdi (jtag_tdi),
.jtag_trst_n(jtag_trst_n)
);
jtagdpi jtagdpi (
.clk_i (core_clk),
.rst_ni(rst_l),
.jtag_tck (jtag_tck),
.jtag_tms (jtag_tms),
.jtag_tdi (jtag_tdi),
.jtag_tdo (jtag_tdo),
.jtag_trst_n(jtag_trst_n),
.jtag_srst_n()
);
`define DRAM(bank) \
rvsoc.rvtop.mem.Gen_dccm_enable.dccm.mem_bank[bank].dccm_bank.ram_core
`define ICCM_PATH `RV_TOP.mem.iccm
`define IRAM0(bk) `ICCM_PATH.mem_bank[bk].iccm_bank_lo0.ram_core
`define IRAM1(bk) `ICCM_PATH.mem_bank[bk].iccm_bank_lo1.ram_core
`define IRAM2(bk) `ICCM_PATH.mem_bank[bk].iccm_bank_hi0.ram_core
`define IRAM3(bk) `ICCM_PATH.mem_bank[bk].iccm_bank_hi1.ram_core
endmodule

51
Flow/soc/soc_top.mk Normal file
View File

@ -0,0 +1,51 @@
../design/include/el2_def.sv
../design/el2_swerv_wrapper.sv
../design/el2_mem.sv
../design/el2_pic_ctrl.sv
../design/el2_swerv.sv
../design/el2_dma_ctrl.sv
../design/ifu/el2_ifu_aln_ctl.sv
../design/ifu/el2_ifu_compress_ctl.sv
../design/ifu/el2_ifu_ifc_ctl.sv
../design/ifu/el2_ifu_bp_ctl.sv
../design/ifu/el2_ifu_ic_mem.sv
../design/ifu/el2_ifu_mem_ctl.sv
../design/ifu/el2_ifu_iccm_mem.sv
../design/ifu/el2_ifu.sv
../design/dec/el2_dec_decode_ctl.sv
../design/dec/el2_dec_gpr_ctl.sv
../design/dec/el2_dec_ib_ctl.sv
../design/dec/el2_dec_tlu_ctl.sv
../design/dec/el2_dec_trigger.sv
../design/dec/el2_dec.sv
../design/exu/el2_exu_alu_ctl.sv
../design/exu/el2_exu_mul_ctl.sv
../design/exu/el2_exu_div_ctl.sv
../design/exu/el2_exu.sv
../design/lsu/el2_lsu.sv
../design/lsu/el2_lsu_clkdomain.sv
../design/lsu/el2_lsu_addrcheck.sv
../design/lsu/el2_lsu_lsc_ctl.sv
../design/lsu/el2_lsu_stbuf.sv
../design/lsu/el2_lsu_bus_buffer.sv
../design/lsu/el2_lsu_bus_intf.sv
../design/lsu/el2_lsu_ecc.sv
../design/lsu/el2_lsu_dccm_mem.sv
../design/lsu/el2_lsu_dccm_ctl.sv
../design/lsu/el2_lsu_trigger.sv
../design/dbg/el2_dbg.sv
../design/dmi/dmi_wrapper.v
../design/dmi/dmi_jtag_to_core_sync.v
../design/dmi/rvjtag_tap.v
../design/lib/el2_lib.sv
../design/lib/beh_lib.sv
../design/lib/mem_lib.sv
../design/lib/ahb_to_axi4.sv
../design/lib/axi4_to_ahb.sv
./ahb_sif.sv
./axi_lsu_dma_bridge.sv
./soc_top.sv

695
Flow/soc/soc_top.sv Normal file
View File

@ -0,0 +1,695 @@
module soc_top (
input clk,
input dbg_rst,
input rst,
output jtag_tdo,
input jtag_tck,
input jtag_tms,
input jtag_tdi,
input jtag_trst_n
);
logic nmi_int;
logic [ 31:0] reset_vector;
logic [ 31:0] nmi_vector;
logic [ 31:1] jtag_id;
logic [ 31:0] ic_haddr;
logic [ 2:0] ic_hburst;
logic ic_hmastlock;
logic [ 3:0] ic_hprot;
logic [ 2:0] ic_hsize;
logic [ 1:0] ic_htrans;
logic ic_hwrite;
logic [ 63:0] ic_hrdata;
logic ic_hready;
logic ic_hresp;
logic [ 31:0] lsu_haddr;
logic [ 2:0] lsu_hburst;
logic lsu_hmastlock;
logic [ 3:0] lsu_hprot;
logic [ 2:0] lsu_hsize;
logic [ 1:0] lsu_htrans;
logic lsu_hwrite;
logic [ 63:0] lsu_hrdata;
logic [ 63:0] lsu_hwdata;
logic lsu_hready;
logic lsu_hresp;
logic [ 31:0] sb_haddr;
logic [ 2:0] sb_hburst;
logic sb_hmastlock;
logic [ 3:0] sb_hprot;
logic [ 2:0] sb_hsize;
logic [ 1:0] sb_htrans;
logic sb_hwrite;
logic [ 63:0] sb_hrdata;
logic [ 63:0] sb_hwdata;
logic sb_hready;
logic sb_hresp;
logic [ 63:0] trace_rv_i_insn_ip;
logic [ 63:0] trace_rv_i_address_ip;
logic [ 2:0] trace_rv_i_valid_ip;
logic [ 2:0] trace_rv_i_exception_ip;
logic [ 4:0] trace_rv_i_ecause_ip;
logic [ 2:0] trace_rv_i_interrupt_ip;
logic [ 31:0] trace_rv_i_tval_ip;
logic o_debug_mode_status;
logic [ 1:0] dec_tlu_perfcnt0;
logic [ 1:0] dec_tlu_perfcnt1;
logic [ 1:0] dec_tlu_perfcnt2;
logic [ 1:0] dec_tlu_perfcnt3;
logic o_cpu_halt_ack;
logic o_cpu_halt_status;
logic o_cpu_run_ack;
logic mailbox_write;
logic [ 63:0] dma_hrdata;
logic [ 63:0] dma_hwdata;
logic dma_hready;
logic dma_hresp;
logic mpc_debug_halt_req;
logic mpc_debug_run_req;
logic mpc_reset_run_req;
logic mpc_debug_halt_ack;
logic mpc_debug_run_ack;
logic debug_brkpt_status;
wire dma_hready_out;
//-------------------------- LSU AXI signals--------------------------
// AXI Write Channels
wire lsu_axi_awvalid;
wire lsu_axi_awready;
wire [`RV_LSU_BUS_TAG-1:0] lsu_axi_awid;
wire [ 31:0] lsu_axi_awaddr;
wire [ 3:0] lsu_axi_awregion;
wire [ 7:0] lsu_axi_awlen;
wire [ 2:0] lsu_axi_awsize;
wire [ 1:0] lsu_axi_awburst;
wire lsu_axi_awlock;
wire [ 3:0] lsu_axi_awcache;
wire [ 2:0] lsu_axi_awprot;
wire [ 3:0] lsu_axi_awqos;
wire lsu_axi_wvalid;
wire lsu_axi_wready;
wire [ 63:0] lsu_axi_wdata;
wire [ 7:0] lsu_axi_wstrb;
wire lsu_axi_wlast;
wire lsu_axi_bvalid;
wire lsu_axi_bready;
wire [ 1:0] lsu_axi_bresp;
wire [`RV_LSU_BUS_TAG-1:0] lsu_axi_bid;
// AXI Read Channels
wire lsu_axi_arvalid;
wire lsu_axi_arready;
wire [`RV_LSU_BUS_TAG-1:0] lsu_axi_arid;
wire [ 31:0] lsu_axi_araddr;
wire [ 3:0] lsu_axi_arregion;
wire [ 7:0] lsu_axi_arlen;
wire [ 2:0] lsu_axi_arsize;
wire [ 1:0] lsu_axi_arburst;
wire lsu_axi_arlock;
wire [ 3:0] lsu_axi_arcache;
wire [ 2:0] lsu_axi_arprot;
wire [ 3:0] lsu_axi_arqos;
wire lsu_axi_rvalid;
wire lsu_axi_rready;
wire [`RV_LSU_BUS_TAG-1:0] lsu_axi_rid;
wire [ 63:0] lsu_axi_rdata;
wire [ 1:0] lsu_axi_rresp;
wire lsu_axi_rlast;
//-------------------------- IFU AXI signals--------------------------
// AXI Write Channels
wire ifu_axi_awvalid;
wire ifu_axi_awready;
wire [`RV_IFU_BUS_TAG-1:0] ifu_axi_awid;
wire [ 31:0] ifu_axi_awaddr;
wire [ 3:0] ifu_axi_awregion;
wire [ 7:0] ifu_axi_awlen;
wire [ 2:0] ifu_axi_awsize;
wire [ 1:0] ifu_axi_awburst;
wire ifu_axi_awlock;
wire [ 3:0] ifu_axi_awcache;
wire [ 2:0] ifu_axi_awprot;
wire [ 3:0] ifu_axi_awqos;
wire ifu_axi_wvalid;
wire ifu_axi_wready;
wire [ 63:0] ifu_axi_wdata;
wire [ 7:0] ifu_axi_wstrb;
wire ifu_axi_wlast;
wire ifu_axi_bvalid;
wire ifu_axi_bready;
wire [ 1:0] ifu_axi_bresp;
wire [`RV_IFU_BUS_TAG-1:0] ifu_axi_bid;
// AXI Read Channels
wire ifu_axi_arvalid;
wire ifu_axi_arready;
wire [`RV_IFU_BUS_TAG-1:0] ifu_axi_arid;
wire [ 31:0] ifu_axi_araddr;
wire [ 3:0] ifu_axi_arregion;
wire [ 7:0] ifu_axi_arlen;
wire [ 2:0] ifu_axi_arsize;
wire [ 1:0] ifu_axi_arburst;
wire ifu_axi_arlock;
wire [ 3:0] ifu_axi_arcache;
wire [ 2:0] ifu_axi_arprot;
wire [ 3:0] ifu_axi_arqos;
wire ifu_axi_rvalid;
wire ifu_axi_rready;
wire [`RV_IFU_BUS_TAG-1:0] ifu_axi_rid;
wire [ 63:0] ifu_axi_rdata;
wire [ 1:0] ifu_axi_rresp;
wire ifu_axi_rlast;
//-------------------------- SB AXI signals--------------------------
// AXI Write Channels
wire sb_axi_awvalid;
wire sb_axi_awready;
wire [ `RV_SB_BUS_TAG-1:0] sb_axi_awid;
wire [ 31:0] sb_axi_awaddr;
wire [ 3:0] sb_axi_awregion;
wire [ 7:0] sb_axi_awlen;
wire [ 2:0] sb_axi_awsize;
wire [ 1:0] sb_axi_awburst;
wire sb_axi_awlock;
wire [ 3:0] sb_axi_awcache;
wire [ 2:0] sb_axi_awprot;
wire [ 3:0] sb_axi_awqos;
wire sb_axi_wvalid;
wire sb_axi_wready;
wire [ 63:0] sb_axi_wdata;
wire [ 7:0] sb_axi_wstrb;
wire sb_axi_wlast;
wire sb_axi_bvalid;
wire sb_axi_bready;
wire [ 1:0] sb_axi_bresp;
wire [ `RV_SB_BUS_TAG-1:0] sb_axi_bid;
// AXI Read Channels
wire sb_axi_arvalid;
wire sb_axi_arready;
wire [ `RV_SB_BUS_TAG-1:0] sb_axi_arid;
wire [ 31:0] sb_axi_araddr;
wire [ 3:0] sb_axi_arregion;
wire [ 7:0] sb_axi_arlen;
wire [ 2:0] sb_axi_arsize;
wire [ 1:0] sb_axi_arburst;
wire sb_axi_arlock;
wire [ 3:0] sb_axi_arcache;
wire [ 2:0] sb_axi_arprot;
wire [ 3:0] sb_axi_arqos;
wire sb_axi_rvalid;
wire sb_axi_rready;
wire [ `RV_SB_BUS_TAG-1:0] sb_axi_rid;
wire [ 63:0] sb_axi_rdata;
wire [ 1:0] sb_axi_rresp;
wire sb_axi_rlast;
//-------------------------- DMA AXI signals--------------------------
// AXI Write Channels
wire dma_axi_awvalid;
wire dma_axi_awready;
wire [`RV_DMA_BUS_TAG-1:0] dma_axi_awid;
wire [ 31:0] dma_axi_awaddr;
wire [ 2:0] dma_axi_awsize;
wire [ 2:0] dma_axi_awprot;
wire [ 7:0] dma_axi_awlen;
wire [ 1:0] dma_axi_awburst;
wire dma_axi_wvalid;
wire dma_axi_wready;
wire [ 63:0] dma_axi_wdata;
wire [ 7:0] dma_axi_wstrb;
wire dma_axi_wlast;
wire dma_axi_bvalid;
wire dma_axi_bready;
wire [ 1:0] dma_axi_bresp;
wire [`RV_DMA_BUS_TAG-1:0] dma_axi_bid;
// AXI Read Channels
wire dma_axi_arvalid;
wire dma_axi_arready;
wire [`RV_DMA_BUS_TAG-1:0] dma_axi_arid;
wire [ 31:0] dma_axi_araddr;
wire [ 2:0] dma_axi_arsize;
wire [ 2:0] dma_axi_arprot;
wire [ 7:0] dma_axi_arlen;
wire [ 1:0] dma_axi_arburst;
wire dma_axi_rvalid;
wire dma_axi_rready;
wire [`RV_DMA_BUS_TAG-1:0] dma_axi_rid;
wire [ 63:0] dma_axi_rdata;
wire [ 1:0] dma_axi_rresp;
wire dma_axi_rlast;
wire lmem_axi_arvalid;
wire lmem_axi_arready;
wire lmem_axi_rvalid;
wire [`RV_LSU_BUS_TAG-1:0] lmem_axi_rid;
wire [ 1:0] lmem_axi_rresp;
wire [ 63:0] lmem_axi_rdata;
wire lmem_axi_rlast;
wire lmem_axi_rready;
wire lmem_axi_awvalid;
wire lmem_axi_awready;
wire lmem_axi_wvalid;
wire lmem_axi_wready;
wire [ 1:0] lmem_axi_bresp;
wire lmem_axi_bvalid;
wire [`RV_LSU_BUS_TAG-1:0] lmem_axi_bid;
wire lmem_axi_bready;
initial begin
jtag_id[31:28] = 4'b1;
jtag_id[27:12] = '0;
jtag_id[11:1] = 11'h45;
reset_vector = `RV_RESET_VEC;
nmi_vector = 32'hee000000;
nmi_int = 0;
end
el2_swerv_wrapper rvtop (
.rst_l (rst),
.dbg_rst_l(dbg_rst),
.clk (clk),
.rst_vec (reset_vector[31:1]),
.nmi_int (nmi_int),
.nmi_vec (nmi_vector[31:1]),
.jtag_id (jtag_id[31:1]),
//-------------------------- LSU AXI signals--------------------------
// AXI Write Channels
.lsu_axi_awvalid (lsu_axi_awvalid),
.lsu_axi_awready (lsu_axi_awready),
.lsu_axi_awid (lsu_axi_awid),
.lsu_axi_awaddr (lsu_axi_awaddr),
.lsu_axi_awregion(lsu_axi_awregion),
.lsu_axi_awlen (lsu_axi_awlen),
.lsu_axi_awsize (lsu_axi_awsize),
.lsu_axi_awburst (lsu_axi_awburst),
.lsu_axi_awlock (lsu_axi_awlock),
.lsu_axi_awcache (lsu_axi_awcache),
.lsu_axi_awprot (lsu_axi_awprot),
.lsu_axi_awqos (lsu_axi_awqos),
.lsu_axi_wvalid(lsu_axi_wvalid),
.lsu_axi_wready(lsu_axi_wready),
.lsu_axi_wdata (lsu_axi_wdata),
.lsu_axi_wstrb (lsu_axi_wstrb),
.lsu_axi_wlast (lsu_axi_wlast),
.lsu_axi_bvalid(lsu_axi_bvalid),
.lsu_axi_bready(lsu_axi_bready),
.lsu_axi_bresp (lsu_axi_bresp),
.lsu_axi_bid (lsu_axi_bid),
.lsu_axi_arvalid (lsu_axi_arvalid),
.lsu_axi_arready (lsu_axi_arready),
.lsu_axi_arid (lsu_axi_arid),
.lsu_axi_araddr (lsu_axi_araddr),
.lsu_axi_arregion(lsu_axi_arregion),
.lsu_axi_arlen (lsu_axi_arlen),
.lsu_axi_arsize (lsu_axi_arsize),
.lsu_axi_arburst (lsu_axi_arburst),
.lsu_axi_arlock (lsu_axi_arlock),
.lsu_axi_arcache (lsu_axi_arcache),
.lsu_axi_arprot (lsu_axi_arprot),
.lsu_axi_arqos (lsu_axi_arqos),
.lsu_axi_rvalid(lsu_axi_rvalid),
.lsu_axi_rready(lsu_axi_rready),
.lsu_axi_rid (lsu_axi_rid),
.lsu_axi_rdata (lsu_axi_rdata),
.lsu_axi_rresp (lsu_axi_rresp),
.lsu_axi_rlast (lsu_axi_rlast),
//-------------------------- IFU AXI signals--------------------------
// AXI Write Channels
.ifu_axi_awvalid (ifu_axi_awvalid),
.ifu_axi_awready (ifu_axi_awready),
.ifu_axi_awid (ifu_axi_awid),
.ifu_axi_awaddr (ifu_axi_awaddr),
.ifu_axi_awregion(ifu_axi_awregion),
.ifu_axi_awlen (ifu_axi_awlen),
.ifu_axi_awsize (ifu_axi_awsize),
.ifu_axi_awburst (ifu_axi_awburst),
.ifu_axi_awlock (ifu_axi_awlock),
.ifu_axi_awcache (ifu_axi_awcache),
.ifu_axi_awprot (ifu_axi_awprot),
.ifu_axi_awqos (ifu_axi_awqos),
.ifu_axi_wvalid(ifu_axi_wvalid),
.ifu_axi_wready(ifu_axi_wready),
.ifu_axi_wdata (ifu_axi_wdata),
.ifu_axi_wstrb (ifu_axi_wstrb),
.ifu_axi_wlast (ifu_axi_wlast),
.ifu_axi_bvalid(ifu_axi_bvalid),
.ifu_axi_bready(ifu_axi_bready),
.ifu_axi_bresp (ifu_axi_bresp),
.ifu_axi_bid (ifu_axi_bid),
.ifu_axi_arvalid (ifu_axi_arvalid),
.ifu_axi_arready (ifu_axi_arready),
.ifu_axi_arid (ifu_axi_arid),
.ifu_axi_araddr (ifu_axi_araddr),
.ifu_axi_arregion(ifu_axi_arregion),
.ifu_axi_arlen (ifu_axi_arlen),
.ifu_axi_arsize (ifu_axi_arsize),
.ifu_axi_arburst (ifu_axi_arburst),
.ifu_axi_arlock (ifu_axi_arlock),
.ifu_axi_arcache (ifu_axi_arcache),
.ifu_axi_arprot (ifu_axi_arprot),
.ifu_axi_arqos (ifu_axi_arqos),
.ifu_axi_rvalid(ifu_axi_rvalid),
.ifu_axi_rready(ifu_axi_rready),
.ifu_axi_rid (ifu_axi_rid),
.ifu_axi_rdata (ifu_axi_rdata),
.ifu_axi_rresp (ifu_axi_rresp),
.ifu_axi_rlast (ifu_axi_rlast),
//-------------------------- SB AXI signals--------------------------
// AXI Write Channels
.sb_axi_awvalid (sb_axi_awvalid),
.sb_axi_awready (sb_axi_awready),
.sb_axi_awid (sb_axi_awid),
.sb_axi_awaddr (sb_axi_awaddr),
.sb_axi_awregion(sb_axi_awregion),
.sb_axi_awlen (sb_axi_awlen),
.sb_axi_awsize (sb_axi_awsize),
.sb_axi_awburst (sb_axi_awburst),
.sb_axi_awlock (sb_axi_awlock),
.sb_axi_awcache (sb_axi_awcache),
.sb_axi_awprot (sb_axi_awprot),
.sb_axi_awqos (sb_axi_awqos),
.sb_axi_wvalid(sb_axi_wvalid),
.sb_axi_wready(sb_axi_wready),
.sb_axi_wdata (sb_axi_wdata),
.sb_axi_wstrb (sb_axi_wstrb),
.sb_axi_wlast (sb_axi_wlast),
.sb_axi_bvalid(sb_axi_bvalid),
.sb_axi_bready(sb_axi_bready),
.sb_axi_bresp (sb_axi_bresp),
.sb_axi_bid (sb_axi_bid),
.sb_axi_arvalid (sb_axi_arvalid),
.sb_axi_arready (sb_axi_arready),
.sb_axi_arid (sb_axi_arid),
.sb_axi_araddr (sb_axi_araddr),
.sb_axi_arregion(sb_axi_arregion),
.sb_axi_arlen (sb_axi_arlen),
.sb_axi_arsize (sb_axi_arsize),
.sb_axi_arburst (sb_axi_arburst),
.sb_axi_arlock (sb_axi_arlock),
.sb_axi_arcache (sb_axi_arcache),
.sb_axi_arprot (sb_axi_arprot),
.sb_axi_arqos (sb_axi_arqos),
.sb_axi_rvalid(sb_axi_rvalid),
.sb_axi_rready(sb_axi_rready),
.sb_axi_rid (sb_axi_rid),
.sb_axi_rdata (sb_axi_rdata),
.sb_axi_rresp (sb_axi_rresp),
.sb_axi_rlast (sb_axi_rlast),
//-------------------------- DMA AXI signals--------------------------
// AXI Write Channels
.dma_axi_awvalid(dma_axi_awvalid),
.dma_axi_awready(dma_axi_awready),
.dma_axi_awid ('0), // ids are not used on DMA since it always responses in order
.dma_axi_awaddr(lsu_axi_awaddr),
.dma_axi_awsize(lsu_axi_awsize),
.dma_axi_awprot('0),
.dma_axi_awlen('0),
.dma_axi_awburst('0),
.dma_axi_wvalid(dma_axi_wvalid),
.dma_axi_wready(dma_axi_wready),
.dma_axi_wdata (lsu_axi_wdata),
.dma_axi_wstrb (lsu_axi_wstrb),
.dma_axi_wlast (1'b1),
.dma_axi_bvalid(dma_axi_bvalid),
.dma_axi_bready(dma_axi_bready),
.dma_axi_bresp (dma_axi_bresp),
.dma_axi_bid (),
.dma_axi_arvalid(dma_axi_arvalid),
.dma_axi_arready(dma_axi_arready),
.dma_axi_arid ('0),
.dma_axi_araddr (lsu_axi_araddr),
.dma_axi_arsize (lsu_axi_arsize),
.dma_axi_arprot ('0),
.dma_axi_arlen ('0),
.dma_axi_arburst('0),
.dma_axi_rvalid(dma_axi_rvalid),
.dma_axi_rready(dma_axi_rready),
.dma_axi_rid (),
.dma_axi_rdata (dma_axi_rdata),
.dma_axi_rresp (dma_axi_rresp),
.dma_axi_rlast (dma_axi_rlast),
.timer_int (1'b0),
.extintsrc_req('0),
.lsu_bus_clk_en ( 1'b1 ),// Clock ratio b/w cpu core clk & AHB master interface
.ifu_bus_clk_en ( 1'b1 ),// Clock ratio b/w cpu core clk & AHB master interface
.dbg_bus_clk_en ( 1'b1 ),// Clock ratio b/w cpu core clk & AHB Debug master interface
.dma_bus_clk_en ( 1'b1 ),// Clock ratio b/w cpu core clk & AHB slave interface
.trace_rv_i_insn_ip (trace_rv_i_insn_ip),
.trace_rv_i_address_ip (trace_rv_i_address_ip),
.trace_rv_i_valid_ip (trace_rv_i_valid_ip),
.trace_rv_i_exception_ip(trace_rv_i_exception_ip),
.trace_rv_i_ecause_ip (trace_rv_i_ecause_ip),
.trace_rv_i_interrupt_ip(trace_rv_i_interrupt_ip),
.trace_rv_i_tval_ip (trace_rv_i_tval_ip),
.jtag_tck (jtag_tck),
.jtag_tms (jtag_tms),
.jtag_tdi (jtag_tdi),
.jtag_trst_n(jtag_trst_n),
.jtag_tdo (jtag_tdo),
.mpc_debug_halt_ack(mpc_debug_halt_ack),
.mpc_debug_halt_req(1'b0),
.mpc_debug_run_ack (mpc_debug_run_ack),
.mpc_debug_run_req (1'b1),
.mpc_reset_run_req (1'b1), // Start running after reset
.debug_brkpt_status(debug_brkpt_status),
.i_cpu_halt_req(1'b0), // Async halt req to CPU
.o_cpu_halt_ack(o_cpu_halt_ack), // core response to halt
.o_cpu_halt_status(o_cpu_halt_status), // 1'b1 indicates core is halted
.i_cpu_run_req(1'b0), // Async restart req to CPU
.o_debug_mode_status(o_debug_mode_status),
.o_cpu_run_ack(o_cpu_run_ack), // Core response to run req
.dec_tlu_perfcnt0(),
.dec_tlu_perfcnt1(),
.dec_tlu_perfcnt2(),
.dec_tlu_perfcnt3(),
// remove mems DFT pins for opensource
.dccm_ext_in_pkt ('0),
.iccm_ext_in_pkt ('0),
.ic_data_ext_in_pkt('0),
.ic_tag_ext_in_pkt ('0),
.soft_int ('0),
.core_id ('0),
.scan_mode (1'b0), // To enable scan mode
.mbist_mode(1'b0) // to enable mbist
);
axi_slv #(
.TAGW(`RV_IFU_BUS_TAG)
) imem (
.aclk(clk),
.rst_l(rst),
.arvalid(ifu_axi_arvalid),
.arready(ifu_axi_arready),
.araddr(ifu_axi_araddr),
.arid(ifu_axi_arid),
.arlen(ifu_axi_arlen),
.arburst(ifu_axi_arburst),
.arsize(ifu_axi_arsize),
.rvalid(ifu_axi_rvalid),
.rready(ifu_axi_rready),
.rdata(ifu_axi_rdata),
.rresp(ifu_axi_rresp),
.rid(ifu_axi_rid),
.rlast(ifu_axi_rlast),
.awvalid(1'b0),
.awready(),
.awaddr('0),
.awid('0),
.awlen('0),
.awburst('0),
.awsize('0),
.wdata ('0),
.wstrb ('0),
.wvalid(1'b0),
.wready(),
.bvalid(),
.bready(1'b0),
.bresp(),
.bid()
);
defparam lmem.TAGW = `RV_LSU_BUS_TAG;
//axi_slv #(.TAGW(`RV_LSU_BUS_TAG)) lmem(
axi_slv lmem (
.aclk(clk),
.rst_l(rst),
.arvalid(lmem_axi_arvalid),
.arready(lmem_axi_arready),
.araddr(lsu_axi_araddr),
.arid(lsu_axi_arid),
.arlen(lsu_axi_arlen),
.arburst(lsu_axi_arburst),
.arsize(lsu_axi_arsize),
.rvalid(lmem_axi_rvalid),
.rready(lmem_axi_rready),
.rdata(lmem_axi_rdata),
.rresp(lmem_axi_rresp),
.rid(lmem_axi_rid),
.rlast(lmem_axi_rlast),
.awvalid(lmem_axi_awvalid),
.awready(lmem_axi_awready),
.awaddr(lsu_axi_awaddr),
.awid(lsu_axi_awid),
.awlen(lsu_axi_awlen),
.awburst(lsu_axi_awburst),
.awsize(lsu_axi_awsize),
.wdata (lsu_axi_wdata),
.wstrb (lsu_axi_wstrb),
.wvalid(lmem_axi_wvalid),
.wready(lmem_axi_wready),
.bvalid(lmem_axi_bvalid),
.bready(lmem_axi_bready),
.bresp(lmem_axi_bresp),
.bid(lmem_axi_bid)
);
axi_lsu_dma_bridge #(`RV_LSU_BUS_TAG, `RV_LSU_BUS_TAG) bridge (
.clk(clk),
.reset_l(rst),
.m_arvalid(lsu_axi_arvalid),
.m_arid(lsu_axi_arid),
.m_araddr(lsu_axi_araddr),
.m_arready(lsu_axi_arready),
.m_rvalid(lsu_axi_rvalid),
.m_rready(lsu_axi_rready),
.m_rdata(lsu_axi_rdata),
.m_rid(lsu_axi_rid),
.m_rresp(lsu_axi_rresp),
.m_rlast(lsu_axi_rlast),
.m_awvalid(lsu_axi_awvalid),
.m_awid(lsu_axi_awid),
.m_awaddr(lsu_axi_awaddr),
.m_awready(lsu_axi_awready),
.m_wvalid(lsu_axi_wvalid),
.m_wready(lsu_axi_wready),
.m_bresp(lsu_axi_bresp),
.m_bvalid(lsu_axi_bvalid),
.m_bid(lsu_axi_bid),
.m_bready(lsu_axi_bready),
.s0_arvalid(lmem_axi_arvalid),
.s0_arready(lmem_axi_arready),
.s0_rvalid(lmem_axi_rvalid),
.s0_rid(lmem_axi_rid),
.s0_rresp(lmem_axi_rresp),
.s0_rdata(lmem_axi_rdata),
.s0_rlast(lmem_axi_rlast),
.s0_rready(lmem_axi_rready),
.s0_awvalid(lmem_axi_awvalid),
.s0_awready(lmem_axi_awready),
.s0_wvalid(lmem_axi_wvalid),
.s0_wready(lmem_axi_wready),
.s0_bresp(lmem_axi_bresp),
.s0_bvalid(lmem_axi_bvalid),
.s0_bid(lmem_axi_bid),
.s0_bready(lmem_axi_bready),
.s1_arvalid(dma_axi_arvalid),
.s1_arready(dma_axi_arready),
.s1_rvalid(dma_axi_rvalid),
.s1_rresp (dma_axi_rresp),
.s1_rdata (dma_axi_rdata),
.s1_rlast (dma_axi_rlast),
.s1_rready(dma_axi_rready),
.s1_awvalid(dma_axi_awvalid),
.s1_awready(dma_axi_awready),
.s1_wvalid(dma_axi_wvalid),
.s1_wready(dma_axi_wready),
.s1_bresp (dma_axi_bresp),
.s1_bvalid(dma_axi_bvalid),
.s1_bready(dma_axi_bready)
);
endmodule

2609
Flow/soc/swerv.config Executable file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,41 @@
#!/usr/bin/env python3
from fusesoc.capi2.generator import Generator
import os
import subprocess
import sys
class SwervConfigGenerator(Generator):
def run(self):
script_root = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..'))
files = [
{"common_defines.vh" : {
"copyto" : "config/common_defines.vh",
"file_type" : "systemVerilogSource"}},
{"el2_pdef.vh" : {
"copyto" : "config/el2_pdef.vh",
"file_type" : "systemVerilogSource"}},
{"el2_param.vh" : {
"is_include_file" : True,
"file_type" : "systemVerilogSource"}},
{"pic_map_auto.h" : {
"is_include_file" : True,
"file_type" : "systemVerilogSource"}}]
env = os.environ.copy()
env['RV_ROOT'] = script_root
env['BUILD_PATH'] = os.getcwd()
args = ['configs/swerv.config'] + self.config.get('args', [])
rc = subprocess.call(args, cwd=script_root, env=env, stdout=subprocess.DEVNULL)
if rc:
exit(1)
filenames = []
for f in files:
for k in f:
filenames.append(k)
self.add_files(files)
g = SwervConfigGenerator()
g.run()
g.write()

97
Flow/swerv_el2.core Normal file
View File

@ -0,0 +1,97 @@
CAPI=2:
name : chipsalliance.org:cores:SweRV_EL2:1.3
filesets:
rtl:
files:
- design/include/el2_def.sv
- design/lib/el2_lib.sv
- design/lib/beh_lib.sv
- design/el2_mem.sv
- design/el2_pic_ctrl.sv
- design/el2_dma_ctrl.sv
- design/ifu/el2_ifu_aln_ctl.sv
- design/ifu/el2_ifu_compress_ctl.sv
- design/ifu/el2_ifu_ifc_ctl.sv
- design/ifu/el2_ifu_bp_ctl.sv
- design/ifu/el2_ifu_ic_mem.sv
- design/ifu/el2_ifu_mem_ctl.sv
- design/ifu/el2_ifu_iccm_mem.sv
- design/ifu/el2_ifu.sv
- design/dec/el2_dec_decode_ctl.sv
- design/dec/el2_dec_gpr_ctl.sv
- design/dec/el2_dec_ib_ctl.sv
- design/dec/el2_dec_tlu_ctl.sv
- design/dec/el2_dec_trigger.sv
- design/dec/el2_dec.sv
- design/exu/el2_exu_alu_ctl.sv
- design/exu/el2_exu_mul_ctl.sv
- design/exu/el2_exu_div_ctl.sv
- design/exu/el2_exu.sv
- design/lsu/el2_lsu.sv
- design/lsu/el2_lsu_bus_buffer.sv
- design/lsu/el2_lsu_clkdomain.sv
- design/lsu/el2_lsu_addrcheck.sv
- design/lsu/el2_lsu_lsc_ctl.sv
- design/lsu/el2_lsu_stbuf.sv
- design/lsu/el2_lsu_bus_intf.sv
- design/lsu/el2_lsu_ecc.sv
- design/lsu/el2_lsu_dccm_mem.sv
- design/lsu/el2_lsu_dccm_ctl.sv
- design/lsu/el2_lsu_trigger.sv
- design/dbg/el2_dbg.sv
- design/dmi/dmi_wrapper.v
- design/dmi/dmi_jtag_to_core_sync.v
- design/dmi/rvjtag_tap.v
- design/lib/mem_lib.sv
- design/el2_swerv.sv
- design/el2_swerv_wrapper.sv
file_type : systemVerilogSource
vivado_tcl: {files: [tools/vivado.tcl : {file_type : tclSource}]}
targets:
default:
filesets :
- rtl
- "tool_vivado ? (vivado_tcl)"
lint:
default_tool: verilator
filesets : [rtl]
generate : [swerv_default_config]
tools:
verilator :
mode : lint-only
toplevel : el2_swerv_wrapper
synth:
default_tool : vivado
filesets : [rtl, vivado_tcl]
generate : [swerv_default_config]
parameters : [RV_FPGA_OPTIMIZE]
tools:
vivado:
part : xc7a100tcsg324-1
pnr : none
toplevel : el2_swerv_wrapper
generate:
swerv_default_config:
generator: swerv_el2_config
position : first
parameters:
args : [-unset=assert_on]
generators:
swerv_el2_config:
interpreter: python3
command: configs/swerv_config_gen.py
description : Create a SweRV EL2 configuration. Note! Only supports the default config
parameters:
RV_FPGA_OPTIMIZE:
datatype : bool
default : true
description : Minimize clock gating to map better to FPGAs
paramtype : vlogdefine