diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..4b24f09 --- /dev/null +++ b/LICENSE @@ -0,0 +1,69 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of this License; and + +You must cause any modified files to carry prominent notices stating that You changed the files; and + +You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + +If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + +You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. diff --git a/README.md b/README.md index 2310d7a..a3c34ed 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,192 @@ -# Cores-SweRV-EL2 -Coming soon. +# EL2 SweRV RISC-V CoreTM 1.0 from Western Digital + +This repository contains the SweRV EL2 CoreTM design RTL + +## License + +By contributing to this project, you agree that your contribution is governed by [Apache-2.0](LICENSE). +Files under the [tools](tools/) directory may be available under a different license. Please review individual file for details. + +## Directory Structure + + ├── configs # Configurations Dir + │   └── snapshots # Where generated configuration files are created + ├── design # Design root dir + │   ├── dbg # Debugger + │   ├── dec # Decode, Registers and Exceptions + │   ├── dmi # DMI block + │   ├── exu # EXU (ALU/MUL/DIV) + │   ├── ifu # Fetch & Branch Prediction + │   ├── include + │   ├── lib + │   └── lsu # Load/Store + ├── docs + ├── tools # Scripts/Makefiles + └── testbench # (Very) simple testbench +    ├── asm # Example assembly files +    └── hex # Canned demo hex files + +## Dependencies + +- Verilator **(4.020 or later)** must be installed on the system if running with verilator +- If adding/removing instructions, espresso must be installed (used by *tools/coredecode*) +- RISCV tool chain (based on gcc version 7.3 or higher) must be +installed so that it can be used to prepare RISCV binaries to run. + +## Quickstart guide +1. Clone the repository +1. Setup RV_ROOT to point to the path in your local filesystem +1. Determine your configuration {optional} +1. Run make with tools/Makefile + +## Release Notes for this version +Please see [release notes](release-notes.md) for changes and bug fixes in this version of SweRV + +### Configurations + +SweRV can be configured by running the `$RV_ROOT/configs/swerv.config` script: + +`% $RV_ROOT/configs/swerv.config -h` for detailed help options + +For example to build with a DCCM of size 64 Kb: + +`% $RV_ROOT/configs/swerv.config -dccm_size=64` + +This will update the **default** snapshot in $RV_ROOT/configs/snapshots/default/ with parameters for a 64K DCCM. + +Add `-snapshot=dccm64`, for example, if you wish to name your build snapshot *dccm64* and refer to it during the build. + +There are 4 predefined target configurations: `default`, `default_ahb`, `typical_pd` and `high_perf` that can be selected via +the `-target=name` option to swerv.config. + +This script derives the following consistent set of include files : + + $RV_ROOT/configs/snapshots/default + ├── common_defines.vh # `defines for testbench or design + ├── defines.h # #defines for C/assembly headers + ├── el2_param.vh # Design parameters + ├── el2_pdef.vh # Parameter structure + ├── pd_defines.vh # `defines for physical design + ├── perl_configs.pl # Perl %configs hash for scripting + ├── pic_map_auto.h # PIC memory map based on configure size + └── whisper.json # JSON file for swerv-iss + + + +### Building a model + +while in a work directory: + +1. Set the RV_ROOT environment variable to the root of the SweRV directory structure. +Example for bash shell: + `export RV_ROOT=/path/to/swerv` +Example for csh or its derivatives: + `setenv RV_ROOT /path/to/swerv` + +1. Create your specific configuration + + *(Skip if default is sufficient)* + *(Name your snapshot to distinguish it from the default. Without an explicit name, it will update/override the __default__ snapshot)* + For example if `mybuild` is the name for the snapshot: + + set BUILD_PATH environment variable: + + `setenv BUILD_PATH snapshots/mybuild` + + `$RV_ROOT/configs/swerv.config [configuration options..] -snapshot=mybuild` + + Snapshots are placed in `$BUILD_PATH` directory + + +1. Running a simple Hello World program (verilator) + + `make -f $RV_ROOT/tools/Makefile` + +This command will build a verilator model of SweRV EL2 with AXI bus, and +execute a short sequence of instructions that writes out "HELLO WORLD" +to the bus. + + +The simulation produces output on the screen like: +``` + +VerilatorTB: Start of sim + +---------------------------------- +Hello World from SweRV EL2 @WDC !! +---------------------------------- +TEST_PASSED + +Finished : minstret = 437, mcycle = 922 +See "exec.log" for execution trace with register updates.. + +``` +The simulation generates following files: + + `console.log` contains what the cpu writes to the console address of 0xd0580000. + `exec.log` shows instruction trace with GPR updates. + `trace_port.csv` contains a log of the trace port. + When `debug=1` is provided, a vcd file `sim.vcd` is created and can be browsed by + gtkwave or similar waveform viewers. + +You can re-execute simulation using: + ` ./obj_dir/Vtb_top ` +or + `make -f $RV_ROOT/tools/Makefile verilator` + + + +The simulation run/build command has following generic form: + + make -f $RV_ROOT/tools/Makefile [] [debug=1] [snapshot=mybuild] [target=] [TEST=] [TEST_DIR=] + +where: +``` + - can be 'verilator' (by default) 'irun' - Cadence xrun, 'vcs' - Synopsys VCS + if not provided, 'make' cleans work directory, builds verilator executable and runs a test. +debug=1 - allows VCD generation for verilator and VCS and SHM waves for irun option. + - predefined CPU configurations 'default' ( by default), 'default_ahb', 'typical_pd', 'high_perf' +TEST - allows to run a C (.c) or assembly (.s) test, hello_world2 is run by default +TEST_DIR - alternative to test source directory testbench/asm + - run and build executable model of custom CPU configuration, remember to provide 'snapshot' argument + for runs on custom configurations. + +``` +Example: + + make -f $RV_ROOT/tools/Makefile verilator TEST=cmark + +will simulate testbench/asm/cmark.c program with verilator + + +If you want to compile a test only, you can run: + + make -f $RV_ROOT/tools/Makefile program.hex TEST= [TEST_DIR=/path/to/dir] + + +For the cmark test, the script in `$RV_ROOT/tools/calc_cmarks.pl` can be used +to extract the core-marks score by invoking that script in the run +directory. + +The Makefile uses `$RV_ROOT/testbench/linker.ld` file by default to build test executable. +User can provide test specific linker file in form `.ld` to build the test executable, + in the same directory with the test source. + +User also can create a test specific makefile in form `.makefile`, containing building instructions +how to create `program.hex` and `data.hex` files used by simulation. The private makefile should be in the same directory +as the test source. +*(`program.hex` file is loaded to instruction bus memory slave and 'data.hex' file is loaded to LSU bus memory slave and +optionally to DCCM at the beginning of simulation)*. + +The `$RV_ROOT/testbench/asm` directory contains following tests ready to simulate: +``` +hello_world2 - default tes to run, prints Hello World message to screen and console.log +hello_world_dccm - the same as above, but takes the string from preloaded DCCM. +cmark - coremark benchmark running with code and data in external memories +cmark_dccm - the same as above, running data and stack from DCCM (faster) +``` + +---- +Western Digital, the Western Digital logo, G-Technology, SanDisk, Tegile, Upthere, WD, SweRV Core, SweRV ISS, +and OmniXtend are registered trademarks or trademarks of Western Digital Corporation or its affiliates in the US +and/or other countries. All other marks are the property of their respective owners. diff --git a/configs/README.md b/configs/README.md new file mode 100644 index 0000000..dd2e59a --- /dev/null +++ b/configs/README.md @@ -0,0 +1,41 @@ +# SweRV RISC-V EL2 core from Western Digital + +## Configuration + +### Contents +Name | Description +---------------------- | ------------------------------ +swerv.config | Configuration script for SweRV-EL2 + + +This script will generate a consistent set of `defines/#defines/parameters` needed for the design and testbench. +A perl hash (*perl_configs.pl*) and a JSON format for SweRV-iss are also generated. +This set of include files : + + $RV_ROOT/configs/snapshots/ + ├── common_defines.vh # `defines for testbench + ├── defines.h # #defines for C/assembly headers + ├── el2_param.vh # Actual Design parameters + ├── el2_pdef.vh # Parameter structure definition + ├── pd_defines.vh # `defines for physical design + ├── perl_configs.pl # Perl %configs hash for scripting + ├── el2_pic_ctrl_verilator_unroll.sv # Unrolled verilog based on PIC size (for verilator only) + ├── pic_map_auto.h # PIC memory map based on configure size + └── whisper.json # JSON file for swerv-iss + + + +While the defines may be modified by hand, it is recommended that this script be used to generate a consistent set. + +### Targets +There are 4 predefined target configurations: `default`, `default_ahb`, `typical_pd` and `high_perf` that can be selected via the `-target=name` option to swerv.config. + +Target | Description +---------------------- | ------------------------------ +default | Default configuration. AXI4 bus interface. +default_ahb | Default configuration, AHB-Lite bus interface +typical_pd | No ICCM, AXI4 bus interface +high_perf | Large BTB/BHT, AXI4 interface + + +`swerv.config` may be edited to add additional target configurations, or new configurations may be created via the command line `-set` or `-unset` options. diff --git a/configs/swerv.config b/configs/swerv.config new file mode 100755 index 0000000..ab3dda3 --- /dev/null +++ b/configs/swerv.config @@ -0,0 +1,2234 @@ +#! /usr/bin/env perl + +use strict; # Do not turn this off or else +use Data::Dumper; +use Getopt::Long; +##use Bit::Vector; +use lib "$ENV{RV_ROOT}/tools"; +use JSON; + +my ($self) = $0 =~ m/.*\/(\w+)/o; +my @argv_orig = @ARGV; + + +# Master configuration file +# +# Configuration is perl hash +# Output are define files for various flows +# Verilog (`defines common to RTL/TB) +# Software (#defines) +# Whisper (JSON/#defines) +# +# Default values and valid ranges should be specified +# Can be overridden via the cmd line (-set=name=value-string) +# +# Format of the hash is +# name => VALUE | LIST | HASH +# +# Special name "inside" followed by list .. values must be one of provided list +# Special name "derive" followed by equation to derive +# + +# Dump verilog/assembly macros in upper case +my $defines_case = "U"; + +# Include these macros in verilog (pattern matched) +my @verilog_vars = qw (xlen config_key reset_vec tec_rv_icg numiregs nmi_vec target protection.* testbench.* dccm.* retstack core.* iccm.* btb.* bht.* icache.* pic.* regwidth memmap bus.*); + +# Include these macros in assembly (pattern matched) +my @asm_vars = qw (xlen reset_vec nmi_vec target dccm.* iccm.* pic.* memmap bus.* testbench.* protection.* core.*); +my @asm_overridable = qw (reset_vec nmi_vec) ; + +# Include these macros in PD (pattern matched) +my @pd_vars = qw (physical retstack target btb.* bht.* dccm.* iccm.* icache.* pic.* reset_vec nmi_vec build_ahb_lite datawidth bus.*); + +# Dump non-derived/settable vars/values for these vars in stdout : +my @dvars = qw(retstack btb bht core dccm iccm icache pic bus protection memmap); + +# Prefix all macros with +my $prefix = "RV_"; +# No prefix if keyword has +my $no_prefix = 'RV|TOP|^tec_|regwidth|clock_period|assert_on|^datawidth|^physical|verilator|SDVT_AHB'; + +my $vlog_use__wh = 1; + +my %regions_used = (); + +# Cmd Line options#{{{ +our %sets; +our %unsets; +my $help; +my @sets = (); +my @unsets = (); + +#Configurations may be changed via the -set option +# +# -set=name=value : Change the default config parameter value (lowercase)\n"; +# -unset=name : Remove the default config parameter (lowercase)\n"; +# : Do not prepend RV_ prefex to -set/-unset variables\n"; +# : multiple -set/-unset options accepted\n\n"; +# + +my $helpusage = " + +Main configuration database for SWERV + +This script documents, and generates the {`#} define/include files for verilog/assembly/backend flows + +It is run by vsim (with defaults) every time the file changes, or when -config_set=VAR=value options are passed to vsim + +This script can be run stand-alone by processes not running vsim + +User options: + + -target = {default, typical_pd, high_perf, default_ahb} + use default settings for one of the targets + + -set=var=value + set arbitrary variable(parameter) to a value + -unset=var + unset any definitions for var + -snapshot=name + name the configuration (only if no -target specified) + +Parameters that can be set by the end user: + + -set=ret_stack_size = {2, 3, 4, ... 8} + size of return stack + -set=btb_size = { 32, 64, 128, 256, 512 } + size of branch target buffer + -set=bht_size = {32, 64, 128, 256, 512, 1024, 2048} + size of branch history buffer + -set=dccm_enable = {0,1} + DCCM enabled + -set=dccm_num_banks = {2, 4} + DCCM number of banks + -set=dccm_region = { 0x0, 0x1, ... 0xf } + number of 256Mb memory region containig DCCM + -set=dccm_offset = hexadecimal + offset (in bytes) of DCCM witin dccm_region + dccm address will be: 256M * dccm_region + dccm_offset\", and that must be aligned + to the dccm size or the next larger power of 2 if size is not a power of 2 + -set=dccm_size = { 4, 8, 16, 32, 48, 64, 128, 256, 512 } kB + size of DCCM + -set=dma_buf_depth = {2,4,5} + DMA buffer depth + -set=fast_interrupt_redirect = {0, 1} + Fast interrupt redirect mechanism + -set=iccm_enable = { 0, 1 } + whether or not ICCM is enabled + -set=icache_enable = { 0, 1 } + whether or not icache is enabled + -set=icache_waypack = { 0, 1 } + whether or not icache packing is enabled + -set=icache_ecc = { 0, 1 } + whether or not icache has ecc - EXPENSIVE 30% sram growth + default: icache_ecc==0 (parity) + -set=icache_size = { 8, 16, 32, 64, 128, 256 } kB + size of icache + -set=icache_num_ways { 2,4} + Number of ways in icache + -set=iccm_region = { 0x0, 0x1, ... 0xf } + number of 256Mb memory region containing ICCM + -set=iccm_offset = hexadecimal + offcet (in bytes) of ICCM within iccm_region + iccm address will be: \"256M * iccm_region + iccm_offset\", and that must be aligned + to the iccm size or the next larger power of 2 if size is not a power of 2 + -set=iccm_size = { 4 , 8 , 16 , 32, 64, 128, 256, 512 } kB + size of ICCM + -set=iccm_num_banks = {2,4,8,16} + Number of ICCM banks + -set=lsu_stbuf_depth = {2,4,8 } + LSU stbuf depth + -set=lsu_num_nbload = {2,4,8 } + LSU number of outstanding Non Blocking loads + -set=load_to_use_plus1 = {0 1} + Load to use latency (fast or +1cycle) + -set=pic_2cycle = { 0, 1 } + whether or not 2-cycle PIC is enabled (2 cycle pic may result + in an overall smaller cycle time) + -set=pic_region = { 0x0, 0x1, ... 0xf } + number of 256Mb memory region containing PIC memory-mapped registers + -set=pic_offset = hexadecial + offset (in bytes) of PIC within pic_region + pic address will be: \"256M * pic_region + pic_offset\", and that must be aligned + to the pic size or the next larger power of 2 if size is not a power of 2 + -set=pic_size = { 32, 64, 128, 256 } kB + size of PIC + -set=pic_total_int = { 1, 2, 3, ..., 255 } + number of interrupt sources in PIC + + + {inst|data}_access_enable[0-7] : default 0 + {inst|data}_access_addr[0-7] : default 0x00000000 + {inst|data}_access_mask[0-7] : default 0xffffffff +"; + + +my $ret_stack_size; +my $btb_size; +my $bht_size; +my $dccm_region; +my $dccm_offset; +my $dccm_size; +my $iccm_enable; +my $icache_enable; +my $icache_waypack; +my $icache_num_ways; +my $icache_banks_way; +my $icache_ln_sz; +my $icache_bank_width; +my $icache_ecc; +my $iccm_region; +my $iccm_offset; +my $iccm_size; +my $icache_size; +my $pic_2cycle; +my $pic_region; +my $pic_offset; +my $pic_size; +my $pic_total_int; + +my $top_align_iccm = 0; + +my $target = "default"; +my $snapshot ; +my $build_path ; +my $verbose; +my $load_to_use_plus1; +my $dccm_enable; +my $icache_2banks; +my $lsu_stbuf_depth; +my $dma_buf_depth; +my $lsu_num_nbload; +my $dccm_num_banks; +my $iccm_num_banks; +my $verilator; + +my $fast_interrupt_redirect = 1; # ON by default +$ret_stack_size=8; +$btb_size=512; +$bht_size=512; +$dccm_enable=1; +$dccm_region="0xf"; +$dccm_offset="0x40000"; #1*256*1024 +$dccm_size=64; +$dccm_num_banks=4; +$iccm_enable=1; +$iccm_region="0xe"; +$top_align_iccm = 1; +$iccm_offset="0xe000000"; #0x380*256*1024 +$iccm_size=64; +$iccm_num_banks=4; +$icache_enable=1; +$icache_waypack=0; +$icache_num_ways=2; +$icache_banks_way=2; +$icache_2banks=1; +$icache_bank_width=8; +$icache_ln_sz=64; +$icache_ecc=1; +$icache_size=16; +$pic_2cycle=0; +$pic_region="0xf"; +$pic_offset="0xc0000"; # 3*256*1024 +$pic_size=32; +$pic_total_int=31; +$load_to_use_plus1=0; +$lsu_stbuf_depth=4; +$dma_buf_depth=5; +$lsu_num_nbload=4; + +GetOptions( + "help" => \$help, + "target=s" => \$target, + "snapshot=s" => \$snapshot, + "verbose" => \$verbose, + "load_to_use_plus1" => \$load_to_use_plus1, + "ret_stack_size=s" => \$ret_stack_size, + "btb_size=s" => \$btb_size, + "bht_size=s" => \$bht_size, + "dccm_enable=s" => \$dccm_enable, + "dccm_region=s" => \$dccm_region, + "dccm_offset=s" => \$dccm_offset, + "dccm_size=s" => \$dccm_size, + "dma_buf_depth" => \$dma_buf_depth, + "iccm_enable=s" => \$iccm_enable, + "icache_enable=s" => \$icache_enable, + "icache_waypack=s" => \$icache_waypack, + "icache_num_ways=s" => \$icache_num_ways, + "icache_ln_sz=s" => \$icache_ln_sz, + "icache_ecc=s" => \$icache_ecc, + "icache_2banks=s" => \$icache_2banks, + "iccm_region=s" => \$iccm_region, + "iccm_offset=s" => \$iccm_offset, + "iccm_size=s" => \$iccm_size, + "lsu_stbuf_depth" => \$lsu_stbuf_depth, + "lsu_num_nbload" => \$lsu_num_nbload, + "pic_2cycle=s" => \$pic_2cycle, + "pic_region=s" => \$pic_region, + "pic_offset=s" => \$pic_offset, + "pic_size=s" => \$pic_size, + "pic_total_int=s" => \$pic_total_int, + "icache_size=s" => \$icache_size, + "set=s@" => \@sets, + "unset=s@" => \@unsets, +) || die("$helpusage"); + +if ($help) { + print "$helpusage\n"; + exit; +} + +if (!defined $snapshot ) { + $snapshot = $target; +} + +if (!defined $ENV{BUILD_PATH}) { + $build_path = "$ENV{RV_ROOT}/configs/snapshots/$snapshot" ; +} else { + $build_path = $ENV{BUILD_PATH}; +} + +if (! -d "$build_path") { + system ("mkdir -p $build_path"); +} + +# Parameter file +my $tdfile = "$build_path/el2_pdef.vh"; +my $paramfile = "$build_path/el2_param.vh"; + +# Verilog defines file path +my $vlogfile = "$build_path/common_defines.vh"; + +# Assembly defines file path +my $asmfile = "$build_path/defines.h"; + +# PD defines file path +my $pdfile = "$build_path/pd_defines.vh"; + +# Whisper config file path +my $whisperfile = "$build_path/whisper.json"; + +# Perl defines file path +my $perlfile = "$build_path/perl_configs.pl"; + +my $opensource=0; + + + +# IDEA: is ghr at 5b the right size for el2 core + +if ($target eq "default") { +} +elsif ($target eq "typical_pd") { + print "$self: Using target \"typical_pd\"\n"; + $ret_stack_size=2; + $btb_size=32; + $bht_size=128; + $dccm_size=16; + $dccm_num_banks=2; + $iccm_enable=0; +} +elsif ($target eq "high_perf") { + print "$self: Using target \"high_perf\"\n"; + $btb_size=512; + $bht_size=2048; +} +elsif ($target eq "default_ahb") { + print "$self: Using target \"default_ahb\"\n"; +} +else { + die "$self: ERROR! Unsupported target \"$target\". Supported are 'default', 'default_ahb', 'typical_pd', 'high_perf'\n" ; +} + + + +# Configure triggers +our @triggers = (#{{{ + { + "reset" => ["0x23e00000", "0x00000000", "0x00000000"], + "mask" => ["0x081818c7", "0xffffffff", "0x00000000"], + "poke_mask" => ["0x081818c7", "0xffffffff", "0x00000000"] + }, + { + "reset" => ["0x23e00000", "0x00000000", "0x00000000"], + "mask" => ["0x081818c7", "0xffffffff", "0x00000000"], + "poke_mask" => ["0x081818c7", "0xffffffff", "0x00000000"] + }, + { + "reset" => ["0x23e00000", "0x00000000", "0x00000000"], + "mask" => ["0x081818c7", "0xffffffff", "0x00000000"], + "poke_mask" => ["0x081818c7", "0xffffffff", "0x00000000"] + }, + { + "reset" => ["0x23e00000", "0x00000000", "0x00000000"], + "mask" => ["0x081818c7", "0xffffffff", "0x00000000"], + "poke_mask" => ["0x081818c7", "0xffffffff", "0x00000000"] + }, + );#}}} + + +# Configure CSRs +our %csr = (#{{{ + "mstatus" => { + "reset" => "0x1800", # MPP bits hard wired to binrary 11. + "mask" => "0x88", # Only mpie(7) & mie(3) bits writeable + "exists" => "true", + }, + "mie" => { + "reset" => "0x0", + # Only external, timer, local, and software writeable + "mask" => "0x40000888", + "exists" => "true", + }, + "mip" => { + "reset" => "0x0", + # None of the bits are writeable using CSR instructions + "mask" => "0x0", + # Bits corresponding to error overflow, external, timer and stoftware + # interrupts are modifiable + "poke_mask" => "0x40000888", + "exists" => "true", + }, + "mcountinhibit" => { + "commnet" => "Performance counter inhibit. One bit per counter.", + "reset" => "0x0", + "mask" => "0x7d", + "poke_mask" => "0x7d", + "exists" => "true", + }, + "mvendorid" => { + "reset" => "0x45", + "mask" => "0x0", + "exists" => "true", + }, + "marchid" => { + "reset" => "0x00000010", + "mask" => "0x0", + "exists" => "true", + }, + "mimpid" => { + "reset" => "0x1", + "mask" => "0x0", + "exists" => "true", + }, + "misa" => { + "reset" => "0x40001104", + "mask" => "0x0", + "exists" => "true", + }, + "tselect" => { + "reset" => "0x0", + "mask" => "0x3", # Four triggers + "exists" => "true", + }, + "mhartid" => { + "reset" => "0x0", + "mask" => "0x0", + "poke_mask" => "0xfffffff0", + "exists" => "true", + }, + "dcsr" => { + "reset" => "0x40000003", + "mask" => "0x00008c04", + "poke_mask" => "0x00008dcc", # cause field modifiable, nmip modifiable + "exists" => "true", + "debug" => "true", + }, + "cycle" => { + "exists" => "false", + }, + "time" => { + "exists" => "false", + }, + "instret" => { + "exists" => "false", + }, + "mhpmcounter3" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmcounter4" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmcounter5" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmcounter6" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmcounter3h" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmcounter4h" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmcounter5h" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmcounter6h" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmevent3" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmevent4" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmevent5" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mhpmevent6" => { + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, +# Remaining CSRs are non-standard. These are specific to SWERV + "dicawics" => { + "number" => "0x7c8", + "reset" => "0x0", + "mask" => "0x0130fffc", + "exists" => "true", + }, + "dicad0" => { + "number" => "0x7c9", + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "dicad1" => { + "number" => "0x7ca", + "reset" => "0x0", + "mask" => "0x3", + "exists" => "true", + }, + "dicago" => { + "number" => "0x7cb", + "reset" => "0x0", + "mask" => "0x0", + "exists" => "true", + }, + "mcpc" => { + "comment" => "Core pause", + "number" => "0x7c2", + "reset" => "0x0", + "mask" => "0x0", + "exists" => "true", + }, + "mpmc" => { + "number" => "0x7c6", + "reset" => "0x2", + "mask" => "0x2", + "exists" => "true", + }, + "micect" => { + "number" => "0x7f0", + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "miccmect" => { + "number" => "0x7f1", + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mdccmect" => { + "number" => "0x7f2", + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + }, + "mcgc" => { + "number" => "0x7f8", + "reset" => "0x0", + "mask" => "0x000001ff", + "poke_mask" => "0x000001ff", + "exists" => "true", + }, + "mfdc" => { + "number" => "0x7f9", + "reset" => "0x00070000", + "mask" => "0x00070fff", + "exists" => "true", + }, + "mrac" => { + "comment" => "Memory region io and cache control.", + "number" => "0x7c0", + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + "shared" => "true", + }, + "dmst" => { + "comment" => "Memory synch trigger: Flush caches in debug mode.", + "number" => "0x7c4", + "reset" => "0x0", + "mask" => "0x0", + "exists" => "true", + "debug" => "true", + }, + "dicawics" => { + "comment" => "Cache diagnostics.", + "number" => "0x7c8", + "reset" => "0x0", + "mask" => "0x0130fffc", + "exists" => "true", + "debug" => "true", + }, + "dicad0" => { + "comment" => "Cache diagnostics.", + "number" => "0x7c9", + "reset" => "0x0", + "mask" => "0xffffffff", + "exists" => "true", + "debug" => "true", + }, + "dicad1" => { + "comment" => "Cache diagnostics.", + "number" => "0x7ca", + "reset" => "0x0", + "mask" => "0x3", + "exists" => "true", + "debug" => "true", + }, + "dicago" => { + "comment" => "Cache diagnostics.", + "number" => "0x7cb", + "reset" => "0x0", + "mask" => "0x0", + "exists" => "true", + "debug" => "true", + }, + "meipt" => { + "comment" => "External interrupt priority threshold.", + "number" => "0xbc9", + "reset" => "0x0", + "mask" => "0xf", + "exists" => "true", + }, + "meicpct" => { + "comment" => "External claim id/priority capture.", + "number" => "0xbca", + "reset" => "0x0", + "mask" => "0x0", + "exists" => "true", + }, + "meicidpl" => { + "comment" => "External interrupt claim id priority level.", + "number" => "0xbcb", + "reset" => "0x0", + "mask" => "0xf", + "exists" => "true", + }, + "meicurpl" => { + "comment" => "External interrupt current priority level.", + "number" => "0xbcc", + "reset" => "0x0", + "mask" => "0xf", + "exists" => "true", + }, + "mscause" => { + "number" => "0x7ff", + "reset" => "0x0", + "mask" => "0x00000007", + "exists" => "true", + }, +);#}}} + + +foreach my $i (0 .. 3) { + $csr{"pmpcfg$i"} = { "exists" => "false" }; +} + +foreach my $i (0 .. 15) { + $csr{"pmpaddr$i"} = { "exists" => "false" }; +} + + + +# }}} +# Main config hash, with default values +# +# Hash can be hierarchical with arbitrary levels +# Hexadecimal values are prefixed with 0x +# +# For verilog, if bit width is expected, add to %width hash below +# +# NOTE: params/keys marked 'derived' are not settable via cmd line, unless they ALSO have the 'overridable' tag +# +our %config = (#{{{ + "harts" => "1", + "xlen" => "32", # Testbench, Do Not Override + "numiregs" => "32", # Testbench, Do Not Override + "regwidth" => "32", # Testbench, Do Not Override + "reset_vec" => "0x80000000", # Testbench, Overridable + "nmi_vec" => "0x11110000", # Testbench, Overridable + "physical" => "1", + "num_mmode_perf_regs" => "4", # Whisper only + "max_mmode_perf_event" => "516", # Whisper only: performance counters event ids will be clamped to this + "target" => $target, # Flow Infrastructure + "config_key" => "derived", + "tec_rv_icg" => "clockhdr", + + "retstack" => { + "ret_stack_size" => "$ret_stack_size", # Design Parm, Overridable + }, + + "btb" => { + "btb_size" => "$btb_size", # Design Parm, Overridable + "btb_index1_hi" => "derived", + "btb_index1_lo" => "2", # Constant, Do Not Override + "btb_index2_hi" => "derived", + "btb_index2_lo" => "derived", + "btb_index3_hi" => "derived", + "btb_index3_lo" => "derived", + "btb_addr_hi" => "derived", + "btb_array_depth" => "derived", + "btb_addr_lo" => "2", # Constant, Do Not Override + "btb_btag_size" => "derived", + "btb_btag_fold" => "derived", + "btb_fold2_index_hash" => "derived", + }, + "bht" => { + "bht_size" => "$bht_size", # Design Parm, Overridable + "bht_addr_hi" => "derived", + "bht_addr_lo" => "2", # Constant, Do Not Override + "bht_array_depth" => "derived", + "bht_ghr_size" => "derived", + "bht_ghr_range" => "derived", + "bht_hash_string" => "derived", + "bht_ghr_hash_1" => "derived", + }, + + "core" => { + "lsu_stbuf_depth" => "$lsu_stbuf_depth", # Design Parm, Overridable + "dma_buf_depth" => "$dma_buf_depth", # Design Parm, Overridable + "lsu_num_nbload" => "$lsu_num_nbload", # Design Parm, Overridable + "opensource" => "$opensource", # Flow Infrastructure + "verilator" => "$verilator", # Flow Infrastructure + "load_to_use_plus1" => "$load_to_use_plus1", # Design Parm, Overridable + "iccm_icache" => 'derived', # Used by design + "iccm_only" => 'derived', # Used by design + "icache_only" => 'derived', # Used by design + "no_iccm_no_icache" => 'derived', # Used by design + "fast_interrupt_redirect" => "$fast_interrupt_redirect" # Design Parm, Overridable + }, + + "dccm" => { + "dccm_enable" => "$dccm_enable", # Design Parm, Overridable + "dccm_region" => "$dccm_region", # Design Parm, Overridable + "dccm_offset" => "$dccm_offset", # Design Parm, Overridable + "dccm_size" => "$dccm_size", # Design Parm, Overridable + "dccm_num_banks" => "$dccm_num_banks", # Design Parm, Overridable + "dccm_sadr" => 'derived', + "dccm_eadr" => 'derived', + "dccm_bits" => 'derived', + "dccm_bank_bits" => 'derived', + "dccm_data_width" => 'derived', + "dccm_fdata_width" => 'derived', + "dccm_byte_width" => 'derived', + "dccm_width_bits" => 'derived', + "dccm_index_bits" => 'derived', + "dccm_ecc_width" => 'derived', + "lsu_sb_bits" => 'derived', + "dccm_data_cell" => 'derived', + "dccm_rows" => 'derived', + "dccm_reserved" => 'derived', # Testbench use only : reserve dccm space for SW/stack - no random r/w + }, + + + "iccm" => { + "iccm_enable" => "$iccm_enable", # Design Parm, Overridable + "iccm_region" => "$iccm_region", # Design Parm, Overridable + "iccm_offset" => "$iccm_offset", # Design Parm, Overridable + "iccm_size" => "$iccm_size", # Design Parm, Overridable + "iccm_num_banks" => "$iccm_num_banks", # Design Parm, Overridable + "iccm_bank_bits" => 'derived', + "iccm_index_bits" => 'derived', + "iccm_rows" => 'derived', + "iccm_data_cell" => 'derived', + "iccm_sadr" => 'derived', + "iccm_eadr" => 'derived', + "iccm_reserved" => 'derived', # Testbench use only : reserve iccm space for SW/handlers - no random r/w + "iccm_bank_hi" => 'derived', + "iccm_bank_index_lo" => 'derived', + }, + "icache" => { + "icache_enable" => "$icache_enable", # Design Parm, Overridable + "icache_waypack" => "$icache_waypack", # Design Parm, Overridable + "icache_num_ways" => "$icache_num_ways", # Design Parm, Overridable + "icache_banks_way" => "2", # Design Parm, Constant + "icache_bank_width" => "8", # Design Parm, Constant + "icache_ln_sz" => "$icache_ln_sz", # Design Parm, Overridable + "icache_size" => "$icache_size", # Design Parm, Overridable + "icache_bank_hi" => 'derived', + "icache_bank_lo" => 'derived', + "icache_data_cell" => 'derived', + "icache_tag_cell" => 'derived', + "icache_tag_depth" => 'derived', + "icache_data_depth" => 'derived', + "icache_num_lines" => 'derived', + "icache_num_lines_bank" => 'derived', + "icache_num_lines_way" => 'derived', + "icache_data_depth" => 'derived', + "icache_tag_lo" => 'derived', + "icache_index_hi" => 'derived', + "icache_data_index_lo" => 'derived', + "icache_data_width" => 'derived', + "icache_fdata_width" => 'derived', + "icache_tag_index_lo" => 'derived', + "icache_ecc" => "$icache_ecc", # Design Parm, Overridable + "icache_2banks" => "$icache_2banks", # Design Parm, Overridable + "icache_bank_bits" => "derived", + "icache_status_bits" => "derived", + "icache_num_beats" => "derived", + "icache_beat_bits" => "derived", + "icache_scnd_last" => "derived", + "icache_beat_addr_hi" => "derived", + }, + "pic" => { + "pic_2cycle" => "$pic_2cycle", # Design Parm, Overridable + "pic_region" => "$pic_region", # Design Parm, Overridable + "pic_offset" => "$pic_offset", # Design Parm, Overridable + "pic_size" => "$pic_size", # Design Parm, Overridable + "pic_base_addr" => 'derived', + "pic_total_int_plus1" => 'derived', # pic_total_int + 1 + "pic_total_int" => "$pic_total_int", # Design Parm, Overridable + "pic_int_words" => 'derived', # number of 32 bit words for packed registers (Xmax) + "pic_bits" => 'derived', # of bits needs to address the PICM + "pic_meipl_offset" => '0x0000', # Testbench only: Offset of meipl relative to pic_base_addr + "pic_meip_offset" => '0x1000', # Testbench only: Offset of meip relative to pic_base_addr + "pic_meie_offset" => '0x2000', # Testbench only: Offset of meie relative to pic_base_addr + "pic_mpiccfg_offset" => '0x3000', # Testbench only: Offset of mpiccfg relative to pic_base_addr + "pic_meipt_offset" => '0x3004', # Testbench only: Offset of meipt relative to pic_base_addr -- deprecated + "pic_meigwctrl_offset" => '0x4000', # Testbench only: gateway control regs relative to pic_base_addr + "pic_meigwclr_offset" => '0x5000' # Testbench only: gateway clear regs relative to pic_base_addr + + }, + "testbench" => { + "TOP" => "tb_top", + "RV_TOP" => "`TOP.rvtop", + "CPU_TOP" => "`RV_TOP.swerv", + "clock_period" => "100", + "build_ahb_lite" => "0", + "build_axi4" => "1", + "build_axi_native" => "1", + "assert_on" => "", + "ext_datawidth" => "64", + "ext_addrwidth" => "32", + "sterr_rollback" => "0", + "lderr_rollback" => "1", + "SDVT_AHB" => "1", + }, + "protection" => { # Design parms, Overridable + "inst_access_enable0" => "0x0", + "inst_access_addr0" => "0x00000000", + "inst_access_mask0" => "0xffffffff", + "inst_access_enable1" => "0x0", + "inst_access_addr1" => "0x00000000", + "inst_access_mask1" => "0xffffffff", + "inst_access_enable2" => "0x0", + "inst_access_addr2" => "0x00000000", + "inst_access_mask2" => "0xffffffff", + "inst_access_enable3" => "0x0", + "inst_access_addr3" => "0x00000000", + "inst_access_mask3" => "0xffffffff", + "inst_access_enable4" => "0x0", + "inst_access_addr4" => "0x00000000", + "inst_access_mask4" => "0xffffffff", + "inst_access_enable5" => "0x0", + "inst_access_addr5" => "0x00000000", + "inst_access_mask5" => "0xffffffff", + "inst_access_enable6" => "0x0", + "inst_access_addr6" => "0x00000000", + "inst_access_mask6" => "0xffffffff", + "inst_access_enable7" => "0x0", + "inst_access_addr7" => "0x00000000", + "inst_access_mask7" => "0xffffffff", + "data_access_enable0" => "0x0", + "data_access_addr0" => "0x00000000", + "data_access_mask0" => "0xffffffff", + "data_access_enable1" => "0x0", + "data_access_addr1" => "0x00000000", + "data_access_mask1" => "0xffffffff", + "data_access_enable2" => "0x0", + "data_access_addr2" => "0x00000000", + "data_access_mask2" => "0xffffffff", + "data_access_enable3" => "0x0", + "data_access_addr3" => "0x00000000", + "data_access_mask3" => "0xffffffff", + "data_access_enable4" => "0x0", + "data_access_addr4" => "0x00000000", + "data_access_mask4" => "0xffffffff", + "data_access_enable5" => "0x0", + "data_access_addr5" => "0x00000000", + "data_access_mask5" => "0xffffffff", + "data_access_enable6" => "0x0", + "data_access_addr6" => "0x00000000", + "data_access_mask6" => "0xffffffff", + "data_access_enable7" => "0x0", + "data_access_addr7" => "0x00000000", + "data_access_mask7" => "0xffffffff", + }, + "memmap" => { # Testbench only + "serialio" => 'derived, overridable', # Testbench only + "external_data" => 'derived, overridable', # Testbench only + "external_prog" => 'derived, overridable', # Testbench only + "debug_sb_mem" => 'derived, overridable', # Testbench only + "external_data_1" => 'derived, overridable', # Testbench only + "external_mem_hole" => 'derived, overridable', # Testbench only +# "consoleio" => 'derived', # Part of serial io. + }, + "bus" => { + "lsu_bus_tag" => 'derived', + "lsu_bus_id" => '1', # Design parm, Overridable, + "lsu_bus_prty" => '2', # Design parm, Overridable, + "dma_bus_tag" => '1', # Design parm, Overridable + "dma_bus_id" => '1', # Design parm, Overridable + "dma_bus_prty" => '2', # Design parm, Overridable + "sb_bus_tag" => '1', # Design parm, Overridable + "sb_bus_id" => '1', # Design parm, Overridable + "sb_bus_prty" => '2', # Design parm, Overridable + "ifu_bus_tag" => 'derived', + "ifu_bus_id" => '1', # Design parm, Overridable + "ifu_bus_prty" => '2', # Design parm, Overridable + "bus_prty_default" => '3', # Design parm, Overridable + }, + "triggers" => \@triggers, # Whisper only + "csr" => \%csr, # Whisper only + "even_odd_trigger_chains" => "true", # Whisper only +); + + +# These parms are used in the verilog and will be part of global parm structure +# need to have this be width in binary +# for now autosize to the data +our %verilog_parms = ( + "fast_interrupt_redirect" => '1', + "inst_access_enable0" => '1', + "inst_access_addr0" => '32', + "inst_access_mask0" => '32', + "inst_access_enable1" => '1', + "inst_access_addr1" => '32', + "inst_access_mask1" => '32', + "inst_access_enable2" => '1', + "inst_access_addr2" => '32', + "inst_access_mask2" => '32', + "inst_access_enable3" => '1', + "inst_access_addr3" => '32', + "inst_access_mask3" => '32', + "inst_access_enable4" => '1', + "inst_access_addr4" => '32', + "inst_access_mask4" => '32', + "inst_access_enable5" => '1', + "inst_access_addr5" => '32', + "inst_access_mask5" => '32', + "inst_access_enable6" => '1', + "inst_access_addr6" => '32', + "inst_access_mask6" => '32', + "inst_access_enable7" => '1', + "inst_access_addr7" => '32', + "inst_access_mask7" => '32', + "data_access_enable0" => '1', + "data_access_addr0" => '32', + "data_access_mask0" => '32', + "data_access_enable1" => '1', + "data_access_addr1" => '32', + "data_access_mask1" => '32', + "data_access_enable2" => '1', + "data_access_addr2" => '32', + "data_access_mask2" => '32', + "data_access_enable3" => '1', + "data_access_addr3" => '32', + "data_access_mask3" => '32', + "data_access_enable4" => '1', + "data_access_addr4" => '32', + "data_access_mask4" => '32', + "data_access_enable5" => '1', + "data_access_addr5" => '32', + "data_access_mask5" => '32', + "data_access_enable6" => '1', + "data_access_addr6" => '32', + "data_access_mask6" => '32', + "data_access_enable7" => '1', + "data_access_addr7" => '32', + "data_access_mask7" => '32', + "iccm_bits" => '5', + "iccm_bank_hi" => '5', + "iccm_bank_index_lo" => '5', + "icache_bank_bits" => '3', + "icache_status_bits" => '3', + "icache_num_beats" => '4', + "icache_beat_bits" => '4', + "icache_scnd_last" => '4', + "icache_beat_addr_hi" => '4', + "iccm_icache" => '1', + "iccm_only" => '1', + "icache_only" => '1', + "no_iccm_no_icache" => '1', + "build_axi4" => '1', + "build_ahb_lite" => '1', + "build_axi_native" => '1', + "lsu_num_nbload_width" => '3', + "lsu_num_nbload" => '5', + "ret_stack_size" => '4', + "btb_size" => '10', + "btb_index1_hi" => '5', + "btb_index1_lo" => '5', + "btb_index2_hi" => '5', + "btb_index2_lo" => '5', + "btb_index3_hi" => '5', + "btb_index3_lo" => '5', + "btb_addr_hi" => '5', + "btb_array_depth" => '9', + "btb_addr_lo" => '2', + "btb_btag_size" => '4', + "btb_btag_fold" => '1', + "btb_fold2_index_hash" => '1', + "bht_size" => '12', + "bht_addr_hi" => '4', + "bht_addr_lo" => '2', + "bht_array_depth" => '11', + "bht_ghr_size" => '4', + "bht_ghr_hash_1" => '1', + "lsu_stbuf_depth" => '4', + "dma_buf_depth" => '3', + "load_to_use_plus1" => '1', + "dccm_enable" => '1', + "dccm_region" => '4', + "dccm_size" => '10', + "dccm_num_banks" => '5', + "dccm_sadr" => '32', + "dccm_bits" => '5', + "dccm_bank_bits" => '3', + "dccm_data_width" => '6', + "dccm_fdata_width" => '6', + "dccm_byte_width" => '3', + "dccm_width_bits" => '2', + "dccm_index_bits" => '4', + "dccm_ecc_width" => '3', + "lsu_sb_bits" => '5', + "iccm_enable" => '1', + "iccm_region" => '4', + "iccm_size" => '10', + "iccm_num_banks" => '5', + "iccm_bank_bits" => '3', + "iccm_index_bits" => '4', + "iccm_sadr" => '32', + "icache_enable" => '1', + "icache_waypack" => '1', + "icache_num_ways" => '3', + "icache_banks_way" => '3', + "icache_bank_width" => '4', + "icache_ln_sz" => '7', + "icache_size" => '9', + "icache_bank_hi" => '3', + "icache_bank_lo" => '2', + "icache_tag_depth" => '13', + "icache_data_depth" => '14', + "icache_tag_lo" => '5', + "icache_index_hi" => '5', + "icache_data_index_lo" => '3', + "icache_data_width" => '7', + "icache_fdata_width" => '7', + "icache_tag_index_lo" => '3', + "icache_ecc" => '1', + "icache_2banks" => '1', + "pic_2cycle" => '1', + "pic_region" => '4', + "pic_size" => '9', + "pic_base_addr" => '32', + "pic_total_int_plus1" => '9', + "pic_total_int" => '8', + "pic_int_words" => '4', + "pic_bits" => '5', + "lsu_bus_tag" => '4', + "lsu_bus_id" => '1', + "lsu_bus_prty" => '2', + "dma_bus_tag" => '4', + "dma_bus_id" => '1', + "dma_bus_prty" => '2', + "sb_bus_tag" => '4', + "sb_bus_id" => '1', + "sb_bus_prty" => '2', + "ifu_bus_tag" => '4', + "ifu_bus_id" => '1', + "ifu_bus_prty" => '2', + "bus_prty_default" => '2', +); + + +# need to figure out what to do here +# for now none of these can be parameters + + +# move deletes lower + +# Perform any overrides first before derived values +map_set_unset(); + +gen_define("","", \%config,"",[]); + + +# perform final checks +my $c; +$c=$config{retstack}{ret_stack_size}; if (!($c >=2 && $c <=8)) { die("$helpusage\n\nFAIL: ret_stack_size == $c; ILLEGAL !!!\n\n"); } +$c=$config{btb}{btb_size}; if (!($c==32||$c==64||$c==128||$c==256||$c==512)) { die("$helpusage\n\nFAIL: btb_size == $c; ILLEGAL !!!\n\n"); } +$c=$config{iccm}{iccm_region}; if (!($c>=0 && $c<16)) { die("$helpusage\n\nFAIL: iccm_region == $c ILLEGAL !!!\n\n"); } +$c=$config{iccm}{iccm_offset}; if (!($c>=0 && $c<256*1024*1024 && ($c&0xfff)==0)) { die("$helpusage\n\nFAIL: iccm_offset == $c ILLEGAL !!!\n\n"); } +$c=$config{iccm}{iccm_size}; if (!($c==2||$c==4||$c==8||$c==16||$c==32||$c==64||$c==128||$c==256||$c==512)) { die("$helpusage\n\nFAIL: iccm_size == $c ILLEGAL !!!\n\n"); } +$c=$config{iccm}{iccm_num_banks}; if (!($c==2 || $c==4 || ($c==8 && $config{iccm}{iccm_size} != 2) || ($c==16 && $config{iccm}{iccm_size} > 4))) { die("$helpusage\n\nFAIL: iccm_num_banks == $c ILLEGAL !!!\n\n"); } +$c=$config{iccm}{iccm_enable}; if (!($c==0 || $c==1)) { die("$helpusage\n\nFAIL: iccm_enable == $c ILLEGAL !!!\n\n"); } +$c=$config{dccm}{dccm_region}; if (!($c>=0 && $c<16)) { die("$helpusage\n\nFAIL: dccm_region == $c ILLEGAL !!!\n\n"); } +$c=$config{dccm}{dccm_num_banks}; if (!(($c==2 && $config{dccm}{dccm_size} != 48) || $c==4 || ($c==8 && $config{dccm}{dccm_size} != 48) || ($c==16 && $config{dccm}{dccm_size} != 4 && $config{dccm}{dccm_size} != 48))) + { die("$helpusage\n\nFAIL: dccm_num_banks == $c ILLEGAL !!!\n\n"); } +$c=$config{dccm}{dccm_offset}; if (!($c>=0 && $c<256*1024*1024 && ($c&0xfff)==0)) { die("$helpusage\n\nFAIL: dccm_offset == $c ILLEGAL !!!\n\n"); } +$c=$config{dccm}{dccm_size}; if (!($c==4||$c==8||$c==16||$c==32||$c==48||$c==64||$c==128||$c==256||$c==512)) { die("$helpusage\n\nFAIL: dccm_size == $c ILLEGAL !!!\n\n"); } +$c=$config{pic}{pic_2cycle}; if (!($c==0 || $c==1)) { die("$helpusage\n\nFAIL: pic_2cycle == $c ILLEGAL !!!\n\n"); } +$c=$config{pic}{pic_region}; if (!($c>=0 && $c<16)) { die("$helpusage\n\nFAIL: pic_region == $c ILLEGAL !!!\n\n"); } +$c=$config{pic}{pic_offset}; if (!($c>=0 && $c<256*1024*1024 && ($c&0xfff)==0)) { die("$helpusage\n\nFAIL: pic_offset == $c ILLEGAL !!!\n\n"); } +$c=$config{pic}{pic_size}; if (!($c==32 || $c==64 || $c==128 || $c==256)) { die("$helpusage\n\nFAIL: pic_size == $c ILLEGAL !!!\n\n"); } +$c=$config{pic}{pic_total_int}; if ( $c<1 || $c>255) { die("$helpusage\n\nFAIL: pic_total_int == $c ILLEGAL !!!\n\n"); } +$c=$config{icache}{icache_enable}; if (!($c==0 || $c==1)) { die("$helpusage\n\nFAIL: icache_enable == $c ILLEGAL !!!\n\n"); } +$c=$config{icache}{icache_waypack}; if (!($c==0 || $c==1)) { die("$helpusage\n\nFAIL: icache_waypack == $c ILLEGAL !!!\n\n"); } +$c=$config{icache}{icache_num_ways}; if (!($c==2 || $c==4)) { die("$helpusage\n\nFAIL: icache_num_ways == $c ILLEGAL !!!\n\n"); } +$c=$config{icache}{icache_ln_sz}; if (!($c==32 || $c==64)) { die("$helpusage\n\nFAIL: icache_ln_sz == $c ILLEGAL !!!\n\n"); } +$c=$config{icache}{icache_size}; if (!($c==8 || $c==16 || $c==32 || $c==64 || $c==128 || $c==256)) { die("$helpusage\n\nFAIL: icache_size == $c ILLEGAL !!!\n\n"); } +$c=$config{core}{lsu_stbuf_depth}; if (!($c==2 || $c==4 || $c==8)) { die("$helpusage\n\nFAIL: lsu_stbuf_depth == $c ILLEGAL !!!\n\n"); } +$c=$config{core}{dma_buf_depth}; if (!($c==2 || $c==4 || $c==5)) { die("$helpusage\n\nFAIL: dma_buf_depth == $c ILLEGAL !!!\n\n"); } +$c=$config{core}{lsu_num_nbload}; if (!($c==2 || $c==4 || $c==8)) { die("$helpusage\n\nFAIL: lsu_num_nbload == $c ILLEGAL !!!\n\n"); } + +$c=$config{protection}{inst_access_addr0}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr0 lower 6b must be 0s $c !!!\n\n"); } +$c=$config{protection}{inst_access_addr1}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr1 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{inst_access_addr2}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr2 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{inst_access_addr3}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr3 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{inst_access_addr4}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr4 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{inst_access_addr5}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr5 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{inst_access_addr6}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr6 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{inst_access_addr7}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: inst_access_addr7 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{inst_access_mask0}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: inst_access_mask0 invalid !!!\n\n"); } +$c=$config{protection}{inst_access_mask1}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: inst_access_mask1 invalid !!!\n\n"); } +$c=$config{protection}{inst_access_mask2}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: inst_access_mask2 invalid !!!\n\n"); } +$c=$config{protection}{inst_access_mask3}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: inst_access_mask3 invalid !!!\n\n"); } +$c=$config{protection}{inst_access_mask4}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: inst_access_mask4 invalid !!!\n\n"); } +$c=$config{protection}{inst_access_mask5}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: inst_access_mask5 invalid !!!\n\n"); } +$c=$config{protection}{inst_access_mask6}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: inst_access_mask6 invalid !!!\n\n"); } +$c=$config{protection}{inst_access_mask7}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: inst_access_mask7 invalid !!!\n\n"); } +$c=$config{protection}{data_access_addr0}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr0 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_addr1}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr1 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_addr2}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr2 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_addr3}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr3 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_addr4}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr4 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_addr5}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr5 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_addr6}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr6 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_addr7}; if ((hex($c)&0x3f) != 0) { die("$helpusage\n\nFAIL: data_access_addr7 lower 6b must be 0s !!!\n\n"); } +$c=$config{protection}{data_access_mask0}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: data_access_mask0 invalid !!!\n\n"); } +$c=$config{protection}{data_access_mask1}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: data_access_mask1 invalid !!!\n\n"); } +$c=$config{protection}{data_access_mask2}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: data_access_mask2 invalid !!!\n\n"); } +$c=$config{protection}{data_access_mask3}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: data_access_mask3 invalid !!!\n\n"); } +$c=$config{protection}{data_access_mask4}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: data_access_mask4 invalid !!!\n\n"); } +$c=$config{protection}{data_access_mask5}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: data_access_mask5 invalid !!!\n\n"); } +$c=$config{protection}{data_access_mask6}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: data_access_mask6 invalid !!!\n\n"); } +$c=$config{protection}{data_access_mask7}; if ((hex($c)&0x3f) != 63 || invalid_mask($c)) { die("$helpusage\n\nFAIL: data_access_mask7 invalid !!!\n\n"); } + +if ((hex($config{protection}{inst_access_addr0}) & hex($config{protection}{inst_access_mask0}))!=0) { die("$helpusage\n\nFAIL: inst_access_addr0 and inst_access_mask0 must be orthogonal!!!\n\n"); } +if ((hex($config{protection}{inst_access_addr1}) & hex($config{protection}{inst_access_mask1}))!=0) { die("$helpusage\n\nFAIL: inst_access_addr1 and inst_access_mask1 must be orthogonal!!!\n\n"); } +if ((hex($config{protection}{inst_access_addr2}) & hex($config{protection}{inst_access_mask2}))!=0) { die("$helpusage\n\nFAIL: inst_access_addr2 and inst_access_mask2 must be orthogonal!!!\n\n"); } +if ((hex($config{protection}{inst_access_addr3}) & hex($config{protection}{inst_access_mask3}))!=0) { die("$helpusage\n\nFAIL: inst_access_addr3 and inst_access_mask3 must be orthogonal!!!\n\n"); } +if ((hex($config{protection}{inst_access_addr4}) & hex($config{protection}{inst_access_mask4}))!=0) { die("$helpusage\n\nFAIL: inst_access_addr4 and inst_access_mask4 must be orthogonal!!!\n\n"); } +if ((hex($config{protection}{inst_access_addr5}) & hex($config{protection}{inst_access_mask5}))!=0) { die("$helpusage\n\nFAIL: inst_access_addr5 and inst_access_mask5 must be orthogonal!!!\n\n"); } +if ((hex($config{protection}{inst_access_addr6}) & hex($config{protection}{inst_access_mask6}))!=0) { die("$helpusage\n\nFAIL: inst_access_addr6 and inst_access_mask6 must be orthogonal!!!\n\n"); } +if ((hex($config{protection}{inst_access_addr7}) & hex($config{protection}{inst_access_mask7}))!=0) { die("$helpusage\n\nFAIL: inst_access_addr7 and inst_access_mask7 must be orthogonal!!!\n\n"); } + +if ((hex($config{protection}{data_access_addr0}) & hex($config{protection}{data_access_mask0}))!=0) { die("$helpusage\n\nFAIL: data_access_addr0 and data_access_mask0 must be orthogonal!!!\n\n"); } +if ((hex($config{protection}{data_access_addr1}) & hex($config{protection}{data_access_mask1}))!=0) { die("$helpusage\n\nFAIL: data_access_addr1 and data_access_mask1 must be orthogonal!!!\n\n"); } +if ((hex($config{protection}{data_access_addr2}) & hex($config{protection}{data_access_mask2}))!=0) { die("$helpusage\n\nFAIL: data_access_addr2 and data_access_mask2 must be orthogonal!!!\n\n"); } +if ((hex($config{protection}{data_access_addr3}) & hex($config{protection}{data_access_mask3}))!=0) { die("$helpusage\n\nFAIL: data_access_addr3 and data_access_mask3 must be orthogonal!!!\n\n"); } +if ((hex($config{protection}{data_access_addr4}) & hex($config{protection}{data_access_mask4}))!=0) { die("$helpusage\n\nFAIL: data_access_addr4 and data_access_mask4 must be orthogonal!!!\n\n"); } +if ((hex($config{protection}{data_access_addr5}) & hex($config{protection}{data_access_mask5}))!=0) { die("$helpusage\n\nFAIL: data_access_addr5 and data_access_mask5 must be orthogonal!!!\n\n"); } +if ((hex($config{protection}{data_access_addr6}) & hex($config{protection}{data_access_mask6}))!=0) { die("$helpusage\n\nFAIL: data_access_addr6 and data_access_mask6 must be orthogonal!!!\n\n"); } +if ((hex($config{protection}{data_access_addr7}) & hex($config{protection}{data_access_mask7}))!=0) { die("$helpusage\n\nFAIL: data_access_addr7 and data_access_mask7 must be orthogonal!!!\n\n"); } + +if ($config{bus}{dma_bus_tag} < 1) {die "$self : ERROR! dma_bus_tag cannot be less than 1\n"} +if ($config{bus}{sb_bus_tag} < 1) {die "$self : ERROR! sb_bus_tag cannot be less than 1\n"} + +# deletes + +# Fill in derived configuration entries. + +if ($config{icache}{icache_enable}==0 && $config{iccm}{iccm_enable}==0) { + $config{core}{no_iccm_no_icache}=1; +} +elsif ($config{icache}{icache_enable}==0 && $config{iccm}{iccm_enable}==1) { + $config{core}{iccm_only}=1; +} +elsif ($config{icache}{icache_enable}==1 && $config{iccm}{iccm_enable}==0) { + $config{core}{icache_only}=1; +} +elsif ($config{icache}{icache_enable}==1 && $config{iccm}{iccm_enable}==1) { + $config{core}{iccm_icache}=1; +} + +$config{btb}{btb_btag_fold} = 0; +$config{btb}{btb_fold2_index_hash} = 0; + +if($config{btb}{btb_size}==512){ + $config{btb}{btb_index1_hi} = 9; + $config{btb}{btb_index2_hi} = 17; + $config{btb}{btb_index3_hi} = 25; + $config{btb}{btb_array_depth}= 256; + $config{btb}{btb_btag_size} = 5; +} elsif($config{btb}{btb_size}==256){ + $config{btb}{btb_index1_hi} = 8; + $config{btb}{btb_index2_hi} = 15; + $config{btb}{btb_index3_hi} = 22; + $config{btb}{btb_array_depth}= 128; + $config{btb}{btb_btag_size} = 6; +} elsif($config{btb}{btb_size}==128){ + $config{btb}{btb_index1_hi} = 7; + $config{btb}{btb_index2_hi} = 13; + $config{btb}{btb_index3_hi} = 19; + $config{btb}{btb_array_depth}= 64; + $config{btb}{btb_btag_size} = 7; +} elsif($config{btb}{btb_size}==64){ + $config{btb}{btb_index1_hi} = 6; + $config{btb}{btb_index2_hi} = 11; + $config{btb}{btb_index3_hi} = 16; + $config{btb}{btb_array_depth}= 32; + $config{btb}{btb_btag_size} = 8; +} elsif($config{btb}{btb_size}==32){ + $config{btb}{btb_index1_hi} = 5; + $config{btb}{btb_index2_hi} = 9; + $config{btb}{btb_index3_hi} = 13; + $config{btb}{btb_array_depth}= 16; + $config{btb}{btb_btag_size} = 9; + $config{btb}{btb_btag_fold} = 1; +} + +$config{btb}{btb_index2_lo} = $config{btb}{btb_index1_hi}+1; +$config{btb}{btb_index3_lo} = $config{btb}{btb_index2_hi}+1; +$config{btb}{btb_addr_hi} = $config{btb}{btb_index1_hi}; + +if($config{bht}{bht_size}==2048){ + $config{bht}{bht_ghr_size}= 10; + $config{bht}{bht_ghr_range}= "9:0"; + $config{bht}{bht_array_depth}= 1024; + $config{bht}{bht_addr_hi}= 11; +} elsif($config{bht}{bht_size}==1024){ + $config{bht}{bht_ghr_size}= 9; + $config{bht}{bht_ghr_range}= "8:0"; + $config{bht}{bht_array_depth}= 512; + $config{bht}{bht_addr_hi}= 10; +} elsif($config{bht}{bht_size}==512){ + $config{bht}{bht_ghr_size}= 8; + $config{bht}{bht_ghr_range}= "7:0"; + $config{bht}{bht_array_depth}= 256; + $config{bht}{bht_addr_hi}= 9; +} elsif($config{bht}{bht_size}==256){ + $config{bht}{bht_ghr_size}= 7; + $config{bht}{bht_ghr_range}= "6:0"; + $config{bht}{bht_addr_hi} = 8; + $config{bht}{bht_array_depth}= 128; +} elsif($config{bht}{bht_size}==128){ + $config{bht}{bht_ghr_size}= 6; + $config{bht}{bht_ghr_range}= "5:0"; + $config{bht}{bht_addr_hi} = 7; + $config{bht}{bht_array_depth}= 64; +} elsif($config{bht}{bht_size}==64){ + $config{bht}{bht_ghr_size}= 5; + $config{bht}{bht_ghr_range}= "4:0"; + $config{bht}{bht_addr_hi} = 6; + $config{bht}{bht_array_depth}= 32; +} elsif($config{bht}{bht_size}==32){ + $config{bht}{bht_ghr_size}= 4; + $config{bht}{bht_ghr_range}= "3:0"; + $config{bht}{bht_addr_hi} = 5; + $config{bht}{bht_array_depth}= 16; +} +$config{bht}{bht_ghr_hash_1} = ($config{bht}{bht_ghr_size} > ($config{btb}{btb_index1_hi}-1)); + +$config{bht}{bht_hash_string} = &ghrhash($config{btb}{btb_index1_hi}, $config{bht}{bht_ghr_size}); + +$config{pic}{pic_base_addr} = (hex($config{pic}{pic_region})<<28) + + (hex($config{pic}{pic_offset})); +$config{pic}{pic_base_addr} = sprintf("0x%x", $config{pic}{pic_base_addr}); + +$config{pic}{pic_int_words} = int($config{pic}{pic_total_int}/32 +0.9); +$config{pic}{pic_bits} = 10 + log2($config{pic}{pic_size}); + +$config{core}{lsu_num_nbload_width} = log2($config{core}{lsu_num_nbload}); + +$config{bus}{lsu_bus_tag} = log2($config{core}{lsu_num_nbload}) + 1; + +$config{bus}{ifu_bus_tag} = log2($config{icache}{icache_ln_sz}/8); + +$config{dccm}{dccm_sadr} = (hex($config{dccm}{dccm_region})<<28) + + (hex($config{dccm}{dccm_offset})); +$config{dccm}{dccm_sadr} = sprintf("0x%x", $config{dccm}{dccm_sadr}); + +$config{dccm}{dccm_eadr} = (hex($config{dccm}{dccm_region})<<28) + + (hex($config{dccm}{dccm_offset})) + size($config{dccm}{dccm_size})-1; +$config{dccm}{dccm_eadr} = sprintf("0x%x", $config{dccm}{dccm_eadr}); + +$config{dccm}{dccm_reserved} = sprintf("0x%x", ($config{dccm}{dccm_size}>=16)? 5120 : ($config{dccm}{dccm_size}*1024)/4); + +$config{dccm}{dccm_bits} = ($config{dccm}{dccm_size}==48 ) ? 16 : 10 + log2($config{dccm}{dccm_size}); + +$config{dccm}{dccm_bank_bits} = log2($config{dccm}{dccm_num_banks}); +$config{dccm}{dccm_data_width} = 32; +$config{dccm}{dccm_fdata_width} = $config{dccm}{dccm_data_width} + log2($config{dccm}{dccm_data_width}) + 2; +$config{dccm}{dccm_byte_width} = $config{dccm}{dccm_data_width}/8; + +$config{dccm}{dccm_width_bits} = log2($config{dccm}{dccm_byte_width}); +$config{dccm}{dccm_index_bits} = $config{dccm}{dccm_bits} - $config{dccm}{dccm_bank_bits} - $config{dccm}{dccm_width_bits}; + +$config{dccm}{dccm_ecc_width} = log2($config{dccm}{dccm_data_width}) + 2; +$config{dccm}{lsu_sb_bits} = $config{dccm}{dccm_bits}; +$config{dccm}{dccm_rows} = ($config{dccm}{dccm_size}==48 ) ? (2**($config{dccm}{dccm_index_bits}-1) + 2**$config{dccm}{dccm_index_bits})/2 : 2**$config{dccm}{dccm_index_bits}; +$config{dccm}{dccm_data_cell} = "ram_$config{dccm}{dccm_rows}x39"; + + +$config{icache}{icache_num_lines} = $config{icache}{icache_size}*1024/$config{icache}{icache_ln_sz}; +$config{icache}{icache_num_lines_way} = $config{icache}{icache_num_lines}/$config{icache}{icache_num_ways}; +$config{icache}{icache_num_lines_bank} = $config{icache}{icache_num_lines}/($config{icache}{icache_num_ways} * $config{icache}{icache_banks_way}); +$config{icache}{icache_data_depth} = $config{icache}{icache_num_lines_bank} * $config{icache}{icache_ln_sz} /$config{icache}{icache_bank_width}; +$config{icache}{icache_data_index_lo} = log2($config{icache}{icache_bank_width}) + log2($config{icache}{icache_banks_way}); +$config{icache}{icache_index_hi} = $config{icache}{icache_data_index_lo} + log2($config{icache}{icache_data_depth}) -1; +$config{icache}{icache_bank_hi} = $config{icache}{icache_data_index_lo} - 1; +$config{icache}{icache_bank_lo} = log2($config{icache}{icache_bank_width}); +$config{icache}{icache_tag_index_lo} = log2($config{icache}{icache_ln_sz}); +$config{icache}{icache_tag_lo} = log2($config{icache}{icache_num_lines_way}) + $config{icache}{icache_tag_index_lo}; +$config{icache}{icache_tag_depth} = $config{icache}{icache_num_lines}/$config{icache}{icache_num_ways}; +$config{icache}{icache_data_width} = 8*$config{icache}{icache_bank_width}; + +$config{icache}{icache_bank_bits} = 1+$config{icache}{icache_bank_hi}-$config{icache}{icache_bank_lo}; +$config{icache}{icache_status_bits} = $config{icache}{icache_num_ways}-1; +$config{icache}{icache_num_beats} = ($config{icache}{icache_ln_sz}==64) ? 8 : 4; +$config{icache}{icache_beat_bits} = ($config{icache}{icache_ln_sz}==64) ? 3 : 2; +$config{icache}{icache_scnd_last} = ($config{icache}{icache_ln_sz}==64) ? 6 : 2; +$config{icache}{icache_beat_addr_hi} = ($config{icache}{icache_ln_sz}==64) ? 5 : 4; + + +if (($config{icache}{icache_ecc})) { +$config{icache}{icache_fdata_width} = $config{icache}{icache_data_width} + 7; +$config{icache}{icache_data_cell} = "ram_$config{icache}{icache_data_depth}x$config{icache}{icache_fdata_width}"; +$config{icache}{icache_tag_cell} = ($config{icache}{icache_tag_depth} == 32) ? "ram_$config{icache}{icache_tag_depth}x26" : "ram_$config{icache}{icache_tag_depth}x25"; + +} +else { +$config{icache}{icache_fdata_width} = $config{icache}{icache_data_width} + 4; +$config{icache}{icache_data_cell} = "ram_$config{icache}{icache_data_depth}x$config{icache}{icache_fdata_width}"; +$config{icache}{icache_tag_cell} = "ram_$config{icache}{icache_tag_depth}x21"; +} +$config{pic}{pic_total_int_plus1} = $config{pic}{pic_total_int} + 1; +# Defines with explicit values in the macro name +$config{dccm}{"dccm_num_banks_$config{dccm}{dccm_num_banks}"} = ""; +$config{dccm}{"dccm_size_$config{dccm}{dccm_size}"} = ""; + +# If ICCM offset not explicitly provided, align to TOP of the region +if ($top_align_iccm && ($config{iccm}{iccm_offset} eq $iccm_offset) && ($config{iccm}{iccm_size} < 32)) { + $config{iccm}{iccm_region} = "0xa"; + print "$self: Setting default iccm region to region $config{iccm}{iccm_region}\n"; + $config{iccm}{iccm_offset} = sprintf("0x%08x",256*1024*1024-size($config{iccm}{iccm_size})); + print "$self: Aligning default iccm offset to top of region @ $config{iccm}{iccm_offset}\n"; +} +$config{iccm}{iccm_sadr} = (hex($config{iccm}{iccm_region})<<28) + + (hex($config{iccm}{iccm_offset})); +$config{iccm}{iccm_sadr} = sprintf("0x%08x", $config{iccm}{iccm_sadr}); + +$config{iccm}{iccm_eadr} = (hex($config{iccm}{iccm_region})<<28) + + (hex($config{iccm}{iccm_offset})) + size($config{iccm}{iccm_size})-1; +$config{iccm}{iccm_eadr} = sprintf("0x%08x", $config{iccm}{iccm_eadr}); + +$config{iccm}{iccm_reserved} = sprintf("0x%x", ($config{iccm}{iccm_size}>30)? 4096 : ($config{iccm}{iccm_size}*1024)/4); + +$config{iccm}{iccm_bits} = 10 + log2($config{iccm}{iccm_size}); +$config{iccm}{iccm_bank_bits} = log2($config{iccm}{iccm_num_banks}); //-1; +$config{iccm}{iccm_index_bits} = $config{iccm}{iccm_bits} - $config{iccm}{iccm_bank_bits} - 2; # always 4 bytes +$config{iccm}{iccm_rows} = 2**$config{iccm}{iccm_index_bits}; +$config{iccm}{iccm_data_cell} = "ram_$config{iccm}{iccm_rows}x39"; + +$config{iccm}{iccm_bank_hi} = 2+$config{iccm}{iccm_bank_bits}-1; +$config{iccm}{iccm_bank_index_lo} = 1+$config{iccm}{iccm_bank_hi}; + +# Defines with explicit values in the macro name +$config{iccm}{"iccm_num_banks_$config{iccm}{iccm_num_banks}"} = ""; +$config{iccm}{"iccm_size_$config{iccm}{iccm_size}"} = ""; + +# Track used regions + +$regions_used{hex($config{iccm}{iccm_region})} = 1; +$regions_used{hex($config{dccm}{dccm_region})} = 1; +$regions_used{hex($config{pic}{pic_region})} = 1; +$regions_used{hex($config{reset_vec})>>28} = 1; + +# Find an unused region for serial IO +for (my $rgn = 15;$rgn >= 0; $rgn--) { + if (($rgn != hex($config{iccm}{iccm_region})) && + ($rgn != hex($config{dccm}{dccm_region})) && + ($rgn != (hex($config{pic}{pic_region})))) { + $config{memmap}{serialio} = ($rgn << 28) + (22<<18); + $regions_used{$rgn} = 1; + last; + } +} + +$config{memmap}{serialio} = sprintf("0x%08x", $config{memmap}{serialio}); + +# Find an unused region for external data +for (my $rgn = 15;$rgn >= 0; $rgn--) { + if (($rgn != hex($config{iccm}{iccm_region})) && + ($rgn != hex($config{dccm}{dccm_region})) && + ($rgn != (hex($config{memmap}{serialio})>>28)) && + ($rgn != (hex($config{pic}{pic_region})))) { + $config{memmap}{external_data} = ($rgn << 28) + (22<<18); + $regions_used{$rgn} = 1; + last; + } +} +$config{memmap}{external_data} = sprintf("0x%08x", $config{memmap}{external_data}); +# +# Find an unused region for external prog +for (my $rgn = 15;$rgn >= 0; $rgn--) { + if (($rgn != hex($config{iccm}{iccm_region})) && + ($rgn != hex($config{dccm}{dccm_region})) && + ($rgn != (hex($config{memmap}{serialio})>>28)) && + ($rgn != (hex($config{memmap}{external_data})>>28)) && + ($rgn != (hex($config{pic}{pic_region})))) { + $config{memmap}{external_prog} = ($rgn << 28); + $regions_used{$rgn} = 1; + last; + } +} +$config{memmap}{external_prog} = sprintf("0x%08x", $config{memmap}{external_prog}); + +# Unused region for second data +for (my $rgn = 15;$rgn >= 0; $rgn--) { + if (($rgn != hex($config{iccm}{iccm_region})) && + ($rgn != hex($config{dccm}{dccm_region})) && + ($rgn != (hex($config{memmap}{serialio})>>28)) && + ($rgn != (hex($config{memmap}{external_data})>>28)) && + ($rgn != (hex($config{memmap}{external_prog})>>28) && + ($rgn != (hex($config{pic}{pic_region}))) + )) { + $config{memmap}{external_data_1} = ($rgn << 28); + $regions_used{$rgn} = 1; + last; + } +} +$config{memmap}{external_data_1} = sprintf("0x%08x", $config{memmap}{data_1}); + + +#$config{memmap}{consoleio} = hex($config{memmap}{serialio}) + 0x100; +#$config{memmap}{consoleio} = sprintf("0x%x", $config{memmap}{consoleio}); + +# Find an unused region for debug_sb_memory data +for (my $rgn = 15;$rgn >= 0; $rgn--) { + if (($rgn != hex($config{iccm}{iccm_region})) && + ($rgn != hex($config{dccm}{dccm_region})) && + ($rgn != (hex($config{memmap}{serialio})>>28)) && + ($rgn != (hex($config{memmap}{external_data})>>28)) && + ($rgn != (hex($config{memmap}{external_data_1})>>28)) && + ($rgn != (hex($config{pic}{pic_region})))) { + $config{memmap}{debug_sb_mem} = ($rgn << 28) + (22<<18); + $regions_used{$rgn} = 1; + last; + } +} +$config{memmap}{debug_sb_mem} = sprintf("0x%08x", $config{memmap}{debug_sb_mem}); + + +# Create the memory map hole for random testing +# Only do this if masks are not enabled already +if (hex($config{protection}{data_access_enable0}) > 0 || + hex($config{protection}{data_access_enable1}) > 0 || + hex($config{protection}{data_access_enable2}) > 0 || + hex($config{protection}{data_access_enable3}) > 0 || + hex($config{protection}{data_access_enable4}) > 0 || + hex($config{protection}{data_access_enable5}) > 0 || + hex($config{protection}{data_access_enable6}) > 0 || + hex($config{protection}{data_access_enable7}) > 0 || + hex($config{protection}{inst_access_enable0}) > 0 || + hex($config{protection}{inst_access_enable1}) > 0 || + hex($config{protection}{inst_access_enable2}) > 0 || + hex($config{protection}{inst_access_enable3}) > 0 || + hex($config{protection}{inst_access_enable4}) > 0 || + hex($config{protection}{inst_access_enable5}) > 0 || + hex($config{protection}{inst_access_enable6}) > 0 || + hex($config{protection}{inst_access_enable7}) > 0) { + delete($config{memmap}{external_mem_hole}) ; +} else { + # Unused region to create a memory map hole + for (my $rgn = 15;$rgn >= 0; $rgn--) { + if (!defined($regions_used{$rgn})) { + $config{memmap}{external_mem_hole} = ($rgn << 28); + $regions_used{$rgn} = 1; + last; + } + } + if ($config{memmap}{external_mem_hole} == 0) { + $config{protection}{data_access_addr0} = "0x10000000"; + $config{protection}{data_access_mask0} = "0xffffffff"; + $config{protection}{data_access_enable0} = "1"; + } elsif (($config{memmap}{external_mem_hole}>>28) == 16) { + $config{protection}{data_access_addr0} = "0x00000000"; + $config{protection}{data_access_mask0} = "0xefffffff"; + $config{protection}{data_access_enable0} = "1"; + } else { + my $hreg = hex($config{memmap}{external_mem_hole}>>28); + $config{protection}{data_access_addr0} = sprintf("0x%x", (($hreg^8)&8)<<28); + $config{protection}{data_access_mask0} = "0x7fffffff"; + $config{protection}{data_access_addr1} = sprintf("0x%x", ($hreg&8) << 28 |(($hreg^4)&4)<<28); + $config{protection}{data_access_mask1} = "0x3fffffff"; + $config{protection}{data_access_addr2} = sprintf("0x%x", ($hreg&12) <<28 | (($hreg^2)&2) <<28); + $config{protection}{data_access_mask2} = "0x1fffffff"; + $config{protection}{data_access_addr3} = sprintf("0x%x", ($hreg&14) << 28 |(($hreg^1)&1)<<28); + $config{protection}{data_access_mask3} = "0x0fffffff"; + $config{protection}{data_access_enable0} = "1"; + $config{protection}{data_access_enable1} = "1"; + $config{protection}{data_access_enable2} = "1"; + $config{protection}{data_access_enable3} = "1"; + $config{protection}{inst_access_addr0} = sprintf("0x%x", (($hreg^8)&8)<<28); + $config{protection}{inst_access_mask0} = "0x7fffffff"; + $config{protection}{inst_access_addr1} = sprintf("0x%x", ($hreg&8) << 28 |(($hreg^4)&4)<<28); + $config{protection}{inst_access_mask1} = "0x3fffffff"; + $config{protection}{inst_access_addr2} = sprintf("0x%x", ($hreg&12) <<28 | (($hreg^2)&2) <<28); + $config{protection}{inst_access_mask2} = "0x1fffffff"; + $config{protection}{inst_access_addr3} = sprintf("0x%x", ($hreg&14) << 28 |(($hreg^1)&1)<<28); + $config{protection}{inst_access_mask3} = "0x0fffffff"; + $config{protection}{inst_access_enable0} = "1"; + $config{protection}{inst_access_enable1} = "1"; + $config{protection}{inst_access_enable2} = "1"; + $config{protection}{inst_access_enable3} = "1"; + } + $config{memmap}{external_mem_hole} = sprintf("0x%08x", $config{memmap}{external_mem_hole}); +} + +#Define 5 unused regions for used in TG + +foreach my $unr (reverse(0 .. 15)) { + if (!defined($regions_used{$unr})) { + $config{memmap}{"unused_region$unr"} = sprintf("0x%08x",($unr << 28)); + $regions_used{$unr} = 1; + } +} + +if ($target eq "baseline") { + $config{reset_vec} = $config{iccm}{iccm_sadr}; + $config{testbench}{magellan} = 1; + print "$self: Setting reset_vec = ICCM start address for Baseline\n"; +} + + +# Output bit-width specifiers for these variables +our %widths = ( + "dccm_region" => "4", + "dccm_offset" => "28", + "dccm_sadr" => "32", + "dccm_eadr" => "32", + "pic_region" => "4", + "pic_offset" => "10", + "pic_base_addr" => "32", + "iccm_region" => "4", + "iccm_offset" => "10", + "iccm_sadr" => "32", + "iccm_eadr" => "32", + "bus_prty_default" => "2", + "inst_access_enable0" => "1", + "inst_access_enable1" => "1", + "inst_access_enable2" => "1", + "inst_access_enable3" => "1", + "inst_access_enable4" => "1", + "inst_access_enable5" => "1", + "inst_access_enable6" => "1", + "inst_access_enable7" => "1", + "data_access_enable0" => "1", + "data_access_enable1" => "1", + "data_access_enable2" => "1", + "data_access_enable3" => "1", + "data_access_enable4" => "1", + "data_access_enable5" => "1", + "data_access_enable6" => "1", + "data_access_enable7" => "1", +); +#}}} + +print "\nSweRV configuration for target=$target\n\n"; +dump_define("","", \%config,[]); + + +#print Dumper(\%config); +#print Dumper(\%width); + +#print Dumper(\%sets); +#print Dumper(\%unsets); + +# Sanity checks +check_addr_align("dccm", hex($config{dccm}{dccm_sadr}), $config{dccm}{dccm_size}*1024); +check_addr_align("iccm", hex($config{iccm}{iccm_sadr}), $config{iccm}{iccm_size}*1024); +check_addr_align("pic", hex($config{pic}{pic_base_addr}), $config{pic}{pic_size}*1024); + +# Prevent overlap of internal memories +if ((hex($config{pic}{pic_region}) == hex($config{iccm}{iccm_region})) && (hex($config{pic}{pic_offset}) == hex($config{iccm}{iccm_offset}))) { + die "$self: ERROR! PIC and ICCM blocks collide (region $config{iccm}{iccm_region}, offset $config{pic}{pic_offset})!\n"; +} +if ((hex($config{pic}{pic_region}) == hex($config{dccm}{dccm_region})) && (hex($config{pic}{pic_offset}) == hex($config{dccm}{dccm_offset}))) { + die "$self: ERROR! PIC and DCCM blocks collide (region $config{dccm}{dccm_region}, offset $config{pic}{pic_offset})!\n"; +} +if ((hex($config{iccm}{iccm_region}) == hex($config{dccm}{dccm_region})) && (hex($config{iccm}{iccm_offset}) == hex($config{dccm}{dccm_offset}))) { + die "$self: ERROR! ICCM and DCCM blocks collide (region $config{iccm}{iccm_region}, offset $config{dccm}{dccm_offset})!\n"; +} + + + +# all targets default to axi +if (($target eq "default_ahb") || ($config{testbench}{build_ahb_lite} == 1)) { + delete $config{testbench}{build_axi4}; + $config{testbench}{build_axi_native}=1; + $verilog_parms{build_axi4} = 0; + $config{testbench}{build_ahb_lite}=1; +} else { + $config{testbench}{build_axi_native}=1; + $config{testbench}{build_axi4} = 1; + delete $config{testbench}{build_ahb_lite}; + $verilog_parms{build_ahb_lite} = 0; +} + + +# Over-ride MFDC reset value for AXI. +# Disable Bus barrier and 64b for AXI +if (defined($config{"testbench"}{"build_axi_native"}) && ($config{"testbench"}{"build_axi_native"} ne "0")) { + if (! (defined($config{testbench}{build_ahb_lite}) && $config{testbench}{build_ahb_lite} ne "0")) { + $config{csr}{mfdc}{reset} = "0x00070040" if exists $config{csr}{mfdc}; + } +} + +# AHB overrides +if (defined($config{"testbench"}{"build_ahb_lite"}) && ($config{"testbench"}{"build_ahb_lite"} ne "0")) { +} + + +# parm processing before any values are deleted from the hash + + + +print "$self: Writing $tdfile\n"; +print "$self: Writing $paramfile\n"; +open (FILE1, ">$tdfile") || die "Cannot open $tdfile for writing $!\n"; +open (FILE2, ">$paramfile") || die "Cannot open $paramfile for writing $!\n"; +print_header("//"); +gen_define("","`", \%config, \%verilog_parms, \@verilog_vars); +dump_parms(\%verilog_parms); +close FILE1; +close FILE2; + +$config{config_key}="32'hdeadbeef"; + +# end parms + +# deletes +if (($load_to_use_plus1==0) && !grep(/load_to_use_plus1/, @sets)) { delete $config{"core"}{"load_to_use_plus1"}; } +if (($iccm_enable==0) && !grep(/iccm_enable/, @sets)) { delete $config{"iccm"}{"iccm_enable"}; } +if (($dccm_enable==0) && !grep(/dccm_enable/, @sets)) { delete $config{"dccm"}{"dccm_enable"}; } +if (($icache_enable==0) && !grep(/icache_enable/, @sets)) { delete $config{"icache"}{"icache_enable"}; } +if (($icache_waypack==0) && !grep(/icache_waypack/, @sets)) { delete $config{"icache"}{"icache_waypack"}; } +if (($opensource==0) && !grep(/opensource/, @sets)) { delete $config{"core"}{"opensource"}; } +if (($verilator==0) && !grep(/verilator/, @sets)) { delete $config{"core"}{"verilator"}; } +if (($pic_2cycle==0) && !grep(/pic_2cycle/, @sets)) { delete $config{"pic"}{"pic_2cycle"}; } +if (($icache_ecc==0) && !grep(/icache_ecc/, @sets)) { delete $config{"icache"}{"icache_ecc"}; } +if (($icache_2banks==0) && !grep(/icache_2banks/, @sets)) { delete $config{"icache"}{"icache_2banks"}; } + + +# new +if ($config{"testbench"}{"build_axi4"} == 1) { + delete $config{"testbench"}{"build_ahb_lite"}; + delete $config{"testbench"}{"build_axi_native_ahb"}; +} +elsif (($target eq "default_ahb") || ($config{"testbench"}{"build_ahb_lite"} == 1)) { + $config{"testbench"}{"build_ahb_lite"} = 1; + delete $config{"testbench"}{"build_axi4"}; + $config{"testbench"}{"build_axi_native_ahb"} = 1; +} + + + + +##################### Add dumper routines here ########################## + + +# +# Dump Verilog $RV_ROOT/configs/common_defines.vh +print "$self: Writing $vlogfile\n"; +open (FILE, ">$vlogfile") || die "Cannot open $vlogfile for writing $!\n"; +print_header("//"); +print FILE "`define RV_ROOT \"".$ENV{RV_ROOT}."\"\n"; +gen_define("","`", \%config, "", \@verilog_vars); +close FILE; + +print "$self: Writing $asmfile\n"; +open (FILE, ">$asmfile") || die "Cannot open $asmfile for writing $!\n"; +# Dump ASM/C $RV_ROOT/diags/env/defines.h +print_header("//"); +gen_define("","#", \%config, "", \@asm_vars, \@asm_overridable); +close FILE; + +# add `define PHYSICAL 1 +# remove `undef RV_ICCM_ENABLE + +my $pddata=' +`include "common_defines.vh" +`undef ASSERT_ON +`undef TEC_RV_ICG +`define TEC_RV_ICG HDBLVT16_CKGTPLT_V5_12 +`define PHYSICAL 1 +'; + + +print "$self: Writing $pdfile\n"; +open (FILE, ">$pdfile") || die "Cannot open $pdfile for writing $!\n"; +# Dump PD $RV_ROOT/$RV_ROOT/configs/pd_defines.vh +print_header("//"); +printf (FILE "$pddata"); +close FILE; + +print "$self: Writing $whisperfile\n"; +dump_whisper_config(\%config, $whisperfile); + + +# change this to use config version +`$ENV{RV_ROOT}/tools/picmap -t $config{pic}{pic_total_int} > $build_path/pic_map_auto.h`; +#`$ENV{RV_ROOT}/tools/unrollforverilator $config{pic}{pic_total_int_plus1} > $build_path/el2_pic_ctrl_verilator_unroll.sv`; + +# Perl vars for use by scripts +print "$self: Writing $perlfile\n"; +open (FILE, ">$perlfile") || die "Cannot open $perlfile for writing $!\n"; +print_header("# "); +print FILE "# To use this in a perf script, use 'require \$RV_ROOT/configs/config.pl'\n"; +print FILE "# Reference the hash via \$config{name}..\n\n\n"; +print FILE Data::Dumper->Dump([\%config], [ qw(*config) ]); +print FILE "1;\n"; +close FILE; + + + +# Done ################################################################## +# +exit(0); + +# ###################### Helper subroutines ##########################{{{ +# Convert size in kilobytes to real value + +sub size {#{{{ + my $ksize = shift; + my $size = sprintf("%d",$ksize*1024); + return $size; +}#}}} + +# Print the defines with prefix +sub print_define {#{{{ + my ($sym, $key,$value, $override) = @_; + my $lprefix = $prefix if ($key !~ /$no_prefix/); + if ($sym eq "`") { + if (defined($widths{$key})) { + $value =~ s/^(0x)*/$widths{$key}'h/; + } else { + $value =~ s/^0x/'h/; + } + } + if ($defines_case eq "U") { + print FILE "${sym}ifndef \U$lprefix$key\E\n" if ($override); + print FILE "${sym}define \U$lprefix$key\E $value\n"; + print FILE "${sym}endif\n" if ($override); + } else { + print FILE "${sym}ifndef $lprefix$key\n" if ($override); + print FILE "${sym}define $lprefix$key $value\n"; + print FILE "${sym}endif\n" if ($override); + } +}#}}} + +# print header +sub print_header {#{{{ + my $cs = shift; + print FILE "$cs NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE\n"; + print FILE "$cs This is an automatically generated file by $ENV{USER} on ",`date`; + print FILE "$cs\n$cs cmd: $self @argv_orig \n"; + print FILE "$cs\n"; +}#}}} + +# evaluate derivations +sub derive {#{{{ + my $eqn = shift; + return sprintf("0x%x", eval($eqn)); +}#}}} + +# traverse the database and extract the key/value pair +sub gen_define {#{{{ + my $matched = shift; + my $prefix = shift; + my $hash = @_[0]; + my $parms = @_[1]; + my @printvars = @{@_[2]}; + my @overridable = @{@_[3]} if defined @_[3]; + my $re = join("|",@printvars); + $re = qr/($re)/; + #print Dumper($hash); + foreach my $key (keys %$hash) { + next if $key eq "csr"; + #print "looking at $key:$matched ($re)\n"; + if (defined($unsets{$key})) { + print "$self:unsetting $key\n"; + delete($config{$key}); + if ($parms and defined($parms->{$key})) { + $parms->{$key} = 0; + } + next + } + if (defined($sets{$key}) && $sets{$key} ne $$hash{$key}) { + if (($$hash{$key} =~ /derived/i) && ($$hash{$key} !~ /overridable/i)) { + die ("$self: ERROR! $key is a derived and non-overridable parameter!\n"); + } else { + print "$self: Overriding $key value $$hash{$key} with $sets{$key}\n"; + $$hash{$key} = $sets{$key}; + } + } + my $value = $$hash{$key}; + if (ref($value) eq "HASH") { + if ($key =~ /$re/) { + $matched = 1; + } + gen_define($matched,$prefix, $value, $parms, \@printvars, \@overridable); + $matched = 0; + } elsif (ref($value) eq "ARRAY") { + # print "$key : @{$value}\n"; + $matched = 0; + } else { + if ($matched eq "1" || $key =~ /$re/) { + if($value =~ /derive\(.*\)/o) { + $value = eval($value); + } + my $override = grep(/^$key$/, @overridable); + print_define($prefix, $key, $value, $override); + #printf("$key = $value\n"); + if ($parms and defined($parms->{$key})) { + $value=decimal($value); + #printf("verilog parm $key = $value %s\n",$parms->{$key}); + $value=d2b($key,$value,$parms->{$key}); + #printf("verilog parm $key = $value\n"); + $parms->{$key}=$value; + } + } + } + } +}#}}} + +sub dump_define {#{{{ + my $matched = shift; + my $prefix = shift; + my $hash = @_[0]; + my @printvars = @{@_[1]}; + my @overridable = @{@_[2]} if defined @_[2]; + my $re = join("|",@printvars); + $re = qr/($re)/; + #print Dumper($hash); + foreach my $key (keys %$hash) { + next if $key eq "csr"; + next unless $matched || grep(/^$key$/,@dvars); + #print "looking at $key:$matched ($re)\n"; + if (defined($unsets{$key})) { + print "$self:unsetting $key\n"; + delete($config{$key}); + next + } + if (defined($sets{$key}) && $sets{$key} ne $$hash{$key}) { + if (($$hash{$key} =~ /derived/i) && ($$hash{$key} !~ /overridable/i)) { + die ("$self: ERROR! $key is a derived and non-overridable parameter!\n"); + } else { + print "$self: Overriding $key value $$hash{$key} with $sets{$key}\n"; + $$hash{$key} = $sets{$key}; + } + } + my $value = $$hash{$key}; + if (ref($value) eq "HASH") { + if ($key =~ /$re/) { + $matched = 1; + } + dump_define($matched,$prefix, $value, \@printvars, \@overridable); + $matched = 0; + } elsif (ref($value) eq "ARRAY") { + # print "$key : @{$value}\n"; + $matched = 0; + } else { + if ($matched eq "1" || $key =~ /$re/) { + if($value =~ /derive\(.*\)/o) { + $value = eval($value); + } + printf ("swerv: %-30s = $value\n",$key) if ($value !~ /derived/); + } + } + } +}#}}} + +# Perform cmd line set/unset ############################################{{{ +sub map_set_unset { + if (scalar(@sets)) { + print "$self: Set(s) requested : @sets\n"; + foreach (@sets) { + my ($key,$value) = m/(\w+)=*(\w+)*/o; + $value = 1 if (!defined($value)); + $sets{$key} = $value; + } + } + if (scalar(@unsets)) { + print "$self: Unset(s) requested : @sets\n"; + foreach (@unsets) { + $unsets{$_} = 1; + } + } +} #}}} +#}}} + + +# If arg looks like a hexadecimal string, then convert it to decimal.#{{{ +# Otherwise, return arg. +sub decimal { + my ($x) = @_; + return hex($x) if $x =~ /^0x/o; + return $x; +}#}}} + + +# Collect memory protection specs (array of address pairs) in the given +# resutls array. Tag is either "data" or "inst". +sub collect_mem_protection { + my ($tag, $config, $results) = @_; + return unless exists $config{protection}; + + my $prot = $config{protection}; + + my $enable_tag = $tag . "_access_enable"; + my $addr_tag = $tag . "_access_addr"; + my $mask_tag = $tag . "_access_mask"; + + foreach my $key (keys %{$prot}) { + next unless $key =~ /^$enable_tag(\d+)$/; + my $ix = $1; + + my $enable = $prot->{$key}; + if ($enable !~ /[01]$/) { + warn("Invalid value for protection entry $key: $enable\n"); + next; + } + + next unless ($enable eq "1" or $enable eq "1'b1"); + + if (! exists $prot->{"$addr_tag$ix"}) { + warn("Missing $addr_tag$ix\n"); + next; + } + + if (! exists $prot->{"$mask_tag$ix"}) { + warn("Missing $mask_tag$ix\n"); + next; + } + + my $addr = $prot->{"$addr_tag$ix"}; + my $mask = $prot->{"$mask_tag$ix"}; + + if ($addr !~ /^0x[0-9a-fA-F]+$/) { + warn("Invalid $addr_tag$ix: $addr\n"); + next; + } + + if ($mask !~ /^0x[0-9a-fA-F]+$/) { + warn("Invalid $mask_tag$ix: $mask\n"); + next; + } + + if ((hex($addr) & hex($mask)) != 0) { + warn("Protection mask bits overlap address bits in $tag mask $mask and addr $addr\n"); + } + + if ($mask !~ /^0x0*[137]?f*$/) { + warn("Protection $tag mask ($mask) must have all its one bits to the right of its zero bits\n"); + next; + } + + my $start = hex($addr) & ~hex($mask) & 0xffffffff; + my $end = (hex($addr) | hex($mask)) & 0xffffffff; + + $start = sprintf("0x%08x", $start); + $end = sprintf("0x%08x", $end); + + push(@{$results}, [ $start, $end ]); + } +} + + +sub dump_whisper_config{#{{{ + my ($config, $path) = @_; + + open(my $fh, ">", "$path") or die ("Failed to open $path for writing: $!\n"); + + # Put the configuration parameters relevant to whisper into a hash + # in preparation for a JSON dump. + my %jh; # Json hash + + # Collect top-level integer entries. + foreach my $tag (qw( harts xlen )) { + $jh{$tag} = $config{$tag} + 0 if exists $config{$tag}; + } + + # Collect top-level string/hex entries. + foreach my $tag (qw ( reset_vec nmi_vec num_mmode_perf_regs max_mmode_perf_event + even_odd_trigger_chains)) { + $jh{$tag} = $config{$tag} if exists $config{$tag}; + } + + # Collect memory map configs. + my (@inst_mem_prot, @data_mem_prot); + collect_mem_protection("inst", $config, \@inst_mem_prot); + collect_mem_protection("data", $config, \@data_mem_prot); + $jh{memmap}{inst} = [@inst_mem_prot] if @inst_mem_prot; + $jh{memmap}{data} = [@data_mem_prot] if @data_mem_prot; + foreach my $tag (qw ( size page_size serialio )) { + $jh{memmap}{tag} = $config{memmap}{ta} if exists $config{memmap}{tag}; + } + + # Collect load/store-error rollback parameter. + if (exists $config{testbench} and exists $config{testbench}{sterr_rollback}) { + $jh{store_error_rollback} = $config{testbench}{sterr_rollback}; + } + if (exists $config{testbench} and exists $config{testbench}{lderr_rollback}) { + $jh{load_error_rollback} = $config{testbench}{lderr_rollback}; + } + + # Collect dccm configs + if (exists $config{dccm} and exists $config{dccm}{dccm_enable}) { + $jh{dccm}{region} = $config{dccm}{dccm_region}; + $jh{dccm}{size} = 1024*decimal($config{dccm}{dccm_size}); # From 1k to bytes + $jh{dccm}{offset} = $config{dccm}{dccm_offset}; + + $jh{dccm}{size} = sprintf("0x%x", $jh{dccm}{size}); + } + + # Collect icccm configs. + if (exists $config{iccm} and exists $config{iccm}{iccm_enable}) { + $jh{iccm}{region} = $config{iccm}{iccm_region}; + $jh{iccm}{size} = 1024*decimal($config{iccm}{iccm_size}); # From 1k to bytes + $jh{iccm}{offset} = $config{iccm}{iccm_offset}; + + $jh{iccm}{size} = sprintf("0x%x", $jh{iccm}{size}); + } + + # Collect CSRs + $jh{csr} = $config{csr} if exists $config{csr}; + + # Collect CSRs not included in verilog. + my @removed_csrs; + + # Collect fast interrupt enable. + if (exists $config{core}{fast_interrupt_redirect}) { + $jh{fast_interrupt_redirect} = $config{core}{fast_interrupt_redirect}; + # meicpct CSR is not built if fast interrupt. + push(@removed_csrs, 'meicpct') if $jh{fast_interrupt_redirect}; + } + + # Remove CSRs not configured into verilog. + delete $jh{csr}{$_} foreach @removed_csrs; + + + + # Collect pic configs. + if (exists $config{pic}) { + while (my ($k, $v) = each %{$config{pic}}) { + next if $k eq 'pic_base_addr'; # derived from region and offset + if ($k eq 'pic_size') { + $v *= 1024; # from kbytes to bytes + $v = sprintf("0x%x", $v); + } + $k =~ s/^pic_//o; + $v += 0 if $v =~ /^\d+$/o; + $jh{pic}{$k} = $v; + } + } + + # Make atomic instructions illegal outside of DCCM. + $jh{amo_illegal_outside_dccm} = "true"; + + # Make ld/st instructions trigger misaligned exceptions if base + # address (value in rs1) and effective address refer to regions of + # different types. + $jh{effective_address_compatible_with_base} = "true"; + + # Collect triggers. + $jh{triggers} = $config{triggers} if exists $config{triggers}; + + # Dump JSON config file. + my $json = JSON->new->allow_nonref; + my $text = $json->pretty->encode(\%jh); + print($fh $text); + + close $fh; +}#}}} + + +# Checker for iccm/dccm/pic sub-region address alignment. Address must be a multiple +# of size or next higher power of 2 if size is not a power of 2. +sub check_addr_align { + my ($section, $addr, $size) = @_; + + die("Invalid $section size: $size\n") if $size <= 0; + + my $log_size = log2($size); + my $p2 = 1 << $log_size; + $size = 2*$p2 if $size != $p2; + + if (($addr % $size) != 0) { + printf("Address of $section area(0x%x) is not a multiple of its size (0x%x)\n", + $addr, $size); + exit(1); + } +} + + +sub log2 { + my ($n) = @_; + return log($n)/log(2); +} + +sub b2d { + my ($v) = @_; + + $v = oct("0b" . $v); + + return($v); +} + +sub d2b { + my ($key,$v,$LEN) = @_; + + my $repeat; + + $v = sprintf "%b",$v; + if (length($v)<$LEN) { + $repeat=$LEN-length($v); + $v="0"x$repeat.$v; + } + elsif (length($v)>$LEN) { + die("d2b: parm $key value $v > len $LEN"); + } + + return($v); +} + + +sub invalid_mask { + my ($m) = @_; + + if ($m =~ /^0x(0)*([137]?f+)$/) { return(0); } + + return(1); +} + + +sub b2h { + my ($bin) = @_; + + # Make input bit string a multiple of 4 + $bin = substr("0000",length($bin)%4) . $bin if length($bin)%4; + + my ($hex, $nybble) = (""); + while (length($bin)) { + ($nybble,$bin) = (substr($bin,0,4), substr($bin,4)); + $nybble = eval "0b$nybble"; + $hex .= substr("0123456789ABCDEF", $nybble, 1); + } + return $hex; +} + +# BHT index is a hash of the GHR and PC_HASH +sub ghrhash{ + my($btb_index_hi,$ghr_size) = @_; + + $btb_size = $btb_index_hi - 1; + + my $ghr_hi = $ghr_size - 1; + my $ghr_lo = $btb_size; + + my $ghr_start = "{"; + if($ghr_size > $btb_size){ + return "{ghr[$ghr_hi:$ghr_lo], hashin[$btb_index_hi:2]^ghr[$ghr_lo-1:0]} // cf1"; + } + else { + return "{hashin[$ghr_size+1:2]^ghr[$ghr_size-1:0]}// cf2"; + } +} + +sub dump_parms { + my ($hash) = @_; + + my ($bvalue, $blen, $upper); + printf(FILE1 "typedef struct packed {\n"); + foreach my $key (sort keys %$hash) { + $bvalue=$hash->{$key}; + $blen=length($bvalue); + $upper=$key; + $upper=~ tr/a-z/A-Z/; + if ($blen==1) { + printf(FILE1 "\tbit %-10s $upper;\n"); + } + else { + printf(FILE1 "\tbit %-10s $upper;\n",sprintf("[%d:0]",$blen-1)); + } + } + printf(FILE1 "} el2_param_t;\n\n"); + + my $bcat=""; + my $parmcnt=0; + foreach my $key (sort keys %$hash) { + #printf("// $key = %s\n",$verilog_parms{$key}); + $bcat.=$hash->{$key}; + $parmcnt++; + } + + my $bvalue=""; + my $pcnt=0; + my $delim=","; + printf(FILE2 "parameter el2_param_t pt = '{\n"); + foreach my $key (sort keys %$hash) { + $upper=$key; + $upper=~ tr/a-z/A-Z/; + $pcnt++; + if ($pcnt==$parmcnt) { undef $delim; } + printf(FILE2 "\t%-22s : %d\'h%-10s $delim\n",$upper,length($hash->{$key}),b2h($hash->{$key})); + } + printf(FILE2 "}\n"); + + printf(FILE2 "// parameter el2_param_t pt = %d'h%s\n",length($bcat),b2h($bcat)); + +} + + diff --git a/design/dbg/el2_dbg.sv b/design/dbg/el2_dbg.sv new file mode 100644 index 0000000..5cfa548 --- /dev/null +++ b/design/dbg/el2_dbg.sv @@ -0,0 +1,607 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** +// $Id$ +// +// Function: Top level SWERV core file to control the debug mode +// Comments: Responsible to put the rest of the core in quiesce mode, +// Send the commands/address. sends WrData and Recieve read Data. +// And then Resume the core to do the normal mode +// Author : +//******************************************************************************** +module el2_dbg +import el2_pkg::*; +#( +`include "el2_param.vh" + )( + // outputs to the core for command and data interface + output logic [31:0] dbg_cmd_addr, + output logic [31:0] dbg_cmd_wrdata, + output logic dbg_cmd_valid, + output logic dbg_cmd_write, // 1: write command, 0: read_command + output logic [1:0] dbg_cmd_type, // 0:gpr 1:csr 2: memory + output logic [1:0] dbg_cmd_size, // size of the abstract mem access debug command + output logic dbg_core_rst_l, // core reset from dm + + // inputs back from the core/dec + input logic [31:0] core_dbg_rddata, + input logic core_dbg_cmd_done, // This will be treated like a valid signal + input logic core_dbg_cmd_fail, // Exception during command run + + // Signals to dma to get a bubble + output logic dbg_dma_bubble, // Debug needs a bubble to send a valid + input logic dma_dbg_ready, // DMA is ready to accept debug request + + // interface with the rest of the core to halt/resume handshaking + output logic dbg_halt_req, // This is a pulse + output logic dbg_resume_req, // Debug sends a resume requests. Pulse + input logic dec_tlu_debug_mode, // Core is in debug mode + input logic dec_tlu_dbg_halted, // The core has finished the queiscing sequence. Core is halted now + input logic dec_tlu_mpc_halted_only, // Only halted due to MPC + input logic dec_tlu_resume_ack, // core sends back an ack for the resume (pulse) + + // inputs from the JTAG + input logic dmi_reg_en, // read or write + input logic [6:0] dmi_reg_addr, // address of DM register + input logic dmi_reg_wr_en, // write instruction + input logic [31:0] dmi_reg_wdata, // write data + + // output + output logic [31:0] dmi_reg_rdata, // read data + + // AXI Write Channels + output logic sb_axi_awvalid, + input logic sb_axi_awready, + output logic [pt.SB_BUS_TAG-1:0] sb_axi_awid, + output logic [31:0] sb_axi_awaddr, + output logic [3:0] sb_axi_awregion, + output logic [7:0] sb_axi_awlen, + output logic [2:0] sb_axi_awsize, + output logic [1:0] sb_axi_awburst, + output logic sb_axi_awlock, + output logic [3:0] sb_axi_awcache, + output logic [2:0] sb_axi_awprot, + output logic [3:0] sb_axi_awqos, + + output logic sb_axi_wvalid, + input logic sb_axi_wready, + output logic [63:0] sb_axi_wdata, + output logic [7:0] sb_axi_wstrb, + output logic sb_axi_wlast, + + input logic sb_axi_bvalid, + output logic sb_axi_bready, + input logic [1:0] sb_axi_bresp, + + // AXI Read Channels + output logic sb_axi_arvalid, + input logic sb_axi_arready, + output logic [pt.SB_BUS_TAG-1:0] sb_axi_arid, + output logic [31:0] sb_axi_araddr, + output logic [3:0] sb_axi_arregion, + output logic [7:0] sb_axi_arlen, + output logic [2:0] sb_axi_arsize, + output logic [1:0] sb_axi_arburst, + output logic sb_axi_arlock, + output logic [3:0] sb_axi_arcache, + output logic [2:0] sb_axi_arprot, + output logic [3:0] sb_axi_arqos, + + input logic sb_axi_rvalid, + output logic sb_axi_rready, + input logic [63:0] sb_axi_rdata, + input logic [1:0] sb_axi_rresp, + + input logic dbg_bus_clk_en, + + // general inputs + input logic clk, + input logic rst_l, + input logic clk_override, + input logic scan_mode +); + + + typedef enum logic [2:0] {IDLE=3'b000, HALTING=3'b001, HALTED=3'b010, CMD_START=3'b011, CMD_WAIT=3'b100, CMD_DONE=3'b101, RESUMING=3'b110} state_t; + typedef enum logic [3:0] {SBIDLE=4'h0, WAIT_RD=4'h1, WAIT_WR=4'h2, CMD_RD=4'h3, CMD_WR=4'h4, CMD_WR_ADDR=4'h5, CMD_WR_DATA=4'h6, RSP_RD=4'h7, RSP_WR=4'h8, DONE=4'h9} sb_state_t; + + state_t dbg_state; + state_t dbg_nxtstate; + logic dbg_state_en; + // these are the registers that the debug module implements + logic [31:0] dmstatus_reg; // [26:24]-dmerr, [17:16]-resume ack, [9:8]-halted, [3:0]-version + logic [31:0] dmcontrol_reg; // dmcontrol register has only 6 bits implemented. 31: haltreq, 30: resumereq, 29: haltreset, 28: ackhavereset, 1: ndmreset, 0: dmactive. + logic [31:0] command_reg; + logic [31:0] abstractcs_reg; // bits implemted are [12] - busy and [10:8]= command error + logic [31:0] haltsum0_reg; + logic [31:0] data0_reg; + logic [31:0] data1_reg; + + // data 0 + logic [31:0] data0_din; + logic data0_reg_wren, data0_reg_wren0, data0_reg_wren1; + // data 1 + logic [31:0] data1_din; + logic data1_reg_wren, data1_reg_wren0; + // abstractcs + logic abstractcs_busy_wren; + logic abstractcs_busy_din; + logic [2:0] abstractcs_error_din; + logic abstractcs_error_sel0, abstractcs_error_sel1, abstractcs_error_sel2, abstractcs_error_sel3, abstractcs_error_sel4, abstractcs_error_sel5; + logic abstractcs_error_selor; + // dmstatus + logic dmstatus_resumeack_wren; + logic dmstatus_resumeack_din; + logic dmstatus_havereset_wren; + logic dmstatus_havereset_rst; + logic dmstatus_resumeack; + logic dmstatus_halted; + logic dmstatus_havereset; + + // dmcontrol + logic dmcontrol_wren, dmcontrol_wren_Q; + // command + logic command_wren; + logic [31:0] command_din; + // needed to send the read data back for dmi reads + logic [31:0] dmi_reg_rdata_din; + + sb_state_t sb_state; + sb_state_t sb_nxtstate; + logic sb_state_en; + + //System bus section + logic sbcs_wren; + logic sbcs_sbbusy_wren; + logic sbcs_sbbusy_din; + logic sbcs_sbbusyerror_wren; + logic sbcs_sbbusyerror_din; + + logic sbcs_sberror_wren; + logic [2:0] sbcs_sberror_din; + logic sbcs_unaligned; + logic sbcs_illegal_size; + + // data + logic sbdata0_reg_wren0; + logic sbdata0_reg_wren1; + logic sbdata0_reg_wren; + logic [31:0] sbdata0_din; + + logic sbdata1_reg_wren0; + logic sbdata1_reg_wren1; + logic sbdata1_reg_wren; + logic [31:0] sbdata1_din; + + logic sbaddress0_reg_wren0; + logic sbaddress0_reg_wren1; + logic sbaddress0_reg_wren; + logic [31:0] sbaddress0_reg_din; + logic [3:0] sbaddress0_incr; + logic sbreadonaddr_access; + logic sbreadondata_access; + logic sbdata0wr_access; + + logic sb_bus_cmd_read, sb_bus_cmd_write_addr, sb_bus_cmd_write_data; + logic sb_bus_rsp_read, sb_bus_rsp_write; + logic sb_bus_rsp_error; + logic [63:0] sb_bus_rdata; + + //registers + logic [31:0] sbcs_reg; + logic [31:0] sbaddress0_reg; + logic [31:0] sbdata0_reg; + logic [31:0] sbdata1_reg; + + logic dbg_dm_rst_l; + + //clken + logic dbg_free_clken; + logic dbg_free_clk; + + logic sb_free_clken; + logic sb_free_clk; + + // clocking + // used for the abstract commands. + assign dbg_free_clken = dmi_reg_en | (dbg_state != IDLE) | dbg_state_en | dec_tlu_dbg_halted | clk_override; + + // used for the system bus + assign sb_free_clken = dmi_reg_en | sb_state_en | (sb_state != SBIDLE) | clk_override; + + rvoclkhdr dbg_free_cgc (.en(dbg_free_clken), .l1clk(dbg_free_clk), .*); + rvoclkhdr sb_free_cgc (.en(sb_free_clken), .l1clk(sb_free_clk), .*); + + // end clocking section + + // Reset logic + assign dbg_dm_rst_l = rst_l & (dmcontrol_reg[0] | scan_mode); + assign dbg_core_rst_l = ~dmcontrol_reg[1]; + + // system bus register + // sbcs[31:29], sbcs - [22]:sbbusyerror, [21]: sbbusy, [20]:sbreadonaddr, [19:17]:sbaccess, [16]:sbautoincrement, [15]:sbreadondata, [14:12]:sberror, sbsize=32, 128=0, 64/32/16/8 are legal + assign sbcs_reg[31:29] = 3'b1; + assign sbcs_reg[28:23] = '0; + assign sbcs_reg[11:5] = 7'h20; + assign sbcs_reg[4:0] = 5'b01111; + assign sbcs_wren = (dmi_reg_addr == 7'h38) & dmi_reg_en & dmi_reg_wr_en & (sb_state == SBIDLE); // & (sbcs_reg[14:12] == 3'b000); + assign sbcs_sbbusyerror_wren = (sbcs_wren & dmi_reg_wdata[22]) | + ((sb_state != SBIDLE) & dmi_reg_en & ((dmi_reg_addr == 7'h39) | (dmi_reg_addr == 7'h3c) | (dmi_reg_addr == 7'h3d))); + assign sbcs_sbbusyerror_din = ~(sbcs_wren & dmi_reg_wdata[22]); // Clear when writing one + + rvdffs #(1) sbcs_sbbusyerror_reg (.din(sbcs_sbbusyerror_din), .dout(sbcs_reg[22]), .en(sbcs_sbbusyerror_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + rvdffs #(1) sbcs_sbbusy_reg (.din(sbcs_sbbusy_din), .dout(sbcs_reg[21]), .en(sbcs_sbbusy_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + rvdffs #(1) sbcs_sbreadonaddr_reg (.din(dmi_reg_wdata[20]), .dout(sbcs_reg[20]), .en(sbcs_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + rvdffs #(5) sbcs_misc_reg (.din(dmi_reg_wdata[19:15]), .dout(sbcs_reg[19:15]), .en(sbcs_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + rvdffs #(3) sbcs_error_reg (.din(sbcs_sberror_din[2:0]), .dout(sbcs_reg[14:12]), .en(sbcs_sberror_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + + assign sbcs_unaligned = ((sbcs_reg[19:17] == 3'b001) & sbaddress0_reg[0]) | + ((sbcs_reg[19:17] == 3'b010) & (|sbaddress0_reg[1:0])) | + ((sbcs_reg[19:17] == 3'b011) & (|sbaddress0_reg[2:0])); + + assign sbcs_illegal_size = sbcs_reg[19]; // Anything bigger than 64 bits is illegal + + assign sbaddress0_incr[3:0] = ({4{(sbcs_reg[19:17] == 3'b000)}} & 4'b0001) | + ({4{(sbcs_reg[19:17] == 3'b001)}} & 4'b0010) | + ({4{(sbcs_reg[19:17] == 3'b010)}} & 4'b0100) | + ({4{(sbcs_reg[19:17] == 3'b100)}} & 4'b1000); + + // sbdata + assign sbdata0_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3c); // write data only when single read is 0 + assign sbdata0_reg_wren1 = (sb_state == RSP_RD) & sb_state_en & ~sbcs_sberror_wren; + assign sbdata0_reg_wren = sbdata0_reg_wren0 | sbdata0_reg_wren1; + + assign sbdata1_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3d); // write data only when single read is 0; + assign sbdata1_reg_wren1 = (sb_state == RSP_RD) & sb_state_en & ~sbcs_sberror_wren; + assign sbdata1_reg_wren = sbdata1_reg_wren0 | sbdata1_reg_wren1; + + assign sbdata0_din[31:0] = ({32{sbdata0_reg_wren0}} & dmi_reg_wdata[31:0]) | + ({32{sbdata0_reg_wren1}} & sb_bus_rdata[31:0]); + assign sbdata1_din[31:0] = ({32{sbdata1_reg_wren0}} & dmi_reg_wdata[31:0]) | + ({32{sbdata1_reg_wren1}} & sb_bus_rdata[63:32]); + + rvdffe #(32) dbg_sbdata0_reg (.*, .din(sbdata0_din[31:0]), .dout(sbdata0_reg[31:0]), .en(sbdata0_reg_wren), .rst_l(dbg_dm_rst_l)); + rvdffe #(32) dbg_sbdata1_reg (.*, .din(sbdata1_din[31:0]), .dout(sbdata1_reg[31:0]), .en(sbdata1_reg_wren), .rst_l(dbg_dm_rst_l)); + + // sbaddress + assign sbaddress0_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h39); + assign sbaddress0_reg_wren = sbaddress0_reg_wren0 | sbaddress0_reg_wren1; + assign sbaddress0_reg_din[31:0]= ({32{sbaddress0_reg_wren0}} & dmi_reg_wdata[31:0]) | + ({32{sbaddress0_reg_wren1}} & (sbaddress0_reg[31:0] + {28'b0,sbaddress0_incr[3:0]})); + rvdffe #(32) dbg_sbaddress0_reg (.*, .din(sbaddress0_reg_din[31:0]), .dout(sbaddress0_reg[31:0]), .en(sbaddress0_reg_wren), .rst_l(dbg_dm_rst_l)); + + assign sbreadonaddr_access = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h39) & sbcs_reg[20]; // if readonaddr is set the next command will start upon writing of addr0 + assign sbreadondata_access = dmi_reg_en & ~dmi_reg_wr_en & (dmi_reg_addr == 7'h3c) & sbcs_reg[15]; // if readondata is set the next command will start upon reading of data0 + assign sbdata0wr_access = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3c); // write to sbdata0 will start write command to system bus + + // memory mapped registers + // dmcontrol register has only 6 bits implemented. 31: haltreq, 30: resumereq, 29: haltreset, 28: ackhavereset, 1: ndmreset, 0: dmactive. + // rest all the bits are zeroed out + // dmactive flop is reset based on core rst_l, all other flops use dm_rst_l + assign dmcontrol_wren = (dmi_reg_addr == 7'h10) & dmi_reg_en & dmi_reg_wr_en; + assign dmcontrol_reg[27:2] = '0; + rvdffs #(5) dmcontrolff (.din({dmi_reg_wdata[31:28],dmi_reg_wdata[1]}), .dout({dmcontrol_reg[31:28], dmcontrol_reg[1]}), .en(dmcontrol_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + rvdffs #(1) dmcontrol_dmactive_ff (.din(dmi_reg_wdata[0]), .dout(dmcontrol_reg[0]), .en(dmcontrol_wren), .rst_l(rst_l), .clk(dbg_free_clk)); + rvdff #(1) dmcontrol_wrenff(.din(dmcontrol_wren), .dout(dmcontrol_wren_Q), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + + // dmstatus register bits that are implemented + // [19:18]-havereset,[17:16]-resume ack, [9:8]-halted, [3:0]-version + // rest all the bits are zeroed out + assign dmstatus_reg[31:20] = '0; + assign dmstatus_reg[19:18] = {2{dmstatus_havereset}}; + assign dmstatus_reg[15:10] = '0; + assign dmstatus_reg[7] = '1; + assign dmstatus_reg[6:4] = '0; + assign dmstatus_reg[17:16] = {2{dmstatus_resumeack}}; + assign dmstatus_reg[9:8] = {2{dmstatus_halted}}; + assign dmstatus_reg[3:0] = 4'h2; + + assign dmstatus_resumeack_wren = ((dbg_state == RESUMING) & dec_tlu_resume_ack) | (dmstatus_resumeack & ~dmcontrol_reg[30]); + assign dmstatus_resumeack_din = (dbg_state == RESUMING) & dec_tlu_resume_ack; + + assign dmstatus_havereset_wren = (dmi_reg_addr == 7'h10) & dmi_reg_wdata[1] & dmi_reg_en & dmi_reg_wr_en; + assign dmstatus_havereset_rst = (dmi_reg_addr == 7'h10) & dmi_reg_wdata[28] & dmi_reg_en & dmi_reg_wr_en; + + rvdffs #(1) dmstatus_resumeack_reg (.din(dmstatus_resumeack_din), .dout(dmstatus_resumeack), .en(dmstatus_resumeack_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + rvdff #(1) dmstatus_halted_reg (.din(dec_tlu_dbg_halted & ~dec_tlu_mpc_halted_only), .dout(dmstatus_halted), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + rvdffsc #(1) dmstatus_havereset_reg (.din(1'b1), .dout(dmstatus_havereset), .en(dmstatus_havereset_wren), .clear(dmstatus_havereset_rst), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + + // haltsum0 register + assign haltsum0_reg[31:1] = '0; + assign haltsum0_reg[0] = dmstatus_halted; + + // abstractcs register + // bits implemted are [12] - busy and [10:8]= command error + assign abstractcs_reg[31:13] = '0; + assign abstractcs_reg[11] = '0; + assign abstractcs_reg[7:4] = '0; + assign abstractcs_reg[3:0] = 4'h2; // One data register + assign abstractcs_error_sel0 = abstractcs_reg[12] & dmi_reg_en & ((dmi_reg_wr_en & ( (dmi_reg_addr == 7'h16) | (dmi_reg_addr == 7'h17))) | (dmi_reg_addr == 7'h4)); + assign abstractcs_error_sel1 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h17) & ~((dmi_reg_wdata[31:24] == 8'b0) | (dmi_reg_wdata[31:24] == 8'h2)); + assign abstractcs_error_sel2 = core_dbg_cmd_done & core_dbg_cmd_fail; + assign abstractcs_error_sel3 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h17) & ~dmstatus_reg[9]; //(dbg_state != HALTED); + assign abstractcs_error_sel4 = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en & (dmi_reg_wdata[31:24] == 8'h2) & + ( ((dmi_reg_wdata[22:20] == 3'b001) & data1_reg[0]) | + ((dmi_reg_wdata[22:20] == 3'b010) & (|data1_reg[1:0])) | + dmi_reg_wdata[22] | (dmi_reg_wdata[22:20] == 3'b011) + ); + + assign abstractcs_error_sel5 = (dmi_reg_addr == 7'h16) & dmi_reg_en & dmi_reg_wr_en; + + assign abstractcs_error_selor = abstractcs_error_sel0 | abstractcs_error_sel1 | abstractcs_error_sel2 | abstractcs_error_sel3 | abstractcs_error_sel4 | abstractcs_error_sel5; + + assign abstractcs_error_din[2:0] = ({3{abstractcs_error_sel0}} & 3'b001) | // writing command or abstractcs while a command was executing. Or accessing data0 + ({3{abstractcs_error_sel1}} & 3'b010) | // writing a non-zero command to cmd field of command + ({3{abstractcs_error_sel2}} & 3'b011) | // exception while running command + ({3{abstractcs_error_sel3}} & 3'b100) | // writing a comnand when not in the halted state + ({3{abstractcs_error_sel4}} & 3'b111) | // unaligned abstract memory command + ({3{abstractcs_error_sel5}} & ~dmi_reg_wdata[10:8] & abstractcs_reg[10:8]) | // W1C + ({3{~abstractcs_error_selor}} & abstractcs_reg[10:8]); // hold + + rvdffs #(1) dmabstractcs_busy_reg (.din(abstractcs_busy_din), .dout(abstractcs_reg[12]), .en(abstractcs_busy_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + rvdff #(3) dmabstractcs_error_reg (.din(abstractcs_error_din[2:0]), .dout(abstractcs_reg[10:8]), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + + + // command register - implemented all the bits in this register + // command[16] = 1: write, 0: read + // Size - 2, Bits Not implemented: 23 (aamvirtual), 19-autoincrement, 18-postexec, 17-transfer + assign command_wren = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en & (dbg_state == HALTED); + assign command_din[31:0] = {dmi_reg_wdata[31:24],1'b0,3'b010,3'b0,dmi_reg_wdata[16:0]}; + rvdffe #(32) dmcommand_reg (.*, .din(command_din[31:0]), .dout(command_reg[31:0]), .en(command_wren), .rst_l(dbg_dm_rst_l)); + + // data0 reg + assign data0_reg_wren0 = (dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h4) & (dbg_state == HALTED)); + assign data0_reg_wren1 = core_dbg_cmd_done & (dbg_state == CMD_WAIT) & ~command_reg[16]; + assign data0_reg_wren = data0_reg_wren0 | data0_reg_wren1; + + assign data0_din[31:0] = ({32{data0_reg_wren0}} & dmi_reg_wdata[31:0]) | + ({32{data0_reg_wren1}} & core_dbg_rddata[31:0]); + + rvdffe #(32) dbg_data0_reg (.*, .din(data0_din[31:0]), .dout(data0_reg[31:0]), .en(data0_reg_wren), .rst_l(dbg_dm_rst_l)); + + // data 1 + assign data1_reg_wren0 = (dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h5) & (dbg_state == HALTED)); + assign data1_reg_wren = data1_reg_wren0; + + assign data1_din[31:0] = ({32{data1_reg_wren0}} & dmi_reg_wdata[31:0]); + + rvdffe #(32) dbg_data1_reg (.*, .din(data1_din[31:0]), .dout(data1_reg[31:0]), .en(data1_reg_wren), .rst_l(dbg_dm_rst_l)); + + + // FSM to control the debug mode entry, command send/recieve, and Resume flow. + always_comb begin + dbg_nxtstate = IDLE; + dbg_state_en = 1'b0; + abstractcs_busy_wren = 1'b0; + abstractcs_busy_din = 1'b0; + dbg_halt_req = dmcontrol_wren_Q & dmcontrol_reg[31]; // single pulse output to the core. Need to drive every time this register is written since core might be halted due to MPC + dbg_resume_req = 1'b0; // single pulse output to the core + + case (dbg_state) + IDLE: begin + dbg_nxtstate = (dmstatus_reg[9] | dec_tlu_mpc_halted_only) ? HALTED : HALTING; // initiate the halt command to the core + dbg_state_en = ((dmcontrol_reg[31] & ~dec_tlu_debug_mode) | dmstatus_reg[9] | dec_tlu_mpc_halted_only) & ~dmcontrol_reg[1]; // when the jtag writes the halt bit in the DM register, OR when the status indicates H + dbg_halt_req = dmcontrol_reg[31]; // only when jtag has written the halt_req bit in the control. Removed debug mode qualification during MPC changes + end + HALTING : begin + dbg_nxtstate = HALTED; // Goto HALTED once the core sends an ACK + dbg_state_en = dmstatus_reg[9]; // core indicates halted + end + HALTED: begin + // wait for halted to go away before send to resume. Else start of new command + dbg_nxtstate = (dmstatus_reg[9] & ~dmcontrol_reg[1]) ? ((dmcontrol_reg[30] & ~dmcontrol_reg[31]) ? RESUMING : CMD_START) : + (dmcontrol_reg[31] ? HALTING : IDLE); // This is MPC halted case + dbg_state_en = (dmstatus_reg[9] & dmcontrol_reg[30] & ~dmcontrol_reg[31] & dmcontrol_wren_Q) | command_wren | dmcontrol_reg[1] | ~(dmstatus_reg[9] | dec_tlu_mpc_halted_only); // need to be exclusive ??? + abstractcs_busy_wren = dbg_state_en & (dbg_nxtstate == CMD_START); // write busy when a new command was written by jtag + abstractcs_busy_din = 1'b1; + dbg_resume_req = dbg_state_en & (dbg_nxtstate == RESUMING); // single cycle pulse to core if resuming + end + CMD_START: begin + dbg_nxtstate = (|abstractcs_reg[10:8]) ? CMD_DONE : CMD_WAIT; // new command sent to the core + dbg_state_en = dbg_cmd_valid | (|abstractcs_reg[10:8]); + end + CMD_WAIT: begin + dbg_nxtstate = CMD_DONE; + dbg_state_en = core_dbg_cmd_done; // go to done state for one cycle after completing current command + end + CMD_DONE: begin + dbg_nxtstate = HALTED; + dbg_state_en = 1'b1; + abstractcs_busy_wren = dbg_state_en; // remove the busy bit from the abstracts ( bit 12 ) + abstractcs_busy_din = 1'b0; + end + RESUMING : begin + dbg_nxtstate = IDLE; + dbg_state_en = dmstatus_reg[17]; // resume ack has been updated in the dmstatus register + end + default : begin + dbg_nxtstate = IDLE; + dbg_state_en = 1'b0; + abstractcs_busy_wren = 1'b0; + abstractcs_busy_din = 1'b0; + dbg_halt_req = 1'b0; // single pulse output to the core + dbg_resume_req = 1'b0; // single pulse output to the core + end + endcase + end // always_comb begin + + assign dmi_reg_rdata_din[31:0] = ({32{dmi_reg_addr == 7'h4}} & data0_reg[31:0]) | + ({32{dmi_reg_addr == 7'h5}} & data1_reg[31:0]) | + ({32{dmi_reg_addr == 7'h10}} & dmcontrol_reg[31:0]) | + ({32{dmi_reg_addr == 7'h11}} & dmstatus_reg[31:0]) | + ({32{dmi_reg_addr == 7'h16}} & abstractcs_reg[31:0]) | + ({32{dmi_reg_addr == 7'h17}} & command_reg[31:0]) | + ({32{dmi_reg_addr == 7'h40}} & haltsum0_reg[31:0]) | + ({32{dmi_reg_addr == 7'h38}} & sbcs_reg[31:0]) | + ({32{dmi_reg_addr == 7'h39}} & sbaddress0_reg[31:0]) | + ({32{dmi_reg_addr == 7'h3c}} & sbdata0_reg[31:0]) | + ({32{dmi_reg_addr == 7'h3d}} & sbdata1_reg[31:0]); + + + rvdffs #($bits(state_t)) dbg_state_reg (.din(dbg_nxtstate), .dout({dbg_state}), .en(dbg_state_en), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + // Ack will use the power on reset only otherwise there won't be any ack until dmactive is 1 + rvdffs #(32) dmi_rddata_reg (.din(dmi_reg_rdata_din[31:0]), .dout(dmi_reg_rdata[31:0]), .en(dmi_reg_en), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + + // interface for the core + assign dbg_cmd_addr[31:0] = (command_reg[31:24] == 8'h2) ? {data1_reg[31:2],2'b0} : {20'b0, command_reg[11:0]}; // Only word addresses for abstract memory + assign dbg_cmd_wrdata[31:0] = data0_reg[31:0]; + assign dbg_cmd_valid = (dbg_state == CMD_START) & ~(|abstractcs_reg[10:8]) & dma_dbg_ready; + assign dbg_cmd_write = command_reg[16]; + assign dbg_cmd_type[1:0] = (command_reg[31:24] == 8'h2) ? 2'b10 : {1'b0, (command_reg[15:12] == 4'b0)}; + assign dbg_cmd_size[1:0] = command_reg[21:20]; + + // Ask DMA to stop taking bus trxns since debug request is done + assign dbg_dma_bubble = ((dbg_state == CMD_START) & ~(|abstractcs_reg[10:8])) | (dbg_state == CMD_WAIT); + + // system bus FSM + always_comb begin + sb_nxtstate = SBIDLE; + sb_state_en = 1'b0; + sbcs_sbbusy_wren = 1'b0; + sbcs_sbbusy_din = 1'b0; + sbcs_sberror_wren = 1'b0; + sbcs_sberror_din[2:0] = 3'b0; + sbaddress0_reg_wren1 = 1'b0; + case (sb_state) + SBIDLE: begin + sb_nxtstate = sbdata0wr_access ? WAIT_WR : WAIT_RD; + sb_state_en = sbdata0wr_access | sbreadondata_access | sbreadonaddr_access; + sbcs_sbbusy_wren = sb_state_en; // set the single read bit if it is a singlread command + sbcs_sbbusy_din = 1'b1; + sbcs_sberror_wren = sbcs_wren & (|dmi_reg_wdata[14:12]); // write to clear the error bits + sbcs_sberror_din[2:0] = ~dmi_reg_wdata[14:12] & sbcs_reg[14:12]; + end + WAIT_RD: begin + sb_nxtstate = (sbcs_unaligned | sbcs_illegal_size) ? DONE : CMD_RD; + sb_state_en = dbg_bus_clk_en | sbcs_unaligned | sbcs_illegal_size; + sbcs_sberror_wren = sbcs_unaligned | sbcs_illegal_size; + sbcs_sberror_din[2:0] = sbcs_unaligned ? 3'b011 : 3'b100; + end + WAIT_WR: begin + sb_nxtstate = (sbcs_unaligned | sbcs_illegal_size) ? DONE : CMD_WR; + sb_state_en = dbg_bus_clk_en | sbcs_unaligned | sbcs_illegal_size; + sbcs_sberror_wren = sbcs_unaligned | sbcs_illegal_size; + sbcs_sberror_din[2:0] = sbcs_unaligned ? 3'b011 : 3'b100; + end + CMD_RD : begin + sb_nxtstate = RSP_RD; + sb_state_en = sb_bus_cmd_read & dbg_bus_clk_en; + end + CMD_WR : begin + sb_nxtstate = (sb_bus_cmd_write_addr & sb_bus_cmd_write_data) ? RSP_WR : (sb_bus_cmd_write_data ? CMD_WR_ADDR : CMD_WR_DATA); + sb_state_en = (sb_bus_cmd_write_addr | sb_bus_cmd_write_data) & dbg_bus_clk_en; + end + CMD_WR_ADDR : begin + sb_nxtstate = RSP_WR; + sb_state_en = sb_bus_cmd_write_addr & dbg_bus_clk_en; + end + CMD_WR_DATA : begin + sb_nxtstate = RSP_WR; + sb_state_en = sb_bus_cmd_write_data & dbg_bus_clk_en; + end + RSP_RD: begin + sb_nxtstate = DONE; + sb_state_en = sb_bus_rsp_read & dbg_bus_clk_en; + sbcs_sberror_wren = sb_state_en & sb_bus_rsp_error; + sbcs_sberror_din[2:0] = 3'b010; + end + RSP_WR: begin + sb_nxtstate = DONE; + sb_state_en = sb_bus_rsp_write & dbg_bus_clk_en; + sbcs_sberror_wren = sb_state_en & sb_bus_rsp_error; + sbcs_sberror_din[2:0] = 3'b010; + end + DONE: begin + sb_nxtstate = SBIDLE; + sb_state_en = 1'b1; + sbcs_sbbusy_wren = 1'b1; // reset the single read + sbcs_sbbusy_din = 1'b0; + sbaddress0_reg_wren1 = sbcs_reg[16]; // auto increment was set. Update to new address after completing the current command + + end + default : begin + sb_nxtstate = SBIDLE; + sb_state_en = 1'b0; + sbcs_sbbusy_wren = 1'b0; + sbcs_sbbusy_din = 1'b0; + sbcs_sberror_wren = 1'b0; + sbcs_sberror_din[2:0] = 3'b0; + sbaddress0_reg_wren1 = 1'b0; + end + endcase + end // always_comb begin + + rvdffs #($bits(sb_state_t)) sb_state_reg (.din(sb_nxtstate), .dout({sb_state}), .en(sb_state_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + + // Generic bus response signals + assign sb_bus_cmd_read = sb_axi_arvalid & sb_axi_arready; + assign sb_bus_cmd_write_addr = sb_axi_awvalid & sb_axi_awready; + assign sb_bus_cmd_write_data = sb_axi_wvalid & sb_axi_wready; + + assign sb_bus_rsp_read = sb_axi_rvalid & sb_axi_rready; + assign sb_bus_rsp_write = sb_axi_bvalid & sb_axi_bready; + assign sb_bus_rsp_error = (sb_bus_rsp_read & (|(sb_axi_rresp[1:0]))) | (sb_bus_rsp_write & (|(sb_axi_bresp[1:0]))); + + // AXI Request signals + assign sb_axi_awvalid = (sb_state == CMD_WR) | (sb_state == CMD_WR_ADDR); + assign sb_axi_awaddr[31:0] = sbaddress0_reg[31:0]; + assign sb_axi_awid[pt.SB_BUS_TAG-1:0] = '0; + assign sb_axi_awsize[2:0] = sbcs_reg[19:17]; + assign sb_axi_awprot[2:0] = '0; + assign sb_axi_awcache[3:0] = 4'b1111; + assign sb_axi_awregion[3:0] = sbaddress0_reg[31:28]; + assign sb_axi_awlen[7:0] = '0; + assign sb_axi_awburst[1:0] = 2'b01; + assign sb_axi_awqos[3:0] = '0; + assign sb_axi_awlock = '0; + + assign sb_axi_wvalid = (sb_state == CMD_WR) | (sb_state == CMD_WR_DATA); + assign sb_axi_wdata[63:0] = ({64{(sbcs_reg[19:17] == 3'h0)}} & {8{sbdata0_reg[7:0]}}) | + ({64{(sbcs_reg[19:17] == 3'h1)}} & {4{sbdata0_reg[15:0]}}) | + ({64{(sbcs_reg[19:17] == 3'h2)}} & {2{sbdata0_reg[31:0]}}) | + ({64{(sbcs_reg[19:17] == 3'h3)}} & {sbdata1_reg[31:0],sbdata0_reg[31:0]}); + assign sb_axi_wstrb[7:0] = ({8{(sbcs_reg[19:17] == 3'h0)}} & (8'h1 << sbaddress0_reg[2:0])) | + ({8{(sbcs_reg[19:17] == 3'h1)}} & (8'h3 << {sbaddress0_reg[2:1],1'b0})) | + ({8{(sbcs_reg[19:17] == 3'h2)}} & (8'hf << {sbaddress0_reg[2],2'b0})) | + ({8{(sbcs_reg[19:17] == 3'h3)}} & 8'hff); + assign sb_axi_wlast = '1; + + assign sb_axi_arvalid = (sb_state == CMD_RD); + assign sb_axi_araddr[31:0] = sbaddress0_reg[31:0]; + assign sb_axi_arid[pt.SB_BUS_TAG-1:0] = '0; + assign sb_axi_arsize[2:0] = sbcs_reg[19:17]; + assign sb_axi_arprot[2:0] = '0; + assign sb_axi_arcache[3:0] = 4'b0; + assign sb_axi_arregion[3:0] = sbaddress0_reg[31:28]; + assign sb_axi_arlen[7:0] = '0; + assign sb_axi_arburst[1:0] = 2'b01; + assign sb_axi_arqos[3:0] = '0; + assign sb_axi_arlock = '0; + + // AXI Response signals + assign sb_axi_bready = 1'b1; + + assign sb_axi_rready = 1'b1; + assign sb_bus_rdata[63:0] = ({64{sbcs_reg[19:17] == 3'h0}} & ((sb_axi_rdata[63:0] >> 8*sbaddress0_reg[2:0]) & 64'hff)) | + ({64{sbcs_reg[19:17] == 3'h1}} & ((sb_axi_rdata[63:0] >> 16*sbaddress0_reg[2:1]) & 64'hffff)) | + ({64{sbcs_reg[19:17] == 3'h2}} & ((sb_axi_rdata[63:0] >> 32*sbaddress0_reg[2]) & 64'hffff_ffff)) | + ({64{sbcs_reg[19:17] == 3'h3}} & sb_axi_rdata[63:0]); + +`ifdef ASSERT_ON +// assertion. +// when the resume_ack is asserted then the dec_tlu_dbg_halted should be 0 + dm_check_resume_and_halted: assert property (@(posedge clk) disable iff(~rst_l) (~dec_tlu_resume_ack | ~dec_tlu_dbg_halted)); +`endif +endmodule diff --git a/design/dec/cdecode b/design/dec/cdecode new file mode 100644 index 0000000..d98b523 --- /dev/null +++ b/design/dec/cdecode @@ -0,0 +1,254 @@ + +.definition + + + +# invalid rs2=0 +c.add0 = [1001.....1....10] +c.add1 = [1001......1...10] +c.add2 = [1001.......1..10] +c.add3 = [1001........1.10] +c.add4 = [1001.........110] + +# invalid rs2=0 +c.mv0 = [1000.....1....10] +c.mv1 = [1000......1...10] +c.mv2 = [1000.......1..10] +c.mv3 = [1000........1.10] +c.mv4 = [1000.........110] + + +# invalid if rs1=0 +c.jalr0 = [10011....0000010] +c.jalr1 = [1001.1...0000010] +c.jalr2 = [1001..1..0000010] +c.jalr3 = [1001...1.0000010] +c.jalr4 = [1001....10000010] + +c.addi = [000...........01] + +# invalid imm=0 +c.addi16sp0 = [011100010.....01] +c.addi16sp1 = [011.000101....01] +c.addi16sp2 = [011.00010.1...01] +c.addi16sp3 = [011.00010..1..01] +c.addi16sp4 = [011.00010...1.01] +c.addi16sp5 = [011.00010....101] + +# invalid uimm=0 +c.addi4spn0 = [0001..........00] +c.addi4spn1 = [000.1.........00] +c.addi4spn2 = [000..1........00] +c.addi4spn3 = [000...1.......00] +c.addi4spn4 = [000....1......00] +c.addi4spn5 = [000.....1.....00] +c.addi4spn6 = [000......1....00] +c.addi4spn7 = [000.......1...00] + + +c.and = [100011...11...01] +c.andi = [100.10........01] +c.beqz = [110...........01] +c.bnez = [111...........01] +c.ebreak = [1001000000000010] +c.j = [101...........01] +c.jal = [001...........01] + + +c.jr0 = [10001....0000010] +c.jr1 = [1000.1...0000010] +c.jr2 = [1000..1..0000010] +c.jr3 = [1000...1.0000010] +c.jr4 = [1000....10000010] + +c.li = [010...........01] + +# invalid rd=x2 or imm=0 +c.lui0 = [01111.........01] +c.lui1 = [0111.1........01] +c.lui2 = [0111..1.......01] +c.lui3 = [0111...0......01] +c.lui4 = [0111....1.....01] +c.lui5 = [011.1....1....01] +c.lui6 = [011..1...1....01] +c.lui7 = [011...1..1....01] +c.lui8 = [011....0.1....01] +c.lui9 = [011.....11....01] +c.lui10= [011.1.....1...01] +c.lui11= [011..1....1...01] +c.lui12 = [011...1...1...01] +c.lui13 = [011....0..1...01] +c.lui14 = [011.....1.1...01] +c.lui15 = [011.1......1..01] +c.lui16 = [011..1.....1..01] +c.lui17 = [011...1....1..01] +c.lui18 = [011....0...1..01] +c.lui19 = [011.....1..1..01] +c.lui20 = [011.1.......1.01] +c.lui21 = [011..1......1.01] +c.lui22 = [011...1.....1.01] +c.lui23 = [011....0....1.01] +c.lui24 = [011.....1...1.01] +c.lui25 = [011.1........101] +c.lui26 = [011..1.......101] +c.lui27 = [011...1......101] +c.lui28 = [011....0.....101] +c.lui29 = [011.....1....101] + + +c.lw = [010...........00] + + +c.lwsp = [010...........10] + +c.or = [100011...10...01] + +# bit 5 of the shift must be 0 to be legal +c.slli = [0000..........10] + +c.srai = [100001........01] + +c.srli = [100000........01] + +c.sub = [100011...00...01] +c.sw = [110...........00] +c.swsp = [110...........10] +c.xor = [100011...01...01] + + +.input +rv32c = { + i[15] + i[14] + i[13] + i[12] + i[11] + i[10] + i[9] + i[8] + i[7] + i[6] + i[5] + i[4] + i[3] + i[2] + i[1] + i[0] +} + +.output +rv32c = { + rdrd + rdrs1 + rs2rs2 + rdprd + rdprs1 + rs2prs2 + rs2prd + uimm9_2 + ulwimm6_2 + ulwspimm7_2 + rdeq2 + rdeq1 + rs1eq2 + sbroffset8_1 + simm9_4 + simm5_0 + sjaloffset11_1 + sluimm17_12 + uimm5_0 + uswimm6_2 + uswspimm7_2 + o[31] + o[30] + o[29] + o[28] + o[27] + o[26] + o[25] + o[24] + o[23] + o[22] + o[21] + o[20] + o[19] + o[18] + o[17] + o[16] + o[15] + o[14] + o[13] + o[12] + o[11] + o[10] + o[9] + o[8] + o[7] + o[6] + o[5] + o[4] + o[3] + o[2] + o[1] + o[0] + } + +# assign rs2d[4:0] = i[6:2]; +# +# assign rdd[4:0] = i[11:7]; +# +# assign rdpd[4:0] = {2'b01, i[9:7]}; +# +# assign rs2pd[4:0] = {2'b01, i[4:2]}; + +.decode + + + + +rv32c[c.add{0-4}] = { rdrd rdrs1 rs2rs2 o[5] o[4] o[1] o[0] } + +rv32c[c.mv{0-4}] = { rdrd rs2rs2 o[5] o[4] o[1] o[0] } + +rv32c[c.addi] = { rdrd rdrs1 simm5_0 o[4] o[1] o[0] } + +rv32c[c.addi16sp{0-5}] = { rdeq2 rs1eq2 simm9_4 o[4] o[1] o[0] } +rv32c[c.addi4spn{0-7}] = { rs2prd rs1eq2 uimm9_2 o[4] o[1] o[0] } + + +rv32c[c.and] = { rdprd rdprs1 rs2prs2 o[14] o[13] o[12] o[5] o[4] o[1] o[0] } +rv32c[c.andi] = { rdprd rdprs1 simm5_0 o[14] o[13] o[12] o[4] o[1] o[0] } +rv32c[c.beqz] = { rdprs1 sbroffset8_1 o[6] o[5] o[1] o[0] } +rv32c[c.bnez] = { rdprs1 sbroffset8_1 o[12] o[6] o[5] o[1] o[0] } + + +rv32c[c.ebreak] = { o[20] o[6] o[5] o[4] o[1] o[0] } + +rv32c[c.j] = { sjaloffset11_1 o[6] o[5] o[3] o[2] o[1] o[0] } +rv32c[c.jal] = { sjaloffset11_1 rdeq1 o[6] o[5] o[3] o[2] o[1] o[0] } + + +rv32c[c.jalr{0-4}] = { rdeq1 rdrs1 o[6] o[5] o[2] o[1] o[0] } +rv32c[c.jr{0-4}] = { rdrs1 o[6] o[5] o[2] o[1] o[0] } +rv32c[c.li] = { rdrd simm5_0 o[4] o[1] o[0] } + +rv32c[c.lui{0-29}] = { rdrd sluimm17_12 o[5] o[4] o[2] o[1] o[0] } +rv32c[c.lw] = { rs2prd rdprs1 ulwimm6_2 o[13] o[1] o[0] } +rv32c[c.lwsp] = { rdrd rs1eq2 ulwspimm7_2 o[13] o[1] o[0] } + + +rv32c[c.or] = { rdprd rdprs1 rs2prs2 o[14] o[13] o[5] o[4] o[1] o[0] } + +rv32c[c.slli] = { rdrd rdrs1 uimm5_0 o[12] o[4] o[1] o[0] } +rv32c[c.srai] = { rdprd rdprs1 uimm5_0 o[30] o[14] o[12] o[4] o[1] o[0] } +rv32c[c.srli] = { rdprd rdprs1 uimm5_0 o[14] o[12] o[4] o[1] o[0] } + + +rv32c[c.sub] = { rdprd rdprs1 rs2prs2 o[30] o[5] o[4] o[1] o[0] } +rv32c[c.sw] = { rdprs1 rs2prs2 uswimm6_2 o[13] o[5] o[1] o[0] } +rv32c[c.swsp] = { rs2rs2 rs1eq2 uswspimm7_2 o[13] o[5] o[1] o[0] } +rv32c[c.xor] = { rdprd rdprs1 rs2prs2 o[14] o[5] o[4] o[1] o[0] } + + + +.end \ No newline at end of file diff --git a/design/dec/csrdecode b/design/dec/csrdecode new file mode 100644 index 0000000..5fa2ba2 --- /dev/null +++ b/design/dec/csrdecode @@ -0,0 +1,240 @@ +.definition + +csr_misa = [001100000001] +csr_mvendorid = [111100010001] +csr_marchid = [111100010010] +csr_mimpid = [111100010011] +csr_mhartid = [111100010100] +csr_mstatus = [001100000000] +csr_mtvec = [001100000101] +csr_mip = [001101000100] +csr_mie = [001100000100] +csr_mcyclel = [101100000000] +csr_mcycleh = [101110000000] +csr_minstretl = [101100000010] +csr_minstreth = [101110000010] +csr_mscratch = [001101000000] +csr_mepc = [001101000001] +csr_mcause = [001101000010] +csr_mscause = [011111111111] +csr_mtval = [001101000011] +csr_mrac = [011111000000] +csr_dmst = [011111000100] +csr_mdeau = [101111000000] +csr_mdseac = [111111000000] +csr_meivt = [101111001000] +csr_meihap = [111111001000] +csr_meipt = [101111001001] +csr_meicpct = [101111001010] +csr_meicurpl = [101111001100] +csr_meicidpl = [101111001011] +csr_dcsr = [011110110000] +csr_dpc = [011110110001] +csr_dicawics = [011111001000] +csr_dicad0h = [011111001100] +csr_dicad0 = [011111001001] +csr_dicad1 = [011111001010] +csr_dicago = [011111001011] +csr_mtsel = [011110100000] +csr_mtdata1 = [011110100001] +csr_mtdata2 = [011110100010] +csr_mhpmc3 = [101100000011] +csr_mhpmc4 = [101100000100] +csr_mhpmc5 = [101100000101] +csr_mhpmc6 = [101100000110] +csr_mhpmc3h = [101110000011] +csr_mhpmc4h = [101110000100] +csr_mhpmc5h = [101110000101] +csr_mhpmc6h = [101110000110] +csr_mhpme3 = [001100100011] +csr_mhpme4 = [001100100100] +csr_mhpme5 = [001100100101] +csr_mhpme6 = [001100100110] +csr_micect = [011111110000] +csr_miccmect = [011111110001] +csr_mdccmect = [011111110010] +csr_mpmc = [011111000110] +csr_mcgc = [011111111000] +csr_mcpc = [011111000010] +csr_mfdc = [011111111001] +csr_perfva = [101100000111] +csr_perfvb = [101100001...] +csr_perfvc = [10110001....] +csr_perfvd = [101110000111] +csr_perfve = [101110001...] +csr_perfvf = [10111001....] +csr_perfvg = [001100100111] +csr_perfvh = [001100101...] +csr_perfvi = [00110011....] +csr_mcountinhibit = [001100100000] +csr_mfdht = [011111001110] +csr_mfdhs = [011111001111] + +.input + +csr = { + dec_csr_rdaddr_d[11] + dec_csr_rdaddr_d[10] + dec_csr_rdaddr_d[9] + dec_csr_rdaddr_d[8] + dec_csr_rdaddr_d[7] + dec_csr_rdaddr_d[6] + dec_csr_rdaddr_d[5] + dec_csr_rdaddr_d[4] + dec_csr_rdaddr_d[3] + dec_csr_rdaddr_d[2] + dec_csr_rdaddr_d[1] + dec_csr_rdaddr_d[0] +} + +.output + +csr = { + csr_misa + csr_mvendorid + csr_marchid + csr_mimpid + csr_mhartid + csr_mstatus + csr_mtvec + csr_mip + csr_mie + csr_mcyclel + csr_mcycleh + csr_minstretl + csr_minstreth + csr_mscratch + csr_mepc + csr_mcause + csr_mscause + csr_mtval + csr_mrac + csr_dmst + csr_mdseac + csr_meihap + csr_meivt + csr_meipt + csr_meicurpl + csr_meicidpl + csr_dcsr + csr_mcgc + csr_mfdc + csr_dpc + csr_mtsel + csr_mtdata1 + csr_mtdata2 + csr_mhpmc3 + csr_mhpmc4 + csr_mhpmc5 + csr_mhpmc6 + csr_mhpmc3h + csr_mhpmc4h + csr_mhpmc5h + csr_mhpmc6h + csr_mhpme3 + csr_mhpme4 + csr_mhpme5 + csr_mhpme6 + csr_mcountinhibit +csr_perfva +csr_perfvb +csr_perfvc +csr_perfvd +csr_perfve +csr_perfvf +csr_perfvg +csr_perfvh +csr_perfvi + csr_mpmc + csr_mcpc + csr_meicpct + csr_mdeau + csr_micect + csr_miccmect + csr_mdccmect +csr_mfdht +csr_mfdhs +csr_dicawics +csr_dicad0h +csr_dicad0 +csr_dicad1 +csr_dicago + valid_only + presync + postsync +} + +.decode + +csr[ csr_misa ] = { csr_misa } +csr[ csr_mvendorid ] = { csr_mvendorid } +csr[ csr_marchid ] = { csr_marchid } +csr[ csr_mimpid ] = { csr_mimpid } +csr[ csr_mhartid ] = { csr_mhartid } +csr[ csr_mstatus ] = { csr_mstatus postsync } +csr[ csr_mtvec ] = { csr_mtvec postsync} +csr[ csr_mip ] = { csr_mip } +csr[ csr_mie ] = { csr_mie } +csr[ csr_mcyclel ] = { csr_mcyclel } +csr[ csr_mcycleh ] = { csr_mcycleh } +csr[ csr_minstretl ] = { csr_minstretl presync } +csr[ csr_minstreth ] = { csr_minstreth presync } +csr[ csr_mscratch ] = { csr_mscratch } +csr[ csr_mepc ] = { csr_mepc postsync} +csr[ csr_mcause ] = { csr_mcause } +csr[ csr_mscause ] = { csr_mscause } +csr[ csr_mtval ] = { csr_mtval } +csr[ csr_mrac ] = { csr_mrac postsync } +csr[ csr_dmst ] = { csr_dmst postsync} +csr[ csr_mdseac ] = { csr_mdseac } +csr[ csr_meipt ] = { csr_meipt } +csr[ csr_meihap ] = { csr_meihap } +csr[ csr_meivt ] = { csr_meivt } +csr[ csr_meicurpl ] = { csr_meicurpl } +csr[ csr_mdeau ] = { csr_mdeau } +csr[ csr_meicpct ] = { csr_meicpct } +csr[ csr_mpmc ] = { csr_mpmc } +csr[ csr_mcpc ] = { csr_mcpc presync postsync } +csr[ csr_meicidpl ] = { csr_meicidpl } +csr[ csr_mcgc ] = { csr_mcgc } +csr[ csr_mfdc ] = { csr_mfdc presync postsync } +csr[ csr_dcsr ] = { csr_dcsr } +csr[ csr_dpc ] = { csr_dpc } +csr[ csr_mtsel ] = { csr_mtsel } +csr[ csr_mtdata1 ] = { csr_mtdata1 postsync } +csr[ csr_mtdata2 ] = { csr_mtdata2 postsync } +csr[ csr_mhpmc3 ] = { csr_mhpmc3 presync } +csr[ csr_mhpmc4 ] = { csr_mhpmc4 presync } +csr[ csr_mhpmc5 ] = { csr_mhpmc5 presync } +csr[ csr_mhpmc6 ] = { csr_mhpmc6 presync } +csr[ csr_mhpmc3h ] = { csr_mhpmc3h presync } +csr[ csr_mhpmc4h ] = { csr_mhpmc4h presync } +csr[ csr_mhpmc5h ] = { csr_mhpmc5h presync } +csr[ csr_mhpmc6h ] = { csr_mhpmc6h presync } +csr[ csr_mhpme3 ] = { csr_mhpme3 } +csr[ csr_mhpme4 ] = { csr_mhpme4 } +csr[ csr_mhpme5 ] = { csr_mhpme5 } +csr[ csr_mhpme6 ] = { csr_mhpme6 } +csr[ csr_micect ] = { csr_micect } +csr[ csr_miccmect ] = { csr_miccmect } +csr[ csr_mdccmect ] = { csr_mdccmect } +csr[ csr_dicawics ] = { csr_dicawics } +csr[ csr_dicad0h ] = { csr_dicad0h } +csr[ csr_dicad0 ] = { csr_dicad0 } +csr[ csr_dicad1 ] = { csr_dicad1 } +csr[ csr_dicago ] = { csr_dicago } +csr[ csr_mfdht ] = { csr_mfdht } +csr[ csr_mfdhs ] = { csr_mfdhs } +csr[ csr_mcountinhibit] = { csr_mcountinhibit presync postsync } + +csr[ csr_perfva ] = { valid_only } +csr[ csr_perfvb ] = { valid_only } +csr[ csr_perfvc ] = { valid_only } +csr[ csr_perfvd ] = { valid_only } +csr[ csr_perfve ] = { valid_only } +csr[ csr_perfvf ] = { valid_only } +csr[ csr_perfvg ] = { valid_only } +csr[ csr_perfvh ] = { valid_only } +csr[ csr_perfvi ] = { valid_only } + +.end diff --git a/design/dec/decode b/design/dec/decode new file mode 100644 index 0000000..d4435e0 --- /dev/null +++ b/design/dec/decode @@ -0,0 +1,323 @@ + +.definition + +add = [0000000..........000.....0110011] +addi = [.................000.....0010011] + +sub = [0100000..........000.....0110011] + +and = [0000000..........111.....0110011] +andi = [.................111.....0010011] + +or = [0000000..........110.....0110011] +ori = [.................110.....0010011] + +xor = [0000000..........100.....0110011] +xori = [.................100.....0010011] + +sll = [0000000..........001.....0110011] +slli = [0000000..........001.....0010011] + +sra = [0100000..........101.....0110011] +srai = [0100000..........101.....0010011] + +srl = [0000000..........101.....0110011] +srli = [0000000..........101.....0010011] + +lui = [.........................0110111] +auipc = [.........................0010111] + +slt = [0000000..........010.....0110011] +sltu = [0000000..........011.....0110011] +slti = [.................010.....0010011] +sltiu = [.................011.....0010011] + +beq = [.................000.....1100011] +bne = [.................001.....1100011] +bge = [.................101.....1100011] +blt = [.................100.....1100011] +bgeu = [.................111.....1100011] +bltu = [.................110.....1100011] + +jal = [.........................1101111] +jalr = [.................000.....1100111] + +lb = [.................000.....0000011] +lh = [.................001.....0000011] +lw = [.................010.....0000011] + +sb = [.................000.....0100011] +sh = [.................001.....0100011] +sw = [.................010.....0100011] + +lbu = [.................100.....0000011] +lhu = [.................101.....0000011] + +fence = [0000........00000000000000001111] +fence.i = [00000000000000000001000000001111] + +ebreak = [00000000000100000000000001110011] +ecall = [00000000000000000000000001110011] + +mret = [00110000001000000000000001110011] + +wfi = [00010000010100000000000001110011] + +csrrc_ro = [............00000011.....1110011] +csrrc_rw0 = [............1....011.....1110011] +csrrc_rw1 = [.............1...011.....1110011] +csrrc_rw2 = [..............1..011.....1110011] +csrrc_rw3 = [...............1.011.....1110011] +csrrc_rw4 = [................1011.....1110011] + +csrrci_ro = [............00000111.....1110011] +csrrci_rw0 = [............1....111.....1110011] +csrrci_rw1 = [.............1...111.....1110011] +csrrci_rw2 = [..............1..111.....1110011] +csrrci_rw3 = [...............1.111.....1110011] +csrrci_rw4 = [................1111.....1110011] + +csrrs_ro = [............00000010.....1110011] +csrrs_rw0 = [............1....010.....1110011] +csrrs_rw1 = [.............1...010.....1110011] +csrrs_rw2 = [..............1..010.....1110011] +csrrs_rw3 = [...............1.010.....1110011] +csrrs_rw4 = [................1010.....1110011] + +csrrsi_ro = [............00000110.....1110011] +csrrsi_rw0 = [............1....110.....1110011] +csrrsi_rw1 = [.............1...110.....1110011] +csrrsi_rw2 = [..............1..110.....1110011] +csrrsi_rw3 = [...............1.110.....1110011] +csrrsi_rw4 = [................1110.....1110011] + + +csrw = [.................001000001110011] +csrrw0 = [.................001....11110011] +csrrw1 = [.................001...1.1110011] +csrrw2 = [.................001..1..1110011] +csrrw3 = [.................001.1...1110011] +csrrw4 = [.................0011....1110011] + +csrwi = [.................101000001110011] +csrrwi0 = [.................101....11110011] +csrrwi1 = [.................101...1.1110011] +csrrwi2 = [.................101..1..1110011] +csrrwi3 = [.................101.1...1110011] +csrrwi4 = [.................1011....1110011] + +mul = [0000001..........000.....0110011] +mulh = [0000001..........001.....0110011] +mulhsu = [0000001..........010.....0110011] +mulhu = [0000001..........011.....0110011] + +div = [0000001..........100.....0110011] +divu = [0000001..........101.....0110011] +rem = [0000001..........110.....0110011] +remu = [0000001..........111.....0110011] + + +.input + +rv32i = { + i[31] + i[30] + i[29] + i[28] + i[27] + i[26] + i[25] + i[24] + i[23] + i[22] + i[21] + i[20] + i[19] + i[18] + i[17] + i[16] + i[15] + i[14] + i[13] + i[12] + i[11] + i[10] + i[9] + i[8] + i[7] + i[6] + i[5] + i[4] + i[3] + i[2] + i[1] + i[0] +} + + +.output + +rv32i = { + alu + rs1 + rs2 + imm12 + rd + shimm5 + imm20 + pc + load + store + lsu + add + sub + land + lor + lxor + sll + sra + srl + slt + unsign + condbr + beq + bne + bge + blt + jal + by + half + word + csr_read + csr_clr + csr_set + csr_write + csr_imm + presync + postsync + ebreak + ecall + mret + mul + rs1_sign + rs2_sign + low + div + rem + fence + fence_i + pm_alu +} + +.decode + +rv32i[mul] = { mul rs1 rs2 rd low } +rv32i[mulh] = { mul rs1 rs2 rd rs1_sign rs2_sign } +rv32i[mulhu] = { mul rs1 rs2 rd } +rv32i[mulhsu] = { mul rs1 rs2 rd rs1_sign } + +rv32i[div] = { div rs1 rs2 rd } +rv32i[divu] = { div rs1 rs2 rd unsign } +rv32i[rem] = { div rs1 rs2 rd rem} +rv32i[remu] = { div rs1 rs2 rd unsign rem} + +rv32i[add] = { alu rs1 rs2 rd add pm_alu } +rv32i[addi] = { alu rs1 imm12 rd add pm_alu } + +rv32i[sub] = { alu rs1 rs2 rd sub pm_alu } + +rv32i[and] = { alu rs1 rs2 rd land pm_alu } +rv32i[andi] = { alu rs1 imm12 rd land pm_alu } + +rv32i[or] = { alu rs1 rs2 rd lor pm_alu } +rv32i[ori] = { alu rs1 imm12 rd lor pm_alu } + +rv32i[xor] = { alu rs1 rs2 rd lxor pm_alu } +rv32i[xori] = { alu rs1 imm12 rd lxor pm_alu } + +rv32i[sll] = { alu rs1 rs2 rd sll pm_alu } +rv32i[slli] = { alu rs1 shimm5 rd sll pm_alu } + +rv32i[sra] = { alu rs1 rs2 rd sra pm_alu } +rv32i[srai] = { alu rs1 shimm5 rd sra pm_alu } + +rv32i[srl] = { alu rs1 rs2 rd srl pm_alu } +rv32i[srli] = { alu rs1 shimm5 rd srl pm_alu } + +rv32i[lui] = { alu imm20 rd lor pm_alu } +rv32i[auipc] = { alu imm20 pc rd add pm_alu } + + +rv32i[slt] = { alu rs1 rs2 rd sub slt pm_alu } +rv32i[sltu] = { alu rs1 rs2 rd sub slt unsign pm_alu } +rv32i[slti] = { alu rs1 imm12 rd sub slt pm_alu } +rv32i[sltiu] = { alu rs1 imm12 rd sub slt unsign pm_alu } + +rv32i[beq] = { alu rs1 rs2 sub condbr beq } +rv32i[bne] = { alu rs1 rs2 sub condbr bne } +rv32i[bge] = { alu rs1 rs2 sub condbr bge } +rv32i[blt] = { alu rs1 rs2 sub condbr blt } +rv32i[bgeu] = { alu rs1 rs2 sub condbr bge unsign } +rv32i[bltu] = { alu rs1 rs2 sub condbr blt unsign } + +rv32i[jal] = { alu imm20 rd pc jal } +rv32i[jalr] = { alu rs1 rd imm12 jal } + + + +rv32i[lb] = { lsu load rs1 rd by } +rv32i[lh] = { lsu load rs1 rd half } +rv32i[lw] = { lsu load rs1 rd word } +rv32i[lbu] = { lsu load rs1 rd by unsign } +rv32i[lhu] = { lsu load rs1 rd half unsign } + +rv32i[sb] = { lsu store rs1 rs2 by } +rv32i[sh] = { lsu store rs1 rs2 half } +rv32i[sw] = { lsu store rs1 rs2 word } + + +rv32i[fence] = { alu lor fence presync} + +# fence.i has fence effect in addtion to flush I$ and redirect +rv32i[fence.i] = { alu lor fence fence_i presync postsync} + +# nops for now + +rv32i[ebreak] = { alu rs1 imm12 rd lor ebreak postsync} +rv32i[ecall] = { alu rs1 imm12 rd lor ecall postsync} +rv32i[mret] = { alu rs1 imm12 rd lor mret postsync} + +rv32i[wfi] = { alu rs1 imm12 rd lor pm_alu } + +# csr means read + +# csr_read - put csr on rs2 and rs1 0's +rv32i[csrrc_ro] = { alu rd csr_read } + +# put csr on rs2 and make rs1 0's into alu. Save rs1 for csr_clr later +rv32i[csrrc_rw{0-4}] = { alu rd csr_read rs1 csr_clr presync postsync } + +rv32i[csrrci_ro] = { alu rd csr_read } + +rv32i[csrrci_rw{0-4}] = { alu rd csr_read rs1 csr_clr csr_imm presync postsync } + +rv32i[csrrs_ro] = { alu rd csr_read } + +rv32i[csrrs_rw{0-4}] = { alu rd csr_read rs1 csr_set presync postsync } + +rv32i[csrrsi_ro] = { alu rd csr_read } + +rv32i[csrrsi_rw{0-4}] = { alu rd csr_read rs1 csr_set csr_imm presync postsync } + +rv32i[csrrw{0-4}] = { alu rd csr_read rs1 csr_write presync postsync } + + +rv32i[csrrwi{0-4}] = { alu rd csr_read rs1 csr_write csr_imm presync postsync } + +# optimize csr write only - pipelined +rv32i[csrw] = { alu rd rs1 csr_write } + +rv32i[csrwi] = { alu rd csr_write csr_imm } + + +.end diff --git a/design/dec/el2_dec.sv b/design/dec/el2_dec.sv new file mode 100644 index 0000000..2b50c55 --- /dev/null +++ b/design/dec/el2_dec.sv @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// dec: decode unit - decode, bypassing, ARF, interrupts +// +//******************************************************************************** +// $Id$ +// +// +// Function: Decode +// Comments: Decode, dependency scoreboard, ARF +// +// +// A -> D -> EX1 ... WB +// +//******************************************************************************** + +module el2_dec +import el2_pkg::*; +#( +`include "el2_param.vh" + ) + ( + input logic clk, + input logic free_clk, + input logic active_clk, + + input logic lsu_fastint_stall_any, // needed by lsu for 2nd pass of dma with ecc correction, stall next cycle + +// fast interrupt + output logic dec_extint_stall, + + output logic dec_i0_decode_d, + output logic dec_pause_state_cg, // to top for active state clock gating + + input logic rst_l, // reset, active low + input logic [31:1] rst_vec, // reset vector, from core pins + + input logic nmi_int, // NMI pin + input logic [31:1] nmi_vec, // NMI vector, from pins + + input logic i_cpu_halt_req, // Asynchronous Halt request to CPU + input logic i_cpu_run_req, // Asynchronous Restart request to CPU + + output logic o_cpu_halt_status, // Halt status of core (pmu/fw) + output logic o_cpu_halt_ack, // Halt request ack + output logic o_cpu_run_ack, // Run request ack + output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request + + input logic [31:4] core_id, // CORE ID + // + // external MPC halt/run interface + input logic mpc_debug_halt_req, // Async halt request + input logic mpc_debug_run_req, // Async run request + input logic mpc_reset_run_req, // Run/halt after reset + output logic mpc_debug_halt_ack, // Halt ack + output logic mpc_debug_run_ack, // Run ack + output logic debug_brkpt_status, // debug breakpoint + + input logic exu_pmu_i0_br_misp, // slot 0 branch misp + input logic exu_pmu_i0_br_ataken, // slot 0 branch actual taken + input logic exu_pmu_i0_pc4, // slot 0 4 byte branch + + + input logic lsu_nonblock_load_valid_m, // valid nonblock load at m + input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // -> corresponding tag + input logic lsu_nonblock_load_inv_r, // invalidate request for nonblock load r + input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // -> corresponding tag + input logic lsu_nonblock_load_data_valid, // valid nonblock load data back + input logic lsu_nonblock_load_data_error, // nonblock load bus error + input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // -> corresponding tag + input logic [31:0] lsu_nonblock_load_data, // nonblock load data + + input logic lsu_pmu_bus_trxn, // D side bus transaction + input logic lsu_pmu_bus_misaligned, // D side bus misaligned + input logic lsu_pmu_bus_error, // D side bus error + input logic lsu_pmu_bus_busy, // D side bus busy + input logic lsu_pmu_misaligned_m, // D side load or store misaligned + input logic lsu_pmu_load_external_m, // D side bus load + input logic lsu_pmu_store_external_m, // D side bus store + input logic dma_pmu_dccm_read, // DMA DCCM read + input logic dma_pmu_dccm_write, // DMA DCCM write + input logic dma_pmu_any_read, // DMA read + input logic dma_pmu_any_write, // DMA write + + input logic [31:1] lsu_fir_addr, // Fast int address + input logic [1:0] lsu_fir_error, // Fast int lookup error + + input logic ifu_pmu_instr_aligned, // aligned instructions + input logic ifu_pmu_fetch_stall, // fetch unit stalled + input logic ifu_pmu_ic_miss, // icache miss + input logic ifu_pmu_ic_hit, // icache hit + input logic ifu_pmu_bus_error, // Instruction side bus error + input logic ifu_pmu_bus_busy, // Instruction side bus busy + input logic ifu_pmu_bus_trxn, // Instruction side bus transaction + + input logic ifu_ic_error_start, // IC single bit error + input logic ifu_iccm_rd_ecc_single_err, // ICCM single bit error + + input logic [3:0] lsu_trigger_match_m, + input logic dbg_cmd_valid, // debugger abstract command valid + input logic dbg_cmd_write, // command is a write + input logic [1:0] dbg_cmd_type, // command type + input logic [31:0] dbg_cmd_addr, // command address + input logic [1:0] dbg_cmd_wrdata, // command write data, for fence/fence_i + + + input logic ifu_i0_icaf, // icache access fault + input logic [1:0] ifu_i0_icaf_type, + + input logic ifu_i0_icaf_f1, // i0 has access fault on second fetch group + input logic ifu_i0_dbecc, // icache/iccm double-bit error + + input logic lsu_idle_any, // lsu idle for halting + + input el2_br_pkt_t i0_brp, // branch packet + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index + input logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR + input logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag + + input el2_lsu_error_pkt_t lsu_error_pkt_r, // LSU exception/error packet + input logic lsu_single_ecc_error_incr, // LSU inc SB error counter + + input logic lsu_imprecise_error_load_any, // LSU imprecise load bus error + input logic lsu_imprecise_error_store_any, // LSU imprecise store bus error + input logic [31:0] lsu_imprecise_error_addr_any, // LSU imprecise bus error address + + input logic [31:0] exu_div_result, // final div result + input logic exu_div_wren, // Divide write enable to GPR + + input logic [31:0] exu_csr_rs1_x, // rs1 for csr instruction + + input logic [31:0] lsu_result_m, // load result + input logic [31:0] lsu_result_corr_r, // load result - corrected load data + + input logic lsu_load_stall_any, // This is for blocking loads + input logic lsu_store_stall_any, // This is for blocking stores + input logic dma_dccm_stall_any, // stall any load/store at decode, pmu event + input logic dma_iccm_stall_any, // iccm stalled, pmu event + + input logic iccm_dma_sb_error, // ICCM DMA single bit error + + input logic exu_flush_final, // slot0 flush + + input logic [31:1] exu_npc_r, // next PC + + input logic [31:0] exu_i0_result_x, // alu result x + + + input logic ifu_i0_valid, // fetch valids to instruction buffer + input logic [31:0] ifu_i0_instr, // fetch inst's to instruction buffer + input logic [31:1] ifu_i0_pc, // pc's for instruction buffer + input logic ifu_i0_pc4, // indication of 4B or 2B for corresponding inst + input logic [31:1] exu_i0_pc_x, // pc's for e1 from the alu's + + input logic mexintpend, // External interrupt pending + input logic timer_int, // Timer interrupt pending (from pin) + input logic soft_int, // Software interrupt pending (from pin) + + input logic [7:0] pic_claimid, // PIC claimid + input logic [3:0] pic_pl, // PIC priv level + input logic mhwakeup, // High priority wakeup + + output logic [3:0] dec_tlu_meicurpl, // to PIC, Current priv level + output logic [3:0] dec_tlu_meipt, // to PIC + + input logic [70:0] ifu_ic_debug_rd_data, // diagnostic icache read data + input logic ifu_ic_debug_rd_data_valid, // diagnostic icache read data valid + output el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt, // packet of DICAWICS, DICAD0/1, DICAGO info for icache diagnostics + + +// Debug start + input logic dbg_halt_req, // DM requests a halt + input logic dbg_resume_req, // DM requests a resume + input logic ifu_miss_state_idle, // I-side miss buffer empty + + output logic dec_tlu_dbg_halted, // Core is halted and ready for debug command + output logic dec_tlu_debug_mode, // Core is in debug mode + output logic dec_tlu_resume_ack, // Resume acknowledge + output logic dec_tlu_flush_noredir_r, // Tell fetch to idle on this flush + output logic dec_tlu_mpc_halted_only, // Core is halted only due to MPC + output logic dec_tlu_flush_leak_one_r, // single step + output logic dec_tlu_flush_err_r, // iside perr/ecc rfpc + output logic [31:2] dec_tlu_meihap, // Fast ext int base + + output logic dec_debug_wdata_rs1_d, // insert debug write data into rs1 at decode + + output logic [31:0] dec_dbg_rddata, // debug command read data + + output logic dec_dbg_cmd_done, // abstract command is done + output logic dec_dbg_cmd_fail, // abstract command failed (illegal reg address) + + output el2_trigger_pkt_t [3:0] trigger_pkt_any, // info needed by debug trigger blocks + + output logic dec_tlu_force_halt, // halt has been forced +// Debug end + // branch info from pipe0 for errors or counter updates + input logic [1:0] exu_i0_br_hist_r, // history + input logic exu_i0_br_error_r, // error + input logic exu_i0_br_start_error_r, // start error + input logic exu_i0_br_valid_r, // valid + input logic exu_i0_br_mp_r, // mispredict + input logic exu_i0_br_middle_r, // middle of bank + + input logic exu_i0_br_way_r, // way hit or repl + + output logic dec_i0_rs1_en_d, // Qualify GPR RS1 data + output logic dec_i0_rs2_en_d, // Qualify GPR RS2 data + output logic [31:0] gpr_i0_rs1_d, // gpr rs1 data + output logic [31:0] gpr_i0_rs2_d, // gpr rs2 data + + output logic [31:0] dec_i0_immed_d, // immediate data + output logic [12:1] dec_i0_br_immed_d, // br immediate data + + output el2_alu_pkt_t i0_ap, // alu packet + + output logic dec_i0_alu_decode_d, // schedule on D-stage alu + + output logic dec_i0_select_pc_d, // select pc onto rs1 for jal's + + output logic [31:1] dec_i0_pc_d, // pc's at decode + output logic [1:0] dec_i0_rs1_bypass_en_d, // rs1 bypass enable + output logic [1:0] dec_i0_rs2_bypass_en_d, // rs2 bypass enable + + output logic [31:0] dec_i0_rs1_bypass_data_d, // rs1 bypass data + output logic [31:0] dec_i0_rs2_bypass_data_d, // rs2 bypass data + + output el2_lsu_pkt_t lsu_p, // lsu packet + output el2_mul_pkt_t mul_p, // mul packet + output el2_div_pkt_t div_p, // div packet + output logic dec_div_cancel, // cancel divide operation + + output logic [11:0] dec_lsu_offset_d, // 12b offset for load/store addresses + + output logic dec_csr_ren_d, // csr read enable + + + output logic dec_tlu_flush_lower_r, // tlu flush due to late mp, exception, rfpc, or int + output logic [31:1] dec_tlu_flush_path_r, // tlu flush target + output logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state + output logic dec_tlu_fence_i_r, // flush is a fence_i rfnpc, flush icache + + output logic [31:1] pred_correct_npc_x, // npc if prediction is correct at e2 stage + + output el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // slot 0 branch predictor update packet + + output logic dec_tlu_perfcnt0, // toggles when slot0 perf counter 0 has an event inc + output logic dec_tlu_perfcnt1, // toggles when slot0 perf counter 1 has an event inc + output logic dec_tlu_perfcnt2, // toggles when slot0 perf counter 2 has an event inc + output logic dec_tlu_perfcnt3, // toggles when slot0 perf counter 3 has an event inc + + output el2_predict_pkt_t dec_i0_predict_p_d, // prediction packet to alus + output logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // DEC predict fghr + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // DEC predict index + output logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // DEC predict branch tag + + output logic dec_lsu_valid_raw_d, + + output logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control + + output logic [1:0] dec_data_en, // clock-gate control logic + output logic [1:0] dec_ctl_en, + + input logic [15:0] ifu_i0_cinst, // 16b compressed instruction + + output el2_trace_pkt_t rv_trace_pkt, // trace packet + + // feature disable from mfdc + output logic dec_tlu_external_ldfwd_disable, // disable external load forwarding + output logic dec_tlu_sideeffect_posted_disable, // disable posted stores to side-effect address + output logic dec_tlu_core_ecc_disable, // disable core ECC + output logic dec_tlu_bpred_disable, // disable branch prediction + output logic dec_tlu_wb_coalescing_disable, // disable writebuffer coalescing + output logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:16] + + // clock gating overrides from mcgc + output logic dec_tlu_misc_clk_override, // override misc clock domain gating + output logic dec_tlu_ifu_clk_override, // override fetch clock domain gating + output logic dec_tlu_lsu_clk_override, // override load/store clock domain gating + output logic dec_tlu_bus_clk_override, // override bus clock domain gating + output logic dec_tlu_pic_clk_override, // override PIC clock domain gating + output logic dec_tlu_dccm_clk_override, // override DCCM clock domain gating + output logic dec_tlu_icm_clk_override, // override ICCM clock domain gating + + output logic dec_tlu_i0_commit_cmt, // committed i0 instruction + input logic scan_mode + + ); + + + logic dec_tlu_dec_clk_override; // to and from dec blocks + logic clk_override; + + logic dec_ib0_valid_d; + + logic dec_pmu_instr_decoded; + logic dec_pmu_decode_stall; + logic dec_pmu_presync_stall; + logic dec_pmu_postsync_stall; + + logic dec_tlu_wr_pause_r; // CSR write to pause reg is at R. + + logic [4:0] dec_i0_rs1_d; + logic [4:0] dec_i0_rs2_d; + + logic [31:0] dec_i0_instr_d; + + logic dec_tlu_pipelining_disable; + + + logic [4:0] dec_i0_waddr_r; + logic dec_i0_wen_r; + logic [31:0] dec_i0_wdata_r; + logic dec_csr_wen_r; // csr write enable at wb + logic [11:0] dec_csr_wraddr_r; // write address for csryes + logic [31:0] dec_csr_wrdata_r; // csr write data at wb + + logic [11:0] dec_csr_rdaddr_d; // read address for csr + logic [31:0] dec_csr_rddata_d; // csr read data at wb + logic dec_csr_legal_d; // csr indicates legal operation + + logic dec_csr_wen_unq_d; // valid csr with write - for csr legal + logic dec_csr_any_unq_d; // valid csr - for csr legal + logic dec_csr_stall_int_ff; // csr is mie/mstatus + + el2_trap_pkt_t dec_tlu_packet_r; + + logic dec_i0_pc4_d; + logic dec_tlu_presync_d; + logic dec_tlu_postsync_d; + logic dec_tlu_debug_stall; + + logic [31:0] dec_illegal_inst; + + logic dec_i0_icaf_d; + + logic dec_i0_dbecc_d; + logic dec_i0_icaf_f1_d; + logic [3:0] dec_i0_trigger_match_d; + logic dec_debug_fence_d; + logic dec_nonblock_load_wen; + logic [4:0] dec_nonblock_load_waddr; + logic dec_tlu_flush_pause_r; + el2_br_pkt_t dec_i0_brp; + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index; + logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr; + logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag; + + logic [31:1] dec_tlu_i0_pc_r; + logic dec_tlu_i0_kill_writeb_wb; + logic dec_tlu_flush_lower_wb; + logic dec_tlu_i0_valid_r; + + logic dec_pause_state; + + logic [1:0] dec_i0_icaf_type_d; // i0 instruction access fault type + + logic dec_tlu_flush_extint; // Fast ext int started + + logic [31:0] dec_i0_inst_wb1; + logic [31:1] dec_i0_pc_wb1; + logic dec_tlu_i0_valid_wb1, dec_tlu_int_valid_wb1; + logic [4:0] dec_tlu_exc_cause_wb1; + logic [31:0] dec_tlu_mtval_wb1; + logic dec_tlu_i0_exc_valid_wb1; + + logic [4:0] div_waddr_wb; + + logic dec_div_active; // non-block divide is active + + + + assign clk_override = dec_tlu_dec_clk_override; + + + assign dec_dbg_rddata[31:0] = dec_i0_wdata_r[31:0]; + + + el2_dec_ib_ctl #(.pt(pt)) instbuff (.*); + + + el2_dec_decode_ctl #(.pt(pt)) decode (.*); + + + el2_dec_tlu_ctl #(.pt(pt)) tlu (.*); + + + el2_dec_gpr_ctl #(.pt(pt)) arf (.*, + // inputs + .raddr0(dec_i0_rs1_d[4:0]), + .raddr1(dec_i0_rs2_d[4:0]), + + .wen0(dec_i0_wen_r), .waddr0(dec_i0_waddr_r[4:0]), .wd0(dec_i0_wdata_r[31:0]), + .wen1(dec_nonblock_load_wen), .waddr1(dec_nonblock_load_waddr[4:0]), .wd1(lsu_nonblock_load_data[31:0]), + .wen2(exu_div_wren), .waddr2(div_waddr_wb), .wd2(exu_div_result[31:0]), + + // outputs + .rd0(gpr_i0_rs1_d[31:0]), .rd1(gpr_i0_rs2_d[31:0]) + ); + + +// Trigger + + el2_dec_trigger #(.pt(pt)) dec_trigger (.*); + + + + +// trace + assign rv_trace_pkt.rv_i_insn_ip = dec_i0_inst_wb1[31:0]; + assign rv_trace_pkt.rv_i_address_ip = { dec_i0_pc_wb1[31:1], 1'b0}; + assign rv_trace_pkt.rv_i_valid_ip = {dec_tlu_int_valid_wb1, // always int + dec_tlu_i0_valid_wb1 | dec_tlu_i0_exc_valid_wb1}; + assign rv_trace_pkt.rv_i_exception_ip = {dec_tlu_int_valid_wb1, dec_tlu_i0_exc_valid_wb1}; + assign rv_trace_pkt.rv_i_ecause_ip = dec_tlu_exc_cause_wb1[4:0]; // replicate across ports + assign rv_trace_pkt.rv_i_interrupt_ip = {dec_tlu_int_valid_wb1,2'b0}; + assign rv_trace_pkt.rv_i_tval_ip = dec_tlu_mtval_wb1[31:0]; // replicate across ports +// end trace + + +endmodule // el2_dec + diff --git a/design/dec/el2_dec_decode_ctl.sv b/design/dec/el2_dec_decode_ctl.sv new file mode 100644 index 0000000..2a30241 --- /dev/null +++ b/design/dec/el2_dec_decode_ctl.sv @@ -0,0 +1,1544 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +module el2_dec_decode_ctl +import el2_pkg::*; +#( +`include "el2_param.vh" + ) + ( + + input logic dec_tlu_flush_extint, + + input logic dec_tlu_force_halt, // invalidate nonblock load cam on a force halt event + + output logic dec_extint_stall, + + input logic [15:0] ifu_i0_cinst, // 16b compressed instruction + output logic [31:0] dec_i0_inst_wb1, // 32b instruction at wb+1 for trace encoder + output logic [31:1] dec_i0_pc_wb1, // 31b pc at wb+1 for trace encoder + + + input logic lsu_nonblock_load_valid_m, // valid nonblock load at m + input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // -> corresponding tag + input logic lsu_nonblock_load_inv_r, // invalidate request for nonblock load r + input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // -> corresponding tag + input logic lsu_nonblock_load_data_valid, // valid nonblock load data back + input logic lsu_nonblock_load_data_error, // nonblock load bus error + input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // -> corresponding tag + + input logic [31:0] lsu_nonblock_load_data, // nonblock load data + + + input logic [3:0] dec_i0_trigger_match_d, // i0 decode trigger matches + + input logic dec_tlu_wr_pause_r, // pause instruction at r + input logic dec_tlu_pipelining_disable, // pipeline disable - presync, i0 decode only + + input logic [3:0] lsu_trigger_match_m, // lsu trigger matches + + input logic lsu_pmu_misaligned_m, // perf mon: load/store misalign + input logic dec_tlu_debug_stall, // debug stall decode + input logic dec_tlu_flush_leak_one_r, // leak1 instruction + + input logic dec_debug_fence_d, // debug fence instruction + + input logic [1:0] dbg_cmd_wrdata, // disambiguate fence, fence_i + + input logic dec_i0_icaf_d, // icache access fault + input logic dec_i0_icaf_f1_d, // i0 instruction access fault at decode for f1 fetch group + input logic [1:0] dec_i0_icaf_type_d, // i0 instruction access fault type + + input logic dec_i0_dbecc_d, // icache/iccm double-bit error + + input el2_br_pkt_t dec_i0_brp, // branch packet + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index, // i0 branch index + input logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr, // BP FGHR + input logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag, // BP tag + + input logic [31:1] dec_i0_pc_d, // pc + + input logic lsu_idle_any, // lsu idle: if fence instr & ~lsu_idle then stall decode + + input logic lsu_load_stall_any, // stall any load at decode + input logic lsu_store_stall_any, // stall any store at decode + input logic dma_dccm_stall_any, // stall any load/store at decode + + input logic exu_div_wren, // nonblocking divide write enable to GPR. + + input logic dec_tlu_i0_kill_writeb_wb, // I0 is flushed, don't writeback any results to arch state + input logic dec_tlu_flush_lower_wb, // trap lower flush + input logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state + input logic dec_tlu_flush_lower_r, // trap lower flush + input logic dec_tlu_flush_pause_r, // don't clear pause state on initial lower flush + input logic dec_tlu_presync_d, // CSR read needs to be presync'd + input logic dec_tlu_postsync_d, // CSR ops that need to be postsync'd + + input logic dec_i0_pc4_d, // inst is 4B inst else 2B + + input logic [31:0] dec_csr_rddata_d, // csr read data at wb + input logic dec_csr_legal_d, // csr indicates legal operation + + input logic [31:0] exu_csr_rs1_x, // rs1 for csr instr + + input logic [31:0] lsu_result_m, // load result + input logic [31:0] lsu_result_corr_r, // load result - corrected data for writing gpr's, not for bypassing + + input logic exu_flush_final, // lower flush or i0 flush at X or D + + input logic [31:1] exu_i0_pc_x, // pcs at e1 + + input logic [31:0] dec_i0_instr_d, // inst at decode + + input logic dec_ib0_valid_d, // inst valid at decode + + input logic [31:0] exu_i0_result_x, // from primary alu's + + input logic clk, // for rvdffe's + input logic free_clk, + input logic active_clk, // clk except for halt / pause + + input logic clk_override, // test stuff + input logic rst_l, + + + + output logic dec_i0_rs1_en_d, // rs1 enable at decode + output logic dec_i0_rs2_en_d, + + output logic [4:0] dec_i0_rs1_d, // rs1 logical source + output logic [4:0] dec_i0_rs2_d, + + output logic [31:0] dec_i0_immed_d, // 32b immediate data decode + + + output logic [12:1] dec_i0_br_immed_d, // 12b branch immediate + + output el2_alu_pkt_t i0_ap, // alu packets + + output logic dec_i0_decode_d, // i0 decode + + output logic dec_i0_alu_decode_d, // decode to D-stage alu + + output logic [31:0] dec_i0_rs1_bypass_data_d, // i0 rs1 bypass data + output logic [31:0] dec_i0_rs2_bypass_data_d, // i0 rs2 bypass data + + + output logic [4:0] dec_i0_waddr_r, // i0 logical source to write to gpr's + output logic dec_i0_wen_r, // i0 write enable + output logic [31:0] dec_i0_wdata_r, // i0 write data + + output logic dec_i0_select_pc_d, // i0 select pc for rs1 - branches + + output logic [1:0] dec_i0_rs1_bypass_en_d, // i0 rs1 bypass enable + output logic [1:0] dec_i0_rs2_bypass_en_d, // i0 rs2 bypass enable + + output el2_lsu_pkt_t lsu_p, // load/store packet + + output el2_mul_pkt_t mul_p, // multiply packet + + output el2_div_pkt_t div_p, // divide packet + output logic [4:0] div_waddr_wb, // DIV write address to GPR + output logic dec_div_cancel, // cancel the divide operation + + output logic dec_lsu_valid_raw_d, + output logic [11:0] dec_lsu_offset_d, + + output logic dec_csr_ren_d, // valid csr decode + output logic dec_csr_wen_unq_d, // valid csr with write - for csr legal + output logic dec_csr_any_unq_d, // valid csr - for csr legal + output logic [11:0] dec_csr_rdaddr_d, // read address for csr + output logic dec_csr_wen_r, // csr write enable at r + output logic [11:0] dec_csr_wraddr_r, // write address for csr + output logic [31:0] dec_csr_wrdata_r, // csr write data at r + output logic dec_csr_stall_int_ff, // csr is mie/mstatus + + output dec_tlu_i0_valid_r, // i0 valid inst at c + + output el2_trap_pkt_t dec_tlu_packet_r, // trap packet + + output logic [31:1] dec_tlu_i0_pc_r, // i0 trap pc + + output logic [31:0] dec_illegal_inst, // illegal inst + output logic [31:1] pred_correct_npc_x, // npc e2 if the prediction is correct + + output el2_predict_pkt_t dec_i0_predict_p_d, // i0 predict packet decode + output logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // i0 predict fghr + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // i0 predict index + output logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // i0_predict branch tag + + output logic [1:0] dec_data_en, // clock-gating logic + output logic [1:0] dec_ctl_en, + + output logic dec_pmu_instr_decoded, // number of instructions decode this cycle encoded + output logic dec_pmu_decode_stall, // decode is stalled + output logic dec_pmu_presync_stall, // decode has presync stall + output logic dec_pmu_postsync_stall, // decode has postsync stall + + output logic dec_nonblock_load_wen, // write enable for nonblock load + output logic [4:0] dec_nonblock_load_waddr, // logical write addr for nonblock load + output logic dec_pause_state, // core in pause state + output logic dec_pause_state_cg, // pause state for clock-gating + + output logic dec_div_active, // non-block divide is active + + input logic scan_mode + ); + + + + + el2_dec_pkt_t i0_dp_raw, i0_dp; + + logic [31:0] i0; + logic i0_valid_d; + + logic [31:0] i0_result_r; + + logic [2:0] i0_rs1bypass, i0_rs2bypass; + + logic i0_jalimm20; + logic i0_uiimm20; + + logic lsu_decode_d; + logic [31:0] i0_immed_d; + logic i0_presync; + logic i0_postsync; + + logic postsync_stall; + logic ps_stall; + + logic prior_inflight, prior_inflight_wb; + + logic csr_clr_d, csr_set_d, csr_write_d; + + logic csr_clr_x,csr_set_x,csr_write_x,csr_imm_x; + logic [31:0] csr_mask_x; + logic [31:0] write_csr_data_x; + logic [31:0] write_csr_data_in; + logic [31:0] write_csr_data; + logic csr_data_wen; + + logic [4:0] csrimm_x; + + logic [31:0] csr_rddata_x; + + logic mul_decode_d; + logic div_decode_d; + logic div_e1_to_r; + logic div_flush; + logic div_active_in; + logic div_active; + logic i0_nonblock_div_stall; + logic i0_div_prior_div_stall; + logic nonblock_div_cancel; + + logic i0_legal; + logic shift_illegal; + logic illegal_inst_en; + logic illegal_lockout_in, illegal_lockout; + logic i0_legal_decode_d; + logic i0_exulegal_decode_d, i0_exudecode_d, i0_exublock_d; + + logic [12:1] last_br_immed_d; + logic i0_rs1_depend_i0_x, i0_rs1_depend_i0_r; + logic i0_rs2_depend_i0_x, i0_rs2_depend_i0_r; + + logic i0_div_decode_d; + logic i0_load_block_d; + logic [1:0] i0_rs1_depth_d, i0_rs2_depth_d; + + logic i0_load_stall_d; + logic i0_store_stall_d; + + logic i0_predict_nt, i0_predict_t; + + logic i0_notbr_error, i0_br_toffset_error; + logic i0_ret_error; + logic i0_br_error; + logic i0_br_error_all; + logic [11:0] i0_br_offset; + + logic [20:1] i0_pcall_imm; // predicted jal's + logic i0_pcall_12b_offset; + logic i0_pcall_raw; + logic i0_pcall_case; + logic i0_pcall; + + logic i0_pja_raw; + logic i0_pja_case; + logic i0_pja; + + logic i0_pret_case; + logic i0_pret_raw, i0_pret; + + logic i0_jal; // jal's that are not predicted + + + logic i0_predict_br; + + logic store_data_bypass_d, store_data_bypass_m; + + el2_class_pkt_t i0_rs1_class_d, i0_rs2_class_d; + + el2_class_pkt_t i0_d_c, i0_x_c, i0_r_c; + + + logic i0_ap_pc2, i0_ap_pc4; + + logic i0_rd_en_d; + + logic load_ldst_bypass_d; + + logic leak1_i0_stall_in, leak1_i0_stall; + logic leak1_i1_stall_in, leak1_i1_stall; + logic leak1_mode; + + logic i0_csr_write_only_d; + + logic prior_inflight_x, prior_inflight_eff; + logic any_csr_d; + + logic prior_csr_write; + + logic [3:0] i0_pipe_en; + logic i0_r_ctl_en, i0_x_ctl_en, i0_wb_ctl_en; + logic i0_x_data_en, i0_r_data_en, i0_wb_data_en, i0_wb1_data_en; + + logic debug_fence_i; + logic debug_fence; + + logic i0_csr_write; + logic presync_stall; + + logic i0_instr_error; + logic i0_icaf_d; + + logic clear_pause; + logic pause_state_in, pause_state; + logic pause_stall; + + logic i0_brp_valid; + logic nonblock_load_cancel; + logic lsu_idle; + logic lsu_pmu_misaligned_r; + logic csr_ren_qual_d; + logic csr_read_x; + logic i0_block_d; + logic i0_block_raw_d; // This is use to create the raw valid + logic ps_stall_in; + logic [31:0] i0_result_x; + + el2_dest_pkt_t d_d, x_d, r_d, wbd; + el2_dest_pkt_t x_d_in, r_d_in; + + el2_trap_pkt_t d_t, x_t, x_t_in, r_t_in, r_t; + + logic [3:0] lsu_trigger_match_r; + + logic [31:1] dec_i0_pc_r; + + logic csr_read, csr_write; + logic i0_br_unpred; + + logic nonblock_load_valid_m_delay; + logic i0_wen_r; + + logic tlu_wr_pause_r1; + logic tlu_wr_pause_r2; + + logic flush_final_r; + + logic data_gate_en; + logic data_gate_clk; + + + localparam NBLOAD_SIZE = pt.LSU_NUM_NBLOAD; + localparam NBLOAD_SIZE_MSB = int'(pt.LSU_NUM_NBLOAD)-1; + localparam NBLOAD_TAG_MSB = pt.LSU_NUM_NBLOAD_WIDTH-1; + + + logic cam_write, cam_inv_reset, cam_data_reset; + logic [NBLOAD_TAG_MSB:0] cam_write_tag, cam_inv_reset_tag, cam_data_reset_tag; + logic [NBLOAD_SIZE_MSB:0] cam_wen; + + logic [NBLOAD_TAG_MSB:0] load_data_tag; + logic [NBLOAD_SIZE_MSB:0] nonblock_load_write; + + el2_load_cam_pkt_t [NBLOAD_SIZE_MSB:0] cam; + el2_load_cam_pkt_t [NBLOAD_SIZE_MSB:0] cam_in; + el2_load_cam_pkt_t [NBLOAD_SIZE_MSB:0] cam_raw; + + logic [4:0] nonblock_load_rd; + logic i0_nonblock_load_stall; + logic i0_nonblock_boundary_stall; + + logic i0_rs1_nonblock_load_bypass_en_d, i0_rs2_nonblock_load_bypass_en_d; + + logic i0_load_kill_wen_r; + + logic found; + + logic [NBLOAD_SIZE_MSB:0] cam_inv_reset_val, cam_data_reset_val; + + logic debug_fence_raw; + + logic [31:0] i0_result_r_raw; + logic [31:0] i0_result_corr_r; + + logic [12:1] last_br_immed_x; + + logic [24:7] div_inst; + logic [31:0] i0_inst_d; + logic [31:0] i0_inst_x; + logic [31:0] i0_inst_r; + logic [31:0] i0_inst_wb_in; + logic [31:0] i0_inst_wb; + + logic [31:1] i0_pc_wb; + + logic i0_wb_en; + logic i0_wb1_en; + + el2_inst_pkt_t i0_itype; + el2_reg_pkt_t i0r; + + + + + // Start - Data gating {{ + + + assign data_gate_en = (dec_tlu_wr_pause_r ^ tlu_wr_pause_r1 ) | // replaces free_clk + (tlu_wr_pause_r1 ^ tlu_wr_pause_r2 ) | // replaces free_clk + (dec_tlu_flush_extint ^ dec_extint_stall ) | + (leak1_i1_stall_in ^ leak1_i1_stall ) | // replaces free_clk + (leak1_i0_stall_in ^ leak1_i0_stall ) | // replaces free_clk + (pause_state_in ^ pause_state ) | // replaces free_clk + (ps_stall_in ^ ps_stall ) | // replaces free_clk + (exu_flush_final ^ flush_final_r ) | // replaces free_clk + (illegal_lockout_in ^ illegal_lockout ); // replaces active_clk + + rvclkhdr data_gated_cgc (.*, .en(data_gate_en), .l1clk(data_gate_clk)); + + // End - Data gating }} + + + +// branch prediction + + + // in leak1_mode, ignore any predictions for i0, treat branch as if we haven't seen it before + // in leak1 mode, also ignore branch errors for i0 + assign i0_brp_valid = dec_i0_brp.valid & ~leak1_mode; + + assign dec_i0_predict_p_d.misp = '0; + assign dec_i0_predict_p_d.ataken = '0; + assign dec_i0_predict_p_d.boffset = '0; + + assign dec_i0_predict_p_d.pcall = i0_pcall; // don't mark as pcall if branch error + assign dec_i0_predict_p_d.pja = i0_pja; + assign dec_i0_predict_p_d.pret = i0_pret; + assign dec_i0_predict_p_d.prett[31:1] = dec_i0_brp.prett[31:1]; + assign dec_i0_predict_p_d.pc4 = dec_i0_pc4_d; + assign dec_i0_predict_p_d.hist[1:0] = dec_i0_brp.hist[1:0]; + assign dec_i0_predict_p_d.valid = i0_brp_valid & i0_legal_decode_d; + assign i0_notbr_error = i0_brp_valid & ~(i0_dp_raw.condbr | i0_pcall_raw | i0_pja_raw | i0_pret_raw); + + // no toffset error for a pret + assign i0_br_toffset_error = i0_brp_valid & dec_i0_brp.hist[1] & (dec_i0_brp.toffset[11:0] != i0_br_offset[11:0]) & ~i0_pret_raw; + assign i0_ret_error = i0_brp_valid & dec_i0_brp.ret & ~i0_pret_raw; + assign i0_br_error = dec_i0_brp.br_error | i0_notbr_error | i0_br_toffset_error | i0_ret_error; + assign dec_i0_predict_p_d.br_error = i0_br_error & i0_legal_decode_d & ~leak1_mode; + assign dec_i0_predict_p_d.br_start_error = dec_i0_brp.br_start_error & i0_legal_decode_d & ~leak1_mode; + assign i0_predict_index_d[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_i0_bp_index; + + assign i0_predict_btag_d[pt.BTB_BTAG_SIZE-1:0] = dec_i0_bp_btag[pt.BTB_BTAG_SIZE-1:0]; + assign i0_br_error_all = (i0_br_error | dec_i0_brp.br_start_error) & ~leak1_mode; + assign dec_i0_predict_p_d.toffset[11:0] = i0_br_offset[11:0]; + assign i0_predict_fghr_d[pt.BHT_GHR_SIZE-1:0] = dec_i0_bp_fghr[pt.BHT_GHR_SIZE-1:0]; + assign dec_i0_predict_p_d.way = dec_i0_brp.way; + + // end + + // on br error turn anything into a nop + // on i0 instruction fetch access fault turn anything into a nop + // nop => alu rs1 imm12 rd lor + + assign i0_icaf_d = dec_i0_icaf_d | dec_i0_dbecc_d; + + assign i0_instr_error = i0_icaf_d; + + always_comb begin + i0_dp = i0_dp_raw; + if (i0_br_error_all | i0_instr_error) begin + i0_dp = '0; + i0_dp.alu = 1'b1; + i0_dp.rs1 = 1'b1; + i0_dp.rs2 = 1'b1; + i0_dp.lor = 1'b1; + i0_dp.legal = 1'b1; + i0_dp.postsync = 1'b1; + end + end + + assign i0[31:0] = dec_i0_instr_d[31:0]; + + assign dec_i0_select_pc_d = i0_dp.pc; + + // branches that can be predicted + + assign i0_predict_br = i0_dp.condbr | i0_pcall | i0_pja | i0_pret; + + assign i0_predict_nt = ~(dec_i0_brp.hist[1] & i0_brp_valid) & i0_predict_br; + assign i0_predict_t = (dec_i0_brp.hist[1] & i0_brp_valid) & i0_predict_br; + + assign i0_ap.add = i0_dp.add; + assign i0_ap.sub = i0_dp.sub; + assign i0_ap.land = i0_dp.land; + assign i0_ap.lor = i0_dp.lor; + assign i0_ap.lxor = i0_dp.lxor; + assign i0_ap.sll = i0_dp.sll; + assign i0_ap.srl = i0_dp.srl; + assign i0_ap.sra = i0_dp.sra; + assign i0_ap.slt = i0_dp.slt; + assign i0_ap.unsign = i0_dp.unsign; + assign i0_ap.beq = i0_dp.beq; + assign i0_ap.bne = i0_dp.bne; + assign i0_ap.blt = i0_dp.blt; + assign i0_ap.bge = i0_dp.bge; + + assign i0_ap.csr_write = i0_csr_write_only_d; + assign i0_ap.csr_imm = i0_dp.csr_imm; + assign i0_ap.jal = i0_jal; + + assign i0_ap_pc2 = ~dec_i0_pc4_d; + assign i0_ap_pc4 = dec_i0_pc4_d; + + assign i0_ap.predict_nt = i0_predict_nt; + assign i0_ap.predict_t = i0_predict_t; + + +// non block load cam logic + + always_comb begin + found = 0; + cam_wen[NBLOAD_SIZE_MSB:0] = '0; + for (int i=0; i<32'(NBLOAD_SIZE); i++) begin + if (~found) begin + if (~cam[i].valid) begin + cam_wen[i] = cam_write; + found = 1'b1; + end + end + end + end + + + assign cam_write = lsu_nonblock_load_valid_m; + assign cam_write_tag[NBLOAD_TAG_MSB:0] = lsu_nonblock_load_tag_m[NBLOAD_TAG_MSB:0]; + + assign cam_inv_reset = lsu_nonblock_load_inv_r; + assign cam_inv_reset_tag[NBLOAD_TAG_MSB:0] = lsu_nonblock_load_inv_tag_r[NBLOAD_TAG_MSB:0]; + + assign cam_data_reset = lsu_nonblock_load_data_valid | lsu_nonblock_load_data_error; + assign cam_data_reset_tag[NBLOAD_TAG_MSB:0] = lsu_nonblock_load_data_tag[NBLOAD_TAG_MSB:0]; + + assign nonblock_load_rd[4:0] = (x_d.i0load) ? x_d.i0rd[4:0] : 5'b0; // rd data + + + // checks + +`ifdef ASSERT_ON + assert_dec_data_valid_data_error_onehot: assert #0 ($onehot0({lsu_nonblock_load_data_valid,lsu_nonblock_load_data_error})); + assert_dec_cam_inv_reset_onehot: assert #0 ($onehot0(cam_inv_reset_val[NBLOAD_SIZE_MSB:0])); + assert_dec_cam_data_reset_onehot: assert #0 ($onehot0(cam_data_reset_val[NBLOAD_SIZE_MSB:0])); +`endif + + + + // case of multiple loads to same dest ie. x1 ... you have to invalidate the older one + + for (genvar i=0; i coredecode.e + +// 2) espresso -Dso -oeqntott coredecode.e | addassign -pre out. > equations + +// to generate the legal (32b instruction is legal) equation below: + +// 1) coredecode -in decode -legal > legal.e + +// 2) espresso -Dso -oeqntott legal.e | addassign -pre out. > legal_equation + +module el2_dec_dec_ctl +import el2_pkg::*; + ( + input logic [31:0] inst, + + output el2_dec_pkt_t out + ); + + logic [31:0] i; + + + assign i[31:0] = inst[31:0]; + + +assign out.alu = (i[2]) | (i[6]) | (!i[25]&i[4]) | (!i[5]&i[4]); + +assign out.rs1 = (!i[14]&!i[13]&!i[2]) | (!i[13]&i[11]&!i[2]) | (i[19]&i[13]&!i[2]) | ( + !i[13]&i[10]&!i[2]) | (i[18]&i[13]&!i[2]) | (!i[13]&i[9]&!i[2]) | ( + i[17]&i[13]&!i[2]) | (!i[13]&i[8]&!i[2]) | (i[16]&i[13]&!i[2]) | ( + !i[13]&i[7]&!i[2]) | (i[15]&i[13]&!i[2]) | (!i[4]&!i[3]) | (!i[6] + &!i[2]); + +assign out.rs2 = (i[5]&!i[4]&!i[2]) | (!i[6]&i[5]&!i[2]); + +assign out.imm12 = (!i[4]&!i[3]&i[2]) | (i[13]&!i[5]&i[4]&!i[2]) | (!i[13]&!i[12] + &i[6]&i[4]) | (!i[12]&!i[5]&i[4]&!i[2]); + +assign out.rd = (!i[5]&!i[2]) | (i[5]&i[2]) | (i[4]); + +assign out.shimm5 = (!i[13]&i[12]&!i[5]&i[4]&!i[2]); + +assign out.imm20 = (i[5]&i[3]) | (i[4]&i[2]); + +assign out.pc = (!i[5]&!i[3]&i[2]) | (i[5]&i[3]); + +assign out.load = (!i[5]&!i[4]&!i[2]); + +assign out.store = (!i[6]&i[5]&!i[4]); + +assign out.lsu = (!i[6]&!i[4]&!i[2]); + +assign out.add = (!i[14]&!i[13]&!i[12]&!i[5]&i[4]) | (!i[5]&!i[3]&i[2]) | (!i[30] + &!i[25]&!i[14]&!i[13]&!i[12]&!i[6]&i[4]&!i[2]); + +assign out.sub = (i[30]&!i[12]&!i[6]&i[5]&i[4]&!i[2]) | (!i[25]&!i[14]&i[13]&!i[6] + &i[4]&!i[2]) | (!i[14]&i[13]&!i[5]&i[4]&!i[2]) | (i[6]&!i[4]&!i[2]); + +assign out.land = (i[14]&i[13]&i[12]&!i[5]&!i[2]) | (!i[25]&i[14]&i[13]&i[12]&!i[6] + &!i[2]); + +assign out.lor = (!i[6]&i[3]) | (!i[25]&i[14]&i[13]&!i[12]&!i[6]&!i[2]) | (i[5]&i[4] + &i[2]) | (!i[13]&!i[12]&i[6]&i[4]) | (i[14]&i[13]&!i[12]&!i[5]&!i[2]); + +assign out.lxor = (!i[25]&i[14]&!i[13]&!i[12]&i[4]&!i[2]) | (i[14]&!i[13]&!i[12] + &!i[5]&i[4]&!i[2]); + +assign out.sll = (!i[25]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + +assign out.sra = (i[30]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + +assign out.srl = (!i[30]&!i[25]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + +assign out.slt = (!i[25]&!i[14]&i[13]&!i[6]&i[4]&!i[2]) | (!i[14]&i[13]&!i[5]&i[4] + &!i[2]); + +assign out.unsign = (!i[14]&i[13]&i[12]&!i[5]&!i[2]) | (i[13]&i[6]&!i[4]&!i[2]) | ( + i[14]&!i[5]&!i[4]) | (!i[25]&!i[14]&i[13]&i[12]&!i[6]&!i[2]) | ( + i[25]&i[14]&i[12]&!i[6]&i[5]&!i[2]); + +assign out.condbr = (i[6]&!i[4]&!i[2]); + +assign out.beq = (!i[14]&!i[12]&i[6]&!i[4]&!i[2]); + +assign out.bne = (!i[14]&i[12]&i[6]&!i[4]&!i[2]); + +assign out.bge = (i[14]&i[12]&i[5]&!i[4]&!i[2]); + +assign out.blt = (i[14]&!i[12]&i[5]&!i[4]&!i[2]); + +assign out.jal = (i[6]&i[2]); + +assign out.by = (!i[13]&!i[12]&!i[6]&!i[4]&!i[2]); + +assign out.half = (i[12]&!i[6]&!i[4]&!i[2]); + +assign out.word = (i[13]&!i[6]&!i[4]); + +assign out.csr_read = (i[13]&i[6]&i[4]) | (i[7]&i[6]&i[4]) | (i[8]&i[6]&i[4]) | ( + i[9]&i[6]&i[4]) | (i[10]&i[6]&i[4]) | (i[11]&i[6]&i[4]); + +assign out.csr_clr = (i[15]&i[13]&i[12]&i[6]&i[4]) | (i[16]&i[13]&i[12]&i[6]&i[4]) | ( + i[17]&i[13]&i[12]&i[6]&i[4]) | (i[18]&i[13]&i[12]&i[6]&i[4]) | ( + i[19]&i[13]&i[12]&i[6]&i[4]); + +assign out.csr_set = (i[15]&!i[12]&i[6]&i[4]) | (i[16]&!i[12]&i[6]&i[4]) | (i[17] + &!i[12]&i[6]&i[4]) | (i[18]&!i[12]&i[6]&i[4]) | (i[19]&!i[12]&i[6] + &i[4]); + +assign out.csr_write = (!i[13]&i[12]&i[6]&i[4]); + +assign out.csr_imm = (i[14]&!i[13]&i[6]&i[4]) | (i[15]&i[14]&i[6]&i[4]) | (i[16] + &i[14]&i[6]&i[4]) | (i[17]&i[14]&i[6]&i[4]) | (i[18]&i[14]&i[6]&i[4]) | ( + i[19]&i[14]&i[6]&i[4]); + +assign out.presync = (!i[5]&i[3]) | (!i[13]&i[7]&i[6]&i[4]) | (!i[13]&i[8]&i[6]&i[4]) | ( + !i[13]&i[9]&i[6]&i[4]) | (!i[13]&i[10]&i[6]&i[4]) | (!i[13]&i[11] + &i[6]&i[4]) | (i[15]&i[13]&i[6]&i[4]) | (i[16]&i[13]&i[6]&i[4]) | ( + i[17]&i[13]&i[6]&i[4]) | (i[18]&i[13]&i[6]&i[4]) | (i[19]&i[13]&i[6] + &i[4]); + +assign out.postsync = (i[12]&!i[5]&i[3]) | (!i[22]&!i[13]&!i[12]&i[6]&i[4]) | ( + !i[13]&i[7]&i[6]&i[4]) | (!i[13]&i[8]&i[6]&i[4]) | (!i[13]&i[9]&i[6] + &i[4]) | (!i[13]&i[10]&i[6]&i[4]) | (!i[13]&i[11]&i[6]&i[4]) | ( + i[15]&i[13]&i[6]&i[4]) | (i[16]&i[13]&i[6]&i[4]) | (i[17]&i[13]&i[6] + &i[4]) | (i[18]&i[13]&i[6]&i[4]) | (i[19]&i[13]&i[6]&i[4]); + +assign out.ebreak = (!i[22]&i[20]&!i[13]&!i[12]&i[6]&i[4]); + +assign out.ecall = (!i[21]&!i[20]&!i[13]&!i[12]&i[6]&i[4]); + +assign out.mret = (i[29]&!i[13]&!i[12]&i[6]&i[4]); + +assign out.mul = (i[25]&!i[14]&!i[6]&i[5]&i[4]&!i[2]); + +assign out.rs1_sign = (i[25]&!i[14]&i[13]&!i[12]&!i[6]&i[5]&i[4]&!i[2]) | (i[25] + &!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + +assign out.rs2_sign = (i[25]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + +assign out.low = (i[25]&!i[14]&!i[13]&!i[12]&i[5]&i[4]&!i[2]); + +assign out.div = (i[25]&i[14]&!i[6]&i[5]&!i[2]); + +assign out.rem = (i[25]&i[14]&i[13]&!i[6]&i[5]&!i[2]); + +assign out.fence = (!i[5]&i[3]); + +assign out.fence_i = (i[12]&!i[5]&i[3]); + +assign out.pm_alu = (i[28]&i[22]&!i[13]&!i[12]&i[4]) | (i[4]&i[2]) | (!i[25]&!i[6] + &i[4]) | (!i[5]&i[4]); + + + +assign out.legal = (!i[31]&!i[30]&i[29]&i[28]&!i[27]&!i[26]&!i[25]&!i[24]&!i[23] + &!i[22]&i[21]&!i[20]&!i[19]&!i[18]&!i[17]&!i[16]&!i[15]&!i[14]&!i[11] + &!i[10]&!i[9]&!i[8]&!i[7]&i[6]&i[5]&i[4]&!i[3]&!i[2]&i[1]&i[0]) | ( + !i[31]&!i[30]&!i[29]&i[28]&!i[27]&!i[26]&!i[25]&!i[24]&!i[23]&i[22] + &!i[21]&i[20]&!i[19]&!i[18]&!i[17]&!i[16]&!i[15]&!i[14]&!i[11]&!i[10] + &!i[9]&!i[8]&!i[7]&i[6]&i[5]&i[4]&!i[3]&!i[2]&i[1]&i[0]) | (!i[31] + &!i[30]&!i[29]&!i[28]&!i[27]&!i[26]&!i[25]&!i[24]&!i[23]&!i[22]&!i[21] + &!i[19]&!i[18]&!i[17]&!i[16]&!i[15]&!i[14]&!i[11]&!i[10]&!i[9]&!i[8] + &!i[7]&i[5]&i[4]&!i[3]&!i[2]&i[1]&i[0]) | (!i[31]&!i[30]&!i[29]&!i[28] + &!i[27]&!i[26]&!i[25]&!i[6]&i[4]&!i[3]&i[1]&i[0]) | (!i[31]&!i[29] + &!i[28]&!i[27]&!i[26]&!i[25]&!i[14]&!i[13]&!i[12]&!i[6]&!i[3]&!i[2] + &i[1]&i[0]) | (!i[31]&!i[29]&!i[28]&!i[27]&!i[26]&!i[25]&i[14]&!i[13] + &i[12]&!i[6]&i[4]&!i[3]&i[1]&i[0]) | (!i[31]&!i[30]&!i[29]&!i[28] + &!i[27]&!i[26]&!i[6]&i[5]&i[4]&!i[3]&i[1]&i[0]) | (!i[14]&!i[13] + &!i[12]&i[6]&i[5]&!i[4]&!i[3]&i[1]&i[0]) | (i[14]&i[6]&i[5]&!i[4] + &!i[3]&!i[2]&i[1]&i[0]) | (!i[12]&!i[6]&!i[5]&i[4]&!i[3]&i[1]&i[0]) | ( + !i[14]&!i[13]&i[5]&!i[4]&!i[3]&!i[2]&i[1]&i[0]) | (i[12]&i[6]&i[5] + &i[4]&!i[3]&!i[2]&i[1]&i[0]) | (!i[31]&!i[30]&!i[29]&!i[28]&!i[27] + &!i[26]&!i[25]&!i[24]&!i[23]&!i[22]&!i[21]&!i[20]&!i[19]&!i[18]&!i[17] + &!i[16]&!i[15]&!i[14]&!i[13]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[6] + &!i[5]&!i[4]&i[3]&i[2]&i[1]&i[0]) | (!i[31]&!i[30]&!i[29]&!i[28] + &!i[19]&!i[18]&!i[17]&!i[16]&!i[15]&!i[14]&!i[13]&!i[12]&!i[11]&!i[10] + &!i[9]&!i[8]&!i[7]&!i[6]&!i[5]&!i[4]&i[3]&i[2]&i[1]&i[0]) | (i[13] + &i[6]&i[5]&i[4]&!i[3]&!i[2]&i[1]&i[0]) | (!i[13]&!i[6]&!i[5]&!i[4] + &!i[3]&!i[2]&i[1]&i[0]) | (i[6]&i[5]&!i[4]&i[3]&i[2]&i[1]&i[0]) | ( + i[13]&!i[6]&!i[5]&i[4]&!i[3]&i[1]&i[0]) | (!i[14]&!i[12]&!i[6]&!i[4] + &!i[3]&!i[2]&i[1]&i[0]) | (!i[6]&i[4]&!i[3]&i[2]&i[1]&i[0]); + + + +endmodule // el2_dec_dec_ctl diff --git a/design/dec/el2_dec_gpr_ctl.sv b/design/dec/el2_dec_gpr_ctl.sv new file mode 100644 index 0000000..a9ac52f --- /dev/null +++ b/design/dec/el2_dec_gpr_ctl.sv @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +module el2_dec_gpr_ctl +import el2_pkg::*; +#( + `include "el2_param.vh" + ) ( + input logic [4:0] raddr0, // logical read addresses + input logic [4:0] raddr1, + + input logic wen0, // write enable + input logic [4:0] waddr0, // write address + input logic [31:0] wd0, // write data + + input logic wen1, // write enable + input logic [4:0] waddr1, // write address + input logic [31:0] wd1, // write data + + input logic wen2, // write enable + input logic [4:0] waddr2, // write address + input logic [31:0] wd2, // write data + + input logic clk, + input logic rst_l, + + output logic [31:0] rd0, // read data + output logic [31:0] rd1, + + input logic scan_mode +); + + logic [31:1] [31:0] gpr_out; // 31 x 32 bit GPRs + logic [31:1] [31:0] gpr_in; + logic [31:1] w0v,w1v,w2v; + logic [31:1] gpr_wr_en; + + // GPR Write Enables for power savings + assign gpr_wr_en[31:1] = (w0v[31:1] | w1v[31:1] | w2v[31:1]); + for ( genvar j=1; j<32; j++ ) begin : gpr + rvdffe #(32) gprff (.*, .en(gpr_wr_en[j]), .din(gpr_in[j][31:0]), .dout(gpr_out[j][31:0])); + end : gpr + +// the read out + always_comb begin + rd0[31:0] = 32'b0; + rd1[31:0] = 32'b0; + w0v[31:1] = 31'b0; + w1v[31:1] = 31'b0; + w2v[31:1] = 31'b0; + gpr_in[31:1] = '0; + + // GPR Read logic + for (int j=1; j<32; j++ ) begin + rd0[31:0] |= ({32{(raddr0[4:0]== 5'(j))}} & gpr_out[j][31:0]); + rd1[31:0] |= ({32{(raddr1[4:0]== 5'(j))}} & gpr_out[j][31:0]); + end + + // GPR Write logic + for (int j=1; j<32; j++ ) begin + w0v[j] = wen0 & (waddr0[4:0]== 5'(j) ); + w1v[j] = wen1 & (waddr1[4:0]== 5'(j) ); + w2v[j] = wen2 & (waddr2[4:0]== 5'(j) ); + gpr_in[j] = ({32{w0v[j]}} & wd0[31:0]) | + ({32{w1v[j]}} & wd1[31:0]) | + ({32{w2v[j]}} & wd2[31:0]); + end + end // always_comb begin + +`ifdef ASSERT_ON + + logic write_collision_unused; + assign write_collision_unused = ( (w0v[31:1] == w1v[31:1]) & wen0 & wen1 ) | + ( (w0v[31:1] == w2v[31:1]) & wen0 & wen2 ) | + ( (w1v[31:1] == w2v[31:1]) & wen1 & wen2 ); + + + // asserting that no 2 ports will write to the same gpr simultaneously + assert_multiple_wen_to_same_gpr: assert #0 (~( write_collision_unused ) ); + +`endif + +endmodule diff --git a/design/dec/el2_dec_ib_ctl.sv b/design/dec/el2_dec_ib_ctl.sv new file mode 100644 index 0000000..9ad8051 --- /dev/null +++ b/design/dec/el2_dec_ib_ctl.sv @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +module el2_dec_ib_ctl +import el2_pkg::*; +#( +`include "el2_param.vh" + ) + ( + input logic dbg_cmd_valid, // valid dbg cmd + + input logic dbg_cmd_write, // dbg cmd is write + input logic [1:0] dbg_cmd_type, // dbg type + input logic [31:0] dbg_cmd_addr, // expand to 31:0 + + input el2_br_pkt_t i0_brp, // i0 branch packet from aligner + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index + input logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR + input logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag + input logic ifu_i0_pc4, // i0 is 4B inst else 2B + input logic ifu_i0_valid, // i0 valid from ifu + input logic ifu_i0_icaf, // i0 instruction access fault + input logic [1:0] ifu_i0_icaf_type, // i0 instruction access fault type + + input logic ifu_i0_icaf_f1, // i0 has access fault on second fetch group + input logic ifu_i0_dbecc, // i0 double-bit error + input logic [31:0] ifu_i0_instr, // i0 instruction from the aligner + input logic [31:1] ifu_i0_pc, // i0 pc from the aligner + + + output logic dec_ib0_valid_d, // ib0 valid + + + output logic [31:0] dec_i0_instr_d, // i0 inst at decode + + output logic [31:1] dec_i0_pc_d, // i0 pc at decode + + output logic dec_i0_pc4_d, // i0 is 4B inst else 2B + + output el2_br_pkt_t dec_i0_brp, // i0 branch packet at decode + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index, // i0 branch index + output logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr, // BP FGHR + output logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag, // BP tag + output logic dec_i0_icaf_d, // i0 instruction access fault at decode + output logic dec_i0_icaf_f1_d, // i0 instruction access fault at decode for f1 fetch group + output logic [1:0] dec_i0_icaf_type_d, // i0 instruction access fault type + output logic dec_i0_dbecc_d, // i0 double-bit error at decode + output logic dec_debug_wdata_rs1_d, // put debug write data onto rs1 source: machine is halted + + output logic dec_debug_fence_d // debug fence inst + + ); + + + logic debug_valid; + logic [4:0] dreg; + logic [11:0] dcsr; + logic [31:0] ib0, ib0_debug_in; + + logic debug_read; + logic debug_write; + logic debug_read_gpr; + logic debug_write_gpr; + logic debug_read_csr; + logic debug_write_csr; + + logic [34:0] ifu_i0_pcdata, pc0; + + assign ifu_i0_pcdata[34:0] = { ifu_i0_icaf_f1, ifu_i0_dbecc, ifu_i0_icaf, + ifu_i0_pc[31:1], ifu_i0_pc4 }; + + assign pc0[34:0] = ifu_i0_pcdata[34:0]; + + assign dec_i0_icaf_f1_d = pc0[34]; // icaf's can only decode as i0 + + assign dec_i0_dbecc_d = pc0[33]; + + assign dec_i0_icaf_d = pc0[32]; + assign dec_i0_pc_d[31:1] = pc0[31:1]; + assign dec_i0_pc4_d = pc0[0]; + + assign dec_i0_icaf_type_d[1:0] = ifu_i0_icaf_type[1:0]; + +// GPR accesses + +// put reg to read on rs1 +// read -> or %x0, %reg,%x0 {000000000000,reg[4:0],110000000110011} + +// put write date on rs1 +// write -> or %reg, %x0, %x0 {00000000000000000110,reg[4:0],0110011} + + +// CSR accesses +// csr is of form rd, csr, rs1 + +// read -> csrrs %x0, %csr, %x0 {csr[11:0],00000010000001110011} + +// put write data on rs1 +// write -> csrrw %x0, %csr, %x0 {csr[11:0],00000001000001110011} + +// abstract memory command not done here + assign debug_valid = dbg_cmd_valid & (dbg_cmd_type[1:0] != 2'h2); + + + assign debug_read = debug_valid & ~dbg_cmd_write; + assign debug_write = debug_valid & dbg_cmd_write; + + assign debug_read_gpr = debug_read & (dbg_cmd_type[1:0]==2'h0); + assign debug_write_gpr = debug_write & (dbg_cmd_type[1:0]==2'h0); + assign debug_read_csr = debug_read & (dbg_cmd_type[1:0]==2'h1); + assign debug_write_csr = debug_write & (dbg_cmd_type[1:0]==2'h1); + + assign dreg[4:0] = dbg_cmd_addr[4:0]; + assign dcsr[11:0] = dbg_cmd_addr[11:0]; + + + assign ib0_debug_in[31:0] = ({32{debug_read_gpr}} & {12'b000000000000,dreg[4:0],15'b110000000110011}) | + ({32{debug_write_gpr}} & {20'b00000000000000000110,dreg[4:0],7'b0110011}) | + ({32{debug_read_csr}} & {dcsr[11:0],20'b00000010000001110011}) | + ({32{debug_write_csr}} & {dcsr[11:0],20'b00000001000001110011}); + + + + // machine is in halted state, pipe empty, write will always happen next cycle + + assign dec_debug_wdata_rs1_d = debug_write_gpr | debug_write_csr; + + + // special fence csr for use only in debug mode + + assign dec_debug_fence_d = debug_write_csr & (dcsr[11:0] == 12'h7c4); + + assign ib0[31:0] = (debug_valid) ? ib0_debug_in[31:0] : ifu_i0_instr[31:0]; + + assign dec_ib0_valid_d = ifu_i0_valid | debug_valid; + + assign dec_i0_instr_d[31:0] = ib0[31:0]; + + assign dec_i0_brp = i0_brp; + assign dec_i0_bp_index = ifu_i0_bp_index; + assign dec_i0_bp_fghr = ifu_i0_bp_fghr; + assign dec_i0_bp_btag = ifu_i0_bp_btag; + +endmodule diff --git a/design/dec/el2_dec_tlu_ctl.sv b/design/dec/el2_dec_tlu_ctl.sv new file mode 100644 index 0000000..a24caeb --- /dev/null +++ b/design/dec/el2_dec_tlu_ctl.sv @@ -0,0 +1,2622 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +//******************************************************************************** +// el2_dec_tlu_ctl.sv +// +// +// Function: CSRs, Commit/WB, flushing, exceptions, interrupts +// Comments: +// +//******************************************************************************** + +module el2_dec_tlu_ctl +import el2_pkg::*; +#( +`include "el2_param.vh" + ) + ( + input logic clk, + input logic active_clk, + input logic free_clk, + input logic rst_l, + input logic scan_mode, + + input logic [31:1] rst_vec, // reset vector, from core pins + input logic nmi_int, // nmi pin + input logic [31:1] nmi_vec, // nmi vector + input logic i_cpu_halt_req, // Asynchronous Halt request to CPU + input logic i_cpu_run_req, // Asynchronous Restart request to CPU + + input logic lsu_fastint_stall_any, // needed by lsu for 2nd pass of dma with ecc correction, stall next cycle + + + // perf counter inputs + input logic ifu_pmu_instr_aligned, // aligned instructions + input logic ifu_pmu_fetch_stall, // fetch unit stalled + input logic ifu_pmu_ic_miss, // icache miss + input logic ifu_pmu_ic_hit, // icache hit + input logic ifu_pmu_bus_error, // Instruction side bus error + input logic ifu_pmu_bus_busy, // Instruction side bus busy + input logic ifu_pmu_bus_trxn, // Instruction side bus transaction + input logic dec_pmu_instr_decoded, // decoded instructions + input logic dec_pmu_decode_stall, // decode stall + input logic dec_pmu_presync_stall, // decode stall due to presync'd inst + input logic dec_pmu_postsync_stall,// decode stall due to postsync'd inst + input logic lsu_store_stall_any, // SB or WB is full, stall decode + input logic dma_dccm_stall_any, // DMA stall of lsu + input logic dma_iccm_stall_any, // DMA stall of ifu + input logic exu_pmu_i0_br_misp, // pipe 0 branch misp + input logic exu_pmu_i0_br_ataken, // pipe 0 branch actual taken + input logic exu_pmu_i0_pc4, // pipe 0 4 byte branch + input logic lsu_pmu_bus_trxn, // D side bus transaction + input logic lsu_pmu_bus_misaligned, // D side bus misaligned + input logic lsu_pmu_bus_error, // D side bus error + input logic lsu_pmu_bus_busy, // D side bus busy + input logic lsu_pmu_load_external_m, // D side bus load + input logic lsu_pmu_store_external_m, // D side bus store + input logic dma_pmu_dccm_read, // DMA DCCM read + input logic dma_pmu_dccm_write, // DMA DCCM write + input logic dma_pmu_any_read, // DMA read + input logic dma_pmu_any_write, // DMA write + + input logic [31:1] lsu_fir_addr, // Fast int address + input logic [1:0] lsu_fir_error, // Fast int lookup error + + input logic iccm_dma_sb_error, // I side dma single bit error + + input el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu precise exception/error packet + input logic lsu_single_ecc_error_incr, // LSU inc SB error counter + + input logic dec_pause_state, // Pause counter not zero + input logic lsu_imprecise_error_store_any, // store bus error + input logic lsu_imprecise_error_load_any, // store bus error + input logic [31:0] lsu_imprecise_error_addr_any, // store bus error address + + input logic dec_csr_wen_unq_d, // valid csr with write - for csr legal + input logic dec_csr_any_unq_d, // valid csr - for csr legal + input logic [11:0] dec_csr_rdaddr_d, // read address for csr + + input logic dec_csr_wen_r, // csr write enable at wb + input logic [11:0] dec_csr_wraddr_r, // write address for csr + input logic [31:0] dec_csr_wrdata_r, // csr write data at wb + + input logic dec_csr_stall_int_ff, // csr is mie/mstatus + + input logic dec_tlu_i0_valid_r, // pipe 0 op at e4 is valid + + input logic [31:1] exu_npc_r, // for NPC tracking + + input logic [31:1] dec_tlu_i0_pc_r, // for PC/NPC tracking + + input el2_trap_pkt_t dec_tlu_packet_r, // exceptions known at decode + + input logic [31:0] dec_illegal_inst, // For mtval + input logic dec_i0_decode_d, // decode valid, used for clean icache diagnostics + + // branch info from pipe0 for errors or counter updates + input logic [1:0] exu_i0_br_hist_r, // history + input logic exu_i0_br_error_r, // error + input logic exu_i0_br_start_error_r, // start error + input logic exu_i0_br_valid_r, // valid + input logic exu_i0_br_mp_r, // mispredict + input logic exu_i0_br_middle_r, // middle of bank + + // branch info from pipe1 for errors or counter updates + + input logic exu_i0_br_way_r, // way hit or repl + + // Debug start + output logic dec_dbg_cmd_done, // abstract command done + output logic dec_dbg_cmd_fail, // abstract command failed + output logic dec_tlu_dbg_halted, // Core is halted and ready for debug command + output logic dec_tlu_debug_mode, // Core is in debug mode + output logic dec_tlu_resume_ack, // Resume acknowledge + output logic dec_tlu_debug_stall, // stall decode while waiting on core to empty + + output logic dec_tlu_flush_noredir_r , // Tell fetch to idle on this flush + output logic dec_tlu_mpc_halted_only, // Core is halted only due to MPC + output logic dec_tlu_flush_leak_one_r, // single step + output logic dec_tlu_flush_err_r, // iside perr/ecc rfpc. This is the D stage of the error + + output logic dec_tlu_flush_extint, // fast ext int started + output logic [31:2] dec_tlu_meihap, // meihap for fast int + + input logic dbg_halt_req, // DM requests a halt + input logic dbg_resume_req, // DM requests a resume + input logic ifu_miss_state_idle, // I-side miss buffer empty + input logic lsu_idle_any, // lsu is idle + input logic dec_div_active, // oop div is active + output el2_trigger_pkt_t [3:0] trigger_pkt_any, // trigger info for trigger blocks + + input logic ifu_ic_error_start, // IC single bit error + input logic ifu_iccm_rd_ecc_single_err, // ICCM single bit error + + + input logic [70:0] ifu_ic_debug_rd_data, // diagnostic icache read data + input logic ifu_ic_debug_rd_data_valid, // diagnostic icache read data valid + output el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt, // packet of DICAWICS, DICAD0/1, DICAGO info for icache diagnostics + // Debug end + + input logic [7:0] pic_claimid, // pic claimid for csr + input logic [3:0] pic_pl, // pic priv level for csr + input logic mhwakeup, // high priority external int, wakeup if halted + + input logic mexintpend, // external interrupt pending + input logic timer_int, // timer interrupt pending + input logic soft_int, // software interrupt pending + + output logic o_cpu_halt_status, // PMU interface, halted + output logic o_cpu_halt_ack, // halt req ack + output logic o_cpu_run_ack, // run req ack + output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request + + input logic [31:4] core_id, // Core ID + + // external MPC halt/run interface + input logic mpc_debug_halt_req, // Async halt request + input logic mpc_debug_run_req, // Async run request + input logic mpc_reset_run_req, // Run/halt after reset + output logic mpc_debug_halt_ack, // Halt ack + output logic mpc_debug_run_ack, // Run ack + output logic debug_brkpt_status, // debug breakpoint + + output logic [3:0] dec_tlu_meicurpl, // to PIC + output logic [3:0] dec_tlu_meipt, // to PIC + + + output logic [31:0] dec_csr_rddata_d, // csr read data at wb + output logic dec_csr_legal_d, // csr indicates legal operation + + output el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // branch pkt to bp + + output logic dec_tlu_i0_kill_writeb_wb, // I0 is flushed, don't writeback any results to arch state + output logic dec_tlu_flush_lower_wb, // commit has a flush (exception, int, mispredict at e4) + output logic dec_tlu_i0_commit_cmt, // committed an instruction + + output logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state + output logic dec_tlu_flush_lower_r, // commit has a flush (exception, int) + output logic [31:1] dec_tlu_flush_path_r, // flush pc + output logic dec_tlu_fence_i_r, // flush is a fence_i rfnpc, flush icache + output logic dec_tlu_wr_pause_r, // CSR write to pause reg is at R. + output logic dec_tlu_flush_pause_r, // Flush is due to pause + + output logic dec_tlu_presync_d, // CSR read needs to be presync'd + output logic dec_tlu_postsync_d, // CSR needs to be presync'd + + + output logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control + + output logic dec_tlu_force_halt, // halt has been forced + + output logic dec_tlu_perfcnt0, // toggles when pipe0 perf counter 0 has an event inc + output logic dec_tlu_perfcnt1, // toggles when pipe0 perf counter 1 has an event inc + output logic dec_tlu_perfcnt2, // toggles when pipe0 perf counter 2 has an event inc + output logic dec_tlu_perfcnt3, // toggles when pipe0 perf counter 3 has an event inc + + output logic dec_tlu_i0_exc_valid_wb1, // pipe 0 exception valid + output logic dec_tlu_i0_valid_wb1, // pipe 0 valid + output logic dec_tlu_int_valid_wb1, // pipe 2 int valid + output logic [4:0] dec_tlu_exc_cause_wb1, // exception or int cause + output logic [31:0] dec_tlu_mtval_wb1, // MTVAL value + + // feature disable from mfdc + output logic dec_tlu_external_ldfwd_disable, // disable external load forwarding + output logic dec_tlu_sideeffect_posted_disable, // disable posted stores to side-effect address + output logic dec_tlu_core_ecc_disable, // disable core ECC + output logic dec_tlu_bpred_disable, // disable branch prediction + output logic dec_tlu_wb_coalescing_disable, // disable writebuffer coalescing + output logic dec_tlu_pipelining_disable, // disable pipelining + output logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:16] + + // clock gating overrides from mcgc + output logic dec_tlu_misc_clk_override, // override misc clock domain gating + output logic dec_tlu_dec_clk_override, // override decode clock domain gating + output logic dec_tlu_ifu_clk_override, // override fetch clock domain gating + output logic dec_tlu_lsu_clk_override, // override load/store clock domain gating + output logic dec_tlu_bus_clk_override, // override bus clock domain gating + output logic dec_tlu_pic_clk_override, // override PIC clock domain gating + output logic dec_tlu_dccm_clk_override, // override DCCM clock domain gating + output logic dec_tlu_icm_clk_override // override ICCM clock domain gating + + ); + + logic clk_override, e4e5_int_clk, nmi_lsu_load_type, nmi_lsu_store_type, nmi_int_detected_f, nmi_lsu_load_type_f, + nmi_lsu_store_type_f, allow_dbg_halt_csr_write, dbg_cmd_done_ns, i_cpu_run_req_d1_raw, debug_mode_status, lsu_single_ecc_error_r_d1, + sel_npc_r, sel_npc_resume, ce_int, + nmi_in_debug_mode, dpc_capture_npc, dpc_capture_pc, tdata_load, tdata_opcode, tdata_action, perfcnt_halted; + + + logic reset_delayed, reset_detect, reset_detected; + logic wr_mstatus_r, wr_mtvec_r, wr_mcyclel_r, wr_mcycleh_r, + wr_minstretl_r, wr_minstreth_r, wr_mscratch_r, wr_mepc_r, wr_mcause_r, wr_mscause_r, wr_mtval_r, + wr_mrac_r, wr_meihap_r, wr_meicurpl_r, wr_meipt_r, wr_dcsr_r, + wr_dpc_r, wr_meicidpl_r, wr_meivt_r, wr_meicpct_r, wr_micect_r, wr_miccmect_r, wr_mfdht_r, wr_mfdhs_r, + wr_mdccmect_r,wr_mhpme3_r, wr_mhpme4_r, wr_mhpme5_r, wr_mhpme6_r; + logic wr_mpmc_r; + logic [1:1] mpmc_b_ns, mpmc, mpmc_b; + logic set_mie_pmu_fw_halt, fw_halted_ns, fw_halted; + logic wr_mcountinhibit_r; + logic [6:0] mcountinhibit; + logic wr_mtsel_r, wr_mtdata1_t0_r, wr_mtdata1_t1_r, wr_mtdata1_t2_r, wr_mtdata1_t3_r, wr_mtdata2_t0_r, wr_mtdata2_t1_r, wr_mtdata2_t2_r, wr_mtdata2_t3_r; + logic [31:0] mtdata2_t0, mtdata2_t1, mtdata2_t2, mtdata2_t3, mtdata2_tsel_out, mtdata1_tsel_out; + logic [9:0] mtdata1_t0_ns, mtdata1_t0, mtdata1_t1_ns, mtdata1_t1, mtdata1_t2_ns, mtdata1_t2, mtdata1_t3_ns, mtdata1_t3; + logic [9:0] tdata_wrdata_r; + logic [1:0] mtsel_ns, mtsel; + logic tlu_i0_kill_writeb_r; + logic [1:0] mstatus_ns, mstatus; + logic [1:0] mfdhs_ns, mfdhs; + logic [31:0] force_halt_ctr, force_halt_ctr_f; + logic force_halt; + logic [5:0] mfdht, mfdht_ns; + logic mstatus_mie_ns; + logic [30:0] mtvec_ns, mtvec; + logic [15:2] dcsr_ns, dcsr; + logic [3:0] mip_ns, mip; + logic [3:0] mie_ns, mie; + logic [31:0] mcyclel_ns, mcyclel; + logic [31:0] mcycleh_ns, mcycleh; + logic [31:0] minstretl_ns, minstretl; + logic [31:0] minstreth_ns, minstreth; + logic [31:0] micect_ns, micect, miccmect_ns, miccmect, mdccmect_ns, mdccmect; + logic [26:0] micect_inc, miccmect_inc, mdccmect_inc; + logic [31:0] mscratch; + logic [31:0] mhpmc3, mhpmc3_ns, mhpmc4, mhpmc4_ns, mhpmc5, mhpmc5_ns, mhpmc6, mhpmc6_ns; + logic [31:0] mhpmc3h, mhpmc3h_ns, mhpmc4h, mhpmc4h_ns, mhpmc5h, mhpmc5h_ns, mhpmc6h, mhpmc6h_ns; + logic [9:0] mhpme3, mhpme4, mhpme5, mhpme6; + logic [31:0] mrac; + logic [9:2] meihap; + logic [31:10] meivt; + logic [3:0] meicurpl_ns, meicurpl; + logic [3:0] meicidpl_ns, meicidpl; + logic [3:0] meipt_ns, meipt; + logic [31:0] mdseac; + logic mdseac_locked_ns, mdseac_locked_f, mdseac_en, nmi_lsu_detected; + logic [31:1] mepc_ns, mepc; + logic [31:1] dpc_ns, dpc; + logic [31:0] mcause_ns, mcause; + logic [2:0] mscause_ns, mscause, mscause_type; + logic [31:0] mtval_ns, mtval; + logic dec_pause_state_f, dec_tlu_wr_pause_r_d1, pause_expired_r, pause_expired_wb; + logic tlu_flush_lower_r, tlu_flush_lower_r_d1; + logic [31:1] tlu_flush_path_r, tlu_flush_path_r_d1; + logic i0_valid_wb; + logic tlu_i0_commit_cmt; + logic [31:1] vectored_path, interrupt_path; + logic [16:0] dicawics_ns, dicawics; + logic wr_dicawics_r, wr_dicad0_r, wr_dicad1_r, wr_dicad0h_r; + logic [31:0] dicad0_ns, dicad0, dicad0h_ns, dicad0h; + logic [6:0] dicad1_ns, dicad1_raw; + logic [31:0] dicad1; + logic ebreak_r, ebreak_to_debug_mode_r, ecall_r, illegal_r, mret_r, inst_acc_r, fence_i_r, + ic_perr_r, iccm_sbecc_r, ebreak_to_debug_mode_r_d1, kill_ebreak_count_r, inst_acc_second_r; + logic ic_perr_r_d1, iccm_sbecc_r_d1; + logic ce_int_ready, ext_int_ready, timer_int_ready, soft_int_ready, mhwakeup_ready, + take_ext_int, take_ce_int, take_timer_int, take_soft_int, take_nmi, take_nmi_r_d1; + logic i0_exception_valid_r, interrupt_valid_r, i0_exception_valid_r_d1, interrupt_valid_r_d1, exc_or_int_valid_r, exc_or_int_valid_r_d1, + mdccme_ce_req, miccme_ce_req, mice_ce_req; + logic synchronous_flush_r; + logic [4:0] exc_cause_r, exc_cause_wb; + logic mcyclel_cout, mcyclel_cout_f; + logic [31:0] mcyclel_inc; + logic [31:0] mcycleh_inc; + logic minstretl_cout, minstretl_cout_f, minstret_enable; + logic [31:0] minstretl_inc, minstretl_read; + logic [31:0] minstreth_inc, minstreth_read; + logic [31:1] pc_r, pc_r_d1, npc_r, npc_r_d1; + logic valid_csr; + logic rfpc_i0_r; + logic lsu_i0_rfnpc_r; + logic dec_tlu_br0_error_r, dec_tlu_br0_start_error_r, dec_tlu_br0_v_r; + logic lsu_i0_exc_r, lsu_i0_exc_r_raw, lsu_exc_ma_r, lsu_exc_acc_r, lsu_exc_st_r, + lsu_exc_valid_r, lsu_exc_valid_r_raw, lsu_exc_valid_r_d1, lsu_i0_exc_r_d1, block_interrupts; + logic i0_trigger_eval_r; + logic request_debug_mode_r, request_debug_mode_r_d1, request_debug_mode_done, request_debug_mode_done_f; + logic take_halt, halt_taken, halt_taken_f, internal_dbg_halt_mode, dbg_tlu_halted_f, take_reset, + dbg_tlu_halted, core_empty, lsu_idle_any_f, ifu_miss_state_idle_f, resume_ack_ns, + debug_halt_req_f, debug_resume_req_f, enter_debug_halt_req, dcsr_single_step_done, dcsr_single_step_done_f, + debug_halt_req_d1, debug_halt_req_ns, dcsr_single_step_running, dcsr_single_step_running_f, internal_dbg_halt_timers; + + logic [3:0] i0_trigger_r, trigger_action, trigger_enabled, + i0_trigger_chain_masked_r; + logic i0_trigger_hit_r, i0_trigger_hit_raw_r, i0_trigger_action_r, + trigger_hit_r_d1, + mepc_trigger_hit_sel_pc_r; + logic [3:0] update_hit_bit_r, i0_iside_trigger_has_pri_r,i0trigger_qual_r, i0_lsu_trigger_has_pri_r; + logic cpu_halt_status, cpu_halt_ack, cpu_run_ack, ext_halt_pulse, i_cpu_halt_req_d1, i_cpu_run_req_d1; + + logic inst_acc_r_raw, trigger_hit_dmode_r, trigger_hit_dmode_r_d1; + logic [8:0] mcgc; + logic [18:0] mfdc; + logic i_cpu_halt_req_sync_qual, i_cpu_run_req_sync_qual, pmu_fw_halt_req_ns, pmu_fw_halt_req_f, + fw_halt_req, enter_pmu_fw_halt_req, pmu_fw_tlu_halted, pmu_fw_tlu_halted_f, internal_pmu_fw_halt_mode, + internal_pmu_fw_halt_mode_f; + logic nmi_int_delayed, nmi_int_detected; + logic [3:0] trigger_execute, trigger_data, trigger_store; + logic dec_tlu_pmu_fw_halted; + + logic mpc_run_state_ns, debug_brkpt_status_ns, mpc_debug_halt_ack_ns, mpc_debug_run_ack_ns, dbg_halt_state_ns, dbg_run_state_ns, + dbg_halt_state_f, mpc_debug_halt_req_sync_f, mpc_debug_run_req_sync_f, mpc_halt_state_f, mpc_halt_state_ns, mpc_run_state_f, debug_brkpt_status_f, + mpc_debug_halt_ack_f, mpc_debug_run_ack_f, dbg_run_state_f, mpc_debug_halt_req_sync_pulse, + mpc_debug_run_req_sync_pulse, debug_brkpt_valid, debug_halt_req, debug_resume_req, dec_tlu_mpc_halted_only_ns; + logic take_ext_int_start, ext_int_freeze, take_ext_int_start_d1, take_ext_int_start_d2, + take_ext_int_start_d3, ext_int_freeze_d1, csr_meicpct, ignore_ext_int_due_to_lsu_stall; + logic mcause_sel_nmi_store, mcause_sel_nmi_load, mcause_sel_nmi_ext, fast_int_meicpct; + logic [1:0] mcause_fir_error_type; + logic dbg_halt_req_held_ns, dbg_halt_req_held, dbg_halt_req_final; + logic iccm_repair_state_ns, iccm_repair_state_d1, iccm_repair_state_rfnpc; + + logic nmi_int_sync, timer_int_sync, soft_int_sync, i_cpu_halt_req_sync, i_cpu_run_req_sync, mpc_debug_halt_req_sync, mpc_debug_run_req_sync, mpc_debug_halt_req_sync_raw; + logic csr_wr_clk; + logic lsu_r_wb_clk; + logic e4e5_clk, e4_valid, e5_valid, e4e5_valid, internal_dbg_halt_mode_f, internal_dbg_halt_mode_f2; + logic lsu_pmu_load_external_r, lsu_pmu_store_external_r; + logic dec_tlu_flush_noredir_r_d1, dec_tlu_flush_pause_r_d1; + logic lsu_single_ecc_error_r; + logic [31:0] lsu_error_pkt_addr_r; + logic mcyclel_cout_in; + logic i0_valid_no_ebreak_ecall_r; + logic minstret_enable_f; + logic sel_exu_npc_r, sel_flush_npc_r, sel_hold_npc_r; + logic pc0_valid_r; + logic [14:0] mfdc_int, mfdc_ns; + logic [31:0] mrac_in; + logic [31:27] csr_sat; + logic [8:6] dcsr_cause; + logic enter_debug_halt_req_le, dcsr_cause_upgradeable; + logic icache_rd_valid, icache_wr_valid, icache_rd_valid_f, icache_wr_valid_f; + logic [3:0] mhpmc_inc_r, mhpmc_inc_r_d1; + + logic [3:0][9:0] mhpme_vec; + logic mhpmc3_wr_en0, mhpmc3_wr_en1, mhpmc3_wr_en; + logic mhpmc4_wr_en0, mhpmc4_wr_en1, mhpmc4_wr_en; + logic mhpmc5_wr_en0, mhpmc5_wr_en1, mhpmc5_wr_en; + logic mhpmc6_wr_en0, mhpmc6_wr_en1, mhpmc6_wr_en; + logic mhpmc3h_wr_en0, mhpmc3h_wr_en; + logic mhpmc4h_wr_en0, mhpmc4h_wr_en; + logic mhpmc5h_wr_en0, mhpmc5h_wr_en; + logic mhpmc6h_wr_en0, mhpmc6h_wr_en; + logic [63:0] mhpmc3_incr, mhpmc4_incr, mhpmc5_incr, mhpmc6_incr; + logic perfcnt_halted_d1; + logic [3:0] perfcnt_during_sleep; + logic [9:0] event_saturate_r; + logic trace_tclk; + + el2_inst_pkt_t pmu_i0_itype_qual; + + logic csr_mfdht; + logic csr_mfdhs; + logic csr_misa; + logic csr_mvendorid; + logic csr_marchid; + logic csr_mimpid; + logic csr_mhartid; + logic csr_mstatus; + logic csr_mtvec; + logic csr_mip; + logic csr_mie; + logic csr_mcyclel; + logic csr_mcycleh; + logic csr_minstretl; + logic csr_minstreth; + logic csr_mscratch; + logic csr_mepc; + logic csr_mcause; + logic csr_mscause; + logic csr_mtval; + logic csr_mrac; + logic csr_dmst; + logic csr_mdseac; + logic csr_meihap; + logic csr_meivt; + logic csr_meipt; + logic csr_meicurpl; + logic csr_meicidpl; + logic csr_dcsr; + logic csr_mcgc; + logic csr_mfdc; + logic csr_dpc; + logic csr_mtsel; + logic csr_mtdata1; + logic csr_mtdata2; + logic csr_mhpmc3; + logic csr_mhpmc4; + logic csr_mhpmc5; + logic csr_mhpmc6; + logic csr_mhpmc3h; + logic csr_mhpmc4h; + logic csr_mhpmc5h; + logic csr_mhpmc6h; + logic csr_mhpme3; + logic csr_mhpme4; + logic csr_mhpme5; + logic csr_mhpme6; + logic csr_mcountinhibit; + logic csr_mpmc; + logic csr_mcpc; + logic csr_mdeau; + logic csr_micect; + logic csr_miccmect; + logic csr_mdccmect; + logic csr_dicawics; + logic csr_dicad0h; + logic csr_dicad0; + logic csr_dicad1; + logic csr_dicago; + logic presync; + logic postsync; + logic legal; + logic dec_csr_wen_r_mod; + + logic flush_clkvalid; + logic sel_fir_addr; + logic wr_mie_r; + logic mtval_capture_pc_r; + logic mtval_capture_pc_plus2_r; + logic mtval_capture_inst_r; + logic mtval_capture_lsu_r; + logic mtval_clear_r; + logic wr_mcgc_r; + logic wr_mfdc_r; + logic wr_mdeau_r; + logic trigger_hit_for_dscr_cause_r_d1; + + + assign clk_override = dec_tlu_dec_clk_override; + + // Async inputs to the core have to be sync'd to the core clock. + rvsyncss #(7) syncro_ff(.*, + .clk(free_clk), + .din ({nmi_int, timer_int, soft_int, i_cpu_halt_req, i_cpu_run_req, mpc_debug_halt_req, mpc_debug_run_req}), + .dout({nmi_int_sync, timer_int_sync, soft_int_sync, i_cpu_halt_req_sync, i_cpu_run_req_sync, mpc_debug_halt_req_sync_raw, mpc_debug_run_req_sync})); + + // for CSRs that have inpipe writes only + + rvoclkhdr csrwr_r_cgc ( .en(dec_csr_wen_r_mod | clk_override), .l1clk(csr_wr_clk), .* ); + rvoclkhdr lsu_r_wb_cgc ( .en(lsu_error_pkt_r.exc_valid | lsu_exc_valid_r_d1 | clk_override), .l1clk(lsu_r_wb_clk), .* ); + + assign e4_valid = dec_tlu_i0_valid_r; + assign e4e5_valid = e4_valid | e5_valid; + assign flush_clkvalid = internal_dbg_halt_mode_f | i_cpu_run_req_d1 | interrupt_valid_r | interrupt_valid_r_d1 | + reset_delayed | pause_expired_r | pause_expired_wb | ic_perr_r | ic_perr_r_d1 | iccm_sbecc_r | iccm_sbecc_r_d1 | + clk_override; + rvoclkhdr e4e5_cgc ( .en(e4e5_valid | clk_override), .l1clk(e4e5_clk), .* ); + rvoclkhdr e4e5_int_cgc ( .en(e4e5_valid | flush_clkvalid), .l1clk(e4e5_int_clk), .* ); + + rvdff #(11) freeff (.*, .clk(free_clk), .din ({iccm_repair_state_ns, ic_perr_r, iccm_sbecc_r, e4_valid, internal_dbg_halt_mode, + lsu_pmu_load_external_m, lsu_pmu_store_external_m, tlu_flush_lower_r, tlu_i0_kill_writeb_r, + internal_dbg_halt_mode_f, force_halt}), + .dout({iccm_repair_state_d1, ic_perr_r_d1, iccm_sbecc_r_d1, e5_valid, internal_dbg_halt_mode_f, + lsu_pmu_load_external_r, lsu_pmu_store_external_r, tlu_flush_lower_r_d1, dec_tlu_i0_kill_writeb_wb, + internal_dbg_halt_mode_f2, dec_tlu_force_halt})); + + assign dec_tlu_i0_kill_writeb_r = tlu_i0_kill_writeb_r; + + rvdff #(2) reset_ff (.*, .clk(free_clk), .din({1'b1, reset_detect}), .dout({reset_detect, reset_detected})); + assign reset_delayed = reset_detect ^ reset_detected; + + rvdff #(4) nmi_ff (.*, .clk(free_clk), .din({nmi_int_sync, nmi_int_detected, nmi_lsu_load_type, nmi_lsu_store_type}), .dout({nmi_int_delayed, nmi_int_detected_f, nmi_lsu_load_type_f, nmi_lsu_store_type_f})); + + // Filter subsequent bus errors after the first, until the lock on MDSEAC is cleared + assign nmi_lsu_detected = ~mdseac_locked_f & (lsu_imprecise_error_load_any | lsu_imprecise_error_store_any); + + assign nmi_int_detected = (nmi_int_sync & ~nmi_int_delayed) | nmi_lsu_detected | (nmi_int_detected_f & ~take_nmi_r_d1) | (take_ext_int_start_d3 & |lsu_fir_error[1:0]); + // if the first nmi is a lsu type, note it. If there's already an nmi pending, ignore + assign nmi_lsu_load_type = (nmi_lsu_detected & lsu_imprecise_error_load_any & ~(nmi_int_detected_f & ~take_nmi_r_d1)) | (nmi_lsu_load_type_f & ~take_nmi_r_d1); + assign nmi_lsu_store_type = (nmi_lsu_detected & lsu_imprecise_error_store_any & ~(nmi_int_detected_f & ~take_nmi_r_d1)) | (nmi_lsu_store_type_f & ~take_nmi_r_d1); + +`define MSTATUS_MIE 0 +`define MIP_MCEIP 3 +`define MIP_MEIP 2 +`define MIP_MTIP 1 +`define MIP_MSIP 0 + +`define MIE_MCEIE 3 +`define MIE_MEIE 2 +`define MIE_MTIE 1 +`define MIE_MSIE 0 + +`define DCSR_EBREAKM 15 +`define DCSR_STEPIE 11 +`define DCSR_STOPC 10 +`define DCSR_STEP 2 + // ---------------------------------------------------------------------- + // MPC halt + // - can interact with debugger halt and v-v + + // fast ints in progress have priority + assign mpc_debug_halt_req_sync = mpc_debug_halt_req_sync_raw & ~ext_int_freeze_d1; + + rvdff #(10) mpvhalt_ff (.*, .clk(free_clk), + .din({mpc_debug_halt_req_sync, mpc_debug_run_req_sync, + mpc_halt_state_ns, mpc_run_state_ns, debug_brkpt_status_ns, + mpc_debug_halt_ack_ns, mpc_debug_run_ack_ns, + dbg_halt_state_ns, dbg_run_state_ns, + dec_tlu_mpc_halted_only_ns}), + .dout({mpc_debug_halt_req_sync_f, mpc_debug_run_req_sync_f, + mpc_halt_state_f, mpc_run_state_f, debug_brkpt_status_f, + mpc_debug_halt_ack_f, mpc_debug_run_ack_f, + dbg_halt_state_f, dbg_run_state_f, + dec_tlu_mpc_halted_only})); + + // turn level sensitive requests into pulses + assign mpc_debug_halt_req_sync_pulse = mpc_debug_halt_req_sync & ~mpc_debug_halt_req_sync_f; + assign mpc_debug_run_req_sync_pulse = mpc_debug_run_req_sync & ~mpc_debug_run_req_sync_f; + + // states + assign mpc_halt_state_ns = (mpc_halt_state_f | mpc_debug_halt_req_sync_pulse | (reset_delayed & ~mpc_reset_run_req)) & ~mpc_debug_run_req_sync; + assign mpc_run_state_ns = (mpc_run_state_f | (mpc_debug_run_req_sync_pulse & ~mpc_debug_run_ack_f)) & (internal_dbg_halt_mode_f & ~dcsr_single_step_running_f); + + assign dbg_halt_state_ns = (dbg_halt_state_f | (dbg_halt_req_final | dcsr_single_step_done_f | trigger_hit_dmode_r_d1 | ebreak_to_debug_mode_r_d1)) & ~dbg_resume_req; + assign dbg_run_state_ns = (dbg_run_state_f | dbg_resume_req) & (internal_dbg_halt_mode_f & ~dcsr_single_step_running_f); + + // tell dbg we are only MPC halted + assign dec_tlu_mpc_halted_only_ns = ~dbg_halt_state_f & mpc_halt_state_f; + + // this asserts from detection of bkpt until after we leave debug mode + assign debug_brkpt_valid = ebreak_to_debug_mode_r_d1 | trigger_hit_dmode_r_d1; + assign debug_brkpt_status_ns = (debug_brkpt_valid | debug_brkpt_status_f) & (internal_dbg_halt_mode & ~dcsr_single_step_running_f); + + // acks back to interface + assign mpc_debug_halt_ack_ns = mpc_halt_state_f & internal_dbg_halt_mode_f & mpc_debug_halt_req_sync & core_empty; + assign mpc_debug_run_ack_ns = (mpc_debug_run_req_sync & ~dbg_halt_state_ns & ~mpc_debug_halt_req_sync) | (mpc_debug_run_ack_f & mpc_debug_run_req_sync) ; + + // Pins + assign mpc_debug_halt_ack = mpc_debug_halt_ack_f; + assign mpc_debug_run_ack = mpc_debug_run_ack_f; + assign debug_brkpt_status = debug_brkpt_status_f; + + // DBG halt req is a pulse, fast ext int in progress has priority + assign dbg_halt_req_held_ns = (dbg_halt_req | dbg_halt_req_held) & ext_int_freeze_d1; + assign dbg_halt_req_final = (dbg_halt_req | dbg_halt_req_held) & ~ext_int_freeze_d1; + + // combine MPC and DBG halt requests + assign debug_halt_req = (dbg_halt_req_final | mpc_debug_halt_req_sync | (reset_delayed & ~mpc_reset_run_req)) & ~internal_dbg_halt_mode_f & ~ext_int_freeze_d1; + + assign debug_resume_req = ~debug_resume_req_f & // squash back to back resumes + ((mpc_run_state_ns & ~dbg_halt_state_ns) | // MPC run req + (dbg_run_state_ns & ~mpc_halt_state_ns)); // dbg request is a pulse + + + // HALT + // dbg/pmu/fw requests halt, service as soon as lsu is not blocking interrupts + assign take_halt = (debug_halt_req_f | pmu_fw_halt_req_f) & ~synchronous_flush_r & ~mret_r & ~halt_taken_f & ~dec_tlu_flush_noredir_r_d1 & ~take_reset; + + // hold after we take a halt, so we don't keep taking halts + assign halt_taken = (dec_tlu_flush_noredir_r_d1 & ~dec_tlu_flush_pause_r_d1 & ~take_ext_int_start_d1) | (halt_taken_f & ~dbg_tlu_halted_f & ~pmu_fw_tlu_halted_f & ~interrupt_valid_r_d1); + + // After doing halt flush (RFNPC) wait until core is idle before asserting a particular halt mode + // It takes a cycle for mb_empty to assert after a fetch, take_halt covers that cycle + assign core_empty = force_halt | + (lsu_idle_any & lsu_idle_any_f & ifu_miss_state_idle & ifu_miss_state_idle_f & ~debug_halt_req & ~debug_halt_req_d1 & ~dec_div_active); + +//-------------------------------------------------------------------------------- +// Debug start +// + + assign enter_debug_halt_req = (~internal_dbg_halt_mode_f & debug_halt_req) | dcsr_single_step_done_f | trigger_hit_dmode_r_d1 | ebreak_to_debug_mode_r_d1; + + // dbg halt state active from request until non-step resume + assign internal_dbg_halt_mode = debug_halt_req_ns | (internal_dbg_halt_mode_f & ~(debug_resume_req_f & ~dcsr[`DCSR_STEP])); + // dbg halt can access csrs as long as we are not stepping + assign allow_dbg_halt_csr_write = internal_dbg_halt_mode_f & ~dcsr_single_step_running_f; + + + // hold debug_halt_req_ns high until we enter debug halt + assign debug_halt_req_ns = enter_debug_halt_req | (debug_halt_req_f & ~dbg_tlu_halted); + + assign dbg_tlu_halted = (debug_halt_req_f & core_empty & halt_taken) | (dbg_tlu_halted_f & ~debug_resume_req_f); + + assign resume_ack_ns = (debug_resume_req_f & dbg_tlu_halted_f & dbg_run_state_ns); + + assign dcsr_single_step_done = dec_tlu_i0_valid_r & ~dec_tlu_dbg_halted & dcsr[`DCSR_STEP] & ~rfpc_i0_r; + + assign dcsr_single_step_running = (debug_resume_req_f & dcsr[`DCSR_STEP]) | (dcsr_single_step_running_f & ~dcsr_single_step_done_f); + + assign dbg_cmd_done_ns = dec_tlu_i0_valid_r & dec_tlu_dbg_halted; + + // used to hold off commits after an in-pipe debug mode request (triggers, DCSR) + assign request_debug_mode_r = (trigger_hit_dmode_r | ebreak_to_debug_mode_r) | (request_debug_mode_r_d1 & ~dec_tlu_flush_lower_wb); + + assign request_debug_mode_done = (request_debug_mode_r_d1 | request_debug_mode_done_f) & ~dbg_tlu_halted_f; + + rvdff #(18) halt_ff (.*, .clk(free_clk), + .din({dec_tlu_flush_noredir_r, halt_taken, lsu_idle_any, ifu_miss_state_idle, dbg_tlu_halted, + resume_ack_ns, debug_halt_req_ns, debug_resume_req, trigger_hit_dmode_r, + dcsr_single_step_done, debug_halt_req, dec_tlu_wr_pause_r, dec_pause_state, + request_debug_mode_r, request_debug_mode_done, dcsr_single_step_running, dec_tlu_flush_pause_r, + dbg_halt_req_held_ns}), + .dout({dec_tlu_flush_noredir_r_d1, halt_taken_f, lsu_idle_any_f, ifu_miss_state_idle_f, dbg_tlu_halted_f, + dec_tlu_resume_ack , debug_halt_req_f, debug_resume_req_f, trigger_hit_dmode_r_d1, + dcsr_single_step_done_f, debug_halt_req_d1, dec_tlu_wr_pause_r_d1, dec_pause_state_f, + request_debug_mode_r_d1, request_debug_mode_done_f, dcsr_single_step_running_f, dec_tlu_flush_pause_r_d1, + dbg_halt_req_held})); + + assign dec_tlu_debug_stall = debug_halt_req_f; + assign dec_tlu_dbg_halted = dbg_tlu_halted_f; + assign dec_tlu_debug_mode = internal_dbg_halt_mode_f; + assign dec_tlu_pmu_fw_halted = pmu_fw_tlu_halted_f; + + // kill fetch redirection on flush if going to halt, or if there's a fence during db-halt + assign dec_tlu_flush_noredir_r = take_halt | (fence_i_r & internal_dbg_halt_mode) | dec_tlu_flush_pause_r | (i0_trigger_hit_r & trigger_hit_dmode_r) | take_ext_int_start; + + assign dec_tlu_flush_extint = take_ext_int_start; + + // 1 cycle after writing the PAUSE counter, flush with noredir to idle F1-D. + assign dec_tlu_flush_pause_r = dec_tlu_wr_pause_r_d1 & ~interrupt_valid_r & ~take_ext_int_start; + + // detect end of pause counter and rfpc + assign pause_expired_r = ~dec_pause_state & dec_pause_state_f & ~(ext_int_ready | ce_int_ready | timer_int_ready | soft_int_ready | nmi_int_detected | ext_int_freeze_d1) & ~interrupt_valid_r_d1 & ~debug_halt_req_f & ~pmu_fw_halt_req_f & ~halt_taken_f; + + assign dec_tlu_flush_leak_one_r = dec_tlu_flush_lower_r & dcsr[`DCSR_STEP] & (dec_tlu_resume_ack | dcsr_single_step_running) & ~dec_tlu_flush_noredir_r; + assign dec_tlu_flush_err_r = dec_tlu_flush_lower_r & (ic_perr_r_d1 | iccm_sbecc_r_d1); + + // If DM attempts to access an illegal CSR, send cmd_fail back + assign dec_dbg_cmd_done = dbg_cmd_done_ns; + assign dec_dbg_cmd_fail = illegal_r & dec_dbg_cmd_done; + + + //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- + // Triggers + // +`define MTDATA1_DMODE 9 +`define MTDATA1_SEL 7 +`define MTDATA1_ACTION 6 +`define MTDATA1_CHAIN 5 +`define MTDATA1_MATCH 4 +`define MTDATA1_M_ENABLED 3 +`define MTDATA1_EXE 2 +`define MTDATA1_ST 1 +`define MTDATA1_LD 0 + + // Prioritize trigger hits with other exceptions. + // + // Trigger should have highest priority except: + // - trigger is an execute-data and there is an inst_access exception (lsu triggers won't fire, inst. is nop'd by decode) + // - trigger is a store-data and there is a lsu_acc_exc or lsu_ma_exc. + assign trigger_execute[3:0] = {mtdata1_t3[`MTDATA1_EXE], mtdata1_t2[`MTDATA1_EXE], mtdata1_t1[`MTDATA1_EXE], mtdata1_t0[`MTDATA1_EXE]}; + assign trigger_data[3:0] = {mtdata1_t3[`MTDATA1_SEL], mtdata1_t2[`MTDATA1_SEL], mtdata1_t1[`MTDATA1_SEL], mtdata1_t0[`MTDATA1_SEL]}; + assign trigger_store[3:0] = {mtdata1_t3[`MTDATA1_ST], mtdata1_t2[`MTDATA1_ST], mtdata1_t1[`MTDATA1_ST], mtdata1_t0[`MTDATA1_ST]}; + + // MSTATUS[MIE] needs to be on to take triggers unless the action is trigger to debug mode. + assign trigger_enabled[3:0] = {(mtdata1_t3[`MTDATA1_ACTION] | mstatus[`MSTATUS_MIE]) & mtdata1_t3[`MTDATA1_M_ENABLED], + (mtdata1_t2[`MTDATA1_ACTION] | mstatus[`MSTATUS_MIE]) & mtdata1_t2[`MTDATA1_M_ENABLED], + (mtdata1_t1[`MTDATA1_ACTION] | mstatus[`MSTATUS_MIE]) & mtdata1_t1[`MTDATA1_M_ENABLED], + (mtdata1_t0[`MTDATA1_ACTION] | mstatus[`MSTATUS_MIE]) & mtdata1_t0[`MTDATA1_M_ENABLED]}; + + // iside exceptions are always in i0 + assign i0_iside_trigger_has_pri_r[3:0] = ~( (trigger_execute[3:0] & trigger_data[3:0] & {4{inst_acc_r_raw}}) | // exe-data with inst_acc + ({4{exu_i0_br_error_r | exu_i0_br_start_error_r}})); // branch error in i0 + + // lsu excs have to line up with their respective triggers since the lsu op can be i0 + assign i0_lsu_trigger_has_pri_r[3:0] = ~(trigger_store[3:0] & trigger_data[3:0] & {4{lsu_i0_exc_r_raw}}); + + // trigger hits have to be eval'd to cancel side effect lsu ops even though the pipe is already frozen + assign i0_trigger_eval_r = dec_tlu_i0_valid_r; + + assign i0trigger_qual_r[3:0] = {4{i0_trigger_eval_r}} & dec_tlu_packet_r.i0trigger[3:0] & i0_iside_trigger_has_pri_r[3:0] & i0_lsu_trigger_has_pri_r[3:0] & trigger_enabled[3:0]; + + // Qual trigger hits + assign i0_trigger_r[3:0] = ~{4{dec_tlu_flush_lower_wb | dec_tlu_dbg_halted}} & i0trigger_qual_r[3:0]; + + // chaining can mask raw trigger info + assign i0_trigger_chain_masked_r[3:0] = {i0_trigger_r[3] & (~mtdata1_t2[`MTDATA1_CHAIN] | i0_trigger_r[2]), + i0_trigger_r[2] & (~mtdata1_t2[`MTDATA1_CHAIN] | i0_trigger_r[3]), + i0_trigger_r[1] & (~mtdata1_t0[`MTDATA1_CHAIN] | i0_trigger_r[0]), + i0_trigger_r[0] & (~mtdata1_t0[`MTDATA1_CHAIN] | i0_trigger_r[1])}; + + // This is the highest priority by this point. + assign i0_trigger_hit_raw_r = |i0_trigger_chain_masked_r[3:0]; + + assign i0_trigger_hit_r = i0_trigger_hit_raw_r; + + // Actions include breakpoint, or dmode. Dmode is only possible if the DMODE bit is set. + // Otherwise, take a breakpoint. + assign trigger_action[3:0] = {mtdata1_t3[`MTDATA1_ACTION] & mtdata1_t3[`MTDATA1_DMODE], + mtdata1_t2[`MTDATA1_ACTION] & mtdata1_t2[`MTDATA1_DMODE], + mtdata1_t1[`MTDATA1_ACTION] & mtdata1_t1[`MTDATA1_DMODE], + mtdata1_t0[`MTDATA1_ACTION] & mtdata1_t0[`MTDATA1_DMODE]}; + + // this is needed to set the HIT bit in the triggers + assign update_hit_bit_r[3:0] = ({4{i0_trigger_hit_r}} & i0_trigger_chain_masked_r[3:0]); + + // action, 1 means dmode. Simultaneous triggers with at least 1 set for dmode force entire action to dmode. + assign i0_trigger_action_r = |(i0_trigger_chain_masked_r[3:0] & trigger_action[3:0]); + + assign trigger_hit_dmode_r = (i0_trigger_hit_r & i0_trigger_action_r); + + assign mepc_trigger_hit_sel_pc_r = i0_trigger_hit_r & ~trigger_hit_dmode_r; + + +// +// Debug end +//-------------------------------------------------------------------------------- + + //---------------------------------------------------------------------- + // + // Commit + // + //---------------------------------------------------------------------- + + + + //-------------------------------------------------------------------------------- + // External halt (not debug halt) + // - Fully interlocked handshake + // i_cpu_halt_req ____|--------------|_______________ + // core_empty ---------------|___________ + // o_cpu_halt_ack _________________|----|__________ + // o_cpu_halt_status _______________|---------------------|_________ + // i_cpu_run_req ______|----------|____ + // o_cpu_run_ack ____________|------|________ + // + + + // debug mode has priority, ignore PMU/FW halt/run while in debug mode + assign i_cpu_halt_req_sync_qual = i_cpu_halt_req_sync & ~dec_tlu_debug_mode & ~ext_int_freeze_d1; + assign i_cpu_run_req_sync_qual = i_cpu_run_req_sync & ~dec_tlu_debug_mode & pmu_fw_tlu_halted_f & ~ext_int_freeze_d1; + + rvdff #(8) exthaltff (.*, .clk(free_clk), .din({i_cpu_halt_req_sync_qual, i_cpu_run_req_sync_qual, cpu_halt_status, + cpu_halt_ack, cpu_run_ack, internal_pmu_fw_halt_mode, + pmu_fw_halt_req_ns, pmu_fw_tlu_halted}), + .dout({i_cpu_halt_req_d1, i_cpu_run_req_d1_raw, o_cpu_halt_status, + o_cpu_halt_ack, o_cpu_run_ack, internal_pmu_fw_halt_mode_f, + pmu_fw_halt_req_f, pmu_fw_tlu_halted_f})); + + // only happens if we aren't in dgb_halt + assign ext_halt_pulse = i_cpu_halt_req_sync_qual & ~i_cpu_halt_req_d1; + + assign enter_pmu_fw_halt_req = ext_halt_pulse | fw_halt_req; + + assign pmu_fw_halt_req_ns = (enter_pmu_fw_halt_req | (pmu_fw_halt_req_f & ~pmu_fw_tlu_halted)) & ~debug_halt_req_f; + + assign internal_pmu_fw_halt_mode = pmu_fw_halt_req_ns | (internal_pmu_fw_halt_mode_f & ~i_cpu_run_req_d1 & ~debug_halt_req_f); + + // debug halt has priority + assign pmu_fw_tlu_halted = ((pmu_fw_halt_req_f & core_empty & halt_taken & ~enter_debug_halt_req) | (pmu_fw_tlu_halted_f & ~i_cpu_run_req_d1)) & ~debug_halt_req_f; + + assign cpu_halt_ack = i_cpu_halt_req_d1 & pmu_fw_tlu_halted_f; + assign cpu_halt_status = (pmu_fw_tlu_halted_f & ~i_cpu_run_req_d1) | (o_cpu_halt_status & ~i_cpu_run_req_d1 & ~internal_dbg_halt_mode_f); + assign cpu_run_ack = (o_cpu_halt_status & i_cpu_run_req_sync_qual) | (o_cpu_run_ack & i_cpu_run_req_sync_qual); + assign debug_mode_status = internal_dbg_halt_mode_f; + assign o_debug_mode_status = debug_mode_status; + +`ifdef ASSERT_ON + assert_commit_while_halted: assert #0 (~(tlu_i0_commit_cmt & o_cpu_halt_status)) else $display("ERROR: Commiting while cpu_halt_status asserted!"); + assert_flush_while_fastint: assert #0 (~((take_ext_int_start_d1 | take_ext_int_start_d2) & dec_tlu_flush_lower_r)) else $display("ERROR: TLU Flushing inside fast interrupt procedure!"); +`endif + + // high priority interrupts can wakeup from external halt, so can unmasked timer interrupts + assign i_cpu_run_req_d1 = i_cpu_run_req_d1_raw | ((nmi_int_detected | timer_int_ready | soft_int_ready | (mhwakeup & mhwakeup_ready)) & o_cpu_halt_status & ~i_cpu_halt_req_d1); + + //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- + + assign lsu_single_ecc_error_r = lsu_single_ecc_error_incr; + rvdff #(2) lsu_dccm_errorff (.*, .clk(free_clk), .din({mdseac_locked_ns, lsu_single_ecc_error_r}), .dout({mdseac_locked_f, lsu_single_ecc_error_r_d1})); + + assign lsu_error_pkt_addr_r[31:0] = lsu_error_pkt_r.addr[31:0]; + rvdff #(2) lsu_error_wbff (.*, .clk(lsu_r_wb_clk), .din({lsu_exc_valid_r, lsu_i0_exc_r}), .dout({lsu_exc_valid_r_d1, lsu_i0_exc_r_d1})); + + + assign lsu_exc_valid_r_raw = lsu_error_pkt_r.exc_valid & ~dec_tlu_flush_lower_wb; + + assign lsu_i0_exc_r_raw = lsu_error_pkt_r.exc_valid; + + assign lsu_i0_exc_r = lsu_i0_exc_r_raw & lsu_exc_valid_r_raw & ~i0_trigger_hit_r & ~rfpc_i0_r; + + assign lsu_exc_valid_r = lsu_i0_exc_r; + + assign lsu_exc_ma_r = lsu_i0_exc_r & ~lsu_error_pkt_r.exc_type; + assign lsu_exc_acc_r = lsu_i0_exc_r & lsu_error_pkt_r.exc_type; + assign lsu_exc_st_r = lsu_i0_exc_r & lsu_error_pkt_r.inst_type; + + // Single bit ECC errors on loads are RFNPC corrected, with the corrected data written to the GPR. + // LSU turns the load into a store and patches the data in the DCCM + assign lsu_i0_rfnpc_r = dec_tlu_i0_valid_r & ~i0_trigger_hit_r & + (~lsu_error_pkt_r.inst_type & lsu_error_pkt_r.single_ecc_error); + + // Final commit valids + assign tlu_i0_commit_cmt = dec_tlu_i0_valid_r & + ~rfpc_i0_r & + ~lsu_i0_exc_r & + ~inst_acc_r & + ~dec_tlu_dbg_halted & + ~request_debug_mode_r_d1 & + ~i0_trigger_hit_r; + + // unified place to manage the killing of arch state writebacks + assign tlu_i0_kill_writeb_r = rfpc_i0_r | lsu_i0_exc_r | inst_acc_r | (illegal_r & dec_tlu_dbg_halted) | i0_trigger_hit_r; + assign dec_tlu_i0_commit_cmt = tlu_i0_commit_cmt; + + + // refetch PC, microarch flush + // ic errors only in pipe0 + assign rfpc_i0_r = ((dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1 & (exu_i0_br_error_r | exu_i0_br_start_error_r)) | // inst commit with rfpc + ((ic_perr_r_d1 | iccm_sbecc_r_d1) & ~ext_int_freeze_d1)) & // ic/iccm without inst commit + ~i0_trigger_hit_r & // unless there's a trigger. Err signal to ic/iccm will assert anyway to clear the error. + ~lsu_i0_rfnpc_r; + + // From the indication of a iccm single bit error until the first commit or flush, maintain a repair state. In the repair state, rfnpc i0 commits. + assign iccm_repair_state_ns = iccm_sbecc_r_d1 | (iccm_repair_state_d1 & ~dec_tlu_flush_lower_r); + + + `define MCPC 12'h7c2 + + // this is a flush of last resort, meaning only assert it if there is no other flush happening. + assign iccm_repair_state_rfnpc = tlu_i0_commit_cmt & iccm_repair_state_d1 & + ~(ebreak_r | ecall_r | mret_r | take_reset | illegal_r | (dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MCPC))); + + // go ahead and repair the branch error on other flushes, doesn't have to be the rfpc flush + assign dec_tlu_br0_error_r = exu_i0_br_error_r & dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1; + assign dec_tlu_br0_start_error_r = exu_i0_br_start_error_r & dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1; + assign dec_tlu_br0_v_r = exu_i0_br_valid_r & dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1 & (~exu_i0_br_mp_r | ~exu_pmu_i0_br_ataken); + + + assign dec_tlu_br0_r_pkt.hist[1:0] = exu_i0_br_hist_r[1:0]; + assign dec_tlu_br0_r_pkt.br_error = dec_tlu_br0_error_r; + assign dec_tlu_br0_r_pkt.br_start_error = dec_tlu_br0_start_error_r; + assign dec_tlu_br0_r_pkt.valid = dec_tlu_br0_v_r; + assign dec_tlu_br0_r_pkt.way = exu_i0_br_way_r; + assign dec_tlu_br0_r_pkt.middle = exu_i0_br_middle_r; + + + assign ebreak_r = (dec_tlu_packet_r.pmu_i0_itype == EBREAK) & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~dcsr[`DCSR_EBREAKM] & ~rfpc_i0_r; + assign ecall_r = (dec_tlu_packet_r.pmu_i0_itype == ECALL) & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~rfpc_i0_r; + assign illegal_r = ~dec_tlu_packet_r.legal & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~rfpc_i0_r; + assign mret_r = (dec_tlu_packet_r.pmu_i0_itype == MRET) & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~rfpc_i0_r; + // fence_i includes debug only fence_i's + assign fence_i_r = (dec_tlu_packet_r.fence_i & dec_tlu_i0_valid_r & ~i0_trigger_hit_r) & ~rfpc_i0_r; + assign ic_perr_r = ifu_ic_error_start & ~ext_int_freeze_d1 & (~internal_dbg_halt_mode_f | dcsr_single_step_running) & ~internal_pmu_fw_halt_mode_f; + assign iccm_sbecc_r = ifu_iccm_rd_ecc_single_err & ~ext_int_freeze_d1 & (~internal_dbg_halt_mode_f | dcsr_single_step_running) & ~internal_pmu_fw_halt_mode_f; + assign inst_acc_r_raw = dec_tlu_packet_r.icaf & dec_tlu_i0_valid_r; + assign inst_acc_r = inst_acc_r_raw & ~rfpc_i0_r & ~i0_trigger_hit_r; + assign inst_acc_second_r = dec_tlu_packet_r.icaf_f1; + + assign ebreak_to_debug_mode_r = (dec_tlu_packet_r.pmu_i0_itype == EBREAK) & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & dcsr[`DCSR_EBREAKM] & ~rfpc_i0_r; + + rvdff #(1) exctype_wb_ff (.*, .clk(e4e5_clk), + .din (ebreak_to_debug_mode_r ), + .dout(ebreak_to_debug_mode_r_d1)); + + assign dec_tlu_fence_i_r = fence_i_r; + // + // Exceptions + // + // - MEPC <- PC + // - PC <- MTVEC, assert flush_lower + // - MCAUSE <- cause + // - MSCAUSE <- secondary cause + // - MTVAL <- + // - MPIE <- MIE + // - MIE <- 0 + // + assign i0_exception_valid_r = (ebreak_r | ecall_r | illegal_r | inst_acc_r) & ~rfpc_i0_r & ~dec_tlu_dbg_halted; + + // Cause: + // + // 0x2 : illegal + // 0x3 : breakpoint + // 0xb : Environment call M-mode + + + assign exc_cause_r[4:0] = ( ({5{take_ext_int}} & 5'h0b) | + ({5{take_timer_int}} & 5'h07) | + ({5{take_soft_int}} & 5'h03) | + ({5{take_ce_int}} & 5'h1e) | + ({5{illegal_r}} & 5'h02) | + ({5{ecall_r}} & 5'h0b) | + ({5{inst_acc_r}} & 5'h01) | + ({5{ebreak_r | i0_trigger_hit_r}} & 5'h03) | + ({5{lsu_exc_ma_r & ~lsu_exc_st_r}} & 5'h04) | + ({5{lsu_exc_acc_r & ~lsu_exc_st_r}} & 5'h05) | + ({5{lsu_exc_ma_r & lsu_exc_st_r}} & 5'h06) | + ({5{lsu_exc_acc_r & lsu_exc_st_r}} & 5'h07) + ) & ~{5{take_nmi}}; + + // + // Interrupts + // + // exceptions that are committed have already happened and will cause an int at E4 to wait a cycle + // or more if MSTATUS[MIE] is cleared. + // + // -in priority order, highest to lowest + // -single cycle window where a csr write to MIE/MSTATUS is at E4 when the other conditions for externals are met. + // Hold off externals for a cycle to make sure we are consistent with what was just written + assign mhwakeup_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[`MIP_MEIP] & mie_ns[`MIE_MEIE]; + assign ext_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[`MIP_MEIP] & mie_ns[`MIE_MEIE] & ~ignore_ext_int_due_to_lsu_stall; + assign ce_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[`MIP_MCEIP] & mie_ns[`MIE_MCEIE]; + assign soft_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[`MIP_MSIP] & mie_ns[`MIE_MSIE]; + assign timer_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[`MIP_MTIP] & mie_ns[`MIE_MTIE]; + + assign internal_dbg_halt_timers = internal_dbg_halt_mode_f & ~dcsr_single_step_running; + + + assign block_interrupts = ( (internal_dbg_halt_mode & (~dcsr_single_step_running | dec_tlu_i0_valid_r)) | // No ints in db-halt unless we are single stepping + internal_pmu_fw_halt_mode | i_cpu_halt_req_d1 |// No ints in PMU/FW halt. First we exit halt + take_nmi | // NMI is top priority + ebreak_to_debug_mode_r | // Heading to debug mode, hold off ints + synchronous_flush_r | // exception flush this cycle + exc_or_int_valid_r_d1 | // ext/int past cycle (need time for MIE to update) + mret_r | // mret in progress, for cases were ISR enables ints before mret + ext_int_freeze_d1 // Fast interrupt in progress (optional) + ); + + +if (pt.FAST_INTERRUPT_REDIRECT) begin + + rvdff #(4) fastint_ff (.*, .clk(free_clk), + .din({take_ext_int_start, take_ext_int_start_d1, take_ext_int_start_d2, ext_int_freeze}), + .dout({take_ext_int_start_d1, take_ext_int_start_d2, take_ext_int_start_d3, ext_int_freeze_d1})); + + assign take_ext_int_start = ext_int_ready & ~block_interrupts; + + assign ext_int_freeze = take_ext_int_start | take_ext_int_start_d1 | take_ext_int_start_d2 | take_ext_int_start_d3; + assign take_ext_int = take_ext_int_start_d3 & ~|lsu_fir_error[1:0]; + assign fast_int_meicpct = csr_meicpct & dec_csr_any_unq_d; // MEICPCT becomes illegal if fast ints are enabled + + assign ignore_ext_int_due_to_lsu_stall = lsu_fastint_stall_any; +end +else begin + assign take_ext_int_start = 1'b0; + assign ext_int_freeze = 1'b0; + assign ext_int_freeze_d1 = 1'b0; + assign take_ext_int_start_d1 = 1'b0; + assign take_ext_int_start_d2 = 1'b0; + assign take_ext_int_start_d3 = 1'b0; + assign fast_int_meicpct = 1'b0; + assign ignore_ext_int_due_to_lsu_stall = 1'b0; + + assign take_ext_int = ext_int_ready & ~block_interrupts; +end + + assign take_ce_int = ce_int_ready & ~ext_int_ready & ~block_interrupts; + assign take_soft_int = soft_int_ready & ~ext_int_ready & ~ce_int_ready & ~block_interrupts; + assign take_timer_int = timer_int_ready & ~soft_int_ready & ~ext_int_ready & ~ce_int_ready & ~block_interrupts; + + assign take_reset = reset_delayed & mpc_reset_run_req; + assign take_nmi = nmi_int_detected & ~internal_pmu_fw_halt_mode & (~internal_dbg_halt_mode | (dcsr_single_step_running_f & dcsr[`DCSR_STEPIE] & ~dec_tlu_i0_valid_r & ~dcsr_single_step_done_f)) & + ~synchronous_flush_r & ~mret_r & ~take_reset & ~ebreak_to_debug_mode_r & (~ext_int_freeze_d1 | (take_ext_int_start_d3 & |lsu_fir_error[1:0])); + + assign interrupt_valid_r = take_ext_int | take_timer_int | take_soft_int | take_nmi | take_ce_int; + + + // Compute interrupt path: + // If vectored async is set in mtvec, flush path for interrupts is MTVEC + (4 * CAUSE); + assign vectored_path[31:1] = {mtvec[30:1], 1'b0} + {25'b0, exc_cause_r[4:0], 1'b0}; + assign interrupt_path[31:1] = take_nmi ? nmi_vec[31:1] : ((mtvec[0] == 1'b1) ? vectored_path[31:1] : {mtvec[30:1], 1'b0}); + + assign sel_npc_r = lsu_i0_rfnpc_r | fence_i_r | iccm_repair_state_rfnpc | (i_cpu_run_req_d1 & ~interrupt_valid_r) | (rfpc_i0_r & ~dec_tlu_i0_valid_r); + assign sel_npc_resume = (i_cpu_run_req_d1 & pmu_fw_tlu_halted_f) | pause_expired_r; + + assign sel_fir_addr = take_ext_int_start_d3 & ~|lsu_fir_error[1:0]; + + assign synchronous_flush_r = i0_exception_valid_r | // exception + rfpc_i0_r | // rfpc + lsu_exc_valid_r | // lsu exception in either pipe 0 or pipe 1 + fence_i_r | // fence, a rfnpc + lsu_i0_rfnpc_r | // lsu dccm sb ecc + iccm_repair_state_rfnpc | // Iccm sb ecc + debug_resume_req_f | // resume from debug halt, fetch the dpc + sel_npc_resume | // resume from pmu/fw halt, or from pause and fetch the NPC + dec_tlu_wr_pause_r_d1 | // flush at start of pause + i0_trigger_hit_r; // trigger hit, ebreak or goto debug mode + + assign tlu_flush_lower_r = interrupt_valid_r | mret_r | synchronous_flush_r | take_halt | take_reset | take_ext_int_start; + + assign tlu_flush_path_r[31:1] = take_reset ? rst_vec[31:1] : + + ( ({31{sel_fir_addr}} & lsu_fir_addr[31:1]) | + ({31{~take_nmi & sel_npc_r}} & npc_r[31:1]) | + ({31{~take_nmi & rfpc_i0_r & dec_tlu_i0_valid_r & ~sel_npc_r}} & dec_tlu_i0_pc_r[31:1]) | + ({31{interrupt_valid_r & ~sel_fir_addr}} & interrupt_path[31:1]) | + ({31{(i0_exception_valid_r | lsu_exc_valid_r | + (i0_trigger_hit_r & ~trigger_hit_dmode_r)) & ~interrupt_valid_r & ~sel_fir_addr}} & {mtvec[30:1],1'b0}) | + ({31{~take_nmi & mret_r}} & mepc[31:1]) | + ({31{~take_nmi & debug_resume_req_f}} & dpc[31:1]) | + ({31{~take_nmi & sel_npc_resume}} & npc_r_d1[31:1]) ); + + rvdff #(31) flush_lower_ff (.*, .clk(e4e5_int_clk), + .din({tlu_flush_path_r[31:1]}), + .dout({tlu_flush_path_r_d1[31:1]})); + + assign dec_tlu_flush_lower_wb = tlu_flush_lower_r_d1; + assign dec_tlu_flush_lower_r = tlu_flush_lower_r; + assign dec_tlu_flush_path_r[31:1] = tlu_flush_path_r[31:1]; + + + // this is used to capture mepc, etc. + assign exc_or_int_valid_r = lsu_exc_valid_r | i0_exception_valid_r | interrupt_valid_r | (i0_trigger_hit_r & ~trigger_hit_dmode_r); + + + rvdff #(12) excinfo_wb_ff (.*, .clk(e4e5_int_clk), + .din({interrupt_valid_r, i0_exception_valid_r, exc_or_int_valid_r, + exc_cause_r[4:0], tlu_i0_commit_cmt & ~illegal_r, i0_trigger_hit_r, + take_nmi, pause_expired_r }), + .dout({interrupt_valid_r_d1, i0_exception_valid_r_d1, exc_or_int_valid_r_d1, + exc_cause_wb[4:0], i0_valid_wb, trigger_hit_r_d1, + take_nmi_r_d1, pause_expired_wb})); + + //---------------------------------------------------------------------- + // + // CSRs + // + //---------------------------------------------------------------------- + + + // ---------------------------------------------------------------------- + // MISA (RO) + // [31:30] XLEN - implementation width, 2'b01 - 32 bits + // [12] M - integer mul/div + // [8] I - RV32I + // [2] C - Compressed extension + `define MISA 12'h301 + + // MVENDORID, MARCHID, MIMPID, MHARTID + `define MVENDORID 12'hf11 + `define MARCHID 12'hf12 + `define MIMPID 12'hf13 + `define MHARTID 12'hf14 + + + // ---------------------------------------------------------------------- + // MSTATUS (RW) + // [12:11] MPP : Prior priv level, always 2'b11, not flopped + // [7] MPIE : Int enable previous [1] + // [3] MIE : Int enable [0] + `define MSTATUS 12'h300 + + + //When executing a MRET instruction, supposing MPP holds the value 3, MIE + //is set to MPIE; the privilege mode is changed to 3; MPIE is set to 1; and MPP is set to 3 + + assign dec_csr_wen_r_mod = dec_csr_wen_r & ~i0_trigger_hit_r & ~rfpc_i0_r; + assign wr_mstatus_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MSTATUS); + + // set this even if we don't go to fwhalt due to debug halt. We committed the inst, so ... + assign set_mie_pmu_fw_halt = ~mpmc_b_ns[1] & fw_halt_req; + + assign mstatus_ns[1:0] = ( ({2{~wr_mstatus_r & exc_or_int_valid_r}} & {mstatus[`MSTATUS_MIE], 1'b0}) | + ({2{ wr_mstatus_r & exc_or_int_valid_r}} & {dec_csr_wrdata_r[3], 1'b0}) | + ({2{mret_r & ~exc_or_int_valid_r}} & {1'b1, mstatus[1]}) | + ({2{set_mie_pmu_fw_halt}} & {mstatus[1], 1'b1}) | + ({2{wr_mstatus_r & ~exc_or_int_valid_r}} & {dec_csr_wrdata_r[7], dec_csr_wrdata_r[3]}) | + ({2{~wr_mstatus_r & ~exc_or_int_valid_r & ~mret_r & ~set_mie_pmu_fw_halt}} & mstatus[1:0]) ); + + // gate MIE if we are single stepping and DCSR[STEPIE] is off + assign mstatus_mie_ns = mstatus[`MSTATUS_MIE] & (~dcsr_single_step_running_f | dcsr[`DCSR_STEPIE]); + rvdff #(2) mstatus_ff (.*, .clk(free_clk), .din(mstatus_ns[1:0]), .dout(mstatus[1:0])); + + // ---------------------------------------------------------------------- + // MTVEC (RW) + // [31:2] BASE : Trap vector base address + // [1] - Reserved, not implemented, reads zero + // [0] MODE : 0 = Direct, 1 = Asyncs are vectored to BASE + (4 * CAUSE) + `define MTVEC 12'h305 + + assign wr_mtvec_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MTVEC); + assign mtvec_ns[30:0] = {dec_csr_wrdata_r[31:2], dec_csr_wrdata_r[0]} ; + rvdffe #(31) mtvec_ff (.*, .en(wr_mtvec_r), .din(mtvec_ns[30:0]), .dout(mtvec[30:0])); + + // ---------------------------------------------------------------------- + // MIP (RW) + // + // [30] MCEIP : (RO) M-Mode Correctable Error interrupt pending + // [11] MEIP : (RO) M-Mode external interrupt pending + // [7] MTIP : (RO) M-Mode timer interrupt pending + // [3] MSIP : (RO) M-Mode software interrupt pending + `define MIP 12'h344 + + assign ce_int = (mdccme_ce_req | miccme_ce_req | mice_ce_req); + + assign mip_ns[3:0] = {ce_int, mexintpend, timer_int_sync, soft_int_sync}; + rvdff #(4) mip_ff (.*, .clk(free_clk), .din(mip_ns[3:0]), .dout(mip[3:0])); + + // ---------------------------------------------------------------------- + // MIE (RW) + // [30] MCEIE : (RO) M-Mode Correctable Error interrupt enable + // [11] MEIE : (RW) M-Mode external interrupt enable + // [7] MTIE : (RW) M-Mode timer interrupt enable + // [3] MSIE : (RW) M-Mode software interrupt enable + `define MIE 12'h304 + + assign wr_mie_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MIE); + assign mie_ns[3:0] = wr_mie_r ? {dec_csr_wrdata_r[30], dec_csr_wrdata_r[11], dec_csr_wrdata_r[7], dec_csr_wrdata_r[3]} : mie[3:0]; + rvdff #(4) mie_ff (.*, .clk(csr_wr_clk), .din(mie_ns[3:0]), .dout(mie[3:0])); + + + // ---------------------------------------------------------------------- + // MCYCLEL (RW) + // [31:0] : Lower Cycle count + + `define MCYCLEL 12'hb00 + + assign kill_ebreak_count_r = ebreak_to_debug_mode_r & dcsr[`DCSR_STOPC]; + + assign wr_mcyclel_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MCYCLEL); + + assign mcyclel_cout_in = ~(kill_ebreak_count_r | (dec_tlu_dbg_halted & dcsr[`DCSR_STOPC]) | dec_tlu_pmu_fw_halted | mcountinhibit[0]); + + assign {mcyclel_cout, mcyclel_inc[31:0]} = mcyclel[31:0] + {31'b0, mcyclel_cout_in}; + assign mcyclel_ns[31:0] = wr_mcyclel_r ? dec_csr_wrdata_r[31:0] : mcyclel_inc[31:0]; + + rvdffe #(32) mcyclel_ff (.*, .en(wr_mcyclel_r | mcyclel_cout_in), .din(mcyclel_ns[31:0]), .dout(mcyclel[31:0])); + rvdff #(1) mcyclef_cout_ff (.*, .clk(free_clk), .din(mcyclel_cout & ~wr_mcycleh_r), .dout(mcyclel_cout_f)); + // ---------------------------------------------------------------------- + // MCYCLEH (RW) + // [63:32] : Higher Cycle count + // Chained with mcyclel. Note: mcyclel overflow due to a mcycleh write gets ignored. + + `define MCYCLEH 12'hb80 + + assign wr_mcycleh_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MCYCLEH); + + assign mcycleh_inc[31:0] = mcycleh[31:0] + {31'b0, mcyclel_cout_f}; + assign mcycleh_ns[31:0] = wr_mcycleh_r ? dec_csr_wrdata_r[31:0] : mcycleh_inc[31:0]; + + rvdffe #(32) mcycleh_ff (.*, .en(wr_mcycleh_r | mcyclel_cout_f), .din(mcycleh_ns[31:0]), .dout(mcycleh[31:0])); + + // ---------------------------------------------------------------------- + // MINSTRETL (RW) + // [31:0] : Lower Instruction retired count + // From the spec "Some CSRs, such as the instructions retired counter, instret, may be modified as side effects + // of instruction execution. In these cases, if a CSR access instruction reads a CSR, it reads the + // value prior to the execution of the instruction. If a CSR access instruction writes a CSR, the + // update occurs after the execution of the instruction. In particular, a value written to instret by + // one instruction will be the value read by the following instruction (i.e., the increment of instret + // caused by the first instruction retiring happens before the write of the new value)." + `define MINSTRETL 12'hb02 + + assign i0_valid_no_ebreak_ecall_r = tlu_i0_commit_cmt & ~(ebreak_r | ecall_r | ebreak_to_debug_mode_r | illegal_r | mcountinhibit[2]); + + assign wr_minstretl_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MINSTRETL); + + assign {minstretl_cout, minstretl_inc[31:0]} = minstretl[31:0] + {31'b0,i0_valid_no_ebreak_ecall_r}; + + assign minstret_enable = i0_valid_no_ebreak_ecall_r | wr_minstretl_r; + + assign minstretl_ns[31:0] = wr_minstretl_r ? dec_csr_wrdata_r[31:0] : minstretl_inc[31:0]; + rvdffe #(32) minstretl_ff (.*, .en(minstret_enable), .din(minstretl_ns[31:0]), .dout(minstretl[31:0])); + rvdff #(2) minstretf_cout_ff (.*, .clk(free_clk), .din({minstret_enable, minstretl_cout & ~wr_minstreth_r}), .dout({minstret_enable_f, minstretl_cout_f})); + + assign minstretl_read[31:0] = minstretl[31:0]; + // ---------------------------------------------------------------------- + // MINSTRETH (RW) + // [63:32] : Higher Instret count + // Chained with minstretl. Note: minstretl overflow due to a minstreth write gets ignored. + + `define MINSTRETH 12'hb82 + + assign wr_minstreth_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MINSTRETH); + + assign minstreth_inc[31:0] = minstreth[31:0] + {31'b0, minstretl_cout_f}; + assign minstreth_ns[31:0] = wr_minstreth_r ? dec_csr_wrdata_r[31:0] : minstreth_inc[31:0]; + rvdffe #(32) minstreth_ff (.*, .en(minstret_enable_f | wr_minstreth_r), .din(minstreth_ns[31:0]), .dout(minstreth[31:0])); + + assign minstreth_read[31:0] = minstreth_inc[31:0]; + + // ---------------------------------------------------------------------- + // MSCRATCH (RW) + // [31:0] : Scratch register + `define MSCRATCH 12'h340 + + assign wr_mscratch_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MSCRATCH); + + rvdffe #(32) mscratch_ff (.*, .en(wr_mscratch_r), .din(dec_csr_wrdata_r[31:0]), .dout(mscratch[31:0])); + + // ---------------------------------------------------------------------- + // MEPC (RW) + // [31:1] : Exception PC + `define MEPC 12'h341 + + // NPC + + assign sel_exu_npc_r = ~dec_tlu_dbg_halted & ~tlu_flush_lower_r_d1 & dec_tlu_i0_valid_r; + assign sel_flush_npc_r = ~dec_tlu_dbg_halted & tlu_flush_lower_r_d1 & ~dec_tlu_flush_noredir_r_d1; + assign sel_hold_npc_r = ~sel_exu_npc_r & ~sel_flush_npc_r; + + assign npc_r[31:1] = ( ({31{sel_exu_npc_r}} & exu_npc_r[31:1]) | + ({31{~mpc_reset_run_req & reset_delayed}} & rst_vec[31:1]) | // init to reset vector for mpc halt on reset case + ({31{(sel_flush_npc_r)}} & tlu_flush_path_r_d1[31:1]) | + ({31{(sel_hold_npc_r)}} & npc_r_d1[31:1]) ); + + rvdffe #(31) npwbc_ff (.*, .en(sel_exu_npc_r | sel_flush_npc_r | reset_delayed), .din(npc_r[31:1]), .dout(npc_r_d1[31:1])); + + // PC has to be captured for exceptions and interrupts. For MRET, we could execute it and then take an + // interrupt before the next instruction. + assign pc0_valid_r = ~dec_tlu_dbg_halted & dec_tlu_i0_valid_r; + + assign pc_r[31:1] = ( ({31{ pc0_valid_r}} & dec_tlu_i0_pc_r[31:1]) | + ({31{~pc0_valid_r}} & pc_r_d1[31:1])); + + rvdffe #(31) pwbc_ff (.*, .en(pc0_valid_r), .din(pc_r[31:1]), .dout(pc_r_d1[31:1])); + + assign wr_mepc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MEPC); + + assign mepc_ns[31:1] = ( ({31{i0_exception_valid_r | lsu_exc_valid_r | mepc_trigger_hit_sel_pc_r}} & pc_r[31:1]) | + ({31{interrupt_valid_r}} & npc_r[31:1]) | + ({31{wr_mepc_r & ~exc_or_int_valid_r}} & dec_csr_wrdata_r[31:1]) | + ({31{~wr_mepc_r & ~exc_or_int_valid_r}} & mepc[31:1]) ); + + + rvdff #(31) mepc_ff (.*, .clk(e4e5_int_clk), .din(mepc_ns[31:1]), .dout(mepc[31:1])); + + // ---------------------------------------------------------------------- + // MCAUSE (RW) + // [31:0] : Exception Cause + `define MCAUSE 12'h342 + + assign wr_mcause_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MCAUSE); + assign mcause_sel_nmi_store = exc_or_int_valid_r & take_nmi & nmi_lsu_store_type; + assign mcause_sel_nmi_load = exc_or_int_valid_r & take_nmi & nmi_lsu_load_type; + assign mcause_sel_nmi_ext = exc_or_int_valid_r & take_nmi & |lsu_fir_error[1:0]; + // FIR value decoder + // 0 –no error + // 1 –uncorrectable ecc => f000_1000 + // 2 –dccm region access error => f000_1001 + // 3 –non dccm region access error => f000_1002 + assign mcause_fir_error_type[1:0] = {&lsu_fir_error[1:0], lsu_fir_error[1] & ~lsu_fir_error[0]}; + + assign mcause_ns[31:0] = ( ({32{mcause_sel_nmi_store}} & {32'hf000_0000}) | + ({32{mcause_sel_nmi_load}} & {32'hf000_0001}) | + ({32{mcause_sel_nmi_ext}} & {28'hf000_100, 2'b0, mcause_fir_error_type[1:0]}) | + ({32{exc_or_int_valid_r & ~take_nmi}} & {interrupt_valid_r, 26'b0, exc_cause_r[4:0]}) | + ({32{wr_mcause_r & ~exc_or_int_valid_r}} & dec_csr_wrdata_r[31:0]) | + ({32{~wr_mcause_r & ~exc_or_int_valid_r}} & mcause[31:0]) ); + + rvdff #(32) mcause_ff (.*, .clk(e4e5_int_clk), .din(mcause_ns[31:0]), .dout(mcause[31:0])); + // ---------------------------------------------------------------------- + // MSCAUSE (RW) + // [2:0] : Secondary exception Cause + `define MSCAUSE 12'h7ff + + assign wr_mscause_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MSCAUSE); + + assign mscause_type[2:0] = ( ({3{lsu_i0_exc_r}} & lsu_error_pkt_r.mscause[2:0]) | + ({3{i0_trigger_hit_r}} & 3'b001) | + ({3{inst_acc_r}} & {1'b0,dec_tlu_packet_r.icaf_type[1:0]}) + ); + + + assign mscause_ns[2:0] = ( ({3{exc_or_int_valid_r}} & mscause_type[2:0]) | + ({3{ wr_mscause_r & ~exc_or_int_valid_r}} & dec_csr_wrdata_r[2:0]) | + ({3{~wr_mscause_r & ~exc_or_int_valid_r}} & mscause[2:0]) + ); + + rvdff #(3) mscause_ff (.*, .clk(e4e5_int_clk), .din(mscause_ns[2:0]), .dout(mscause[2:0])); + // ---------------------------------------------------------------------- + // MTVAL (RW) + // [31:0] : Exception address if relevant + `define MTVAL 12'h343 + + assign wr_mtval_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MTVAL); + assign mtval_capture_pc_r = exc_or_int_valid_r & (ebreak_r | (inst_acc_r & ~inst_acc_second_r) | mepc_trigger_hit_sel_pc_r) & ~take_nmi; + assign mtval_capture_pc_plus2_r = exc_or_int_valid_r & (inst_acc_r & inst_acc_second_r) & ~take_nmi; + assign mtval_capture_inst_r = exc_or_int_valid_r & illegal_r & ~take_nmi; + assign mtval_capture_lsu_r = exc_or_int_valid_r & lsu_exc_valid_r & ~take_nmi; + assign mtval_clear_r = exc_or_int_valid_r & ~mtval_capture_pc_r & ~mtval_capture_inst_r & ~mtval_capture_lsu_r & ~mepc_trigger_hit_sel_pc_r; + + + assign mtval_ns[31:0] = (({32{mtval_capture_pc_r}} & {pc_r[31:1], 1'b0}) | + ({32{mtval_capture_pc_plus2_r}} & {pc_r[31:1] + 31'b1, 1'b0}) | + ({32{mtval_capture_inst_r}} & dec_illegal_inst[31:0]) | + ({32{mtval_capture_lsu_r}} & lsu_error_pkt_addr_r[31:0]) | + ({32{wr_mtval_r & ~interrupt_valid_r}} & dec_csr_wrdata_r[31:0]) | + ({32{~take_nmi & ~wr_mtval_r & ~mtval_capture_pc_r & ~mtval_capture_inst_r & ~mtval_clear_r & ~mtval_capture_lsu_r}} & mtval[31:0]) ); + + + rvdff #(32) mtval_ff (.*, .clk(e4e5_int_clk), .din(mtval_ns[31:0]), .dout(mtval[31:0])); + + // ---------------------------------------------------------------------- + // MCGC (RW) Clock gating control + // [31:9] : Reserved, reads 0x0 + // [8] : misc_clk_override + // [7] : dec_clk_override + // [6] : unused + // [5] : ifu_clk_override + // [4] : lsu_clk_override + // [3] : bus_clk_override + // [2] : pic_clk_override + // [1] : dccm_clk_override + // [0] : icm_clk_override + // + `define MCGC 12'h7f8 + assign wr_mcgc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MCGC); + + rvdffe #(9) mcgc_ff (.*, .en(wr_mcgc_r), .din(dec_csr_wrdata_r[8:0]), .dout(mcgc[8:0])); + + assign dec_tlu_misc_clk_override = mcgc[8]; + assign dec_tlu_dec_clk_override = mcgc[7]; + assign dec_tlu_ifu_clk_override = mcgc[5]; + assign dec_tlu_lsu_clk_override = mcgc[4]; + assign dec_tlu_bus_clk_override = mcgc[3]; + assign dec_tlu_pic_clk_override = mcgc[2]; + assign dec_tlu_dccm_clk_override = mcgc[1]; + assign dec_tlu_icm_clk_override = mcgc[0]; + + // ---------------------------------------------------------------------- + // MFDC (RW) Feature Disable Control + // [31:19] : Reserved, reads 0x0 + // [18:16] : DMA QoS Prty + // [15:12] : Reserved, reads 0x0 + // [11] : Disable external load forwarding + // [10] : Disable dual issue + // [9] : Disable pic multiple ints + // [8] : Disable core ecc + // [7] : Unused, 0x0 + // [6] : Disable Sideeffect lsu posting + // [5:4] : Unused, 0x0 + // [3] : Disable branch prediction and return stack + // [2] : Disable write buffer coalescing + // [1] : Unused, 0x0 + // [0] : Disable pipelining - Enable single instruction execution + // + `define MFDC 12'h7f9 + + assign wr_mfdc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MFDC); + + rvdffe #(15) mfdc_ff (.*, .en(wr_mfdc_r), .din({mfdc_ns[14:0]}), .dout(mfdc_int[14:0])); + +if(pt.BUILD_AXI4==1) begin : axi4 + // flip poweron value of bit 6 for AXI build + assign mfdc_ns[14:0] = {~dec_csr_wrdata_r[18:16],dec_csr_wrdata_r[11:7], ~dec_csr_wrdata_r[6], dec_csr_wrdata_r[5:0]}; + assign mfdc[18:0] = {~mfdc_int[14:12], 4'b0, mfdc_int[11:7], ~mfdc_int[6], mfdc_int[5:0]}; +end +else begin + assign mfdc_ns[14:0] = {~dec_csr_wrdata_r[18:16],dec_csr_wrdata_r[11:0]}; + assign mfdc[18:0] = {~mfdc_int[14:12], 4'b0, mfdc_int[11:0]}; +end + + + assign dec_tlu_dma_qos_prty[2:0] = mfdc[18:16]; + assign dec_tlu_external_ldfwd_disable = mfdc[11]; + assign dec_tlu_core_ecc_disable = mfdc[8]; + assign dec_tlu_sideeffect_posted_disable = mfdc[6]; + assign dec_tlu_bpred_disable = mfdc[3]; + assign dec_tlu_wb_coalescing_disable = mfdc[2]; + assign dec_tlu_pipelining_disable = mfdc[0]; + + // ---------------------------------------------------------------------- + // MCPC (RW) Pause counter + // [31:0] : Reads 0x0, decs in the wb register in decode_ctl + + assign dec_tlu_wr_pause_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MCPC) & ~interrupt_valid_r & ~take_ext_int_start; + + // ---------------------------------------------------------------------- + // MRAC (RW) + // [31:0] : Region Access Control Register, 16 regions, {side_effect, cachable} pairs + `define MRAC 12'h7c0 + + assign wr_mrac_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MRAC); + + // prevent pairs of 0x11, side_effect and cacheable + assign mrac_in[31:0] = {dec_csr_wrdata_r[31], dec_csr_wrdata_r[30] & ~dec_csr_wrdata_r[31], + dec_csr_wrdata_r[29], dec_csr_wrdata_r[28] & ~dec_csr_wrdata_r[29], + dec_csr_wrdata_r[27], dec_csr_wrdata_r[26] & ~dec_csr_wrdata_r[27], + dec_csr_wrdata_r[25], dec_csr_wrdata_r[24] & ~dec_csr_wrdata_r[25], + dec_csr_wrdata_r[23], dec_csr_wrdata_r[22] & ~dec_csr_wrdata_r[23], + dec_csr_wrdata_r[21], dec_csr_wrdata_r[20] & ~dec_csr_wrdata_r[21], + dec_csr_wrdata_r[19], dec_csr_wrdata_r[18] & ~dec_csr_wrdata_r[19], + dec_csr_wrdata_r[17], dec_csr_wrdata_r[16] & ~dec_csr_wrdata_r[17], + dec_csr_wrdata_r[15], dec_csr_wrdata_r[14] & ~dec_csr_wrdata_r[15], + dec_csr_wrdata_r[13], dec_csr_wrdata_r[12] & ~dec_csr_wrdata_r[13], + dec_csr_wrdata_r[11], dec_csr_wrdata_r[10] & ~dec_csr_wrdata_r[11], + dec_csr_wrdata_r[9], dec_csr_wrdata_r[8] & ~dec_csr_wrdata_r[9], + dec_csr_wrdata_r[7], dec_csr_wrdata_r[6] & ~dec_csr_wrdata_r[7], + dec_csr_wrdata_r[5], dec_csr_wrdata_r[4] & ~dec_csr_wrdata_r[5], + dec_csr_wrdata_r[3], dec_csr_wrdata_r[2] & ~dec_csr_wrdata_r[3], + dec_csr_wrdata_r[1], dec_csr_wrdata_r[0] & ~dec_csr_wrdata_r[1]}; + + rvdffe #(32) mrac_ff (.*, .en(wr_mrac_r), .din(mrac_in[31:0]), .dout(mrac[31:0])); + + // drive to LSU/IFU + assign dec_tlu_mrac_ff[31:0] = mrac[31:0]; + + // ---------------------------------------------------------------------- + // MDEAU (WAR0) + // [31:0] : Dbus Error Address Unlock register + // + `define MDEAU 12'hbc0 + + assign wr_mdeau_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MDEAU); + + + // ---------------------------------------------------------------------- + // MDSEAC (R) + // [31:0] : Dbus Store Error Address Capture register + // + `define MDSEAC 12'hfc0 + + // only capture error bus if the MDSEAC reg is not locked + assign mdseac_locked_ns = mdseac_en | (mdseac_locked_f & ~wr_mdeau_r); + + assign mdseac_en = (lsu_imprecise_error_store_any | lsu_imprecise_error_load_any) & ~nmi_int_detected_f & ~mdseac_locked_f; + + rvdffe #(32) mdseac_ff (.*, .en(mdseac_en), .din(lsu_imprecise_error_addr_any[31:0]), .dout(mdseac[31:0])); + + // ---------------------------------------------------------------------- + // MPMC (R0W1) + // [0] : FW halt + // [1] : Set MSTATUS[MIE] on halt + + `define MPMC 12'h7c6 + + assign wr_mpmc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MPMC); + + // allow the cycle of the dbg halt flush that contains the wr_mpmc_r to + // set the mstatus bit potentially, use delayed version of internal dbg halt. + assign fw_halt_req = wr_mpmc_r & dec_csr_wrdata_r[0] & ~internal_dbg_halt_mode_f2 & ~ext_int_freeze_d1; + + assign fw_halted_ns = (fw_halt_req | fw_halted) & ~set_mie_pmu_fw_halt; + assign mpmc_b_ns[1] = wr_mpmc_r ? ~dec_csr_wrdata_r[1] : ~mpmc[1]; + rvdff #(1) mpmc_ff (.*, .clk(csr_wr_clk), .din(mpmc_b_ns[1]), .dout(mpmc_b[1])); + rvdff #(1) fwh_ff (.*, .clk(free_clk), .din(fw_halted_ns), .dout(fw_halted)); + assign mpmc[1] = ~mpmc_b[1]; + + // ---------------------------------------------------------------------- + // MICECT (I-Cache error counter/threshold) + // [31:27] : Icache parity error threshold + // [26:0] : Icache parity error count + `define MICECT 12'h7f0 + + assign csr_sat[31:27] = (dec_csr_wrdata_r[31:27] > 5'd26) ? 5'd26 : dec_csr_wrdata_r[31:27]; + + assign wr_micect_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MICECT); + assign micect_inc[26:0] = micect[26:0] + {26'b0, ic_perr_r_d1}; + assign micect_ns = wr_micect_r ? {csr_sat[31:27], dec_csr_wrdata_r[26:0]} : {micect[31:27], micect_inc[26:0]}; + + rvdffe #(32) micect_ff (.*, .en(wr_micect_r | ic_perr_r_d1), .din(micect_ns[31:0]), .dout(micect[31:0])); + + assign mice_ce_req = |({32'hffffffff << micect[31:27]} & {5'b0, micect[26:0]}); + + // ---------------------------------------------------------------------- + // MICCMECT (ICCM error counter/threshold) + // [31:27] : ICCM parity error threshold + // [26:0] : ICCM parity error count + `define MICCMECT 12'h7f1 + + assign wr_miccmect_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MICCMECT); + assign miccmect_inc[26:0] = miccmect[26:0] + {26'b0, iccm_sbecc_r_d1 | iccm_dma_sb_error}; + assign miccmect_ns = wr_miccmect_r ? {csr_sat[31:27], dec_csr_wrdata_r[26:0]} : {miccmect[31:27], miccmect_inc[26:0]}; + + rvdffe #(32) miccmect_ff (.*, .en(wr_miccmect_r | iccm_sbecc_r_d1 | iccm_dma_sb_error), .din(miccmect_ns[31:0]), .dout(miccmect[31:0])); + + assign miccme_ce_req = |({32'hffffffff << miccmect[31:27]} & {5'b0, miccmect[26:0]}); + + // ---------------------------------------------------------------------- + // MDCCMECT (DCCM error counter/threshold) + // [31:27] : DCCM parity error threshold + // [26:0] : DCCM parity error count + `define MDCCMECT 12'h7f2 + + assign wr_mdccmect_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MDCCMECT); + assign mdccmect_inc[26:0] = mdccmect[26:0] + {26'b0, lsu_single_ecc_error_r_d1}; + assign mdccmect_ns = wr_mdccmect_r ? {csr_sat[31:27], dec_csr_wrdata_r[26:0]} : {mdccmect[31:27], mdccmect_inc[26:0]}; + + rvdffe #(32) mdccmect_ff (.*, .en(wr_mdccmect_r | lsu_single_ecc_error_r_d1), .din(mdccmect_ns[31:0]), .dout(mdccmect[31:0])); + + assign mdccme_ce_req = |({32'hffffffff << mdccmect[31:27]} & {5'b0, mdccmect[26:0]}); + + + // ---------------------------------------------------------------------- + // MFDHT (Force Debug Halt Threshold) + // [5:1] : Halt timeout threshold (power of 2) + // [0] : Halt timeout enabled + `define MFDHT 12'h7ce + + assign wr_mfdht_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MFDHT); + + assign mfdht_ns[5:0] = wr_mfdht_r ? dec_csr_wrdata_r[5:0] : mfdht[5:0]; + + rvdff #(6) mfdht_ff (.*, .clk(active_clk), .din(mfdht_ns[5:0]), .dout(mfdht[5:0])); + + // ---------------------------------------------------------------------- + // MFDHS(RW) + // [1] : LSU operation pending when debug halt threshold reached + // [0] : IFU operation pending when debug halt threshold reached + + `define MFDHS 12'h7cf + + assign wr_mfdhs_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MFDHS); + + assign mfdhs_ns[1:0] = wr_mfdhs_r ? dec_csr_wrdata_r[1:0] : ((dbg_tlu_halted & ~dbg_tlu_halted_f) ? {~lsu_idle_any_f, ~ifu_miss_state_idle_f} : mfdhs[1:0]); + + rvdffs #(2) mfdhs_ff (.*, .clk(active_clk), .en(wr_mfdhs_r | dbg_tlu_halted), .din(mfdhs_ns[1:0]), .dout(mfdhs[1:0])); + + assign force_halt_ctr[31:0] = debug_halt_req_f ? (force_halt_ctr_f[31:0] + 32'b1) : (dbg_tlu_halted_f ? 32'b0 : force_halt_ctr_f[31:0]); + + rvdffs #(32) forcehaltctr_ff (.*, .clk(active_clk), .en(mfdht[0]), .din(force_halt_ctr[31:0]), .dout(force_halt_ctr_f[31:0])); + + assign force_halt = mfdht[0] & |(force_halt_ctr_f[31:0] & (32'hffffffff << mfdht[5:1])); + + + // ---------------------------------------------------------------------- + // MEIVT (External Interrupt Vector Table (R/W)) + // [31:10]: Base address (R/W) + // [9:0] : Reserved, reads 0x0 + `define MEIVT 12'hbc8 + + assign wr_meivt_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MEIVT); + + rvdffe #(22) meivt_ff (.*, .en(wr_meivt_r), .din(dec_csr_wrdata_r[31:10]), .dout(meivt[31:10])); + + + // ---------------------------------------------------------------------- + // MEIHAP (External Interrupt Handler Access Pointer (R)) + // [31:10]: Base address (R/W) + // [9:2] : ClaimID (R) + // [1:0] : Reserved, 0x0 + `define MEIHAP 12'hfc8 + + assign wr_meihap_r = wr_meicpct_r; + + rvdffe #(8) meihap_ff (.*, .en(wr_meihap_r), .din(pic_claimid[7:0]), .dout(meihap[9:2])); + + assign dec_tlu_meihap[31:2] = {meivt[31:10], meihap[9:2]}; + // ---------------------------------------------------------------------- + // MEICURPL (R/W) + // [31:4] : Reserved (read 0x0) + // [3:0] : CURRPRI - Priority level of current interrupt service routine (R/W) + `define MEICURPL 12'hbcc + + assign wr_meicurpl_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MEICURPL); + assign meicurpl_ns[3:0] = wr_meicurpl_r ? dec_csr_wrdata_r[3:0] : meicurpl[3:0]; + + rvdff #(4) meicurpl_ff (.*, .clk(csr_wr_clk), .din(meicurpl_ns[3:0]), .dout(meicurpl[3:0])); + + // PIC needs this reg + assign dec_tlu_meicurpl[3:0] = meicurpl[3:0]; + + + // ---------------------------------------------------------------------- + // MEICIDPL (R/W) + // [31:4] : Reserved (read 0x0) + // [3:0] : External Interrupt Claim ID's Priority Level Register + `define MEICIDPL 12'hbcb + + assign wr_meicidpl_r = (dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MEICIDPL)) | take_ext_int_start; + + assign meicidpl_ns[3:0] = wr_meicpct_r ? pic_pl[3:0] : (wr_meicidpl_r ? dec_csr_wrdata_r[3:0] : meicidpl[3:0]); + + rvdff #(4) meicidpl_ff (.*, .clk(free_clk), .din(meicidpl_ns[3:0]), .dout(meicidpl[3:0])); + + // ---------------------------------------------------------------------- + // MEICPCT (Capture CLAIMID in MEIHAP and PL in MEICIDPL + // [31:1] : Reserved (read 0x0) + // [0] : Capture (W1, Read 0) + `define MEICPCT 12'hbca + + assign wr_meicpct_r = (dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MEICPCT)) | take_ext_int_start; + + // ---------------------------------------------------------------------- + // MEIPT (External Interrupt Priority Threshold) + // [31:4] : Reserved (read 0x0) + // [3:0] : PRITHRESH + `define MEIPT 12'hbc9 + + assign wr_meipt_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MEIPT); + assign meipt_ns[3:0] = wr_meipt_r ? dec_csr_wrdata_r[3:0] : meipt[3:0]; + + rvdff #(4) meipt_ff (.*, .clk(active_clk), .din(meipt_ns[3:0]), .dout(meipt[3:0])); + + // to PIC + assign dec_tlu_meipt[3:0] = meipt[3:0]; + // ---------------------------------------------------------------------- + // DCSR (R/W) (Only accessible in debug mode) + // [31:28] : xdebugver (hard coded to 0x4) RO + // [27:16] : 0x0, reserved + // [15] : ebreakm + // [14] : 0x0, reserved + // [13] : ebreaks (0x0 for this core) + // [12] : ebreaku (0x0 for this core) + // [11] : stepie + // [10] : stopcount + // [9] : 0x0 //stoptime + // [8:6] : cause (RO) + // [5:4] : 0x0, reserved + // [3] : nmip + // [2] : step + // [1:0] : prv (0x3 for this core) + // + `define DCSR 12'h7b0 + + // RV has clarified that 'priority 4' in the spec means top priority. + // 4. single step. 3. Debugger request. 2. Ebreak. 1. Trigger. + + // RV debug spec indicates a cause priority change for trigger hits during single step. + assign trigger_hit_for_dscr_cause_r_d1 = trigger_hit_dmode_r_d1 | (trigger_hit_r_d1 & dcsr_single_step_done_f); + + assign dcsr_cause[8:6] = ( ({3{dcsr_single_step_done_f & ~ebreak_to_debug_mode_r_d1 & ~trigger_hit_for_dscr_cause_r_d1 & ~debug_halt_req}} & 3'b100) | + ({3{debug_halt_req & ~ebreak_to_debug_mode_r_d1 & ~trigger_hit_for_dscr_cause_r_d1}} & 3'b011) | + ({3{ebreak_to_debug_mode_r_d1 & ~trigger_hit_for_dscr_cause_r_d1}} & 3'b001) | + ({3{trigger_hit_for_dscr_cause_r_d1}} & 3'b010)); + + assign wr_dcsr_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `DCSR); + + + + // Multiple halt enter requests can happen before we are halted. + // We have to continue to upgrade based on dcsr_cause priority but we can't downgrade. + assign dcsr_cause_upgradeable = internal_dbg_halt_mode_f & (dcsr[8:6] == 3'b011); + assign enter_debug_halt_req_le = enter_debug_halt_req & (~dbg_tlu_halted | dcsr_cause_upgradeable); + + assign nmi_in_debug_mode = nmi_int_detected_f & internal_dbg_halt_mode_f; + assign dcsr_ns[15:2] = enter_debug_halt_req_le ? {dcsr[15:9], dcsr_cause[8:6], dcsr[5:2]} : + (wr_dcsr_r ? {dec_csr_wrdata_r[15], 3'b0, dec_csr_wrdata_r[11:10], 1'b0, dcsr[8:6], 2'b00, nmi_in_debug_mode | dcsr[3], dec_csr_wrdata_r[2]} : + {dcsr[15:4], nmi_in_debug_mode, dcsr[2]}); + + rvdffe #(14) dcsr_ff (.*, .en(enter_debug_halt_req_le | wr_dcsr_r | internal_dbg_halt_mode | take_nmi), .din(dcsr_ns[15:2]), .dout(dcsr[15:2])); + + // ---------------------------------------------------------------------- + // DPC (R/W) (Only accessible in debug mode) + // [31:0] : Debug PC + `define DPC 12'h7b1 + + assign wr_dpc_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `DPC); + assign dpc_capture_npc = dbg_tlu_halted & ~dbg_tlu_halted_f & ~request_debug_mode_done; + assign dpc_capture_pc = request_debug_mode_r; + + assign dpc_ns[31:1] = ( ({31{~dpc_capture_pc & ~dpc_capture_npc & wr_dpc_r}} & dec_csr_wrdata_r[31:1]) | + ({31{dpc_capture_pc}} & pc_r[31:1]) | + ({31{~dpc_capture_pc & dpc_capture_npc}} & npc_r[31:1]) ); + + rvdffe #(31) dpc_ff (.*, .en(wr_dpc_r | dpc_capture_pc | dpc_capture_npc), .din(dpc_ns[31:1]), .dout(dpc[31:1])); + + // ---------------------------------------------------------------------- + // DICAWICS (R/W) (Only accessible in debug mode) + // [31:25] : Reserved + // [24] : Array select, 0 is data, 1 is tag + // [23:22] : Reserved + // [21:20] : Way select + // [19:17] : Reserved + // [16:3] : Index + // [2:0] : Reserved + `define DICAWICS 12'h7c8 + + assign dicawics_ns[16:0] = {dec_csr_wrdata_r[24], dec_csr_wrdata_r[21:20], dec_csr_wrdata_r[16:3]}; + assign wr_dicawics_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `DICAWICS); + + rvdffe #(17) dicawics_ff (.*, .en(wr_dicawics_r), .din(dicawics_ns[16:0]), .dout(dicawics[16:0])); + + // ---------------------------------------------------------------------- + // DICAD0 (R/W) (Only accessible in debug mode) + // + // If dicawics[array] is 0 + // [31:0] : inst data + // + // If dicawics[array] is 1 + // [31:16] : Tag + // [15:7] : Reserved + // [6:4] : LRU + // [3:1] : Reserved + // [0] : Valid + `define DICAD0 12'h7c9 + + assign dicad0_ns[31:0] = wr_dicad0_r ? dec_csr_wrdata_r[31:0] : ifu_ic_debug_rd_data[31:0]; + + assign wr_dicad0_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `DICAD0); + + rvdffe #(32) dicad0_ff (.*, .en(wr_dicad0_r | ifu_ic_debug_rd_data_valid), .din(dicad0_ns[31:0]), .dout(dicad0[31:0])); + + // ---------------------------------------------------------------------- + // DICAD0H (R/W) (Only accessible in debug mode) + // + // If dicawics[array] is 0 + // [63:32] : inst data + // + `define DICAD0H 12'h7cc + + assign dicad0h_ns[31:0] = wr_dicad0h_r ? dec_csr_wrdata_r[31:0] : ifu_ic_debug_rd_data[63:32]; + + assign wr_dicad0h_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `DICAD0H); + + rvdffe #(32) dicad0h_ff (.*, .en(wr_dicad0h_r | ifu_ic_debug_rd_data_valid), .din(dicad0h_ns[31:0]), .dout(dicad0h[31:0])); + + +if (pt.ICACHE_ECC == 1) begin + // ---------------------------------------------------------------------- + // DICAD1 (R/W) (Only accessible in debug mode) + // [6:0] : ECC + `define DICAD1 12'h7ca + + assign dicad1_ns[6:0] = wr_dicad1_r ? dec_csr_wrdata_r[6:0] : ifu_ic_debug_rd_data[70:64]; + + assign wr_dicad1_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `DICAD1); + + rvdffs #(7) dicad1_ff (.*, .clk(active_clk), .en(wr_dicad1_r | ifu_ic_debug_rd_data_valid), .din(dicad1_ns[6:0]), .dout(dicad1_raw[6:0])); + + assign dicad1[31:0] = {25'b0, dicad1_raw[6:0]}; + +end +else begin + // ---------------------------------------------------------------------- + // DICAD1 (R/W) (Only accessible in debug mode) + // [3:0] : Parity + `define DICAD1 12'h7ca + + assign dicad1_ns[3:0] = wr_dicad1_r ? dec_csr_wrdata_r[3:0] : ifu_ic_debug_rd_data[67:64]; + + assign wr_dicad1_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `DICAD1); + + rvdffs #(4) dicad1_ff (.*, .clk(active_clk), .en(wr_dicad1_r | ifu_ic_debug_rd_data_valid), .din(dicad1_ns[3:0]), .dout(dicad1_raw[3:0])); + + assign dicad1[31:0] = {28'b0, dicad1_raw[3:0]}; +end + // ---------------------------------------------------------------------- + // DICAGO (R/W) (Only accessible in debug mode) + // [0] : Go + `define DICAGO 12'h7cb + +if (pt.ICACHE_ECC == 1) + assign dec_tlu_ic_diag_pkt.icache_wrdata[70:0] = {dicad1[6:0], dicad0h[31:0], dicad0[31:0]}; +else + assign dec_tlu_ic_diag_pkt.icache_wrdata[67:0] = {dicad1[3:0], dicad0h[31:0], dicad0[31:0]}; + + + assign dec_tlu_ic_diag_pkt.icache_dicawics[16:0] = dicawics[16:0]; + + assign icache_rd_valid = allow_dbg_halt_csr_write & dec_csr_any_unq_d & dec_i0_decode_d & ~dec_csr_wen_unq_d & (dec_csr_rdaddr_d[11:0] == `DICAGO); + assign icache_wr_valid = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `DICAGO); + + rvdff #(2) dicgo_ff (.*, .clk(active_clk), .din({icache_rd_valid, icache_wr_valid}), .dout({icache_rd_valid_f, icache_wr_valid_f})); + + assign dec_tlu_ic_diag_pkt.icache_rd_valid = icache_rd_valid_f; + assign dec_tlu_ic_diag_pkt.icache_wr_valid = icache_wr_valid_f; + + // ---------------------------------------------------------------------- + // MTSEL (R/W) + // [1:0] : Trigger select : 00, 01, 10 are data/address triggers. 11 is inst count + `define MTSEL 12'h7a0 + + assign wr_mtsel_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MTSEL); + assign mtsel_ns[1:0] = wr_mtsel_r ? {dec_csr_wrdata_r[1:0]} : mtsel[1:0]; + + rvdff #(2) mtsel_ff (.*, .clk(csr_wr_clk), .din(mtsel_ns[1:0]), .dout(mtsel[1:0])); + + // ---------------------------------------------------------------------- + // MTDATA1 (R/W) + // [31:0] : Trigger Data 1 + `define MTDATA1 12'h7a1 + + // for triggers 0, 1, 2 and 3 aka Match Control + // [31:28] : type, hard coded to 0x2 + // [27] : dmode + // [26:21] : hard coded to 0x1f + // [20] : hit + // [19] : select (0 - address, 1 - data) + // [18] : timing, always 'before', reads 0x0 + // [17:12] : action, bits [17:13] not implemented and reads 0x0 + // [11] : chain + // [10:7] : match, bits [10:8] not implemented and reads 0x0 + // [6] : M + // [5:3] : not implemented, reads 0x0 + // [2] : execute + // [1] : store + // [0] : load + // + // decoder ring + // [27] : => 9 + // [20] : => 8 + // [19] : => 7 + // [12] : => 6 + // [11] : => 5 + // [7] : => 4 + // [6] : => 3 + // [2] : => 2 + // [1] : => 1 + // [0] : => 0 + + + // don't allow setting load-data. + assign tdata_load = dec_csr_wrdata_r[0] & ~dec_csr_wrdata_r[19]; + // don't allow setting execute-data. + assign tdata_opcode = dec_csr_wrdata_r[2] & ~dec_csr_wrdata_r[19]; + // don't allow clearing DMODE and action=1 + assign tdata_action = (dec_csr_wrdata_r[27] & dbg_tlu_halted_f) & dec_csr_wrdata_r[12]; + + assign tdata_wrdata_r[9:0] = {dec_csr_wrdata_r[27] & dbg_tlu_halted_f, + dec_csr_wrdata_r[20:19], + tdata_action, + dec_csr_wrdata_r[11], + dec_csr_wrdata_r[7:6], + tdata_opcode, + dec_csr_wrdata_r[1], + tdata_load}; + + // If the DMODE bit is set, tdata1 can only be updated in debug_mode + assign wr_mtdata1_t0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MTDATA1) & (mtsel[1:0] == 2'b0) & (~mtdata1_t0[`MTDATA1_DMODE] | dbg_tlu_halted_f); + assign mtdata1_t0_ns[9:0] = wr_mtdata1_t0_r ? tdata_wrdata_r[9:0] : + {mtdata1_t0[9], update_hit_bit_r[0] | mtdata1_t0[8], mtdata1_t0[7:0]}; + + assign wr_mtdata1_t1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MTDATA1) & (mtsel[1:0] == 2'b01) & (~mtdata1_t1[`MTDATA1_DMODE] | dbg_tlu_halted_f); + assign mtdata1_t1_ns[9:0] = wr_mtdata1_t1_r ? tdata_wrdata_r[9:0] : + {mtdata1_t1[9], update_hit_bit_r[1] | mtdata1_t1[8], mtdata1_t1[7:0]}; + + assign wr_mtdata1_t2_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MTDATA1) & (mtsel[1:0] == 2'b10) & (~mtdata1_t2[`MTDATA1_DMODE] | dbg_tlu_halted_f); + assign mtdata1_t2_ns[9:0] = wr_mtdata1_t2_r ? tdata_wrdata_r[9:0] : + {mtdata1_t2[9], update_hit_bit_r[2] | mtdata1_t2[8], mtdata1_t2[7:0]}; + + assign wr_mtdata1_t3_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MTDATA1) & (mtsel[1:0] == 2'b11) & (~mtdata1_t3[`MTDATA1_DMODE] | dbg_tlu_halted_f); + assign mtdata1_t3_ns[9:0] = wr_mtdata1_t3_r ? tdata_wrdata_r[9:0] : + {mtdata1_t3[9], update_hit_bit_r[3] | mtdata1_t3[8], mtdata1_t3[7:0]}; + + + rvdff #(10) mtdata1_t0_ff (.*, .clk(active_clk), .din(mtdata1_t0_ns[9:0]), .dout(mtdata1_t0[9:0])); + rvdff #(10) mtdata1_t1_ff (.*, .clk(active_clk), .din(mtdata1_t1_ns[9:0]), .dout(mtdata1_t1[9:0])); + rvdff #(10) mtdata1_t2_ff (.*, .clk(active_clk), .din(mtdata1_t2_ns[9:0]), .dout(mtdata1_t2[9:0])); + rvdff #(10) mtdata1_t3_ff (.*, .clk(active_clk), .din(mtdata1_t3_ns[9:0]), .dout(mtdata1_t3[9:0])); + + assign mtdata1_tsel_out[31:0] = ( ({32{(mtsel[1:0] == 2'b00)}} & {4'h2, mtdata1_t0[9], 6'b011111, mtdata1_t0[8:7], 6'b0, mtdata1_t0[6:5], 3'b0, mtdata1_t0[4:3], 3'b0, mtdata1_t0[2:0]}) | + ({32{(mtsel[1:0] == 2'b01)}} & {4'h2, mtdata1_t1[9], 6'b011111, mtdata1_t1[8:7], 6'b0, mtdata1_t1[6:5], 3'b0, mtdata1_t1[4:3], 3'b0, mtdata1_t1[2:0]}) | + ({32{(mtsel[1:0] == 2'b10)}} & {4'h2, mtdata1_t2[9], 6'b011111, mtdata1_t2[8:7], 6'b0, mtdata1_t2[6:5], 3'b0, mtdata1_t2[4:3], 3'b0, mtdata1_t2[2:0]}) | + ({32{(mtsel[1:0] == 2'b11)}} & {4'h2, mtdata1_t3[9], 6'b011111, mtdata1_t3[8:7], 6'b0, mtdata1_t3[6:5], 3'b0, mtdata1_t3[4:3], 3'b0, mtdata1_t3[2:0]})); + + assign trigger_pkt_any[0].select = mtdata1_t0[`MTDATA1_SEL]; + assign trigger_pkt_any[0].match = mtdata1_t0[`MTDATA1_MATCH]; + assign trigger_pkt_any[0].store = mtdata1_t0[`MTDATA1_ST]; + assign trigger_pkt_any[0].load = mtdata1_t0[`MTDATA1_LD]; + assign trigger_pkt_any[0].execute = mtdata1_t0[`MTDATA1_EXE]; + assign trigger_pkt_any[0].m = mtdata1_t0[`MTDATA1_M_ENABLED]; + + assign trigger_pkt_any[1].select = mtdata1_t1[`MTDATA1_SEL]; + assign trigger_pkt_any[1].match = mtdata1_t1[`MTDATA1_MATCH]; + assign trigger_pkt_any[1].store = mtdata1_t1[`MTDATA1_ST]; + assign trigger_pkt_any[1].load = mtdata1_t1[`MTDATA1_LD]; + assign trigger_pkt_any[1].execute = mtdata1_t1[`MTDATA1_EXE]; + assign trigger_pkt_any[1].m = mtdata1_t1[`MTDATA1_M_ENABLED]; + + assign trigger_pkt_any[2].select = mtdata1_t2[`MTDATA1_SEL]; + assign trigger_pkt_any[2].match = mtdata1_t2[`MTDATA1_MATCH]; + assign trigger_pkt_any[2].store = mtdata1_t2[`MTDATA1_ST]; + assign trigger_pkt_any[2].load = mtdata1_t2[`MTDATA1_LD]; + assign trigger_pkt_any[2].execute = mtdata1_t2[`MTDATA1_EXE]; + assign trigger_pkt_any[2].m = mtdata1_t2[`MTDATA1_M_ENABLED]; + + assign trigger_pkt_any[3].select = mtdata1_t3[`MTDATA1_SEL]; + assign trigger_pkt_any[3].match = mtdata1_t3[`MTDATA1_MATCH]; + assign trigger_pkt_any[3].store = mtdata1_t3[`MTDATA1_ST]; + assign trigger_pkt_any[3].load = mtdata1_t3[`MTDATA1_LD]; + assign trigger_pkt_any[3].execute = mtdata1_t3[`MTDATA1_EXE]; + assign trigger_pkt_any[3].m = mtdata1_t3[`MTDATA1_M_ENABLED]; + + // ---------------------------------------------------------------------- + // MTDATA2 (R/W) + // [31:0] : Trigger Data 2 + `define MTDATA2 12'h7a2 + + // If the DMODE bit is set, tdata2 can only be updated in debug_mode + assign wr_mtdata2_t0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MTDATA2) & (mtsel[1:0] == 2'b0) & (~mtdata1_t0[`MTDATA1_DMODE] | dbg_tlu_halted_f); + assign wr_mtdata2_t1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MTDATA2) & (mtsel[1:0] == 2'b01) & (~mtdata1_t1[`MTDATA1_DMODE] | dbg_tlu_halted_f); + assign wr_mtdata2_t2_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MTDATA2) & (mtsel[1:0] == 2'b10) & (~mtdata1_t2[`MTDATA1_DMODE] | dbg_tlu_halted_f); + assign wr_mtdata2_t3_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MTDATA2) & (mtsel[1:0] == 2'b11) & (~mtdata1_t3[`MTDATA1_DMODE] | dbg_tlu_halted_f); + + rvdffe #(32) mtdata2_t0_ff (.*, .en(wr_mtdata2_t0_r), .din(dec_csr_wrdata_r[31:0]), .dout(mtdata2_t0[31:0])); + rvdffe #(32) mtdata2_t1_ff (.*, .en(wr_mtdata2_t1_r), .din(dec_csr_wrdata_r[31:0]), .dout(mtdata2_t1[31:0])); + rvdffe #(32) mtdata2_t2_ff (.*, .en(wr_mtdata2_t2_r), .din(dec_csr_wrdata_r[31:0]), .dout(mtdata2_t2[31:0])); + rvdffe #(32) mtdata2_t3_ff (.*, .en(wr_mtdata2_t3_r), .din(dec_csr_wrdata_r[31:0]), .dout(mtdata2_t3[31:0])); + + assign mtdata2_tsel_out[31:0] = ( ({32{(mtsel[1:0] == 2'b00)}} & mtdata2_t0[31:0]) | + ({32{(mtsel[1:0] == 2'b01)}} & mtdata2_t1[31:0]) | + ({32{(mtsel[1:0] == 2'b10)}} & mtdata2_t2[31:0]) | + ({32{(mtsel[1:0] == 2'b11)}} & mtdata2_t3[31:0])); + + assign trigger_pkt_any[0].tdata2[31:0] = mtdata2_t0[31:0]; + assign trigger_pkt_any[1].tdata2[31:0] = mtdata2_t1[31:0]; + assign trigger_pkt_any[2].tdata2[31:0] = mtdata2_t2[31:0]; + assign trigger_pkt_any[3].tdata2[31:0] = mtdata2_t3[31:0]; + + + //---------------------------------------------------------------------- + // Performance Monitor Counters section starts + //---------------------------------------------------------------------- + `define MHPME_NOEVENT 10'd0 + `define MHPME_CLK_ACTIVE 10'd1 // OOP - out of pipe + `define MHPME_ICACHE_HIT 10'd2 // OOP + `define MHPME_ICACHE_MISS 10'd3 // OOP + `define MHPME_INST_COMMIT 10'd4 + `define MHPME_INST_COMMIT_16B 10'd5 + `define MHPME_INST_COMMIT_32B 10'd6 + `define MHPME_INST_ALIGNED 10'd7 // OOP + `define MHPME_INST_DECODED 10'd8 // OOP + `define MHPME_INST_MUL 10'd9 + `define MHPME_INST_DIV 10'd10 + `define MHPME_INST_LOAD 10'd11 + `define MHPME_INST_STORE 10'd12 + `define MHPME_INST_MALOAD 10'd13 + `define MHPME_INST_MASTORE 10'd14 + `define MHPME_INST_ALU 10'd15 + `define MHPME_INST_CSRREAD 10'd16 + `define MHPME_INST_CSRRW 10'd17 + `define MHPME_INST_CSRWRITE 10'd18 + `define MHPME_INST_EBREAK 10'd19 + `define MHPME_INST_ECALL 10'd20 + `define MHPME_INST_FENCE 10'd21 + `define MHPME_INST_FENCEI 10'd22 + `define MHPME_INST_MRET 10'd23 + `define MHPME_INST_BRANCH 10'd24 + `define MHPME_BRANCH_MP 10'd25 + `define MHPME_BRANCH_TAKEN 10'd26 + `define MHPME_BRANCH_NOTP 10'd27 + `define MHPME_FETCH_STALL 10'd28 // OOP + `define MHPME_ALGNR_STALL 10'd29 // OOP + `define MHPME_DECODE_STALL 10'd30 // OOP + `define MHPME_POSTSYNC_STALL 10'd31 // OOP + `define MHPME_PRESYNC_STALL 10'd32 // OOP + `define MHPME_LSU_SB_WB_STALL 10'd34 // OOP + `define MHPME_DMA_DCCM_STALL 10'd35 // OOP + `define MHPME_DMA_ICCM_STALL 10'd36 // OOP + `define MHPME_EXC_TAKEN 10'd37 + `define MHPME_TIMER_INT_TAKEN 10'd38 + `define MHPME_EXT_INT_TAKEN 10'd39 + `define MHPME_FLUSH_LOWER 10'd40 + `define MHPME_BR_ERROR 10'd41 + `define MHPME_IBUS_TRANS 10'd42 // OOP + `define MHPME_DBUS_TRANS 10'd43 // OOP + `define MHPME_DBUS_MA_TRANS 10'd44 // OOP + `define MHPME_IBUS_ERROR 10'd45 // OOP + `define MHPME_DBUS_ERROR 10'd46 // OOP + `define MHPME_IBUS_STALL 10'd47 // OOP + `define MHPME_DBUS_STALL 10'd48 // OOP + `define MHPME_INT_DISABLED 10'd49 // OOP + `define MHPME_INT_STALLED 10'd50 // OOP + `define MHPME_INST_BITMANIP 10'd54 + `define MHPME_DBUS_LOAD 10'd55 + `define MHPME_DBUS_STORE 10'd56 + // Counts even during sleep state + `define MHPME_SLEEP_CYC 10'd512 // OOP + `define MHPME_DMA_READ_ALL 10'd513 // OOP + `define MHPME_DMA_WRITE_ALL 10'd514 // OOP + `define MHPME_DMA_READ_DCCM 10'd515 // OOP + `define MHPME_DMA_WRITE_DCCM 10'd516 // OOP + + // Pack the event selects into a vector for genvar + assign mhpme_vec[0][9:0] = mhpme3[9:0]; + assign mhpme_vec[1][9:0] = mhpme4[9:0]; + assign mhpme_vec[2][9:0] = mhpme5[9:0]; + assign mhpme_vec[3][9:0] = mhpme6[9:0]; + + // only consider committed itypes + //logic [3:0] pmu_i0_itype_qual; + assign pmu_i0_itype_qual[3:0] = dec_tlu_packet_r.pmu_i0_itype[3:0] & {4{tlu_i0_commit_cmt}}; + + // Generate the muxed incs for all counters based on event type + for (genvar i=0 ; i < 4; i++) begin + assign mhpmc_inc_r[i] = {{~mcountinhibit[i+3]}} & + ( + ({1{(mhpme_vec[i][9:0] == `MHPME_CLK_ACTIVE )}} & 1'b1) | + ({1{(mhpme_vec[i][9:0] == `MHPME_ICACHE_HIT )}} & {ifu_pmu_ic_hit}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_ICACHE_MISS )}} & {ifu_pmu_ic_miss}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_COMMIT )}} & {tlu_i0_commit_cmt & ~illegal_r}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_COMMIT_16B )}} & {tlu_i0_commit_cmt & ~exu_pmu_i0_pc4 & ~illegal_r}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_COMMIT_32B )}} & {tlu_i0_commit_cmt & exu_pmu_i0_pc4 & ~illegal_r}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_ALIGNED )}} & ifu_pmu_instr_aligned) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_DECODED )}} & dec_pmu_instr_decoded) | + ({1{(mhpme_vec[i][9:0] == `MHPME_DECODE_STALL )}} & {dec_pmu_decode_stall}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_MUL )}} & {(pmu_i0_itype_qual == MUL)}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_DIV )}} & {dec_tlu_packet_r.pmu_divide & tlu_i0_commit_cmt}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_LOAD )}} & {(pmu_i0_itype_qual == LOAD)}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_STORE )}} & {(pmu_i0_itype_qual == STORE)}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_MALOAD )}} & {(pmu_i0_itype_qual == LOAD)} & + {1{dec_tlu_packet_r.pmu_lsu_misaligned}}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_MASTORE )}} & {(pmu_i0_itype_qual == STORE)} & + {1{dec_tlu_packet_r.pmu_lsu_misaligned}}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_ALU )}} & {(pmu_i0_itype_qual == ALU)}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_CSRREAD )}} & {(pmu_i0_itype_qual == CSRREAD)}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_CSRWRITE )}} & {(pmu_i0_itype_qual == CSRWRITE)})| + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_CSRRW )}} & {(pmu_i0_itype_qual == CSRRW)}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_EBREAK )}} & {(pmu_i0_itype_qual == EBREAK)}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_ECALL )}} & {(pmu_i0_itype_qual == ECALL)}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_FENCE )}} & {(pmu_i0_itype_qual == FENCE)}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_FENCEI )}} & {(pmu_i0_itype_qual == FENCEI)}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_MRET )}} & {(pmu_i0_itype_qual == MRET)}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_BRANCH )}} & { + ((pmu_i0_itype_qual == CONDBR) | (pmu_i0_itype_qual == JAL))}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_BRANCH_MP )}} & {exu_pmu_i0_br_misp & tlu_i0_commit_cmt}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_BRANCH_TAKEN )}} & {exu_pmu_i0_br_ataken & tlu_i0_commit_cmt}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_BRANCH_NOTP )}} & {dec_tlu_packet_r.pmu_i0_br_unpred & tlu_i0_commit_cmt}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_FETCH_STALL )}} & { ifu_pmu_fetch_stall}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_DECODE_STALL )}} & { dec_pmu_decode_stall}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_POSTSYNC_STALL )}} & {dec_pmu_postsync_stall}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_PRESYNC_STALL )}} & {dec_pmu_presync_stall}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_LSU_SB_WB_STALL )}} & { lsu_store_stall_any}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_DMA_DCCM_STALL )}} & { dma_dccm_stall_any}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_DMA_ICCM_STALL )}} & { dma_iccm_stall_any}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_EXC_TAKEN )}} & { (i0_exception_valid_r | i0_trigger_hit_r | lsu_exc_valid_r)}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_TIMER_INT_TAKEN )}} & { take_timer_int}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_EXT_INT_TAKEN )}} & { take_ext_int}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_FLUSH_LOWER )}} & { tlu_flush_lower_r}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_BR_ERROR )}} & {(dec_tlu_br0_error_r | dec_tlu_br0_start_error_r) & rfpc_i0_r}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_IBUS_TRANS )}} & {ifu_pmu_bus_trxn}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_DBUS_TRANS )}} & {lsu_pmu_bus_trxn}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_DBUS_MA_TRANS )}} & {lsu_pmu_bus_misaligned}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_IBUS_ERROR )}} & {ifu_pmu_bus_error}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_DBUS_ERROR )}} & {lsu_pmu_bus_error}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_IBUS_STALL )}} & {ifu_pmu_bus_busy}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_DBUS_STALL )}} & {lsu_pmu_bus_busy}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INT_DISABLED )}} & {~mstatus[`MSTATUS_MIE]}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INT_STALLED )}} & {~mstatus[`MSTATUS_MIE] & |(mip[3:0] & mie[3:0])}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_INST_BITMANIP )}} & {(pmu_i0_itype_qual == BITMANIPU)}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_DBUS_LOAD )}} & {tlu_i0_commit_cmt & lsu_pmu_load_external_r}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_DBUS_STORE )}} & {tlu_i0_commit_cmt & lsu_pmu_store_external_r}) | + // These count even during sleep + ({1{(mhpme_vec[i][9:0] == `MHPME_SLEEP_CYC )}} & {dec_tlu_pmu_fw_halted}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_DMA_READ_ALL )}} & {dma_pmu_any_read}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_DMA_WRITE_ALL )}} & {dma_pmu_any_write}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_DMA_READ_DCCM )}} & {dma_pmu_dccm_read}) | + ({1{(mhpme_vec[i][9:0] == `MHPME_DMA_WRITE_DCCM )}} & {dma_pmu_dccm_write}) + ); + end + + + rvdff #(1) pmu0inc_ff (.*, .clk(free_clk), .din(mhpmc_inc_r[0]), .dout(mhpmc_inc_r_d1[0])); + rvdff #(1) pmu1inc_ff (.*, .clk(free_clk), .din(mhpmc_inc_r[1]), .dout(mhpmc_inc_r_d1[1])); + rvdff #(1) pmu2inc_ff (.*, .clk(free_clk), .din(mhpmc_inc_r[2]), .dout(mhpmc_inc_r_d1[2])); + rvdff #(1) pmu3inc_ff (.*, .clk(free_clk), .din(mhpmc_inc_r[3]), .dout(mhpmc_inc_r_d1[3])); + rvdff #(1) perfhlt_ff (.*, .clk(free_clk), .din(perfcnt_halted), .dout(perfcnt_halted_d1)); + + assign perfcnt_halted = ((dec_tlu_dbg_halted & dcsr[`DCSR_STOPC]) | dec_tlu_pmu_fw_halted); + assign perfcnt_during_sleep[3:0] = {4{~(dec_tlu_dbg_halted & dcsr[`DCSR_STOPC])}} & {mhpme_vec[3][9],mhpme_vec[2][9],mhpme_vec[1][9],mhpme_vec[0][9]}; + + assign dec_tlu_perfcnt0 = mhpmc_inc_r_d1[0] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[0]); + assign dec_tlu_perfcnt1 = mhpmc_inc_r_d1[1] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[1]); + assign dec_tlu_perfcnt2 = mhpmc_inc_r_d1[2] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[2]); + assign dec_tlu_perfcnt3 = mhpmc_inc_r_d1[3] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[3]); + + // ---------------------------------------------------------------------- + // MHPMC3H(RW), MHPMC3(RW) + // [63:32][31:0] : Hardware Performance Monitor Counter 3 + `define MHPMC3 12'hB03 + `define MHPMC3H 12'hB83 + + assign mhpmc3_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MHPMC3); + assign mhpmc3_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[0]) & (|(mhpmc_inc_r[0])); + assign mhpmc3_wr_en = mhpmc3_wr_en0 | mhpmc3_wr_en1; + assign mhpmc3_incr[63:0] = {mhpmc3h[31:0],mhpmc3[31:0]} + {63'b0,mhpmc_inc_r[0]}; + assign mhpmc3_ns[31:0] = mhpmc3_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc3_incr[31:0]; + rvdffe #(32) mhpmc3_ff (.*, .en(mhpmc3_wr_en), .din(mhpmc3_ns[31:0]), .dout(mhpmc3[31:0])); + + assign mhpmc3h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MHPMC3H); + assign mhpmc3h_wr_en = mhpmc3h_wr_en0 | mhpmc3_wr_en1; + assign mhpmc3h_ns[31:0] = mhpmc3h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc3_incr[63:32]; + rvdffe #(32) mhpmc3h_ff (.*, .en(mhpmc3h_wr_en), .din(mhpmc3h_ns[31:0]), .dout(mhpmc3h[31:0])); + + // ---------------------------------------------------------------------- + // MHPMC4H(RW), MHPMC4(RW) + // [63:32][31:0] : Hardware Performance Monitor Counter 4 + `define MHPMC4 12'hB04 + `define MHPMC4H 12'hB84 + + assign mhpmc4_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MHPMC4); + assign mhpmc4_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[1]) & (|(mhpmc_inc_r[1])); + assign mhpmc4_wr_en = mhpmc4_wr_en0 | mhpmc4_wr_en1; + assign mhpmc4_incr[63:0] = {mhpmc4h[31:0],mhpmc4[31:0]} + {63'b0,mhpmc_inc_r[1]}; + assign mhpmc4_ns[31:0] = mhpmc4_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc4_incr[31:0]; + rvdffe #(32) mhpmc4_ff (.*, .en(mhpmc4_wr_en), .din(mhpmc4_ns[31:0]), .dout(mhpmc4[31:0])); + + assign mhpmc4h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MHPMC4H); + assign mhpmc4h_wr_en = mhpmc4h_wr_en0 | mhpmc4_wr_en1; + assign mhpmc4h_ns[31:0] = mhpmc4h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc4_incr[63:32]; + rvdffe #(32) mhpmc4h_ff (.*, .en(mhpmc4h_wr_en), .din(mhpmc4h_ns[31:0]), .dout(mhpmc4h[31:0])); + + // ---------------------------------------------------------------------- + // MHPMC5H(RW), MHPMC5(RW) + // [63:32][31:0] : Hardware Performance Monitor Counter 5 + `define MHPMC5 12'hB05 + `define MHPMC5H 12'hB85 + + assign mhpmc5_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MHPMC5); + assign mhpmc5_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[2]) & (|(mhpmc_inc_r[2])); + assign mhpmc5_wr_en = mhpmc5_wr_en0 | mhpmc5_wr_en1; + assign mhpmc5_incr[63:0] = {mhpmc5h[31:0],mhpmc5[31:0]} + {63'b0,mhpmc_inc_r[2]}; + assign mhpmc5_ns[31:0] = mhpmc5_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc5_incr[31:0]; + rvdffe #(32) mhpmc5_ff (.*, .en(mhpmc5_wr_en), .din(mhpmc5_ns[31:0]), .dout(mhpmc5[31:0])); + + assign mhpmc5h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MHPMC5H); + assign mhpmc5h_wr_en = mhpmc5h_wr_en0 | mhpmc5_wr_en1; + assign mhpmc5h_ns[31:0] = mhpmc5h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc5_incr[63:32]; + rvdffe #(32) mhpmc5h_ff (.*, .en(mhpmc5h_wr_en), .din(mhpmc5h_ns[31:0]), .dout(mhpmc5h[31:0])); + + // ---------------------------------------------------------------------- + // MHPMC6H(RW), MHPMC6(RW) + // [63:32][31:0] : Hardware Performance Monitor Counter 6 + `define MHPMC6 12'hB06 + `define MHPMC6H 12'hB86 + + assign mhpmc6_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MHPMC6); + assign mhpmc6_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[3]) & (|(mhpmc_inc_r[3])); + assign mhpmc6_wr_en = mhpmc6_wr_en0 | mhpmc6_wr_en1; + assign mhpmc6_incr[63:0] = {mhpmc6h[31:0],mhpmc6[31:0]} + {63'b0,mhpmc_inc_r[3]}; + assign mhpmc6_ns[31:0] = mhpmc6_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc6_incr[31:0]; + rvdffe #(32) mhpmc6_ff (.*, .en(mhpmc6_wr_en), .din(mhpmc6_ns[31:0]), .dout(mhpmc6[31:0])); + + assign mhpmc6h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MHPMC6H); + assign mhpmc6h_wr_en = mhpmc6h_wr_en0 | mhpmc6_wr_en1; + assign mhpmc6h_ns[31:0] = mhpmc6h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc6_incr[63:32]; + rvdffe #(32) mhpmc6h_ff (.*, .en(mhpmc6h_wr_en), .din(mhpmc6h_ns[31:0]), .dout(mhpmc6h[31:0])); + + // ---------------------------------------------------------------------- + // MHPME3(RW) + // [9:0] : Hardware Performance Monitor Event 3 + `define MHPME3 12'h323 + + // we only have events 0-56, 512-516, HPME* are WARL so saturate otherwise + assign event_saturate_r[9:0] = ((dec_csr_wrdata_r[9:0] > 10'd516) | (|dec_csr_wrdata_r[31:10])) ? 10'd516 : dec_csr_wrdata_r[9:0]; + + assign wr_mhpme3_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MHPME3); + rvdffs #(10) mhpme3_ff (.*, .clk(active_clk), .en(wr_mhpme3_r), .din(event_saturate_r[9:0]), .dout(mhpme3[9:0])); + // ---------------------------------------------------------------------- + // MHPME4(RW) + // [9:0] : Hardware Performance Monitor Event 4 + `define MHPME4 12'h324 + + assign wr_mhpme4_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MHPME4); + rvdffs #(10) mhpme4_ff (.*, .clk(active_clk), .en(wr_mhpme4_r), .din(event_saturate_r[9:0]), .dout(mhpme4[9:0])); + // ---------------------------------------------------------------------- + // MHPME5(RW) + // [9:0] : Hardware Performance Monitor Event 5 + `define MHPME5 12'h325 + + assign wr_mhpme5_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MHPME5); + rvdffs #(10) mhpme5_ff (.*, .clk(active_clk), .en(wr_mhpme5_r), .din(event_saturate_r[9:0]), .dout(mhpme5[9:0])); + // ---------------------------------------------------------------------- + // MHPME6(RW) + // [9:0] : Hardware Performance Monitor Event 6 + `define MHPME6 12'h326 + + assign wr_mhpme6_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MHPME6); + rvdffs #(10) mhpme6_ff (.*, .clk(active_clk), .en(wr_mhpme6_r), .din(event_saturate_r[9:0]), .dout(mhpme6[9:0])); + + //---------------------------------------------------------------------- + // Performance Monitor Counters section ends + //---------------------------------------------------------------------- + // ---------------------------------------------------------------------- + + // MCOUNTINHIBIT(RW) + // [31:7] : Reserved, read 0x0 + // [6] : HPM6 disable + // [5] : HPM5 disable + // [4] : HPM4 disable + // [3] : HPM3 disable + // [2] : MINSTRET disable + // [1] : reserved, read 0x0 + // [0] : MCYCLE disable + + `define MCOUNTINHIBIT 12'h320 + + assign wr_mcountinhibit_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == `MCOUNTINHIBIT); + rvdffs #(6) mcountinhibit_ff (.*, .clk(active_clk), .en(wr_mcountinhibit_r), .din({dec_csr_wrdata_r[6:2], dec_csr_wrdata_r[0]}), .dout({mcountinhibit[6:2], mcountinhibit[0]})); + assign mcountinhibit[1] = 1'b0; + + //-------------------------------------------------------------------------------- + // trace + //-------------------------------------------------------------------------------- + + rvoclkhdr trace_cgc ( .en(i0_valid_wb | exc_or_int_valid_r_d1 | interrupt_valid_r_d1 | dec_tlu_i0_valid_wb1 | + dec_tlu_i0_exc_valid_wb1 | dec_tlu_int_valid_wb1 | clk_override), .l1clk(trace_tclk), .* ); + rvdff #(8) traceff (.*, .clk(trace_tclk), + .din ({i0_valid_wb, + i0_exception_valid_r_d1 | lsu_i0_exc_r_d1 | (trigger_hit_r_d1 & ~trigger_hit_dmode_r_d1), + exc_cause_wb[4:0], + interrupt_valid_r_d1}), + .dout({dec_tlu_i0_valid_wb1, + dec_tlu_i0_exc_valid_wb1, + dec_tlu_exc_cause_wb1[4:0], + dec_tlu_int_valid_wb1})); + + assign dec_tlu_mtval_wb1 = mtval[31:0]; + + // end trace + //-------------------------------------------------------------------------------- + + + // ---------------------------------------------------------------------- + // CSR read mux + // ---------------------------------------------------------------------- + +// file "csrdecode" is human readable file that has all of the CSR decodes defined and is part of git repo +// modify this file as needed + +// to generate all the equations below from "csrdecode" except legal equation: + +// 1) coredecode -in csrdecode > corecsrdecode.e + +// 2) espresso -Dso -oeqntott corecsrdecode.e | addassign > csrequations + +// to generate the legal CSR equation below: + +// 1) coredecode -in csrdecode -legal > csrlegal.e + +// 2) espresso -Dso -oeqntott csrlegal.e | addassign > csrlegal_equation +// coredecode -in csrdecode > corecsrdecode.e; espresso -Dso -oeqntott corecsrdecode.e | addassign > csrequations; coredecode -in csrdecode -legal > csrlegal.e; espresso -Dso -oeqntott csrlegal.e | addassign > csrlegal_equation + +assign csr_misa = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[0]); + +assign csr_mvendorid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); + +assign csr_marchid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[7] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mimpid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[3] + &dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); + +assign csr_mhartid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[7] + &dec_csr_rdaddr_d[2]); + +assign csr_mstatus = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]); + +assign csr_mtvec = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); + +assign csr_mip = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[2]); + +assign csr_mie = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]); + +assign csr_mcyclel = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[1]); + +assign csr_mcycleh = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]); + +assign csr_minstretl = (!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_minstreth = (!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mscratch = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mepc = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]); + +assign csr_mcause = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mscause = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[2]); + +assign csr_mtval = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]); + +assign csr_mrac = (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]); + +assign csr_dmst = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[3] + &dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]); + +assign csr_mdseac = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]); + +assign csr_meihap = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10] + &dec_csr_rdaddr_d[3]); + +assign csr_meivt = (!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] + &!dec_csr_rdaddr_d[0]); + +assign csr_meipt = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]); + +assign csr_meicurpl = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[2]); + +assign csr_meicidpl = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); + +assign csr_dcsr = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[0]); + +assign csr_mcgc = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[0]); + +assign csr_mfdc = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); + +assign csr_dpc = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[0]); + +assign csr_mtsel = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mtdata1 = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[0]); + +assign csr_mtdata2 = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[1]); + +assign csr_mhpmc3 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[0]); + +assign csr_mhpmc4 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mhpmc5 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]); + +assign csr_mhpmc6 = (!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mhpmc3h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]); + +assign csr_mhpmc4h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mhpmc5h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]); + +assign csr_mhpmc6h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mhpme3 = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[0]); + +assign csr_mhpme4 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] + &!dec_csr_rdaddr_d[0]); + +assign csr_mhpme5 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]); + +assign csr_mhpme6 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1] + &!dec_csr_rdaddr_d[0]); + +assign csr_mcountinhibit = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[0]); + +assign csr_mpmc = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[1]); + +assign csr_mcpc = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]); + +assign csr_meicpct = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mdeau = (!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[7] + &dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[3]); + +assign csr_micect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_miccmect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[0]); + +assign csr_mdccmect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[4] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mfdht = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_mfdhs = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[2] + &dec_csr_rdaddr_d[0]); + +assign csr_dicawics = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] + &!dec_csr_rdaddr_d[0]); + +assign csr_dicad0h = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[3] + &dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]); + +assign csr_dicad0 = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); + +assign csr_dicad1 = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); + +assign csr_dicago = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); + +assign presync = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[7] + &dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]) | (dec_csr_rdaddr_d[11] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[1]) | (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | ( + dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]); + +assign postsync = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11] + &!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] + &dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[10] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[0]) | ( + !dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1]) | ( + dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]); + +assign legal = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | ( + dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] + &!dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10] + &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1]) | (dec_csr_rdaddr_d[11] + &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[0]) | ( + !dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + &dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]) | ( + !dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[1]) | (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10] + &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7] + &dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]) | ( + !dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[1]) | (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11] + &dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[1]) | ( + dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[11] + &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4] + &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]) | ( + !dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[2]) | (dec_csr_rdaddr_d[11] + &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + &dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]) | (!dec_csr_rdaddr_d[11] + &dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11] + &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[11] + &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + &dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | ( + !dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + &!dec_csr_rdaddr_d[2]) | (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10] + &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[2]) | (!dec_csr_rdaddr_d[11] + &dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] + &!dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10] + &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[6] + &!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[1]) | (!dec_csr_rdaddr_d[11] + &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]) | ( + !dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5] + &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1] + &!dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10] + &dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7] + &!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[3]) | ( + !dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + &dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4]) | (dec_csr_rdaddr_d[11] + &!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9]&dec_csr_rdaddr_d[8] + &!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[3]) | ( + dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + &dec_csr_rdaddr_d[8]&!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + &dec_csr_rdaddr_d[4]); + +assign dec_tlu_presync_d = presync & dec_csr_any_unq_d & ~dec_csr_wen_unq_d; +assign dec_tlu_postsync_d = postsync & dec_csr_any_unq_d; + +assign valid_csr = ( legal & (~(csr_dcsr | csr_dpc | csr_dmst | csr_dicawics | csr_dicad0 | csr_dicad0h | csr_dicad1 | csr_dicago) | dbg_tlu_halted_f) + & ~fast_int_meicpct); + +assign dec_csr_legal_d = ( dec_csr_any_unq_d & + valid_csr & // of a valid CSR + ~(dec_csr_wen_unq_d & (csr_mvendorid | csr_marchid | csr_mimpid | csr_mhartid | csr_mdseac | csr_meihap)) // that's not a write to a RO CSR + ); + // CSR read mux +assign dec_csr_rddata_d[31:0] = ( ({32{csr_misa}} & 32'h40001104) | + ({32{csr_mvendorid}} & 32'h00000045) | + ({32{csr_marchid}} & 32'h00000010) | + ({32{csr_mimpid}} & 32'h1) | + ({32{csr_mhartid}} & {core_id[31:4], 4'b0}) | + ({32{csr_mstatus}} & {19'b0, 2'b11, 3'b0, mstatus[1], 3'b0, mstatus[0], 3'b0}) | + ({32{csr_mtvec}} & {mtvec[30:1], 1'b0, mtvec[0]}) | + ({32{csr_mip}} & {1'b0, mip[3], 18'b0, mip[2], 3'b0, mip[1], 3'b0, mip[0], 3'b0}) | + ({32{csr_mie}} & {1'b0, mie[3], 18'b0, mie[2], 3'b0, mie[1], 3'b0, mie[0], 3'b0}) | + ({32{csr_mcyclel}} & mcyclel[31:0]) | + ({32{csr_mcycleh}} & mcycleh_inc[31:0]) | + ({32{csr_minstretl}} & minstretl_read[31:0]) | + ({32{csr_minstreth}} & minstreth_read[31:0]) | + ({32{csr_mscratch}} & mscratch[31:0]) | + ({32{csr_mepc}} & {mepc[31:1], 1'b0}) | + ({32{csr_mcause}} & mcause[31:0]) | + ({32{csr_mscause}} & {29'b0, mscause[2:0]}) | + ({32{csr_mtval}} & mtval[31:0]) | + ({32{csr_mrac}} & mrac[31:0]) | + ({32{csr_mdseac}} & mdseac[31:0]) | + ({32{csr_meivt}} & {meivt[31:10], 10'b0}) | + ({32{csr_meihap}} & {meivt[31:10], meihap[9:2], 2'b0}) | + ({32{csr_meicurpl}} & {28'b0, meicurpl[3:0]}) | + ({32{csr_meicidpl}} & {28'b0, meicidpl[3:0]}) | + ({32{csr_meipt}} & {28'b0, meipt[3:0]}) | + ({32{csr_mcgc}} & {23'b0, mcgc[8:0]}) | + ({32{csr_mfdc}} & {13'b0, mfdc[18:0]}) | + ({32{csr_dcsr}} & {16'h4000, dcsr[15:2], 2'b11}) | + ({32{csr_dpc}} & {dpc[31:1], 1'b0}) | + ({32{csr_dicad0}} & dicad0[31:0]) | + ({32{csr_dicad0h}} & dicad0h[31:0]) | + ({32{csr_dicad1}} & dicad1[31:0]) | + ({32{csr_dicawics}} & {7'b0, dicawics[16], 2'b0, dicawics[15:14], 3'b0, dicawics[13:0], 3'b0}) | + ({32{csr_mtsel}} & {30'b0, mtsel[1:0]}) | + ({32{csr_mtdata1}} & {mtdata1_tsel_out[31:0]}) | + ({32{csr_mtdata2}} & {mtdata2_tsel_out[31:0]}) | + ({32{csr_micect}} & {micect[31:0]}) | + ({32{csr_miccmect}} & {miccmect[31:0]}) | + ({32{csr_mdccmect}} & {mdccmect[31:0]}) | + ({32{csr_mhpmc3}} & mhpmc3[31:0]) | + ({32{csr_mhpmc4}} & mhpmc4[31:0]) | + ({32{csr_mhpmc5}} & mhpmc5[31:0]) | + ({32{csr_mhpmc6}} & mhpmc6[31:0]) | + ({32{csr_mhpmc3h}} & mhpmc3h[31:0]) | + ({32{csr_mhpmc4h}} & mhpmc4h[31:0]) | + ({32{csr_mhpmc5h}} & mhpmc5h[31:0]) | + ({32{csr_mhpmc6h}} & mhpmc6h[31:0]) | + ({32{csr_mfdht}} & {26'b0, mfdht[5:0]}) | + ({32{csr_mfdhs}} & {30'b0, mfdhs[1:0]}) | + ({32{csr_mhpme3}} & {22'b0,mhpme3[9:0]}) | + ({32{csr_mhpme4}} & {22'b0,mhpme4[9:0]}) | + ({32{csr_mhpme5}} & {22'b0,mhpme5[9:0]}) | + ({32{csr_mhpme6}} & {22'b0,mhpme6[9:0]}) | + ({32{csr_mcountinhibit}} & {25'b0, mcountinhibit[6:0]}) | + ({32{csr_mpmc}} & {30'b0, mpmc[1], 1'b0}) + ); + +endmodule // el2_dec_tlu_ctl diff --git a/design/dec/el2_dec_trigger.sv b/design/dec/el2_dec_trigger.sv new file mode 100644 index 0000000..6a8b165 --- /dev/null +++ b/design/dec/el2_dec_trigger.sv @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: DEC Trigger Logic +// Comments: +// +//******************************************************************************** +module el2_dec_trigger +import el2_pkg::*; +#( +`include "el2_param.vh" + )( + + input el2_trigger_pkt_t [3:0] trigger_pkt_any, // Packet from tlu. 'select':0-pc,1-Opcode 'Execute' needs to be set for dec triggers to fire. 'match'-1 do mask, 0: full match + input logic [31:1] dec_i0_pc_d, // i0 pc + + output logic [3:0] dec_i0_trigger_match_d +); + + logic [3:0][31:0] dec_i0_match_data; + logic [3:0] dec_i0_trigger_data_match; + + for (genvar i=0; i<4; i++) begin + assign dec_i0_match_data[i][31:0] = ({32{~trigger_pkt_any[i].select & trigger_pkt_any[i].execute}} & {dec_i0_pc_d[31:1], trigger_pkt_any[i].tdata2[0]}); // select=0; do a PC match + + rvmaskandmatch trigger_i0_match (.mask(trigger_pkt_any[i].tdata2[31:0]), .data(dec_i0_match_data[i][31:0]), .masken(trigger_pkt_any[i].match), .match(dec_i0_trigger_data_match[i])); + + assign dec_i0_trigger_match_d[i] = trigger_pkt_any[i].execute & trigger_pkt_any[i].m & dec_i0_trigger_data_match[i]; + end + +endmodule // el2_dec_trigger + diff --git a/design/dmi/dmi_jtag_to_core_sync.v b/design/dmi/dmi_jtag_to_core_sync.v new file mode 100644 index 0000000..562f815 --- /dev/null +++ b/design/dmi/dmi_jtag_to_core_sync.v @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2018 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//------------------------------------------------------------------------------------ +// +// Copyright Western Digital, 2019 +// Owner : Alex Grobman +// Description: +// This module Synchronizes the signals between JTAG (TCK) and +// processor (Core_clk) +// +//------------------------------------------------------------------------------------- + +module dmi_jtag_to_core_sync ( +// JTAG signals +input rd_en, // 1 bit Read Enable from JTAG +input wr_en, // 1 bit Write enable from JTAG + +// Processor Signals +input rst_n, // Core reset +input clk, // Core clock + +output reg_en, // 1 bit Write interface bit to Processor +output reg_wr_en // 1 bit Write enable to Processor +); + +wire c_rd_en; +wire c_wr_en; +reg [2:0] rden, wren; + + +// Outputs +assign reg_en = c_wr_en | c_rd_en; +assign reg_wr_en = c_wr_en; + + +// synchronizers +always @ ( posedge clk or negedge rst_n) begin + if(!rst_n) begin + rden <= '0; + wren <= '0; + end + else begin + rden <= {rden[1:0], rd_en}; + wren <= {wren[1:0], wr_en}; + end +end + +assign c_rd_en = rden[1] & ~rden[2]; +assign c_wr_en = wren[1] & ~wren[2]; + + +endmodule diff --git a/design/dmi/dmi_wrapper.v b/design/dmi/dmi_wrapper.v new file mode 100644 index 0000000..d9fd741 --- /dev/null +++ b/design/dmi/dmi_wrapper.v @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2018 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//------------------------------------------------------------------------------------ +// +// Copyright Western Digital, 2018 +// Owner : Anusha Narayanamoorthy +// Description: +// Wrapper module for JTAG_TAP and DMI synchronizer +// +//------------------------------------------------------------------------------------- + +module dmi_wrapper( + + // JTAG signals + input trst_n, // JTAG reset + input tck, // JTAG clock + input tms, // Test mode select + input tdi, // Test Data Input + output tdo, // Test Data Output + output tdoEnable, // Test Data Output enable + + // Processor Signals + input core_rst_n, // Core reset + input core_clk, // Core clock + input [31:1] jtag_id, // JTAG ID + input [31:0] rd_data, // 32 bit Read data from Processor + output [31:0] reg_wr_data, // 32 bit Write data to Processor + output [6:0] reg_wr_addr, // 7 bit reg address to Processor + output reg_en, // 1 bit Read enable to Processor + output reg_wr_en, // 1 bit Write enable to Processor + output dmi_hard_reset +); + + + + + + //Wire Declaration + wire rd_en; + wire wr_en; + wire dmireset; + + + //jtag_tap instantiation + rvjtag_tap i_jtag_tap( + .trst(trst_n), // dedicated JTAG TRST (active low) pad signal or asynchronous active low power on reset + .tck(tck), // dedicated JTAG TCK pad signal + .tms(tms), // dedicated JTAG TMS pad signal + .tdi(tdi), // dedicated JTAG TDI pad signal + .tdo(tdo), // dedicated JTAG TDO pad signal + .tdoEnable(tdoEnable), // enable for TDO pad + .wr_data(reg_wr_data), // 32 bit Write data + .wr_addr(reg_wr_addr), // 7 bit Write address + .rd_en(rd_en), // 1 bit read enable + .wr_en(wr_en), // 1 bit Write enable + .rd_data(rd_data), // 32 bit Read data + .rd_status(2'b0), + .idle(3'h0), // no need to wait to sample data + .dmi_stat(2'b0), // no need to wait or error possible + .version(4'h1), // debug spec 0.13 compliant + .jtag_id(jtag_id), + .dmi_hard_reset(dmi_hard_reset), + .dmi_reset(dmireset) +); + + + // dmi_jtag_to_core_sync instantiation + dmi_jtag_to_core_sync i_dmi_jtag_to_core_sync( + .wr_en(wr_en), // 1 bit Write enable + .rd_en(rd_en), // 1 bit Read enable + + .rst_n(core_rst_n), + .clk(core_clk), + .reg_en(reg_en), // 1 bit Write interface bit + .reg_wr_en(reg_wr_en) // 1 bit Write enable + ); + +endmodule diff --git a/design/dmi/rvjtag_tap.v b/design/dmi/rvjtag_tap.v new file mode 100644 index 0000000..2463434 --- /dev/null +++ b/design/dmi/rvjtag_tap.v @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License + +module rvjtag_tap #( +parameter AWIDTH = 7 +) +( +input trst, +input tck, +input tms, +input tdi, +output reg tdo, +output tdoEnable, + +output [31:0] wr_data, +output [AWIDTH-1:0] wr_addr, +output wr_en, +output rd_en, + +input [31:0] rd_data, +input [1:0] rd_status, + +output reg dmi_reset, +output reg dmi_hard_reset, + +input [2:0] idle, +input [1:0] dmi_stat, +/* +-- revisionCode : 4'h0; +-- manufacturersIdCode : 11'h45; +-- deviceIdCode : 16'h0001; +-- order MSB .. LSB -> [4 bit version or revision] [16 bit part number] [11 bit manufacturer id] [value of 1'b1 in LSB] +*/ +input [31:1] jtag_id, +input [3:0] version +); + +localparam USER_DR_LENGTH = AWIDTH + 34; + + +reg [USER_DR_LENGTH-1:0] sr, nsr, dr; + +/////////////////////////////////////////////////////// +// Tap controller +/////////////////////////////////////////////////////// +logic[3:0] state, nstate; +logic [4:0] ir; +wire jtag_reset; +wire shift_dr; +wire pause_dr; +wire update_dr; +wire capture_dr; +wire shift_ir; +wire pause_ir ; +wire update_ir ; +wire capture_ir; +wire[1:0] dr_en; +wire devid_sel; +wire [5:0] abits; + +assign abits = AWIDTH[5:0]; + + +localparam TEST_LOGIC_RESET_STATE = 0; +localparam RUN_TEST_IDLE_STATE = 1; +localparam SELECT_DR_SCAN_STATE = 2; +localparam CAPTURE_DR_STATE = 3; +localparam SHIFT_DR_STATE = 4; +localparam EXIT1_DR_STATE = 5; +localparam PAUSE_DR_STATE = 6; +localparam EXIT2_DR_STATE = 7; +localparam UPDATE_DR_STATE = 8; +localparam SELECT_IR_SCAN_STATE = 9; +localparam CAPTURE_IR_STATE = 10; +localparam SHIFT_IR_STATE = 11; +localparam EXIT1_IR_STATE = 12; +localparam PAUSE_IR_STATE = 13; +localparam EXIT2_IR_STATE = 14; +localparam UPDATE_IR_STATE = 15; + +always_comb begin + nstate = state; + case(state) + TEST_LOGIC_RESET_STATE: nstate = tms ? TEST_LOGIC_RESET_STATE : RUN_TEST_IDLE_STATE; + RUN_TEST_IDLE_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE; + SELECT_DR_SCAN_STATE: nstate = tms ? SELECT_IR_SCAN_STATE : CAPTURE_DR_STATE; + CAPTURE_DR_STATE: nstate = tms ? EXIT1_DR_STATE : SHIFT_DR_STATE; + SHIFT_DR_STATE: nstate = tms ? EXIT1_DR_STATE : SHIFT_DR_STATE; + EXIT1_DR_STATE: nstate = tms ? UPDATE_DR_STATE : PAUSE_DR_STATE; + PAUSE_DR_STATE: nstate = tms ? EXIT2_DR_STATE : PAUSE_DR_STATE; + EXIT2_DR_STATE: nstate = tms ? UPDATE_DR_STATE : SHIFT_DR_STATE; + UPDATE_DR_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE; + SELECT_IR_SCAN_STATE: nstate = tms ? TEST_LOGIC_RESET_STATE : CAPTURE_IR_STATE; + CAPTURE_IR_STATE: nstate = tms ? EXIT1_IR_STATE : SHIFT_IR_STATE; + SHIFT_IR_STATE: nstate = tms ? EXIT1_IR_STATE : SHIFT_IR_STATE; + EXIT1_IR_STATE: nstate = tms ? UPDATE_IR_STATE : PAUSE_IR_STATE; + PAUSE_IR_STATE: nstate = tms ? EXIT2_IR_STATE : PAUSE_IR_STATE; + EXIT2_IR_STATE: nstate = tms ? UPDATE_IR_STATE : SHIFT_IR_STATE; + UPDATE_IR_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE; + default: nstate = TEST_LOGIC_RESET_STATE; + endcase +end + +always @ (posedge tck or negedge trst) begin + if(!trst) state <= TEST_LOGIC_RESET_STATE; + else state <= nstate; +end + +assign jtag_reset = state == TEST_LOGIC_RESET_STATE; +assign shift_dr = state == SHIFT_DR_STATE; +assign pause_dr = state == PAUSE_DR_STATE; +assign update_dr = state == UPDATE_DR_STATE; +assign capture_dr = state == CAPTURE_DR_STATE; +assign shift_ir = state == SHIFT_IR_STATE; +assign pause_ir = state == PAUSE_IR_STATE; +assign update_ir = state == UPDATE_IR_STATE; +assign capture_ir = state == CAPTURE_IR_STATE; + +assign tdoEnable = shift_dr | shift_ir; + +/////////////////////////////////////////////////////// +// IR register +/////////////////////////////////////////////////////// + +always @ (negedge tck or negedge trst) begin + if (!trst) ir <= 5'b1; + else begin + if (jtag_reset) ir <= 5'b1; + else if (update_ir) ir <= (sr[4:0] == '0) ? 5'h1f :sr[4:0]; + end +end + + +assign devid_sel = ir == 5'b00001; +assign dr_en[0] = ir == 5'b10000; +assign dr_en[1] = ir == 5'b10001; + +/////////////////////////////////////////////////////// +// Shift register +/////////////////////////////////////////////////////// +always @ (posedge tck or negedge trst) begin + if(!trst)begin + sr <= '0; + end + else begin + sr <= nsr; + end +end + +// SR next value +always_comb begin + nsr = sr; + case(1) + shift_dr: begin + case(1) + dr_en[1]: nsr = {tdi, sr[USER_DR_LENGTH-1:1]}; + + dr_en[0], + devid_sel: nsr = {{USER_DR_LENGTH-32{1'b0}},tdi, sr[31:1]}; + default: nsr = {{USER_DR_LENGTH-1{1'b0}},tdi}; // bypass + endcase + end + capture_dr: begin + case(1) + dr_en[0]: nsr = {{USER_DR_LENGTH-15{1'b0}}, idle, dmi_stat, abits, version}; + dr_en[1]: nsr = {{AWIDTH{1'b0}}, rd_data, rd_status}; + devid_sel: nsr = {{USER_DR_LENGTH-32{1'b0}}, jtag_id, 1'b1}; + endcase + end + shift_ir: nsr = {{USER_DR_LENGTH-5{1'b0}},tdi, sr[4:1]}; + capture_ir: nsr = {{USER_DR_LENGTH-1{1'b0}},1'b1}; + endcase +end + +// TDO retiming +always @ (negedge tck ) tdo <= sr[0]; + +// DMI CS register +always @ (posedge tck or negedge trst) begin + if(!trst) begin + dmi_hard_reset <= 1'b0; + dmi_reset <= 1'b0; + end + else if (update_dr & dr_en[0]) begin + dmi_hard_reset <= sr[17]; + dmi_reset <= sr[16]; + end + else begin + dmi_hard_reset <= 1'b0; + dmi_reset <= 1'b0; + end +end + +// DR register +always @ (posedge tck or negedge trst) begin + if(!trst) + dr <= '0; + else begin + if (update_dr & dr_en[1]) + dr <= sr; + else + dr <= {dr[USER_DR_LENGTH-1:2],2'b0}; + end +end + +assign {wr_addr, wr_data, wr_en, rd_en} = dr; + + + + +endmodule diff --git a/design/el2_dma_ctrl.sv b/design/el2_dma_ctrl.sv new file mode 100644 index 0000000..13ff113 --- /dev/null +++ b/design/el2_dma_ctrl.sv @@ -0,0 +1,609 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// Function: Top level SWERV core file +// Comments: +// +//******************************************************************************** + +module el2_dma_ctrl #( +`include "el2_param.vh" + )( + input logic clk, + input logic free_clk, + input logic rst_l, + input logic dma_bus_clk_en, // slave bus clock enable + input logic clk_override, + input logic scan_mode, + + // Debug signals + input logic [31:0] dbg_cmd_addr, + input logic [31:0] dbg_cmd_wrdata, + input logic dbg_cmd_valid, + input logic dbg_cmd_write, // 1: write command, 0: read_command + input logic [1:0] dbg_cmd_type, // 0:gpr 1:csr 2: memory + input logic [1:0] dbg_cmd_size, // size of the abstract mem access debug command + + input logic dbg_dma_bubble, // Debug needs a bubble to send a valid + output logic dma_dbg_ready, // DMA is ready to accept debug request + + output logic dma_dbg_cmd_done, + output logic dma_dbg_cmd_fail, + output logic [31:0] dma_dbg_rddata, + + // Core side signals + output logic dma_dccm_req, // DMA dccm request (only one of dccm/iccm will be set) + output logic dma_iccm_req, // DMA iccm request + output logic [2:0] dma_mem_tag, // DMA Buffer entry number + output logic [31:0] dma_mem_addr, // DMA request address + output logic [2:0] dma_mem_sz, // DMA request size + output logic dma_mem_write, // DMA write to dccm/iccm + output logic [63:0] dma_mem_wdata, // DMA write data + + input logic dccm_dma_rvalid, // dccm data valid for DMA read + input logic dccm_dma_ecc_error, // ECC error on DMA read + input logic [2:0] dccm_dma_rtag, // Tag of the DMA req + input logic [63:0] dccm_dma_rdata, // dccm data for DMA read + input logic iccm_dma_rvalid, // iccm data valid for DMA read + input logic iccm_dma_ecc_error, // ECC error on DMA read + input logic [2:0] iccm_dma_rtag, // Tag of the DMA req + input logic [63:0] iccm_dma_rdata, // iccm data for DMA read + + output logic dma_dccm_stall_any, // stall dccm pipe (bubble) so that DMA can proceed + output logic dma_iccm_stall_any, // stall iccm pipe (bubble) so that DMA can proceed + input logic dccm_ready, // dccm ready to accept DMA request + input logic iccm_ready, // iccm ready to accept DMA request + input logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:15] + + // PMU signals + output logic dma_pmu_dccm_read, + output logic dma_pmu_dccm_write, + output logic dma_pmu_any_read, + output logic dma_pmu_any_write, + + // AXI Write Channels + input logic dma_axi_awvalid, + output logic dma_axi_awready, + input logic [pt.DMA_BUS_TAG-1:0] dma_axi_awid, + input logic [31:0] dma_axi_awaddr, + input logic [2:0] dma_axi_awsize, + + + input logic dma_axi_wvalid, + output logic dma_axi_wready, + input logic [63:0] dma_axi_wdata, + input logic [7:0] dma_axi_wstrb, + + output logic dma_axi_bvalid, + input logic dma_axi_bready, + output logic [1:0] dma_axi_bresp, + output logic [pt.DMA_BUS_TAG-1:0] dma_axi_bid, + + // AXI Read Channels + input logic dma_axi_arvalid, + output logic dma_axi_arready, + input logic [pt.DMA_BUS_TAG-1:0] dma_axi_arid, + input logic [31:0] dma_axi_araddr, + input logic [2:0] dma_axi_arsize, + + output logic dma_axi_rvalid, + input logic dma_axi_rready, + output logic [pt.DMA_BUS_TAG-1:0] dma_axi_rid, + output logic [63:0] dma_axi_rdata, + output logic [1:0] dma_axi_rresp, + output logic dma_axi_rlast +); + + + localparam DEPTH = pt.DMA_BUF_DEPTH; + localparam DEPTH_PTR = $clog2(DEPTH); + localparam NACK_COUNT = 7; + + logic [DEPTH-1:0] fifo_valid; + logic [DEPTH-1:0][1:0] fifo_error; + logic [DEPTH-1:0] fifo_error_bus; + logic [DEPTH-1:0] fifo_rpend; + logic [DEPTH-1:0] fifo_done; // DMA trxn is done in core + logic [DEPTH-1:0] fifo_done_bus; // DMA trxn is done in core but synced to bus clock + logic [DEPTH-1:0][31:0] fifo_addr; + logic [DEPTH-1:0][2:0] fifo_sz; + logic [DEPTH-1:0][7:0] fifo_byteen; + logic [DEPTH-1:0] fifo_write; + logic [DEPTH-1:0] fifo_posted_write; + logic [DEPTH-1:0] fifo_dbg; + logic [DEPTH-1:0][63:0] fifo_data; + logic [DEPTH-1:0][pt.DMA_BUS_TAG-1:0] fifo_tag; + logic [DEPTH-1:0][pt.DMA_BUS_ID-1:0] fifo_mid; + logic [DEPTH-1:0][pt.DMA_BUS_PRTY-1:0] fifo_prty; + + logic [DEPTH-1:0] fifo_cmd_en; + logic [DEPTH-1:0] fifo_data_en; + logic [DEPTH-1:0] fifo_pend_en; + logic [DEPTH-1:0] fifo_done_en; + logic [DEPTH-1:0] fifo_done_bus_en; + logic [DEPTH-1:0] fifo_error_en; + logic [DEPTH-1:0] fifo_error_bus_en; + logic [DEPTH-1:0] fifo_reset; + logic [DEPTH-1:0][1:0] fifo_error_in; + logic [DEPTH-1:0][63:0] fifo_data_in; + + logic fifo_write_in; + logic fifo_posted_write_in; + logic fifo_dbg_in; + logic [31:0] fifo_addr_in; + logic [2:0] fifo_sz_in; + logic [7:0] fifo_byteen_in; + + logic [DEPTH_PTR-1:0] RspPtr, NxtRspPtr; + logic [DEPTH_PTR-1:0] WrPtr, NxtWrPtr; + logic [DEPTH_PTR-1:0] RdPtr, NxtRdPtr; + logic WrPtrEn, RdPtrEn, RspPtrEn; + + logic dma_dbg_cmd_error; + logic dma_dbg_cmd_done_q; + + logic fifo_full, fifo_full_spec, fifo_empty; + logic dma_address_error, dma_alignment_error; + logic [3:0] num_fifo_vld; + logic dma_mem_req; + logic [31:0] dma_mem_addr_int; + logic [2:0] dma_mem_sz_int; + logic [7:0] dma_mem_byteen; + logic dma_mem_addr_in_dccm; + logic dma_mem_addr_in_iccm; + logic dma_mem_addr_in_pic; + logic dma_mem_addr_in_pic_region_nc; + logic dma_mem_addr_in_dccm_region_nc; + logic dma_mem_addr_in_iccm_region_nc; + + logic [2:0] dma_nack_count, dma_nack_count_d, dma_nack_count_csr; + + logic dma_buffer_c1_clken; + logic dma_free_clken; + logic dma_buffer_c1_clk; + logic dma_free_clk; + logic dma_bus_clk; + + logic bus_rsp_valid, bus_rsp_sent; + logic bus_cmd_valid, bus_cmd_sent; + logic bus_cmd_write, bus_cmd_posted_write; + logic [7:0] bus_cmd_byteen; + logic [2:0] bus_cmd_sz; + logic [31:0] bus_cmd_addr; + logic [63:0] bus_cmd_wdata; + logic [pt.DMA_BUS_TAG-1:0] bus_cmd_tag; + logic [pt.DMA_BUS_ID-1:0] bus_cmd_mid; + logic [pt.DMA_BUS_PRTY-1:0] bus_cmd_prty; + logic bus_posted_write_done; + + logic fifo_full_spec_bus; + logic dbg_dma_bubble_bus; + logic dma_fifo_ready; + + logic wrbuf_en, wrbuf_data_en; + logic wrbuf_cmd_sent, wrbuf_rst, wrbuf_data_rst; + logic wrbuf_vld, wrbuf_data_vld; + logic [pt.DMA_BUS_TAG-1:0] wrbuf_tag; + logic [2:0] wrbuf_sz; + logic [31:0] wrbuf_addr; + logic [63:0] wrbuf_data; + logic [7:0] wrbuf_byteen; + + logic rdbuf_en; + logic rdbuf_cmd_sent, rdbuf_rst; + logic rdbuf_vld; + logic [pt.DMA_BUS_TAG-1:0] rdbuf_tag; + logic [2:0] rdbuf_sz; + logic [31:0] rdbuf_addr; + + logic axi_mstr_prty_in, axi_mstr_prty_en; + logic axi_mstr_priority; + logic axi_mstr_sel; + + logic axi_rsp_valid, axi_rsp_sent; + logic axi_rsp_write; + logic [pt.DMA_BUS_TAG-1:0] axi_rsp_tag; + logic [1:0] axi_rsp_error; + logic [63:0] axi_rsp_rdata; + + //------------------------LOGIC STARTS HERE--------------------------------- + + // FIFO inputs + assign fifo_addr_in[31:0] = dbg_cmd_valid ? dbg_cmd_addr[31:0] : bus_cmd_addr[31:0]; + assign fifo_byteen_in[7:0] = dbg_cmd_valid ? (8'h0f << 4*dbg_cmd_addr[2]) : bus_cmd_byteen[7:0]; + assign fifo_sz_in[2:0] = dbg_cmd_valid ? {1'b0,dbg_cmd_size[1:0]} : bus_cmd_sz[2:0]; + assign fifo_write_in = dbg_cmd_valid ? dbg_cmd_write : bus_cmd_write; + assign fifo_posted_write_in = ~dbg_cmd_valid & bus_cmd_posted_write; + assign fifo_dbg_in = dbg_cmd_valid; + + for (genvar i=0 ;i<32'(DEPTH); i++) begin: GenFifo + assign fifo_cmd_en[i] = ((bus_cmd_sent & dma_bus_clk_en) | (dbg_cmd_valid & dbg_cmd_type[1])) & (DEPTH_PTR'(i) == WrPtr[DEPTH_PTR-1:0]); + assign fifo_data_en[i] = (((bus_cmd_sent & fifo_write_in & dma_bus_clk_en) | (dbg_cmd_valid & dbg_cmd_type[1] & dbg_cmd_write)) & (i == WrPtr[DEPTH_PTR-1:0])) | + ((dma_address_error | dma_alignment_error) & (i == RdPtr[DEPTH_PTR-1:0])) | + (dccm_dma_rvalid & (i == DEPTH_PTR'(dccm_dma_rtag[2:0]))) | + (iccm_dma_rvalid & (i == DEPTH_PTR'(iccm_dma_rtag[2:0]))); + assign fifo_pend_en[i] = (dma_dccm_req | dma_iccm_req) & ~dma_mem_write & (i == RdPtr[DEPTH_PTR-1:0]); + assign fifo_error_en[i] = ((dma_address_error | dma_alignment_error | dma_dbg_cmd_error) & (i == RdPtr[DEPTH_PTR-1:0])) | + ((dccm_dma_rvalid & dccm_dma_ecc_error) & (i == DEPTH_PTR'(dccm_dma_rtag[2:0]))) | + ((iccm_dma_rvalid & iccm_dma_ecc_error) & (i == DEPTH_PTR'(iccm_dma_rtag[2:0]))); + assign fifo_error_bus_en[i] = (((|fifo_error_in[i][1:0]) & fifo_error_en[i]) | (|fifo_error[i])) & dma_bus_clk_en; + assign fifo_done_en[i] = ((|fifo_error[i] | fifo_error_en[i] | ((dma_dccm_req | dma_iccm_req) & dma_mem_write)) & (i == RdPtr[DEPTH_PTR-1:0])) | + (dccm_dma_rvalid & (i == DEPTH_PTR'(dccm_dma_rtag[2:0]))) | + (iccm_dma_rvalid & (i == DEPTH_PTR'(iccm_dma_rtag[2:0]))); + assign fifo_done_bus_en[i] = (fifo_done_en[i] | fifo_done[i]) & dma_bus_clk_en; + assign fifo_reset[i] = (((bus_rsp_sent | bus_posted_write_done) & dma_bus_clk_en) | dma_dbg_cmd_done) & (i == RspPtr[DEPTH_PTR-1:0]); + assign fifo_error_in[i] = (dccm_dma_rvalid & (i == DEPTH_PTR'(dccm_dma_rtag[2:0]))) ? {1'b0,dccm_dma_ecc_error} : (iccm_dma_rvalid & (i == DEPTH_PTR'(iccm_dma_rtag[2:0]))) ? {1'b0,iccm_dma_ecc_error} : + {(dma_address_error | dma_alignment_error | dma_dbg_cmd_error), dma_alignment_error}; + assign fifo_data_in[i] = (fifo_error_en[i] & (|fifo_error_in[i])) ? {32'b0,fifo_addr[i]} : + ((dccm_dma_rvalid & (i == DEPTH_PTR'(dccm_dma_rtag[2:0]))) ? dccm_dma_rdata[63:0] : (iccm_dma_rvalid & (i == DEPTH_PTR'(iccm_dma_rtag[2:0]))) ? iccm_dma_rdata[63:0] : + (dbg_cmd_valid ? {2{dbg_cmd_wrdata[31:0]}} : bus_cmd_wdata[63:0])); + + rvdffsc #(1) fifo_valid_dff (.din(1'b1), .dout(fifo_valid[i]), .en(fifo_cmd_en[i]), .clear(fifo_reset[i]), .clk(dma_free_clk), .*); + rvdffsc #(2) fifo_error_dff (.din(fifo_error_in[i]), .dout(fifo_error[i]), .en(fifo_error_en[i]), .clear(fifo_reset[i]), .clk(dma_free_clk), .*); + rvdffsc #(1) fifo_error_bus_dff (.din(1'b1), .dout(fifo_error_bus[i]), .en(fifo_error_bus_en[i]), .clear(fifo_reset[i]), .clk(dma_free_clk), .*); + rvdffsc #(1) fifo_rpend_dff (.din(1'b1), .dout(fifo_rpend[i]), .en(fifo_pend_en[i]), .clear(fifo_reset[i]), .clk(dma_free_clk), .*); + rvdffsc #(1) fifo_done_dff (.din(1'b1), .dout(fifo_done[i]), .en(fifo_done_en[i]), .clear(fifo_reset[i]), .clk(dma_free_clk), .*); + rvdffsc #(1) fifo_done_bus_dff (.din(1'b1), .dout(fifo_done_bus[i]), .en(fifo_done_bus_en[i]), .clear(fifo_reset[i]), .clk(dma_free_clk), .*); + rvdffe #(32) fifo_addr_dff (.din(fifo_addr_in[31:0]), .dout(fifo_addr[i]), .en(fifo_cmd_en[i]), .*); + rvdffs #(3) fifo_sz_dff (.din(fifo_sz_in[2:0]), .dout(fifo_sz[i]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*); + rvdffs #(8) fifo_byteen_dff (.din(fifo_byteen_in[7:0]), .dout(fifo_byteen[i]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*); + rvdffs #(1) fifo_write_dff (.din(fifo_write_in), .dout(fifo_write[i]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*); + rvdffs #(1) fifo_posted_write_dff (.din(fifo_posted_write_in), .dout(fifo_posted_write[i]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*); + rvdffs #(1) fifo_dbg_dff (.din(fifo_dbg_in), .dout(fifo_dbg[i]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*); + rvdffe #(64) fifo_data_dff (.din(fifo_data_in[i]), .dout(fifo_data[i]), .en(fifo_data_en[i]), .*); + rvdffs #(pt.DMA_BUS_TAG) fifo_tag_dff(.din(bus_cmd_tag[pt.DMA_BUS_TAG-1:0]), .dout(fifo_tag[i][pt.DMA_BUS_TAG-1:0]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*); + rvdffs #(pt.DMA_BUS_ID) fifo_mid_dff(.din(bus_cmd_mid[pt.DMA_BUS_ID-1:0]), .dout(fifo_mid[i][pt.DMA_BUS_ID-1:0]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*); + rvdffs #(pt.DMA_BUS_PRTY) fifo_prty_dff(.din(bus_cmd_prty[pt.DMA_BUS_PRTY-1:0]), .dout(fifo_prty[i][pt.DMA_BUS_PRTY-1:0]), .en(fifo_cmd_en[i]), .clk(dma_buffer_c1_clk), .*); + end + + // Pointer logic + assign NxtWrPtr[DEPTH_PTR-1:0] = (WrPtr[DEPTH_PTR-1:0] == (DEPTH-1)) ? '0 : WrPtr[DEPTH_PTR-1:0] + 1'b1; + assign NxtRdPtr[DEPTH_PTR-1:0] = (RdPtr[DEPTH_PTR-1:0] == (DEPTH-1)) ? '0 : RdPtr[DEPTH_PTR-1:0] + 1'b1; + assign NxtRspPtr[DEPTH_PTR-1:0] = (RspPtr[DEPTH_PTR-1:0] == (DEPTH-1)) ? '0 : RspPtr[DEPTH_PTR-1:0] + 1'b1; + + assign WrPtrEn = |fifo_cmd_en[DEPTH-1:0]; + assign RdPtrEn = dma_dccm_req | dma_iccm_req | (dma_address_error | dma_alignment_error | dma_dbg_cmd_error); + assign RspPtrEn = (dma_dbg_cmd_done | (bus_rsp_sent | bus_posted_write_done) & dma_bus_clk_en); + + rvdffs #(DEPTH_PTR) WrPtr_dff(.din(NxtWrPtr[DEPTH_PTR-1:0]), .dout(WrPtr[DEPTH_PTR-1:0]), .en(WrPtrEn), .clk(dma_free_clk), .*); + rvdffs #(DEPTH_PTR) RdPtr_dff(.din(NxtRdPtr[DEPTH_PTR-1:0]), .dout(RdPtr[DEPTH_PTR-1:0]), .en(RdPtrEn), .clk(dma_free_clk), .*); + rvdffs #(DEPTH_PTR) RspPtr_dff(.din(NxtRspPtr[DEPTH_PTR-1:0]), .dout(RspPtr[DEPTH_PTR-1:0]), .en(RspPtrEn), .clk(dma_free_clk), .*); + + // Miscellaneous signals + assign fifo_full = fifo_full_spec_bus; + + always_comb begin + num_fifo_vld[3:0] = {3'b0,bus_cmd_sent} - {3'b0,bus_rsp_sent}; + for (int i=0; i= DEPTH); + + assign dma_fifo_ready = ~(fifo_full | dbg_dma_bubble_bus); + + // Error logic + assign dma_address_error = fifo_valid[RdPtr] & ~fifo_done[RdPtr] & ~fifo_dbg[RdPtr] & (~(dma_mem_addr_in_dccm | dma_mem_addr_in_iccm)); // request not for ICCM or DCCM + assign dma_alignment_error = fifo_valid[RdPtr] & ~fifo_done[RdPtr] & ~dma_address_error & + (((dma_mem_sz_int[2:0] == 3'h1) & dma_mem_addr_int[0]) | // HW size but unaligned + ((dma_mem_sz_int[2:0] == 3'h2) & (|dma_mem_addr_int[1:0])) | // W size but unaligned + ((dma_mem_sz_int[2:0] == 3'h3) & (|dma_mem_addr_int[2:0])) | // DW size but unaligned + (dma_mem_addr_in_iccm & ~((dma_mem_sz_int[1:0] == 2'b10) | (dma_mem_sz_int[1:0] == 2'b11))) | // ICCM access not word size + (dma_mem_addr_in_dccm & dma_mem_write & ~((dma_mem_sz_int[1:0] == 2'b10) | (dma_mem_sz_int[1:0] == 2'b11))) | // DCCM write not word size + (dma_mem_write & (dma_mem_sz_int[2:0] == 3'h2) & (dma_mem_byteen[dma_mem_addr_int[2:0]+:4] != 4'hf)) | // Write byte enables not aligned for word store + (dma_mem_write & (dma_mem_sz_int[2:0] == 3'h3) & ~((dma_mem_byteen[7:0] == 8'h0f) | (dma_mem_byteen[7:0] == 8'hf0) | (dma_mem_byteen[7:0] == 8'hff)))); // Write byte enables not aligned for dword store + + + //Dbg outputs + assign dma_dbg_ready = fifo_empty & dbg_dma_bubble_bus; + assign dma_dbg_cmd_done = (fifo_valid[RspPtr] & fifo_dbg[RspPtr] & fifo_done[RspPtr]); + assign dma_dbg_rddata[31:0] = fifo_addr[RspPtr][2] ? fifo_data[RspPtr][63:32] : fifo_data[RspPtr][31:0]; + assign dma_dbg_cmd_fail = |fifo_error[RspPtr]; + + assign dma_dbg_cmd_error = fifo_valid[RdPtr] & ~fifo_done[RdPtr] & fifo_dbg[RdPtr] & + ((~(dma_mem_addr_in_dccm | dma_mem_addr_in_iccm | dma_mem_addr_in_pic)) | (dma_mem_sz_int[1:0] != 2'b10)); // Only word accesses allowed + + // Block the decode if fifo full + assign dma_dccm_stall_any = dma_mem_req & (dma_mem_addr_in_dccm | dma_mem_addr_in_pic) & (dma_nack_count >= dma_nack_count_csr); + assign dma_iccm_stall_any = dma_mem_req & dma_mem_addr_in_iccm & (dma_nack_count >= dma_nack_count_csr); + + // Used to indicate ready to debug + assign fifo_empty = ~(|(fifo_valid[DEPTH-1:0])); + + // Nack counter, stall the lsu pipe if 7 nacks + assign dma_nack_count_csr[2:0] = dec_tlu_dma_qos_prty[2:0]; + assign dma_nack_count_d[2:0] = (dma_nack_count[2:0] >= dma_nack_count_csr[2:0]) ? ({3{~(dma_dccm_req | dma_iccm_req)}} & dma_nack_count[2:0]) : + (dma_mem_req & ~(dma_dccm_req | dma_iccm_req)) ? (dma_nack_count[2:0] + 1'b1) : 3'b0; + + rvdffs #(3) nack_count_dff(.din(dma_nack_count_d[2:0]), .dout(dma_nack_count[2:0]), .en(dma_mem_req), .clk(dma_free_clk), .*); + + // Core outputs + assign dma_mem_req = fifo_valid[RdPtr] & ~fifo_rpend[RdPtr] & ~fifo_done[RdPtr] & ~(dma_address_error | dma_alignment_error | dma_dbg_cmd_error); + assign dma_dccm_req = dma_mem_req & (dma_mem_addr_in_dccm | dma_mem_addr_in_pic) & dccm_ready; + assign dma_iccm_req = dma_mem_req & dma_mem_addr_in_iccm & iccm_ready; + assign dma_mem_tag[2:0] = 3'(RdPtr); + assign dma_mem_addr_int[31:0] = fifo_addr[RdPtr]; + assign dma_mem_sz_int[2:0] = fifo_sz[RdPtr]; + assign dma_mem_addr[31:0] = (dma_mem_write & (dma_mem_byteen[7:0] == 8'hf0)) ? {dma_mem_addr_int[31:3],1'b1,dma_mem_addr_int[1:0]} : dma_mem_addr_int[31:0]; + assign dma_mem_sz[2:0] = (dma_mem_write & ((dma_mem_byteen[7:0] == 8'h0f) | (dma_mem_byteen[7:0] == 8'hf0))) ? 3'h2 : dma_mem_sz_int[2:0]; + assign dma_mem_byteen[7:0] = fifo_byteen[RdPtr]; + assign dma_mem_write = fifo_write[RdPtr]; + assign dma_mem_wdata[63:0] = fifo_data[RdPtr]; + + // PMU outputs + assign dma_pmu_dccm_read = dma_dccm_req & ~dma_mem_write; + assign dma_pmu_dccm_write = dma_dccm_req & dma_mem_write; + assign dma_pmu_any_read = (dma_dccm_req | dma_iccm_req) & ~dma_mem_write; + assign dma_pmu_any_write = (dma_dccm_req | dma_iccm_req) & dma_mem_write; + + // Address check dccm + rvrangecheck #(.CCM_SADR(pt.DCCM_SADR), + .CCM_SIZE(pt.DCCM_SIZE)) addr_dccm_rangecheck ( + .addr(dma_mem_addr_int[31:0]), + .in_range(dma_mem_addr_in_dccm), + .in_region(dma_mem_addr_in_dccm_region_nc) + ); + + // Address check iccm + if (pt.ICCM_ENABLE) begin + rvrangecheck #(.CCM_SADR(pt.ICCM_SADR), + .CCM_SIZE(pt.ICCM_SIZE)) addr_iccm_rangecheck ( + .addr(dma_mem_addr_int[31:0]), + .in_range(dma_mem_addr_in_iccm), + .in_region(dma_mem_addr_in_iccm_region_nc) + ); + end + else begin + assign dma_mem_addr_in_iccm = '0; + assign dma_mem_addr_in_iccm_region_nc = '0; + end // else: !if(pt.ICCM_ENABLE) + + + // PIC memory address check + rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR), + .CCM_SIZE(pt.PIC_SIZE)) addr_pic_rangecheck ( + .addr(dma_mem_addr_int[31:0]), + .in_range(dma_mem_addr_in_pic), + .in_region(dma_mem_addr_in_pic_region_nc) + ); + + + // Inputs + rvdff #(1) fifo_full_bus_ff (.din(fifo_full_spec), .dout(fifo_full_spec_bus), .clk(dma_bus_clk), .*); + rvdff #(1) dbg_dma_bubble_ff (.din(dbg_dma_bubble), .dout(dbg_dma_bubble_bus), .clk(dma_bus_clk), .*); + rvdff #(1) dma_dbg_cmd_doneff (.din(dma_dbg_cmd_done), .dout(dma_dbg_cmd_done_q), .clk(free_clk), .*); + + // Clock Gating logic + assign dma_buffer_c1_clken = (bus_cmd_valid & dma_bus_clk_en) | dbg_cmd_valid | clk_override; + assign dma_free_clken = (bus_cmd_valid | bus_rsp_valid | dbg_cmd_valid | dma_dbg_cmd_done | dma_dbg_cmd_done_q | (|fifo_valid[DEPTH-1:0]) | clk_override); + + rvoclkhdr dma_buffer_c1cgc ( .en(dma_buffer_c1_clken), .l1clk(dma_buffer_c1_clk), .* ); + rvoclkhdr dma_free_cgc (.en(dma_free_clken), .l1clk(dma_free_clk), .*); + rvclkhdr dma_bus_cgc (.en(dma_bus_clk_en), .l1clk(dma_bus_clk), .*); + + // Write channel buffer + assign wrbuf_en = dma_axi_awvalid & dma_axi_awready; + assign wrbuf_data_en = dma_axi_wvalid & dma_axi_wready; + assign wrbuf_cmd_sent = bus_cmd_sent & bus_cmd_write; + assign wrbuf_rst = wrbuf_cmd_sent & ~wrbuf_en; + assign wrbuf_data_rst = wrbuf_cmd_sent & ~wrbuf_data_en; + + rvdffsc #(.WIDTH(1)) wrbuf_vldff(.din(1'b1), .dout(wrbuf_vld), .en(wrbuf_en), .clear(wrbuf_rst), .clk(dma_bus_clk), .*); + rvdffsc #(.WIDTH(1)) wrbuf_data_vldff(.din(1'b1), .dout(wrbuf_data_vld), .en(wrbuf_data_en), .clear(wrbuf_data_rst), .clk(dma_bus_clk), .*); + rvdffs #(.WIDTH(pt.DMA_BUS_TAG)) wrbuf_tagff(.din(dma_axi_awid[pt.DMA_BUS_TAG-1:0]), .dout(wrbuf_tag[pt.DMA_BUS_TAG-1:0]), .en(wrbuf_en), .clk(dma_bus_clk), .*); + rvdffs #(.WIDTH(3)) wrbuf_szff(.din(dma_axi_awsize[2:0]), .dout(wrbuf_sz[2:0]), .en(wrbuf_en), .clk(dma_bus_clk), .*); + rvdffe #(.WIDTH(32)) wrbuf_addrff(.din(dma_axi_awaddr[31:0]), .dout(wrbuf_addr[31:0]), .en(wrbuf_en & dma_bus_clk_en), .*); + rvdffe #(.WIDTH(64)) wrbuf_dataff(.din(dma_axi_wdata[63:0]), .dout(wrbuf_data[63:0]), .en(wrbuf_data_en & dma_bus_clk_en), .*); + rvdffs #(.WIDTH(8)) wrbuf_byteenff(.din(dma_axi_wstrb[7:0]), .dout(wrbuf_byteen[7:0]), .en(wrbuf_data_en), .clk(dma_bus_clk), .*); + + // Read channel buffer + assign rdbuf_en = dma_axi_arvalid & dma_axi_arready; + assign rdbuf_cmd_sent = bus_cmd_sent & ~bus_cmd_write; + assign rdbuf_rst = rdbuf_cmd_sent & ~rdbuf_en; + + rvdffsc #(.WIDTH(1)) rdbuf_vldff(.din(1'b1), .dout(rdbuf_vld), .en(rdbuf_en), .clear(rdbuf_rst), .clk(dma_bus_clk), .*); + rvdffs #(.WIDTH(pt.DMA_BUS_TAG)) rdbuf_tagff(.din(dma_axi_arid[pt.DMA_BUS_TAG-1:0]), .dout(rdbuf_tag[pt.DMA_BUS_TAG-1:0]), .en(rdbuf_en), .clk(dma_bus_clk), .*); + rvdffs #(.WIDTH(3)) rdbuf_szff(.din(dma_axi_arsize[2:0]), .dout(rdbuf_sz[2:0]), .en(rdbuf_en), .clk(dma_bus_clk), .*); + rvdffe #(.WIDTH(32)) rdbuf_addrff(.din(dma_axi_araddr[31:0]), .dout(rdbuf_addr[31:0]), .en(rdbuf_en & dma_bus_clk_en), .*); + + assign dma_axi_awready = ~(wrbuf_vld & ~wrbuf_cmd_sent); + assign dma_axi_wready = ~(wrbuf_data_vld & ~wrbuf_cmd_sent); + assign dma_axi_arready = ~(rdbuf_vld & ~rdbuf_cmd_sent); + + //Generate a single request from read/write channel + assign bus_cmd_valid = (wrbuf_vld & wrbuf_data_vld) | rdbuf_vld; + assign bus_cmd_sent = bus_cmd_valid & dma_fifo_ready; + assign bus_cmd_write = axi_mstr_sel; + assign bus_cmd_posted_write = '0; + assign bus_cmd_addr[31:0] = axi_mstr_sel ? wrbuf_addr[31:0] : rdbuf_addr[31:0]; + assign bus_cmd_sz[2:0] = axi_mstr_sel ? wrbuf_sz[2:0] : rdbuf_sz[2:0]; + assign bus_cmd_wdata[63:0] = wrbuf_data[63:0]; + assign bus_cmd_byteen[7:0] = wrbuf_byteen[7:0]; + assign bus_cmd_tag[pt.DMA_BUS_TAG-1:0] = axi_mstr_sel ? wrbuf_tag[pt.DMA_BUS_TAG-1:0] : rdbuf_tag[pt.DMA_BUS_TAG-1:0]; + assign bus_cmd_mid[pt.DMA_BUS_ID-1:0] = '0; + assign bus_cmd_prty[pt.DMA_BUS_PRTY-1:0] = '0; + + // Sel=1 -> write has higher priority + assign axi_mstr_sel = (wrbuf_vld & wrbuf_data_vld & rdbuf_vld) ? axi_mstr_priority : (wrbuf_vld & wrbuf_data_vld); + assign axi_mstr_prty_in = ~axi_mstr_priority; + assign axi_mstr_prty_en = bus_cmd_sent; + rvdffs #(.WIDTH(1)) mstr_prtyff(.din(axi_mstr_prty_in), .dout(axi_mstr_priority), .en(axi_mstr_prty_en), .clk(dma_bus_clk), .*); + + assign axi_rsp_valid = fifo_valid[RspPtr] & ~fifo_dbg[RspPtr] & fifo_done_bus[RspPtr]; + assign axi_rsp_rdata[63:0] = fifo_data[RspPtr]; + assign axi_rsp_write = fifo_write[RspPtr]; + assign axi_rsp_error[1:0] = fifo_error[RspPtr][0] ? 2'b10 : (fifo_error[RspPtr][1] ? 2'b11 : 2'b0); + assign axi_rsp_tag[pt.DMA_BUS_TAG-1:0] = fifo_tag[RspPtr]; + + // AXI response channel signals + assign dma_axi_bvalid = axi_rsp_valid & axi_rsp_write; + assign dma_axi_bresp[1:0] = axi_rsp_error[1:0]; + assign dma_axi_bid[pt.DMA_BUS_TAG-1:0] = axi_rsp_tag[pt.DMA_BUS_TAG-1:0]; + + assign dma_axi_rvalid = axi_rsp_valid & ~axi_rsp_write; + assign dma_axi_rresp[1:0] = axi_rsp_error; + assign dma_axi_rdata[63:0] = axi_rsp_rdata[63:0]; + assign dma_axi_rlast = 1'b1; + assign dma_axi_rid[pt.DMA_BUS_TAG-1:0] = axi_rsp_tag[pt.DMA_BUS_TAG-1:0]; + + assign bus_posted_write_done = 1'b0; + assign bus_rsp_valid = (dma_axi_bvalid | dma_axi_rvalid); + assign bus_rsp_sent = (dma_axi_bvalid & dma_axi_bready) | (dma_axi_rvalid & dma_axi_rready); + +`ifdef ASSERT_ON + + for (genvar i=0; i $past(dma_bus_clk_en); + endproperty + assert_dma_axi_awvalid_stable: assert property (dma_axi_awvalid_stable) else + $display("DMA AXI awvalid changed in middle of bus clock"); + + // Assertion to check awid stays stable during entire bus clock + property dma_axi_awid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_awvalid & (dma_axi_awid[pt.DMA_BUS_TAG-1:0] != $past(dma_axi_awid[pt.DMA_BUS_TAG-1:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_awid_stable: assert property (dma_axi_awid_stable) else + $display("DMA AXI awid changed in middle of bus clock"); + + // Assertion to check awaddr stays stable during entire bus clock + property dma_axi_awaddr_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_awvalid & (dma_axi_awaddr[31:0] != $past(dma_axi_awaddr[31:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_awaddr_stable: assert property (dma_axi_awaddr_stable) else + $display("DMA AXI awaddr changed in middle of bus clock"); + + // Assertion to check awsize stays stable during entire bus clock + property dma_axi_awsize_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_awvalid & (dma_axi_awsize[2:0] != $past(dma_axi_awsize[2:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_awsize_stable: assert property (dma_axi_awsize_stable) else + $display("DMA AXI awsize changed in middle of bus clock"); + + // Assertion to check wstrb stays stable during entire bus clock + property dma_axi_wstrb_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_wvalid & (dma_axi_wstrb[7:0] != $past(dma_axi_wstrb[7:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_wstrb_stable: assert property (dma_axi_wstrb_stable) else + $display("DMA AXI wstrb changed in middle of bus clock"); + + // Assertion to check wdata stays stable during entire bus clock + property dma_axi_wdata_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_wvalid & (dma_axi_wdata[63:0] != $past(dma_axi_wdata[63:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_wdata_stable: assert property (dma_axi_wdata_stable) else + $display("DMA AXI wdata changed in middle of bus clock"); + + // Assertion to check awvalid stays stable during entire bus clock + property dma_axi_arvalid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_arvalid != $past(dma_axi_arvalid)) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_arvalid_stable: assert property (dma_axi_arvalid_stable) else + $display("DMA AXI awvalid changed in middle of bus clock"); + + // Assertion to check awid stays stable during entire bus clock + property dma_axi_arid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_arvalid & (dma_axi_arid[pt.DMA_BUS_TAG-1:0] != $past(dma_axi_arid[pt.DMA_BUS_TAG-1:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_arid_stable: assert property (dma_axi_arid_stable) else + $display("DMA AXI awid changed in middle of bus clock"); + + // Assertion to check awaddr stays stable during entire bus clock + property dma_axi_araddr_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_arvalid & (dma_axi_araddr[31:0] != $past(dma_axi_araddr[31:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_araddr_stable: assert property (dma_axi_araddr_stable) else + $display("DMA AXI awaddr changed in middle of bus clock"); + + // Assertion to check awsize stays stable during entire bus clock + property dma_axi_arsize_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_awvalid & (dma_axi_arsize[2:0] != $past(dma_axi_arsize[2:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_arsize_stable: assert property (dma_axi_arsize_stable) else + $display("DMA AXI awsize changed in middle of bus clock"); + + // Assertion to check bvalid stays stable during entire bus clock + property dma_axi_bvalid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_bvalid != $past(dma_axi_bvalid)) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_bvalid_stable: assert property (dma_axi_bvalid_stable) else + $display("DMA AXI bvalid changed in middle of bus clock"); + + // Assertion to check bvalid stays stable if bready is low + property dma_axi_bvalid_stable_till_bready; + @(posedge clk) disable iff(~rst_l) (~dma_axi_bvalid && $past(dma_axi_bvalid)) |-> $past(dma_axi_bready); + endproperty + assert_dma_axi_bvalid_stable_till_bready: assert property (dma_axi_bvalid_stable_till_bready) else + $display("DMA AXI bvalid deasserted without bready"); + + // Assertion to check bresp stays stable during entire bus clock + property dma_axi_bresp_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_bvalid & (dma_axi_bresp[1:0] != $past(dma_axi_bresp[1:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_bresp_stable: assert property (dma_axi_bresp_stable) else + $display("DMA AXI bresp changed in middle of bus clock"); + + // Assertion to check bid stays stable during entire bus clock + property dma_axi_bid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_bvalid & (dma_axi_bid[pt.DMA_BUS_TAG-1:0] != $past(dma_axi_bid[pt.DMA_BUS_TAG-1:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_bid_stable: assert property (dma_axi_bid_stable) else + $display("DMA AXI bid changed in middle of bus clock"); + + // Assertion to check rvalid stays stable during entire bus clock + property dma_axi_rvalid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_rvalid != $past(dma_axi_rvalid)) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_rvalid_stable: assert property (dma_axi_rvalid_stable) else + $display("DMA AXI bvalid changed in middle of bus clock"); + + // Assertion to check rvalid stays stable if bready is low + property dma_axi_rvalid_stable_till_ready; + @(posedge clk) disable iff(~rst_l) (~dma_axi_rvalid && $past(dma_axi_rvalid)) |-> $past(dma_axi_rready); + endproperty + assert_dma_axi_rvalid_stable_till_ready: assert property (dma_axi_rvalid_stable_till_ready) else + $display("DMA AXI bvalid changed in middle of bus clock"); + + // Assertion to check rresp stays stable during entire bus clock + property dma_axi_rresp_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_rvalid & (dma_axi_rresp[1:0] != $past(dma_axi_rresp[1:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_rresp_stable: assert property (dma_axi_rresp_stable) else + $display("DMA AXI bresp changed in middle of bus clock"); + + // Assertion to check rid stays stable during entire bus clock + property dma_axi_rid_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_rvalid & (dma_axi_rid[pt.DMA_BUS_TAG-1:0] != $past(dma_axi_rid[pt.DMA_BUS_TAG-1:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_rid_stable: assert property (dma_axi_rid_stable) else + $display("DMA AXI bid changed in middle of bus clock"); + + // Assertion to check rdata stays stable during entire bus clock + property dma_axi_rdata_stable; + @(posedge clk) disable iff(~rst_l) (dma_axi_rvalid & (dma_axi_rdata[63:0] != $past(dma_axi_rdata[63:0]))) |-> $past(dma_bus_clk_en); + endproperty + assert_dma_axi_rdata_stable: assert property (dma_axi_rdata_stable) else + $display("DMA AXI bid changed in middle of bus clock"); + +`endif + +endmodule // el2_dma_ctrl diff --git a/design/el2_mem.sv b/design/el2_mem.sv new file mode 100644 index 0000000..49186e7 --- /dev/null +++ b/design/el2_mem.sv @@ -0,0 +1,132 @@ +//******************************************************************************** +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** + +module el2_mem +import el2_pkg::*; +#( +`include "el2_param.vh" + ) +( + input logic clk, + input logic rst_l, + input logic dccm_clk_override, + input logic icm_clk_override, + input logic dec_tlu_core_ecc_disable, + + //DCCM ports + input logic dccm_wren, + input logic dccm_rden, + input logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, + input logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, + input logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, + input logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, + input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, + input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, + + + output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, + output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, + +//`ifdef pt.DCCM_ENABLE + +//`endif + + //ICCM ports + + input logic [pt.ICCM_BITS-1:1] iccm_rw_addr, + input logic iccm_buf_correct_ecc, // ICCM is doing a single bit error correct cycle + input logic iccm_correction_state, // ICCM is doing a single bit error correct cycle + input logic iccm_wren, + input logic iccm_rden, + input logic [2:0] iccm_wr_size, + input logic [77:0] iccm_wr_data, + + output logic [63:0] iccm_rd_data, + output logic [77:0] iccm_rd_data_ecc, + + // Icache and Itag Ports + + input logic [31:1] ic_rw_addr, + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en, + input logic ic_rd_en, + input logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. + input logic ic_sel_premux_data, // Premux data sel + + input logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC + input logic [70:0] ic_debug_wr_data, // Debug wr cache. + output logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. + input logic ic_debug_rd_en, // Icache debug rd + input logic ic_debug_wr_en, // Icache debug wr + input logic ic_debug_tag_array, // Debug tag array + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. + + output logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + output logic [25:0] ictag_debug_rd_data,// Debug icache tag. + + + output logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // ecc error per bank + output logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, // parity error per bank + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, + output logic ic_tag_perr, // Icache Tag parity error + + + input logic scan_mode + +); + + // DCCM Instantiation + if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable + el2_lsu_dccm_mem #(.pt(pt)) dccm ( + .clk_override(dccm_clk_override), + .* + ); + end else begin: Gen_dccm_disable + assign dccm_rd_data_lo = '0; + assign dccm_rd_data_hi = '0; + end + +if ( pt.ICACHE_ENABLE ) begin: icache + el2_ifu_ic_mem #(.pt(pt)) icm ( + .clk_override(icm_clk_override), + .* + ); +end +else begin + assign ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0] = '0; + assign ic_tag_perr = '0 ; + assign ic_rd_data = '0 ; + assign ictag_debug_rd_data = '0 ; +end // else: !if( pt.ICACHE_ENABLE ) + + + +if (pt.ICCM_ENABLE) begin : iccm + el2_ifu_iccm_mem #(.pt(pt)) iccm (.*, + .clk_override(icm_clk_override), + .iccm_rw_addr(iccm_rw_addr[pt.ICCM_BITS-1:1]), + .iccm_rd_data(iccm_rd_data[63:0]) + ); +end +else begin + assign iccm_rd_data = '0 ; + assign iccm_rd_data_ecc = '0 ; +end + + +endmodule diff --git a/design/el2_pic_ctrl.sv b/design/el2_pic_ctrl.sv new file mode 100644 index 0000000..b39dae3 --- /dev/null +++ b/design/el2_pic_ctrl.sv @@ -0,0 +1,502 @@ +//******************************************************************************** +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** + +//******************************************************************************** +// Function: Programmable Interrupt Controller +// Comments: +//******************************************************************************** + +module el2_pic_ctrl #( +`include "el2_param.vh" + ) + ( + + input logic clk, // Core clock + input logic free_clk, // free clock + input logic active_clk, // active clock + input logic rst_l, // Reset for all flops + input logic clk_override, // Clock over-ride for gating + input logic [pt.PIC_TOTAL_INT_PLUS1-1:0] extintsrc_req, // Interrupt requests + input logic [31:0] picm_rdaddr, // Address of the register + input logic [31:0] picm_wraddr, // Address of the register + input logic [31:0] picm_wr_data, // Data to be written to the register + input logic picm_wren, // Write enable to the register + input logic picm_rden, // Read enable for the register + input logic picm_mken, // Read the Mask for the register + input logic [3:0] meicurpl, // Current Priority Level + input logic [3:0] meipt, // Current Priority Threshold + + output logic mexintpend, // External Inerrupt request to the core + output logic [7:0] claimid, // Claim Id of the requested interrupt + output logic [3:0] pl, // Priority level of the requested interrupt + output logic [31:0] picm_rd_data, // Read data of the register + output logic mhwakeup, // Wake-up interrupt request + input logic scan_mode // scan mode + +); + +localparam NUM_LEVELS = $clog2(pt.PIC_TOTAL_INT_PLUS1); +localparam INTPRIORITY_BASE_ADDR = pt.PIC_BASE_ADDR ; +localparam INTPEND_BASE_ADDR = pt.PIC_BASE_ADDR + 32'h00001000 ; +localparam INTENABLE_BASE_ADDR = pt.PIC_BASE_ADDR + 32'h00002000 ; +localparam EXT_INTR_PIC_CONFIG = pt.PIC_BASE_ADDR + 32'h00003000 ; +localparam EXT_INTR_GW_CONFIG = pt.PIC_BASE_ADDR + 32'h00004000 ; +localparam EXT_INTR_GW_CLEAR = pt.PIC_BASE_ADDR + 32'h00005000 ; + + +localparam INTPEND_SIZE = (pt.PIC_TOTAL_INT_PLUS1 < 32) ? 32 : + (pt.PIC_TOTAL_INT_PLUS1 < 64) ? 64 : + (pt.PIC_TOTAL_INT_PLUS1 < 128) ? 128 : + (pt.PIC_TOTAL_INT_PLUS1 < 256) ? 256 : + (pt.PIC_TOTAL_INT_PLUS1 < 512) ? 512 : 1024 ; + +localparam INT_GRPS = INTPEND_SIZE / 32 ; +localparam INTPRIORITY_BITS = 4 ; +localparam ID_BITS = 8 ; +localparam int GW_CONFIG[pt.PIC_TOTAL_INT_PLUS1-1:0] = '{default:0} ; + +logic addr_intpend_base_match; + +logic raddr_config_pic_match ; +logic raddr_intenable_base_match; +logic raddr_intpriority_base_match; +logic raddr_config_gw_base_match ; + +logic waddr_config_pic_match ; +logic waddr_intpriority_base_match; +logic waddr_intenable_base_match; +logic waddr_config_gw_base_match ; +logic addr_clear_gw_base_match ; + +logic mexintpend_in; +logic mhwakeup_in ; +logic intpend_reg_read ; + +logic [31:0] picm_rd_data_in, intpend_rd_out; +logic intenable_rd_out ; +logic [INTPRIORITY_BITS-1:0] intpriority_rd_out; +logic [1:0] gw_config_rd_out; + +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [INTPRIORITY_BITS-1:0] intpriority_reg; +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [INTPRIORITY_BITS-1:0] intpriority_reg_inv; +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intpriority_reg_we; +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intpriority_reg_re; +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [1:0] gw_config_reg; + +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_reg; +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_reg_we; +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_reg_re; +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] gw_config_reg_we; +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] gw_config_reg_re; +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] gw_clear_reg_we; + +logic [INTPEND_SIZE-1:0] intpend_reg_extended; + +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [INTPRIORITY_BITS-1:0] intpend_w_prior_en; +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [ID_BITS-1:0] intpend_id; +logic [INTPRIORITY_BITS-1:0] maxint; +logic [INTPRIORITY_BITS-1:0] selected_int_priority; +logic [INT_GRPS-1:0] [31:0] intpend_rd_part_out ; + + +logic [NUM_LEVELS:NUM_LEVELS/2] [(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2))+1:0] [INTPRIORITY_BITS-1:0] levelx_intpend_w_prior_en; +logic [NUM_LEVELS:NUM_LEVELS/2] [(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2))+1:0] [ID_BITS-1:0] levelx_intpend_id; +logic [(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2)):0] [INTPRIORITY_BITS-1:0] l2_intpend_w_prior_en_ff; +logic [(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2)):0] [ID_BITS-1:0] l2_intpend_id_ff; + +logic config_reg; +logic intpriord; +logic config_reg_we ; +logic config_reg_re ; +logic config_reg_in ; +logic prithresh_reg_write , prithresh_reg_read; +logic intpriority_reg_read ; +logic intenable_reg_read ; +logic gw_config_reg_read ; +logic picm_wren_ff , picm_rden_ff ; +logic [31:0] picm_raddr_ff; +logic [31:0] picm_waddr_ff; +logic [31:0] picm_wr_data_ff; +logic [3:0] mask; +logic picm_mken_ff; +logic [ID_BITS-1:0] claimid_in ; +logic [INTPRIORITY_BITS-1:0] pl_in ; +logic [INTPRIORITY_BITS-1:0] pl_in_q ; + +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] extintsrc_req_sync; +logic [pt.PIC_TOTAL_INT_PLUS1-1:0] extintsrc_req_gw; + logic picm_bypass_ff; + +// clkens + logic pic_raddr_c1_clken; + logic pic_waddr_c1_clken; + logic pic_data_c1_clken; + logic pic_pri_c1_clken; + logic pic_int_c1_clken; + logic gw_config_c1_clken; + +// clocks + logic pic_raddr_c1_clk; + logic pic_data_c1_clk; + logic pic_pri_c1_clk; + logic pic_int_c1_clk; + logic gw_config_c1_clk; + +// ---- Clock gating section ------ +// c1 clock enables + assign pic_raddr_c1_clken = picm_mken | picm_rden | clk_override; + assign pic_data_c1_clken = picm_wren | clk_override; + assign pic_pri_c1_clken = (waddr_intpriority_base_match & picm_wren_ff) | (raddr_intpriority_base_match & picm_rden_ff) | clk_override; + assign pic_int_c1_clken = (waddr_intenable_base_match & picm_wren_ff) | (raddr_intenable_base_match & picm_rden_ff) | clk_override; + assign gw_config_c1_clken = (waddr_config_gw_base_match & picm_wren_ff) | (raddr_config_gw_base_match & picm_rden_ff) | clk_override; + + // C1 - 1 clock pulse for data + rvoclkhdr pic_addr_c1_cgc ( .en(pic_raddr_c1_clken), .l1clk(pic_raddr_c1_clk), .* ); + rvoclkhdr pic_data_c1_cgc ( .en(pic_data_c1_clken), .l1clk(pic_data_c1_clk), .* ); + rvoclkhdr pic_pri_c1_cgc ( .en(pic_pri_c1_clken), .l1clk(pic_pri_c1_clk), .* ); + rvoclkhdr pic_int_c1_cgc ( .en(pic_int_c1_clken), .l1clk(pic_int_c1_clk), .* ); + rvoclkhdr gw_config_c1_cgc ( .en(gw_config_c1_clken), .l1clk(gw_config_c1_clk), .* ); + +// ------ end clock gating section ------------------------ + +assign raddr_intenable_base_match = (picm_raddr_ff[31:NUM_LEVELS+2] == INTENABLE_BASE_ADDR[31:NUM_LEVELS+2]) ; +assign raddr_intpriority_base_match = (picm_raddr_ff[31:NUM_LEVELS+2] == INTPRIORITY_BASE_ADDR[31:NUM_LEVELS+2]) ; +assign raddr_config_gw_base_match = (picm_raddr_ff[31:NUM_LEVELS+2] == EXT_INTR_GW_CONFIG[31:NUM_LEVELS+2]) ; +assign raddr_config_pic_match = (picm_raddr_ff[31:0] == EXT_INTR_PIC_CONFIG[31:0]) ; + +assign addr_intpend_base_match = (picm_raddr_ff[31:6] == INTPEND_BASE_ADDR[31:6]) ; + +assign waddr_config_pic_match = (picm_waddr_ff[31:0] == EXT_INTR_PIC_CONFIG[31:0]) ; +assign addr_clear_gw_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == EXT_INTR_GW_CLEAR[31:NUM_LEVELS+2]) ; +assign waddr_intpriority_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == INTPRIORITY_BASE_ADDR[31:NUM_LEVELS+2]) ; +assign waddr_intenable_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == INTENABLE_BASE_ADDR[31:NUM_LEVELS+2]) ; +assign waddr_config_gw_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == EXT_INTR_GW_CONFIG[31:NUM_LEVELS+2]) ; + + assign picm_bypass_ff = picm_rden_ff & picm_wren_ff & ( picm_raddr_ff[31:0] == picm_waddr_ff[31:0] ); // pic writes and reads to same address together + + +rvdff #(32) picm_radd_flop (.*, .din (picm_rdaddr), .dout(picm_raddr_ff), .clk(pic_raddr_c1_clk)); +rvdff #(32) picm_wadd_flop (.*, .din (picm_wraddr), .dout(picm_waddr_ff), .clk(pic_data_c1_clk)); +rvdff #(1) picm_wre_flop (.*, .din (picm_wren), .dout(picm_wren_ff), .clk(active_clk)); +rvdff #(1) picm_rde_flop (.*, .din (picm_rden), .dout(picm_rden_ff), .clk(active_clk)); +rvdff #(1) picm_mke_flop (.*, .din (picm_mken), .dout(picm_mken_ff), .clk(active_clk)); +rvdff #(32) picm_dat_flop (.*, .din (picm_wr_data[31:0]), .dout(picm_wr_data_ff[31:0]), .clk(pic_data_c1_clk)); + +rvsyncss #(pt.PIC_TOTAL_INT_PLUS1-1) sync_inst +( + .clk (free_clk), + .dout(extintsrc_req_sync[pt.PIC_TOTAL_INT_PLUS1-1:1]), + .din (extintsrc_req[pt.PIC_TOTAL_INT_PLUS1-1:1]), + .*) ; + +assign extintsrc_req_sync[0] = extintsrc_req[0]; + +genvar i ; +for (i=0; i 0 ) begin : NON_ZERO_INT + assign intpriority_reg_we[i] = waddr_intpriority_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff; + assign intpriority_reg_re[i] = raddr_intpriority_base_match & (picm_raddr_ff[NUM_LEVELS+1:2] == i) & picm_rden_ff; + + assign intenable_reg_we[i] = waddr_intenable_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff; + assign intenable_reg_re[i] = raddr_intenable_base_match & (picm_raddr_ff[NUM_LEVELS+1:2] == i) & picm_rden_ff; + + assign gw_config_reg_we[i] = waddr_config_gw_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff; + assign gw_config_reg_re[i] = raddr_config_gw_base_match & (picm_raddr_ff[NUM_LEVELS+1:2] == i) & picm_rden_ff; + + assign gw_clear_reg_we[i] = addr_clear_gw_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff ; + + rvdffs #(INTPRIORITY_BITS) intpriority_ff (.*, .en( intpriority_reg_we[i]), .din (picm_wr_data_ff[INTPRIORITY_BITS-1:0]), .dout(intpriority_reg[i]), .clk(pic_pri_c1_clk)); + rvdffs #(1) intenable_ff (.*, .en( intenable_reg_we[i]), .din (picm_wr_data_ff[0]), .dout(intenable_reg[i]), .clk(pic_int_c1_clk)); + + + rvdffs #(2) gw_config_ff (.*, .en( gw_config_reg_we[i]), .din (picm_wr_data_ff[1:0]), .dout(gw_config_reg[i]), .clk(gw_config_c1_clk)); + el2_configurable_gw config_gw_inst(.*, .clk(free_clk), + .extintsrc_req_sync(extintsrc_req_sync[i]) , + .meigwctrl_polarity(gw_config_reg[i][0]) , + .meigwctrl_type(gw_config_reg[i][1]) , + .meigwclr(gw_clear_reg_we[i]) , + .extintsrc_req_config(extintsrc_req_gw[i]) + ); + + end else begin : INT_ZERO + assign intpriority_reg_we[i] = 1'b0 ; + assign intpriority_reg_re[i] = 1'b0 ; + assign intenable_reg_we[i] = 1'b0 ; + assign intenable_reg_re[i] = 1'b0 ; + + assign gw_config_reg_we[i] = 1'b0 ; + assign gw_config_reg_re[i] = 1'b0 ; + assign gw_clear_reg_we[i] = 1'b0 ; + + assign gw_config_reg[i] = '0 ; + + assign intpriority_reg[i] = {INTPRIORITY_BITS{1'b0}} ; + assign intenable_reg[i] = 1'b0 ; + assign extintsrc_req_gw[i] = 1'b0 ; + end + + + assign intpriority_reg_inv[i] = intpriord ? ~intpriority_reg[i] : intpriority_reg[i] ; + + assign intpend_w_prior_en[i] = {INTPRIORITY_BITS{(extintsrc_req_gw[i] & intenable_reg[i])}} & intpriority_reg_inv[i] ; + assign intpend_id[i] = i ; +end + + + assign pl_in[INTPRIORITY_BITS-1:0] = selected_int_priority[INTPRIORITY_BITS-1:0] ; + + + genvar l, m , j, k; + +if (pt.PIC_2CYCLE == 1) begin : genblock + logic [NUM_LEVELS/2:0] [pt.PIC_TOTAL_INT_PLUS1+2:0] [INTPRIORITY_BITS-1:0] level_intpend_w_prior_en; + logic [NUM_LEVELS/2:0] [pt.PIC_TOTAL_INT_PLUS1+2:0] [ID_BITS-1:0] level_intpend_id; + + assign level_intpend_w_prior_en[0][pt.PIC_TOTAL_INT_PLUS1+2:0] = {4'b0,4'b0,4'b0,intpend_w_prior_en[pt.PIC_TOTAL_INT_PLUS1-1:0]} ; + assign level_intpend_id[0][pt.PIC_TOTAL_INT_PLUS1+2:0] = {8'b0,8'b0,8'b0,intpend_id[pt.PIC_TOTAL_INT_PLUS1-1:0]} ; + + + assign levelx_intpend_w_prior_en[NUM_LEVELS/2][(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2))+1:0] = {{1*INTPRIORITY_BITS{1'b0}},l2_intpend_w_prior_en_ff[(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2)):0]} ; + assign levelx_intpend_id[NUM_LEVELS/2][(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2))+1:0] = {{1*ID_BITS{1'b1}},l2_intpend_id_ff[(pt.PIC_TOTAL_INT_PLUS1/2**(NUM_LEVELS/2)):0]} ; +/// Do the prioritization of the interrupts here //////////// + for (l=0; l meipt_inv[INTPRIORITY_BITS-1:0]) & + ( selected_int_priority[INTPRIORITY_BITS-1:0] > meicurpl_inv[INTPRIORITY_BITS-1:0]) ); +rvdff #(1) mexintpend_ff (.*, .clk(free_clk), .din (mexintpend_in), .dout(mexintpend)); + +assign maxint[INTPRIORITY_BITS-1:0] = intpriord ? 0 : 15 ; +assign mhwakeup_in = ( pl_in_q[INTPRIORITY_BITS-1:0] == maxint) ; +rvdff #(1) wake_up_ff (.*, .clk(free_clk), .din (mhwakeup_in), .dout(mhwakeup)); + + + +////////////////////////////////////////////////////////////////////////// +// Reads of register. +// 1- intpending +////////////////////////////////////////////////////////////////////////// + +assign intpend_reg_read = addr_intpend_base_match & picm_rden_ff ; +assign intpriority_reg_read = raddr_intpriority_base_match & picm_rden_ff; +assign intenable_reg_read = raddr_intenable_base_match & picm_rden_ff; +assign gw_config_reg_read = raddr_config_gw_base_match & picm_rden_ff; + +assign intpend_reg_extended[INTPEND_SIZE-1:0] = {{INTPEND_SIZE-pt.PIC_TOTAL_INT_PLUS1{1'b0}},extintsrc_req_gw[pt.PIC_TOTAL_INT_PLUS1-1:0]} ; + + for (i=0; i<(INT_GRPS); i++) begin + assign intpend_rd_part_out[i] = (({32{intpend_reg_read & picm_raddr_ff[5:2] == i}}) & intpend_reg_extended[((32*i)+31):(32*i)]) ; + end + + always_comb begin : INTPEND_RD + intpend_rd_out = '0 ; + for (int i=0; i AHB Gasket for LSU + axi4_to_ahb #(.pt(pt), + .TAG(pt.LSU_BUS_TAG)) lsu_axi4_to_ahb ( + .clk_override(dec_tlu_bus_clk_override), + .bus_clk_en(lsu_bus_clk_en), + + // AXI Write Channels + .axi_awvalid(lsu_axi_awvalid), + .axi_awready(lsu_axi_awready_ahb), + .axi_awid(lsu_axi_awid[pt.LSU_BUS_TAG-1:0]), + .axi_awaddr(lsu_axi_awaddr[31:0]), + .axi_awsize(lsu_axi_awsize[2:0]), + .axi_awprot(lsu_axi_awprot[2:0]), + + .axi_wvalid(lsu_axi_wvalid), + .axi_wready(lsu_axi_wready_ahb), + .axi_wdata(lsu_axi_wdata[63:0]), + .axi_wstrb(lsu_axi_wstrb[7:0]), + .axi_wlast(lsu_axi_wlast), + + .axi_bvalid(lsu_axi_bvalid_ahb), + .axi_bready(lsu_axi_bready), + .axi_bresp(lsu_axi_bresp_ahb[1:0]), + .axi_bid(lsu_axi_bid_ahb[pt.LSU_BUS_TAG-1:0]), + + // AXI Read Channels + .axi_arvalid(lsu_axi_arvalid), + .axi_arready(lsu_axi_arready_ahb), + .axi_arid(lsu_axi_arid[pt.LSU_BUS_TAG-1:0]), + .axi_araddr(lsu_axi_araddr[31:0]), + .axi_arsize(lsu_axi_arsize[2:0]), + .axi_arprot(lsu_axi_arprot[2:0]), + + .axi_rvalid(lsu_axi_rvalid_ahb), + .axi_rready(lsu_axi_rready), + .axi_rid(lsu_axi_rid_ahb[pt.LSU_BUS_TAG-1:0]), + .axi_rdata(lsu_axi_rdata_ahb[63:0]), + .axi_rresp(lsu_axi_rresp_ahb[1:0]), + .axi_rlast(lsu_axi_rlast_ahb), + + // AHB-LITE signals + .ahb_haddr(lsu_haddr[31:0]), + .ahb_hburst(lsu_hburst), + .ahb_hmastlock(lsu_hmastlock), + .ahb_hprot(lsu_hprot[3:0]), + .ahb_hsize(lsu_hsize[2:0]), + .ahb_htrans(lsu_htrans[1:0]), + .ahb_hwrite(lsu_hwrite), + .ahb_hwdata(lsu_hwdata[63:0]), + + .ahb_hrdata(lsu_hrdata[63:0]), + .ahb_hready(lsu_hready), + .ahb_hresp(lsu_hresp), + + .* + ); + + axi4_to_ahb #(.pt(pt), + .TAG(pt.IFU_BUS_TAG)) ifu_axi4_to_ahb ( + .clk(clk), + .clk_override(dec_tlu_bus_clk_override), + .bus_clk_en(ifu_bus_clk_en), + + // AHB-Lite signals + .ahb_haddr(haddr[31:0]), + .ahb_hburst(hburst), + .ahb_hmastlock(hmastlock), + .ahb_hprot(hprot[3:0]), + .ahb_hsize(hsize[2:0]), + .ahb_htrans(htrans[1:0]), + .ahb_hwrite(hwrite), + .ahb_hwdata(hwdata_nc[63:0]), + + .ahb_hrdata(hrdata[63:0]), + .ahb_hready(hready), + .ahb_hresp(hresp), + + // AXI Write Channels + .axi_awvalid(ifu_axi_awvalid), + .axi_awready(ifu_axi_awready_ahb), + .axi_awid(ifu_axi_awid[pt.IFU_BUS_TAG-1:0]), + .axi_awaddr(ifu_axi_awaddr[31:0]), + .axi_awsize(ifu_axi_awsize[2:0]), + .axi_awprot(ifu_axi_awprot[2:0]), + + .axi_wvalid(ifu_axi_wvalid), + .axi_wready(ifu_axi_wready_ahb), + .axi_wdata(ifu_axi_wdata[63:0]), + .axi_wstrb(ifu_axi_wstrb[7:0]), + .axi_wlast(ifu_axi_wlast), + + .axi_bvalid(ifu_axi_bvalid_ahb), + .axi_bready(1'b1), + .axi_bresp(ifu_axi_bresp_ahb[1:0]), + .axi_bid(ifu_axi_bid_ahb[pt.IFU_BUS_TAG-1:0]), + + // AXI Read Channels + .axi_arvalid(ifu_axi_arvalid), + .axi_arready(ifu_axi_arready_ahb), + .axi_arid(ifu_axi_arid[pt.IFU_BUS_TAG-1:0]), + .axi_araddr(ifu_axi_araddr[31:0]), + .axi_arsize(ifu_axi_arsize[2:0]), + .axi_arprot(ifu_axi_arprot[2:0]), + + .axi_rvalid(ifu_axi_rvalid_ahb), + .axi_rready(ifu_axi_rready), + .axi_rid(ifu_axi_rid_ahb[pt.IFU_BUS_TAG-1:0]), + .axi_rdata(ifu_axi_rdata_ahb[63:0]), + .axi_rresp(ifu_axi_rresp_ahb[1:0]), + .axi_rlast(ifu_axi_rlast_ahb), + .* + ); + + // AXI4 -> AHB Gasket for System Bus + axi4_to_ahb #(.pt(pt), + .TAG(pt.SB_BUS_TAG)) sb_axi4_to_ahb ( + .clk_override(dec_tlu_bus_clk_override), + .bus_clk_en(dbg_bus_clk_en), + + // AXI Write Channels + .axi_awvalid(sb_axi_awvalid), + .axi_awready(sb_axi_awready_ahb), + .axi_awid(sb_axi_awid[pt.SB_BUS_TAG-1:0]), + .axi_awaddr(sb_axi_awaddr[31:0]), + .axi_awsize(sb_axi_awsize[2:0]), + .axi_awprot(sb_axi_awprot[2:0]), + + .axi_wvalid(sb_axi_wvalid), + .axi_wready(sb_axi_wready_ahb), + .axi_wdata(sb_axi_wdata[63:0]), + .axi_wstrb(sb_axi_wstrb[7:0]), + .axi_wlast(sb_axi_wlast), + + .axi_bvalid(sb_axi_bvalid_ahb), + .axi_bready(sb_axi_bready), + .axi_bresp(sb_axi_bresp_ahb[1:0]), + .axi_bid(sb_axi_bid_ahb[pt.SB_BUS_TAG-1:0]), + + // AXI Read Channels + .axi_arvalid(sb_axi_arvalid), + .axi_arready(sb_axi_arready_ahb), + .axi_arid(sb_axi_arid[pt.SB_BUS_TAG-1:0]), + .axi_araddr(sb_axi_araddr[31:0]), + .axi_arsize(sb_axi_arsize[2:0]), + .axi_arprot(sb_axi_arprot[2:0]), + + .axi_rvalid(sb_axi_rvalid_ahb), + .axi_rready(sb_axi_rready), + .axi_rid(sb_axi_rid_ahb[pt.SB_BUS_TAG-1:0]), + .axi_rdata(sb_axi_rdata_ahb[63:0]), + .axi_rresp(sb_axi_rresp_ahb[1:0]), + .axi_rlast(sb_axi_rlast_ahb), + // AHB-LITE signals + .ahb_haddr(sb_haddr[31:0]), + .ahb_hburst(sb_hburst), + .ahb_hmastlock(sb_hmastlock), + .ahb_hprot(sb_hprot[3:0]), + .ahb_hsize(sb_hsize[2:0]), + .ahb_htrans(sb_htrans[1:0]), + .ahb_hwrite(sb_hwrite), + .ahb_hwdata(sb_hwdata[63:0]), + + .ahb_hrdata(sb_hrdata[63:0]), + .ahb_hready(sb_hready), + .ahb_hresp(sb_hresp), + + .* + ); + + //AHB -> AXI4 Gasket for DMA + ahb_to_axi4 #(.pt(pt), + .TAG(pt.DMA_BUS_TAG)) dma_ahb_to_axi4 ( + .clk_override(dec_tlu_bus_clk_override), + .bus_clk_en(dma_bus_clk_en), + + // AXI Write Channels + .axi_awvalid(dma_axi_awvalid_ahb), + .axi_awready(dma_axi_awready), + .axi_awid(dma_axi_awid_ahb[pt.DMA_BUS_TAG-1:0]), + .axi_awaddr(dma_axi_awaddr_ahb[31:0]), + .axi_awsize(dma_axi_awsize_ahb[2:0]), + .axi_awprot(dma_axi_awprot_ahb[2:0]), + .axi_awlen(dma_axi_awlen_ahb[7:0]), + .axi_awburst(dma_axi_awburst_ahb[1:0]), + + .axi_wvalid(dma_axi_wvalid_ahb), + .axi_wready(dma_axi_wready), + .axi_wdata(dma_axi_wdata_ahb[63:0]), + .axi_wstrb(dma_axi_wstrb_ahb[7:0]), + .axi_wlast(dma_axi_wlast_ahb), + + .axi_bvalid(dma_axi_bvalid), + .axi_bready(dma_axi_bready_ahb), + .axi_bresp(dma_axi_bresp[1:0]), + .axi_bid(dma_axi_bid[pt.DMA_BUS_TAG-1:0]), + + // AXI Read Channels + .axi_arvalid(dma_axi_arvalid_ahb), + .axi_arready(dma_axi_arready), + .axi_arid(dma_axi_arid_ahb[pt.DMA_BUS_TAG-1:0]), + .axi_araddr(dma_axi_araddr_ahb[31:0]), + .axi_arsize(dma_axi_arsize_ahb[2:0]), + .axi_arprot(dma_axi_arprot_ahb[2:0]), + .axi_arlen(dma_axi_arlen_ahb[7:0]), + .axi_arburst(dma_axi_arburst_ahb[1:0]), + + .axi_rvalid(dma_axi_rvalid), + .axi_rready(dma_axi_rready_ahb), + .axi_rid(dma_axi_rid[pt.DMA_BUS_TAG-1:0]), + .axi_rdata(dma_axi_rdata[63:0]), + .axi_rresp(dma_axi_rresp[1:0]), + + // AHB signals + .ahb_haddr(dma_haddr[31:0]), + .ahb_hburst(dma_hburst), + .ahb_hmastlock(dma_hmastlock), + .ahb_hprot(dma_hprot[3:0]), + .ahb_hsize(dma_hsize[2:0]), + .ahb_htrans(dma_htrans[1:0]), + .ahb_hwrite(dma_hwrite), + .ahb_hwdata(dma_hwdata[63:0]), + + .ahb_hrdata(dma_hrdata[63:0]), + .ahb_hreadyout(dma_hreadyout), + .ahb_hresp(dma_hresp), + .ahb_hreadyin(dma_hreadyin), + .ahb_hsel(dma_hsel), + .* + ); + + end + + // Drive the final AXI inputs + assign lsu_axi_awready_int = pt.BUILD_AHB_LITE ? lsu_axi_awready_ahb : lsu_axi_awready; + assign lsu_axi_wready_int = pt.BUILD_AHB_LITE ? lsu_axi_wready_ahb : lsu_axi_wready; + assign lsu_axi_bvalid_int = pt.BUILD_AHB_LITE ? lsu_axi_bvalid_ahb : lsu_axi_bvalid; + assign lsu_axi_bready_int = pt.BUILD_AHB_LITE ? lsu_axi_bready_ahb : lsu_axi_bready; + assign lsu_axi_bresp_int[1:0] = pt.BUILD_AHB_LITE ? lsu_axi_bresp_ahb[1:0] : lsu_axi_bresp[1:0]; + assign lsu_axi_bid_int[pt.LSU_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? lsu_axi_bid_ahb[pt.LSU_BUS_TAG-1:0] : lsu_axi_bid[pt.LSU_BUS_TAG-1:0]; + assign lsu_axi_arready_int = pt.BUILD_AHB_LITE ? lsu_axi_arready_ahb : lsu_axi_arready; + assign lsu_axi_rvalid_int = pt.BUILD_AHB_LITE ? lsu_axi_rvalid_ahb : lsu_axi_rvalid; + assign lsu_axi_rid_int[pt.LSU_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? lsu_axi_rid_ahb[pt.LSU_BUS_TAG-1:0] : lsu_axi_rid[pt.LSU_BUS_TAG-1:0]; + assign lsu_axi_rdata_int[63:0] = pt.BUILD_AHB_LITE ? lsu_axi_rdata_ahb[63:0] : lsu_axi_rdata[63:0]; + assign lsu_axi_rresp_int[1:0] = pt.BUILD_AHB_LITE ? lsu_axi_rresp_ahb[1:0] : lsu_axi_rresp[1:0]; + assign lsu_axi_rlast_int = pt.BUILD_AHB_LITE ? lsu_axi_rlast_ahb : lsu_axi_rlast; + + assign ifu_axi_awready_int = pt.BUILD_AHB_LITE ? ifu_axi_awready_ahb : ifu_axi_awready; + assign ifu_axi_wready_int = pt.BUILD_AHB_LITE ? ifu_axi_wready_ahb : ifu_axi_wready; + assign ifu_axi_bvalid_int = pt.BUILD_AHB_LITE ? ifu_axi_bvalid_ahb : ifu_axi_bvalid; + assign ifu_axi_bready_int = pt.BUILD_AHB_LITE ? ifu_axi_bready_ahb : ifu_axi_bready; + assign ifu_axi_bresp_int[1:0] = pt.BUILD_AHB_LITE ? ifu_axi_bresp_ahb[1:0] : ifu_axi_bresp[1:0]; + assign ifu_axi_bid_int[pt.IFU_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? ifu_axi_bid_ahb[pt.IFU_BUS_TAG-1:0] : ifu_axi_bid[pt.IFU_BUS_TAG-1:0]; + assign ifu_axi_arready_int = pt.BUILD_AHB_LITE ? ifu_axi_arready_ahb : ifu_axi_arready; + assign ifu_axi_rvalid_int = pt.BUILD_AHB_LITE ? ifu_axi_rvalid_ahb : ifu_axi_rvalid; + assign ifu_axi_rid_int[pt.IFU_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? ifu_axi_rid_ahb[pt.IFU_BUS_TAG-1:0] : ifu_axi_rid[pt.IFU_BUS_TAG-1:0]; + assign ifu_axi_rdata_int[63:0] = pt.BUILD_AHB_LITE ? ifu_axi_rdata_ahb[63:0] : ifu_axi_rdata[63:0]; + assign ifu_axi_rresp_int[1:0] = pt.BUILD_AHB_LITE ? ifu_axi_rresp_ahb[1:0] : ifu_axi_rresp[1:0]; + assign ifu_axi_rlast_int = pt.BUILD_AHB_LITE ? ifu_axi_rlast_ahb : ifu_axi_rlast; + + assign sb_axi_awready_int = pt.BUILD_AHB_LITE ? sb_axi_awready_ahb : sb_axi_awready; + assign sb_axi_wready_int = pt.BUILD_AHB_LITE ? sb_axi_wready_ahb : sb_axi_wready; + assign sb_axi_bvalid_int = pt.BUILD_AHB_LITE ? sb_axi_bvalid_ahb : sb_axi_bvalid; + assign sb_axi_bready_int = pt.BUILD_AHB_LITE ? sb_axi_bready_ahb : sb_axi_bready; + assign sb_axi_bresp_int[1:0] = pt.BUILD_AHB_LITE ? sb_axi_bresp_ahb[1:0] : sb_axi_bresp[1:0]; + assign sb_axi_bid_int[pt.SB_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? sb_axi_bid_ahb[pt.SB_BUS_TAG-1:0] : sb_axi_bid[pt.SB_BUS_TAG-1:0]; + assign sb_axi_arready_int = pt.BUILD_AHB_LITE ? sb_axi_arready_ahb : sb_axi_arready; + assign sb_axi_rvalid_int = pt.BUILD_AHB_LITE ? sb_axi_rvalid_ahb : sb_axi_rvalid; + assign sb_axi_rid_int[pt.SB_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? sb_axi_rid_ahb[pt.SB_BUS_TAG-1:0] : sb_axi_rid[pt.SB_BUS_TAG-1:0]; + assign sb_axi_rdata_int[63:0] = pt.BUILD_AHB_LITE ? sb_axi_rdata_ahb[63:0] : sb_axi_rdata[63:0]; + assign sb_axi_rresp_int[1:0] = pt.BUILD_AHB_LITE ? sb_axi_rresp_ahb[1:0] : sb_axi_rresp[1:0]; + assign sb_axi_rlast_int = pt.BUILD_AHB_LITE ? sb_axi_rlast_ahb : sb_axi_rlast; + + assign dma_axi_awvalid_int = pt.BUILD_AHB_LITE ? dma_axi_awvalid_ahb : dma_axi_awvalid; + assign dma_axi_awid_int[pt.DMA_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? dma_axi_awid_ahb[pt.DMA_BUS_TAG-1:0] : dma_axi_awid[pt.DMA_BUS_TAG-1:0]; + assign dma_axi_awaddr_int[31:0] = pt.BUILD_AHB_LITE ? dma_axi_awaddr_ahb[31:0] : dma_axi_awaddr[31:0]; + assign dma_axi_awsize_int[2:0] = pt.BUILD_AHB_LITE ? dma_axi_awsize_ahb[2:0] : dma_axi_awsize[2:0]; + assign dma_axi_awprot_int[2:0] = pt.BUILD_AHB_LITE ? dma_axi_awprot_ahb[2:0] : dma_axi_awprot[2:0]; + assign dma_axi_awlen_int[7:0] = pt.BUILD_AHB_LITE ? dma_axi_awlen_ahb[7:0] : dma_axi_awlen[7:0]; + assign dma_axi_awburst_int[1:0] = pt.BUILD_AHB_LITE ? dma_axi_awburst_ahb[1:0] : dma_axi_awburst[1:0]; + assign dma_axi_wvalid_int = pt.BUILD_AHB_LITE ? dma_axi_wvalid_ahb : dma_axi_wvalid; + assign dma_axi_wdata_int[63:0] = pt.BUILD_AHB_LITE ? dma_axi_wdata_ahb[63:0] : dma_axi_wdata; + assign dma_axi_wstrb_int[7:0] = pt.BUILD_AHB_LITE ? dma_axi_wstrb_ahb[7:0] : dma_axi_wstrb[7:0]; + assign dma_axi_wlast_int = pt.BUILD_AHB_LITE ? dma_axi_wlast_ahb : dma_axi_wlast; + assign dma_axi_bready_int = pt.BUILD_AHB_LITE ? dma_axi_bready_ahb : dma_axi_bready; + assign dma_axi_arvalid_int = pt.BUILD_AHB_LITE ? dma_axi_arvalid_ahb : dma_axi_arvalid; + assign dma_axi_arid_int[pt.DMA_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? dma_axi_arid_ahb[pt.DMA_BUS_TAG-1:0] : dma_axi_arid[pt.DMA_BUS_TAG-1:0]; + assign dma_axi_araddr_int[31:0] = pt.BUILD_AHB_LITE ? dma_axi_araddr_ahb[31:0] : dma_axi_araddr[31:0]; + assign dma_axi_arsize_int[2:0] = pt.BUILD_AHB_LITE ? dma_axi_arsize_ahb[2:0] : dma_axi_arsize[2:0]; + assign dma_axi_arprot_int[2:0] = pt.BUILD_AHB_LITE ? dma_axi_arprot_ahb[2:0] : dma_axi_arprot[2:0]; + assign dma_axi_arlen_int[7:0] = pt.BUILD_AHB_LITE ? dma_axi_arlen_ahb[7:0] : dma_axi_arlen[7:0]; + assign dma_axi_arburst_int[1:0] = pt.BUILD_AHB_LITE ? dma_axi_arburst_ahb[1:0] : dma_axi_arburst[1:0]; + assign dma_axi_rready_int = pt.BUILD_AHB_LITE ? dma_axi_rready_ahb : dma_axi_rready; + + +if (pt.BUILD_AHB_LITE == 1) begin +`ifdef ASSERT_ON + property ahb_trxn_aligned; + @(posedge clk) disable iff(~rst_l) (lsu_htrans[1:0] != 2'b0) |-> ((lsu_hsize[2:0] == 3'h0) | + ((lsu_hsize[2:0] == 3'h1) & (lsu_haddr[0] == 1'b0)) | + ((lsu_hsize[2:0] == 3'h2) & (lsu_haddr[1:0] == 2'b0)) | + ((lsu_hsize[2:0] == 3'h3) & (lsu_haddr[2:0] == 3'b0))); + endproperty + assert_ahb_trxn_aligned: assert property (ahb_trxn_aligned) else + $display("Assertion ahb_trxn_aligned failed: lsu_htrans=2'h%h, lsu_hsize=3'h%h, lsu_haddr=32'h%h",lsu_htrans[1:0], lsu_hsize[2:0], lsu_haddr[31:0]); + + property dma_trxn_aligned; + @(posedge clk) disable iff(~rst_l) (dma_htrans[1:0] != 2'b0) |-> ((dma_hsize[2:0] == 3'h0) | + ((dma_hsize[2:0] == 3'h1) & (dma_haddr[0] == 1'b0)) | + ((dma_hsize[2:0] == 3'h2) & (dma_haddr[1:0] == 2'b0)) | + ((dma_hsize[2:0] == 3'h3) & (dma_haddr[2:0] == 3'b0))); + endproperty + + +`endif + end // if (pt.BUILD_AHB_LITE == 1) + + + // unpack packet + // also need retires_p==3 + + assign trace_rv_i_insn_ip[31:0] = rv_trace_pkt.rv_i_insn_ip[31:0]; + assign trace_rv_i_address_ip[31:0] = rv_trace_pkt.rv_i_address_ip[31:0]; + assign trace_rv_i_valid_ip[1:0] = rv_trace_pkt.rv_i_valid_ip[1:0]; + assign trace_rv_i_exception_ip[1:0] = rv_trace_pkt.rv_i_exception_ip[1:0]; + assign trace_rv_i_ecause_ip[4:0] = rv_trace_pkt.rv_i_ecause_ip[4:0]; + assign trace_rv_i_interrupt_ip[2:0] = rv_trace_pkt.rv_i_interrupt_ip[2:0]; + assign trace_rv_i_tval_ip[31:0] = rv_trace_pkt.rv_i_tval_ip[31:0]; + + + + + +endmodule // el2_swerv + diff --git a/design/el2_swerv_wrapper.sv b/design/el2_swerv_wrapper.sv new file mode 100644 index 0000000..b52194e --- /dev/null +++ b/design/el2_swerv_wrapper.sv @@ -0,0 +1,712 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// Function: Top wrapper file with el2_swerv/mem instantiated inside +// Comments: +// +//******************************************************************************** +module el2_swerv_wrapper +import el2_pkg::*; + #( +`include "el2_param.vh" +) +( + input logic clk, + input logic rst_l, + input logic [31:1] rst_vec, + input logic nmi_int, + input logic [31:1] nmi_vec, + input logic [31:1] jtag_id, + + + output logic [31:0] trace_rv_i_insn_ip, + output logic [31:0] trace_rv_i_address_ip, + output logic [1:0] trace_rv_i_valid_ip, + output logic [1:0] trace_rv_i_exception_ip, + output logic [4:0] trace_rv_i_ecause_ip, + output logic [2:0] trace_rv_i_interrupt_ip, + output logic [31:0] trace_rv_i_tval_ip, + + // Bus signals +`ifdef RV_BUILD_AXI4 + //-------------------------- LSU AXI signals-------------------------- + // AXI Write Channels + output logic lsu_axi_awvalid, + input logic lsu_axi_awready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid, + output logic [31:0] lsu_axi_awaddr, + output logic [3:0] lsu_axi_awregion, + output logic [7:0] lsu_axi_awlen, + output logic [2:0] lsu_axi_awsize, + output logic [1:0] lsu_axi_awburst, + output logic lsu_axi_awlock, + output logic [3:0] lsu_axi_awcache, + output logic [2:0] lsu_axi_awprot, + output logic [3:0] lsu_axi_awqos, + + output logic lsu_axi_wvalid, + input logic lsu_axi_wready, + output logic [63:0] lsu_axi_wdata, + output logic [7:0] lsu_axi_wstrb, + output logic lsu_axi_wlast, + + input logic lsu_axi_bvalid, + output logic lsu_axi_bready, + input logic [1:0] lsu_axi_bresp, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid, + + // AXI Read Channels + output logic lsu_axi_arvalid, + input logic lsu_axi_arready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid, + output logic [31:0] lsu_axi_araddr, + output logic [3:0] lsu_axi_arregion, + output logic [7:0] lsu_axi_arlen, + output logic [2:0] lsu_axi_arsize, + output logic [1:0] lsu_axi_arburst, + output logic lsu_axi_arlock, + output logic [3:0] lsu_axi_arcache, + output logic [2:0] lsu_axi_arprot, + output logic [3:0] lsu_axi_arqos, + + input logic lsu_axi_rvalid, + output logic lsu_axi_rready, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid, + input logic [63:0] lsu_axi_rdata, + input logic [1:0] lsu_axi_rresp, + input logic lsu_axi_rlast, + + //-------------------------- IFU AXI signals-------------------------- + // AXI Write Channels + output logic ifu_axi_awvalid, + input logic ifu_axi_awready, + output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_awid, + output logic [31:0] ifu_axi_awaddr, + output logic [3:0] ifu_axi_awregion, + output logic [7:0] ifu_axi_awlen, + output logic [2:0] ifu_axi_awsize, + output logic [1:0] ifu_axi_awburst, + output logic ifu_axi_awlock, + output logic [3:0] ifu_axi_awcache, + output logic [2:0] ifu_axi_awprot, + output logic [3:0] ifu_axi_awqos, + + output logic ifu_axi_wvalid, + input logic ifu_axi_wready, + output logic [63:0] ifu_axi_wdata, + output logic [7:0] ifu_axi_wstrb, + output logic ifu_axi_wlast, + + input logic ifu_axi_bvalid, + output logic ifu_axi_bready, + input logic [1:0] ifu_axi_bresp, + input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_bid, + + // AXI Read Channels + output logic ifu_axi_arvalid, + input logic ifu_axi_arready, + output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_arid, + output logic [31:0] ifu_axi_araddr, + output logic [3:0] ifu_axi_arregion, + output logic [7:0] ifu_axi_arlen, + output logic [2:0] ifu_axi_arsize, + output logic [1:0] ifu_axi_arburst, + output logic ifu_axi_arlock, + output logic [3:0] ifu_axi_arcache, + output logic [2:0] ifu_axi_arprot, + output logic [3:0] ifu_axi_arqos, + + input logic ifu_axi_rvalid, + output logic ifu_axi_rready, + input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_rid, + input logic [63:0] ifu_axi_rdata, + input logic [1:0] ifu_axi_rresp, + input logic ifu_axi_rlast, + + //-------------------------- SB AXI signals-------------------------- + // AXI Write Channels + output logic sb_axi_awvalid, + input logic sb_axi_awready, + output logic [pt.SB_BUS_TAG-1:0] sb_axi_awid, + output logic [31:0] sb_axi_awaddr, + output logic [3:0] sb_axi_awregion, + output logic [7:0] sb_axi_awlen, + output logic [2:0] sb_axi_awsize, + output logic [1:0] sb_axi_awburst, + output logic sb_axi_awlock, + output logic [3:0] sb_axi_awcache, + output logic [2:0] sb_axi_awprot, + output logic [3:0] sb_axi_awqos, + + output logic sb_axi_wvalid, + input logic sb_axi_wready, + output logic [63:0] sb_axi_wdata, + output logic [7:0] sb_axi_wstrb, + output logic sb_axi_wlast, + + input logic sb_axi_bvalid, + output logic sb_axi_bready, + input logic [1:0] sb_axi_bresp, + input logic [pt.SB_BUS_TAG-1:0] sb_axi_bid, + + // AXI Read Channels + output logic sb_axi_arvalid, + input logic sb_axi_arready, + output logic [pt.SB_BUS_TAG-1:0] sb_axi_arid, + output logic [31:0] sb_axi_araddr, + output logic [3:0] sb_axi_arregion, + output logic [7:0] sb_axi_arlen, + output logic [2:0] sb_axi_arsize, + output logic [1:0] sb_axi_arburst, + output logic sb_axi_arlock, + output logic [3:0] sb_axi_arcache, + output logic [2:0] sb_axi_arprot, + output logic [3:0] sb_axi_arqos, + + input logic sb_axi_rvalid, + output logic sb_axi_rready, + input logic [pt.SB_BUS_TAG-1:0] sb_axi_rid, + input logic [63:0] sb_axi_rdata, + input logic [1:0] sb_axi_rresp, + input logic sb_axi_rlast, + + //-------------------------- DMA AXI signals-------------------------- + // AXI Write Channels + input logic dma_axi_awvalid, + output logic dma_axi_awready, + input logic [pt.DMA_BUS_TAG-1:0] dma_axi_awid, + input logic [31:0] dma_axi_awaddr, + input logic [2:0] dma_axi_awsize, + input logic [2:0] dma_axi_awprot, + input logic [7:0] dma_axi_awlen, + input logic [1:0] dma_axi_awburst, + + + input logic dma_axi_wvalid, + output logic dma_axi_wready, + input logic [63:0] dma_axi_wdata, + input logic [7:0] dma_axi_wstrb, + input logic dma_axi_wlast, + + output logic dma_axi_bvalid, + input logic dma_axi_bready, + output logic [1:0] dma_axi_bresp, + output logic [pt.DMA_BUS_TAG-1:0] dma_axi_bid, + + // AXI Read Channels + input logic dma_axi_arvalid, + output logic dma_axi_arready, + input logic [pt.DMA_BUS_TAG-1:0] dma_axi_arid, + input logic [31:0] dma_axi_araddr, + input logic [2:0] dma_axi_arsize, + input logic [2:0] dma_axi_arprot, + input logic [7:0] dma_axi_arlen, + input logic [1:0] dma_axi_arburst, + + output logic dma_axi_rvalid, + input logic dma_axi_rready, + output logic [pt.DMA_BUS_TAG-1:0] dma_axi_rid, + output logic [63:0] dma_axi_rdata, + output logic [1:0] dma_axi_rresp, + output logic dma_axi_rlast, +`endif + +`ifdef RV_BUILD_AHB_LITE + //// AHB LITE BUS + output logic [31:0] haddr, + output logic [2:0] hburst, + output logic hmastlock, + output logic [3:0] hprot, + output logic [2:0] hsize, + output logic [1:0] htrans, + output logic hwrite, + + input logic [63:0] hrdata, + input logic hready, + input logic hresp, + + // LSU AHB Master + output logic [31:0] lsu_haddr, + output logic [2:0] lsu_hburst, + output logic lsu_hmastlock, + output logic [3:0] lsu_hprot, + output logic [2:0] lsu_hsize, + output logic [1:0] lsu_htrans, + output logic lsu_hwrite, + output logic [63:0] lsu_hwdata, + + input logic [63:0] lsu_hrdata, + input logic lsu_hready, + input logic lsu_hresp, + // Debug Syster Bus AHB + output logic [31:0] sb_haddr, + output logic [2:0] sb_hburst, + output logic sb_hmastlock, + output logic [3:0] sb_hprot, + output logic [2:0] sb_hsize, + output logic [1:0] sb_htrans, + output logic sb_hwrite, + output logic [63:0] sb_hwdata, + + input logic [63:0] sb_hrdata, + input logic sb_hready, + input logic sb_hresp, + + // DMA Slave + input logic dma_hsel, + input logic [31:0] dma_haddr, + input logic [2:0] dma_hburst, + input logic dma_hmastlock, + input logic [3:0] dma_hprot, + input logic [2:0] dma_hsize, + input logic [1:0] dma_htrans, + input logic dma_hwrite, + input logic [63:0] dma_hwdata, + input logic dma_hreadyin, + + output logic [63:0] dma_hrdata, + output logic dma_hreadyout, + output logic dma_hresp, +`endif + // clk ratio signals + input logic lsu_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface + input logic ifu_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface + input logic dbg_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface + input logic dma_bus_clk_en, // Clock ratio b/w cpu core clk & AHB slave interface + + +// input logic ext_int, + input logic timer_int, + input logic soft_int, + input logic [pt.PIC_TOTAL_INT:1] extintsrc_req, + + output logic dec_tlu_perfcnt0, // toggles when slot0 perf counter 0 has an event inc + output logic dec_tlu_perfcnt1, + output logic dec_tlu_perfcnt2, + output logic dec_tlu_perfcnt3, + + input logic jtag_tck, // JTAG clk + input logic jtag_tms, // JTAG TMS + input logic jtag_tdi, // JTAG tdi + input logic jtag_trst_n, // JTAG Reset + output logic jtag_tdo, // JTAG TDO + + input logic [31:4] core_id, + + // external MPC halt/run interface + input logic mpc_debug_halt_req, // Async halt request + input logic mpc_debug_run_req, // Async run request + input logic mpc_reset_run_req, // Run/halt after reset + output logic mpc_debug_halt_ack, // Halt ack + output logic mpc_debug_run_ack, // Run ack + output logic debug_brkpt_status, // debug breakpoint + + input logic i_cpu_halt_req, // Async halt req to CPU + output logic o_cpu_halt_ack, // core response to halt + output logic o_cpu_halt_status, // 1'b1 indicates core is halted + output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request + input logic i_cpu_run_req, // Async restart req to CPU + output logic o_cpu_run_ack, // Core response to run req + input logic scan_mode, // To enable scan mode + input logic mbist_mode // to enable mbist +); + + + // DCCM ports + logic dccm_wren; + logic dccm_rden; + logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo; + logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi; + logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo; + logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi; + logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo; + logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi; + + logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo; + logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi; + + // PIC ports + + // Icache & Itag ports + logic [31:1] ic_rw_addr; + logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en ; // Which way to write + logic ic_rd_en ; + + + logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid; // Valid from the I$ tag valid outside (in flops). + + logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit; // ic_rd_hit[3:0] + logic ic_tag_perr; // Ic tag parity error + + logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr; // Read/Write addresss to the Icache. + logic ic_debug_rd_en; // Icache debug rd + logic ic_debug_wr_en; // Icache debug wr + logic ic_debug_tag_array; // Debug tag array + logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way; // Debug way. Rd or Wr. + + logic [25:0] ictag_debug_rd_data;// Debug icache tag. + logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data; + logic [63:0] ic_rd_data; + logic [70:0] ic_debug_rd_data; // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + logic [70:0] ic_debug_wr_data; // Debug wr cache. + + logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr; // ecc error per bank + logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr; // parity error per bank + + logic [63:0] ic_premux_data; + logic ic_sel_premux_data; + + // ICCM ports + logic [pt.ICCM_BITS-1:1] iccm_rw_addr; + logic iccm_wren; + logic iccm_rden; + logic [2:0] iccm_wr_size; + logic [77:0] iccm_wr_data; + logic iccm_buf_correct_ecc; + logic iccm_correction_state; + + logic [63:0] iccm_rd_data; + logic [77:0] iccm_rd_data_ecc; + + logic core_rst_l; // Core reset including rst_l and dbg_rst_l + logic jtag_tdoEn; + logic dmi_reg_en; + logic [6:0] dmi_reg_addr; + logic dmi_reg_wr_en; + logic [31:0] dmi_reg_wdata; + logic [31:0] dmi_reg_rdata; + logic dmi_hard_reset; + + logic dccm_clk_override; + logic icm_clk_override; + logic dec_tlu_core_ecc_disable; + + + // zero out the signals not presented at the wrapper instantiation level +`ifdef RV_BUILD_AXI4 + + //// AHB LITE BUS + logic [31:0] haddr; + logic [2:0] hburst; + logic hmastlock; + logic [3:0] hprot; + logic [2:0] hsize; + logic [1:0] htrans; + logic hwrite; + + logic [63:0] hrdata; + logic hready; + logic hresp; + + // LSU AHB Master + logic [31:0] lsu_haddr; + logic [2:0] lsu_hburst; + logic lsu_hmastlock; + logic [3:0] lsu_hprot; + logic [2:0] lsu_hsize; + logic [1:0] lsu_htrans; + logic lsu_hwrite; + logic [63:0] lsu_hwdata; + + logic [63:0] lsu_hrdata; + logic lsu_hready; + logic lsu_hresp; + // Debug Syster Bus AHB + logic [31:0] sb_haddr; + logic [2:0] sb_hburst; + logic sb_hmastlock; + logic [3:0] sb_hprot; + logic [2:0] sb_hsize; + logic [1:0] sb_htrans; + logic sb_hwrite; + logic [63:0] sb_hwdata; + + logic [63:0] sb_hrdata; + logic sb_hready; + logic sb_hresp; + + // DMA Slave + logic dma_hsel; + logic [31:0] dma_haddr; + logic [2:0] dma_hburst; + logic dma_hmastlock; + logic [3:0] dma_hprot; + logic [2:0] dma_hsize; + logic [1:0] dma_htrans; + logic dma_hwrite; + logic [63:0] dma_hwdata; + logic dma_hreadyin; + + logic [63:0] dma_hrdata; + logic dma_hreadyout; + logic dma_hresp; + + + // AHB + assign hrdata[63:0] = '0; + assign hready = '0; + assign hresp = '0; + // LSU + assign lsu_hrdata[63:0] = '0; + assign lsu_hready = '0; + assign lsu_hresp = '0; + // Debu + assign sb_hrdata[63:0] = '0; + assign sb_hready = '0; + assign sb_hresp = '0; + + // DMA + assign dma_hsel = '0; + assign dma_haddr[31:0] = '0; + assign dma_hburst[2:0] = '0; + assign dma_hmastlock = '0; + assign dma_hprot[3:0] = '0; + assign dma_hsize[2:0] = '0; + assign dma_htrans[1:0] = '0; + assign dma_hwrite = '0; + assign dma_hwdata[63:0] = '0; + assign dma_hreadyin = '0; + +`endif // `ifdef RV_BUILD_AXI4 + +`ifdef RV_BUILD_AHB_LITE + wire lsu_axi_awvalid; + wire lsu_axi_awready; + wire [pt.LSU_BUS_TAG-1:0] lsu_axi_awid; + wire [31:0] lsu_axi_awaddr; + wire [3:0] lsu_axi_awregion; + wire [7:0] lsu_axi_awlen; + wire [2:0] lsu_axi_awsize; + wire [1:0] lsu_axi_awburst; + wire lsu_axi_awlock; + wire [3:0] lsu_axi_awcache; + wire [2:0] lsu_axi_awprot; + wire [3:0] lsu_axi_awqos; + + wire lsu_axi_wvalid; + wire lsu_axi_wready; + wire [63:0] lsu_axi_wdata; + wire [7:0] lsu_axi_wstrb; + wire lsu_axi_wlast; + + wire lsu_axi_bvalid; + wire lsu_axi_bready; + wire [1:0] lsu_axi_bresp; + wire [pt.LSU_BUS_TAG-1:0] lsu_axi_bid; + + // AXI Read Channels + wire lsu_axi_arvalid; + wire lsu_axi_arready; + wire [pt.LSU_BUS_TAG-1:0] lsu_axi_arid; + wire [31:0] lsu_axi_araddr; + wire [3:0] lsu_axi_arregion; + wire [7:0] lsu_axi_arlen; + wire [2:0] lsu_axi_arsize; + wire [1:0] lsu_axi_arburst; + wire lsu_axi_arlock; + wire [3:0] lsu_axi_arcache; + wire [2:0] lsu_axi_arprot; + wire [3:0] lsu_axi_arqos; + + wire lsu_axi_rvalid; + wire lsu_axi_rready; + wire [pt.LSU_BUS_TAG-1:0] lsu_axi_rid; + wire [63:0] lsu_axi_rdata; + wire [1:0] lsu_axi_rresp; + wire lsu_axi_rlast; + + //-------------------------- IFU AXI signals-------------------------- + // AXI Write Channels + wire ifu_axi_awvalid; + wire ifu_axi_awready; + wire [pt.IFU_BUS_TAG-1:0] ifu_axi_awid; + wire [31:0] ifu_axi_awaddr; + wire [3:0] ifu_axi_awregion; + wire [7:0] ifu_axi_awlen; + wire [2:0] ifu_axi_awsize; + wire [1:0] ifu_axi_awburst; + wire ifu_axi_awlock; + wire [3:0] ifu_axi_awcache; + wire [2:0] ifu_axi_awprot; + wire [3:0] ifu_axi_awqos; + + wire ifu_axi_wvalid; + wire ifu_axi_wready; + wire [63:0] ifu_axi_wdata; + wire [7:0] ifu_axi_wstrb; + wire ifu_axi_wlast; + + wire ifu_axi_bvalid; + wire ifu_axi_bready; + wire [1:0] ifu_axi_bresp; + wire [pt.IFU_BUS_TAG-1:0] ifu_axi_bid; + + // AXI Read Channels + wire ifu_axi_arvalid; + wire ifu_axi_arready; + wire [pt.IFU_BUS_TAG-1:0] ifu_axi_arid; + wire [31:0] ifu_axi_araddr; + wire [3:0] ifu_axi_arregion; + wire [7:0] ifu_axi_arlen; + wire [2:0] ifu_axi_arsize; + wire [1:0] ifu_axi_arburst; + wire ifu_axi_arlock; + wire [3:0] ifu_axi_arcache; + wire [2:0] ifu_axi_arprot; + wire [3:0] ifu_axi_arqos; + + wire ifu_axi_rvalid; + wire ifu_axi_rready; + wire [pt.IFU_BUS_TAG-1:0] ifu_axi_rid; + wire [63:0] ifu_axi_rdata; + wire [1:0] ifu_axi_rresp; + wire ifu_axi_rlast; + + //-------------------------- SB AXI signals-------------------------- + // AXI Write Channels + wire sb_axi_awvalid; + wire sb_axi_awready; + wire [pt.SB_BUS_TAG-1:0] sb_axi_awid; + wire [31:0] sb_axi_awaddr; + wire [3:0] sb_axi_awregion; + wire [7:0] sb_axi_awlen; + wire [2:0] sb_axi_awsize; + wire [1:0] sb_axi_awburst; + wire sb_axi_awlock; + wire [3:0] sb_axi_awcache; + wire [2:0] sb_axi_awprot; + wire [3:0] sb_axi_awqos; + + wire sb_axi_wvalid; + wire sb_axi_wready; + wire [63:0] sb_axi_wdata; + wire [7:0] sb_axi_wstrb; + wire sb_axi_wlast; + + wire sb_axi_bvalid; + wire sb_axi_bready; + wire [1:0] sb_axi_bresp; + wire [pt.SB_BUS_TAG-1:0] sb_axi_bid; + + // AXI Read Channels + wire sb_axi_arvalid; + wire sb_axi_arready; + wire [pt.SB_BUS_TAG-1:0] sb_axi_arid; + wire [31:0] sb_axi_araddr; + wire [3:0] sb_axi_arregion; + wire [7:0] sb_axi_arlen; + wire [2:0] sb_axi_arsize; + wire [1:0] sb_axi_arburst; + wire sb_axi_arlock; + wire [3:0] sb_axi_arcache; + wire [2:0] sb_axi_arprot; + wire [3:0] sb_axi_arqos; + + wire sb_axi_rvalid; + wire sb_axi_rready; + wire [pt.SB_BUS_TAG-1:0] sb_axi_rid; + wire [63:0] sb_axi_rdata; + wire [1:0] sb_axi_rresp; + wire sb_axi_rlast; + + //-------------------------- DMA AXI signals-------------------------- + // AXI Write Channels + wire dma_axi_awvalid; + wire dma_axi_awready; + wire [pt.DMA_BUS_TAG-1:0] dma_axi_awid; + wire [31:0] dma_axi_awaddr; + wire [2:0] dma_axi_awsize; + wire [2:0] dma_axi_awprot; + wire [7:0] dma_axi_awlen; + wire [1:0] dma_axi_awburst; + + + wire dma_axi_wvalid; + wire dma_axi_wready; + wire [63:0] dma_axi_wdata; + wire [7:0] dma_axi_wstrb; + wire dma_axi_wlast; + + wire dma_axi_bvalid; + wire dma_axi_bready; + wire [1:0] dma_axi_bresp; + wire [pt.DMA_BUS_TAG-1:0] dma_axi_bid; + + // AXI Read Channels + wire dma_axi_arvalid; + wire dma_axi_arready; + wire [pt.DMA_BUS_TAG-1:0] dma_axi_arid; + wire [31:0] dma_axi_araddr; + wire [2:0] dma_axi_arsize; + wire [2:0] dma_axi_arprot; + wire [7:0] dma_axi_arlen; + wire [1:0] dma_axi_arburst; + + wire dma_axi_rvalid; + wire dma_axi_rready; + wire [pt.DMA_BUS_TAG-1:0] dma_axi_rid; + wire [63:0] dma_axi_rdata; + wire [1:0] dma_axi_rresp; + wire dma_axi_rlast; + + // AXI + assign ifu_axi_awready = 1'b1; + assign ifu_axi_wready = 1'b1; + assign ifu_axi_bvalid = '0; + assign ifu_axi_bresp[1:0] = '0; + assign ifu_axi_bid[pt.IFU_BUS_TAG-1:0] = '0; + +`endif // `ifdef RV_BUILD_AHB_LITE + + // Instantiate the el2_swerv core + el2_swerv #(.pt(pt)) swerv ( + .* + ); + + // Instantiate the mem + el2_mem #(.pt(pt)) mem( + .rst_l(core_rst_l), + .* + ); + + // Instantiate the JTAG/DMI + dmi_wrapper dmi_wrapper ( + // JTAG signals + .trst_n(jtag_trst_n), // JTAG reset + .tck (jtag_tck), // JTAG clock + .tms (jtag_tms), // Test mode select + .tdi (jtag_tdi), // Test Data Input + .tdo (jtag_tdo), // Test Data Output + .tdoEnable (), // Test Data Output enable + + // Processor Signals + .core_rst_n (core_rst_l), // Core reset, active low + .core_clk (clk), // Core clock + .jtag_id (jtag_id), // 32 bit JTAG ID + .rd_data (dmi_reg_rdata), // 32 bit Read data from Processor + .reg_wr_data (dmi_reg_wdata), // 32 bit Write data to Processor + .reg_wr_addr (dmi_reg_addr), // 32 bit Write address to Processor + .reg_en (dmi_reg_en), // 1 bit Write interface bit to Processor + .reg_wr_en (dmi_reg_wr_en), // 1 bit Write enable to Processor + .dmi_hard_reset (dmi_hard_reset) //a hard reset of the DTM, causing the DTM to forget about any outstanding DMI transactions +); + + + +endmodule + diff --git a/design/exu/el2_exu.sv b/design/exu/el2_exu.sv new file mode 100644 index 0000000..fc5f547 --- /dev/null +++ b/design/exu/el2_exu.sv @@ -0,0 +1,357 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +module el2_exu +import el2_pkg::*; +#( +`include "el2_param.vh" +) + ( + input logic clk, // Top level clock + input logic rst_l, // Reset + input logic scan_mode, // Scan control + + input logic [1:0] dec_data_en, // Clock enable {x,r}, one cycle pulse + input logic [1:0] dec_ctl_en, // Clock enable {x,r}, two cycle pulse + input logic [31:0] dbg_cmd_wrdata, // Debug data to primary I0 RS1 + input el2_alu_pkt_t i0_ap, // DEC alu {valid,predecodes} + + input logic dec_debug_wdata_rs1_d, // Debug select to primary I0 RS1 + + input el2_predict_pkt_t dec_i0_predict_p_d, // DEC branch predict packet + input logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // DEC predict fghr + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // DEC predict index + input logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // DEC predict branch tag + + input logic dec_i0_rs1_en_d, // Qualify GPR RS1 data + input logic dec_i0_rs2_en_d, // Qualify GPR RS2 data + input logic [31:0] gpr_i0_rs1_d, // DEC data gpr + input logic [31:0] gpr_i0_rs2_d, // DEC data gpr + input logic [31:0] dec_i0_immed_d, // DEC data immediate + input logic [31:0] dec_i0_rs1_bypass_data_d, // DEC bypass data + input logic [31:0] dec_i0_rs2_bypass_data_d, // DEC bypass data + input logic [12:1] dec_i0_br_immed_d, // Branch immediate + input logic dec_i0_alu_decode_d, // Valid to X-stage ALU + input logic dec_i0_select_pc_d, // PC select to RS1 + input logic [31:1] dec_i0_pc_d, // Instruction PC + input logic [1:0] dec_i0_rs1_bypass_en_d, // DEC bypass select 1 - X-stage, 0 - dec bypass data + input logic [1:0] dec_i0_rs2_bypass_en_d, // DEC bypass select 1 - X-stage, 0 - dec bypass data + input logic dec_csr_ren_d, // Clear I0 RS1 primary + + input el2_mul_pkt_t mul_p, // DEC {valid, operand signs, low, operand bypass} + input el2_div_pkt_t div_p, // DEC {valid, unsigned, rem} + input logic dec_div_cancel, // Cancel the divide operation + + input logic [31:1] pred_correct_npc_x, // DEC NPC for correctly predicted branch + + input logic dec_tlu_flush_lower_r, // Flush divide and secondary ALUs + input logic [31:1] dec_tlu_flush_path_r, // Redirect target + + + input logic dec_extint_stall, // External stall mux select + input logic [31:2] dec_tlu_meihap, // External stall mux data + + + output logic [31:0] exu_lsu_rs1_d, // LSU operand + output logic [31:0] exu_lsu_rs2_d, // LSU operand + + output logic exu_flush_final, // Pipe is being flushed this cycle + output logic [31:1] exu_flush_path_final, // Target for the oldest flush source + + output logic [31:0] exu_i0_result_x, // Primary ALU result to DEC + output logic [31:1] exu_i0_pc_x, // Primary PC result to DEC + output logic [31:0] exu_csr_rs1_x, // RS1 source for a CSR instruction + + output logic [31:1] exu_npc_r, // Divide NPC + output logic [1:0] exu_i0_br_hist_r, // to DEC I0 branch history + output logic exu_i0_br_error_r, // to DEC I0 branch error + output logic exu_i0_br_start_error_r, // to DEC I0 branch start error + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // to DEC I0 branch index + output logic exu_i0_br_valid_r, // to DEC I0 branch valid + output logic exu_i0_br_mp_r, // to DEC I0 branch mispredict + output logic exu_i0_br_middle_r, // to DEC I0 branch middle + output logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // to DEC I0 branch fghr + output logic exu_i0_br_way_r, // to DEC I0 branch way + + output el2_predict_pkt_t exu_mp_pkt, // Mispredict branch packet + output logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // Mispredict global history + output logic [pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index + output logic [pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag + + + output logic exu_pmu_i0_br_misp, // to PMU - I0 E4 branch mispredict + output logic exu_pmu_i0_br_ataken, // to PMU - I0 E4 taken + output logic exu_pmu_i0_pc4, // to PMU - I0 E4 PC + + + output logic [31:0] exu_div_result, // Divide result + output logic exu_div_wren // Divide write enable to GPR + ); + + + + + logic data_gate_en; + logic [31:0] i0_rs1_bypass_data_d; + logic [31:0] i0_rs2_bypass_data_d; + logic i0_rs1_bypass_en_d; + logic i0_rs2_bypass_en_d; + logic [31:0] i0_rs1_d, i0_rs2_d; + logic [31:0] muldiv_rs1_d, muldiv_rs2_d; + logic [31:1] pred_correct_npc_r; + logic i0_pred_correct_upper_r; + logic [31:0] csr_rs1_in_d; + logic [31:1] i0_flush_path_upper_r; + logic x_data_en, r_data_en; + logic x_ctl_en, r_ctl_en; + + logic [pt.BHT_GHR_SIZE-1:0] ghr_d_ns, ghr_d; + logic [pt.BHT_GHR_SIZE-1:0] ghr_x_ns, ghr_x; + logic i0_taken_d; + logic i0_taken_x; + logic i0_valid_d; + logic i0_valid_x; + logic [pt.BHT_GHR_SIZE-1:0] after_flush_eghr; + + el2_predict_pkt_t final_predict_mp; + el2_predict_pkt_t i0_predict_newp_d; + + logic flush_lower_ff; + logic flush_in_d; + logic [31:0] alu_result_x; + + logic mul_valid_x; + logic [31:0] mul_result_x; + + el2_predict_pkt_t i0_pp_r; + + logic i0_flush_upper_d; + logic [31:1] i0_flush_path_d; + el2_predict_pkt_t i0_predict_p_d; + logic i0_pred_correct_upper_d; + + logic i0_flush_upper_x; + logic [31:1] i0_flush_path_x; + el2_predict_pkt_t i0_predict_p_x; + logic i0_pred_correct_upper_x; + + localparam PREDPIPESIZE = pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1+pt.BHT_GHR_SIZE+pt.BTB_BTAG_SIZE; + logic [PREDPIPESIZE-1:0] predpipe_d, predpipe_x, predpipe_r, final_predpipe_mp; + + + + + rvdffe #(31) i_flush_path_x_ff (.*, .en ( x_data_en ), .din( i0_flush_path_d[31:1] ), .dout( i0_flush_path_x[31:1] ) ); + rvdffe #(32) i_csr_rs1_x_ff (.*, .en ( x_data_en ), .din( csr_rs1_in_d[31:0] ), .dout( exu_csr_rs1_x[31:0] ) ); + rvdffe #($bits(el2_predict_pkt_t)) i_predictpacket_x_ff (.*, .en ( x_data_en ), .din( i0_predict_p_d ), .dout( i0_predict_p_x ) ); + rvdffe #(PREDPIPESIZE) i_predpipe_x_ff (.*, .en ( x_data_en ), .din( predpipe_d ), .dout( predpipe_x ) ); + rvdffe #(PREDPIPESIZE) i_predpipe_r_ff (.*, .en ( r_data_en ), .din( predpipe_x ), .dout( predpipe_r ) ); + + rvdffe #(4+pt.BHT_GHR_SIZE) i_x_ff (.*, .en ( x_ctl_en ), .din ({i0_valid_d,i0_taken_d,i0_flush_upper_d,i0_pred_correct_upper_d,ghr_x_ns[pt.BHT_GHR_SIZE-1:0]} ), + .dout({i0_valid_x,i0_taken_x,i0_flush_upper_x,i0_pred_correct_upper_x,ghr_x[pt.BHT_GHR_SIZE-1:0]} ) ); + + rvdffe #($bits(el2_predict_pkt_t)+7) i_r_ff0 (.*, .en ( r_ctl_en ), .din ({i0_predict_p_x ,pred_correct_npc_x[6:1],i0_pred_correct_upper_x}), + .dout({i0_pp_r ,pred_correct_npc_r[6:1],i0_pred_correct_upper_r}) ); + + rvdffe #(56) i_r_ff1 (.*, .en ( r_data_en ), .din ({i0_flush_path_x[31:1] ,pred_correct_npc_x[31:7]}), + .dout({i0_flush_path_upper_r[31:1],pred_correct_npc_r[31:7]}) ); + + if (pt.BHT_SIZE==32 || pt.BHT_SIZE==64) + begin + rvdffs #(pt.BHT_GHR_SIZE+2) i_data_gate_ff (.*, .en( data_gate_en ), .din ({ghr_d_ns[pt.BHT_GHR_SIZE-1:0],mul_p.valid,dec_tlu_flush_lower_r}), + .dout({ghr_d[pt.BHT_GHR_SIZE-1:0] ,mul_valid_x,flush_lower_ff} ) ); + end + else + begin + rvdffe #(pt.BHT_GHR_SIZE+2) i_data_gate_ff (.*, .en( data_gate_en ), .din ({ghr_d_ns[pt.BHT_GHR_SIZE-1:0],mul_p.valid,dec_tlu_flush_lower_r}), + .dout({ghr_d[pt.BHT_GHR_SIZE-1:0] ,mul_valid_x,flush_lower_ff} ) ); + end + + + + + + assign data_gate_en = ( ghr_d_ns[pt.BHT_GHR_SIZE-1:0] != ghr_d[pt.BHT_GHR_SIZE-1:0]) | + ( mul_p.valid != mul_valid_x ) | + ( dec_tlu_flush_lower_r != flush_lower_ff ); + + assign predpipe_d[PREDPIPESIZE-1:0] + = {i0_predict_fghr_d, i0_predict_index_d, i0_predict_btag_d}; + + + assign i0_rs1_bypass_en_d = dec_i0_rs1_bypass_en_d[0] | dec_i0_rs1_bypass_en_d[1]; + assign i0_rs2_bypass_en_d = dec_i0_rs2_bypass_en_d[0] | dec_i0_rs2_bypass_en_d[1]; + + assign i0_rs1_bypass_data_d[31:0]=({32{dec_i0_rs1_bypass_en_d[0]}} & dec_i0_rs1_bypass_data_d[31:0]) | + ({32{dec_i0_rs1_bypass_en_d[1]}} & exu_i0_result_x[31:0] ); + + assign i0_rs2_bypass_data_d[31:0]=({32{dec_i0_rs2_bypass_en_d[0]}} & dec_i0_rs2_bypass_data_d[31:0]) | + ({32{dec_i0_rs2_bypass_en_d[1]}} & exu_i0_result_x[31:0] ); + + + assign i0_rs1_d[31:0] = ({32{ i0_rs1_bypass_en_d }} & i0_rs1_bypass_data_d[31:0]) | + ({32{~i0_rs1_bypass_en_d & dec_i0_select_pc_d }} & {dec_i0_pc_d[31:1],1'b0} ) | // for jal's + ({32{~i0_rs1_bypass_en_d & dec_debug_wdata_rs1_d }} & dbg_cmd_wrdata[31:0] ) | + ({32{~i0_rs1_bypass_en_d & ~dec_debug_wdata_rs1_d & dec_i0_rs1_en_d}} & gpr_i0_rs1_d[31:0] ); + + assign i0_rs2_d[31:0] = ({32{~i0_rs2_bypass_en_d & dec_i0_rs2_en_d}} & gpr_i0_rs2_d[31:0] ) | + ({32{~i0_rs2_bypass_en_d }} & dec_i0_immed_d[31:0] ) | + ({32{ i0_rs2_bypass_en_d }} & i0_rs2_bypass_data_d[31:0]); + + + assign exu_lsu_rs1_d[31:0] = ({32{~i0_rs1_bypass_en_d & ~dec_extint_stall & dec_i0_rs1_en_d}} & gpr_i0_rs1_d[31:0] ) | + ({32{ i0_rs1_bypass_en_d & ~dec_extint_stall }} & i0_rs1_bypass_data_d[31:0]) | + ({32{ dec_extint_stall }} & {dec_tlu_meihap[31:2],2'b0}); + + assign exu_lsu_rs2_d[31:0] = ({32{~i0_rs2_bypass_en_d & ~dec_extint_stall & dec_i0_rs2_en_d}} & gpr_i0_rs2_d[31:0] ) | + ({32{ i0_rs2_bypass_en_d & ~dec_extint_stall }} & i0_rs2_bypass_data_d[31:0]); + + + assign muldiv_rs1_d[31:0] = ({32{~i0_rs1_bypass_en_d & dec_i0_rs1_en_d}} & gpr_i0_rs1_d[31:0] ) | + ({32{ i0_rs1_bypass_en_d }} & i0_rs1_bypass_data_d[31:0]); + + assign muldiv_rs2_d[31:0] = ({32{~i0_rs2_bypass_en_d & dec_i0_rs2_en_d}} & gpr_i0_rs2_d[31:0] ) | + ({32{~i0_rs2_bypass_en_d }} & dec_i0_immed_d[31:0] ) | + ({32{ i0_rs2_bypass_en_d }} & i0_rs2_bypass_data_d[31:0]); + + + assign csr_rs1_in_d[31:0] = ( dec_csr_ren_d ) ? i0_rs1_d[31:0] : exu_csr_rs1_x[31:0]; + + assign x_data_en = dec_data_en[1]; + assign r_data_en = dec_data_en[0]; + assign x_ctl_en = dec_ctl_en[1]; + assign r_ctl_en = dec_ctl_en[0]; + + + + + el2_exu_alu_ctl #(.pt(pt)) i_alu (.*, + .enable ( x_ctl_en ), // I + .pp_in ( i0_predict_newp_d ), // I + .valid_in ( dec_i0_alu_decode_d ), // I + .flush_upper_x ( i0_flush_upper_x ), // I + .flush_lower_r ( dec_tlu_flush_lower_r ), // I + .a_in ( i0_rs1_d[31:0] ), // I + .b_in ( i0_rs2_d[31:0] ), // I + .pc_in ( dec_i0_pc_d[31:1] ), // I + .brimm_in ( dec_i0_br_immed_d[12:1] ), // I + .ap ( i0_ap ), // I + .csr_ren_in ( dec_csr_ren_d ), // I + .result_ff ( alu_result_x[31:0] ), // O + .flush_upper_out ( i0_flush_upper_d ), // O + .flush_final_out ( exu_flush_final ), // O + .flush_path_out ( i0_flush_path_d[31:1] ), // O + .predict_p_out ( i0_predict_p_d ), // O + .pred_correct_out ( i0_pred_correct_upper_d ), // O + .pc_ff ( exu_i0_pc_x[31:1] )); // O + + + + el2_exu_mul_ctl #(.pt(pt)) i_mul (.*, + .mul_p ( mul_p ), // I + .rs1_in ( muldiv_rs1_d[31:0] ), // I + .rs2_in ( muldiv_rs2_d[31:0] ), // I + .result_x ( mul_result_x[31:0] )); // O + + + + el2_exu_div_ctl #(.pt(pt)) i_div (.*, + .cancel ( dec_div_cancel ), // I + .dp ( div_p ), // I + .dividend ( muldiv_rs1_d[31:0] ), // I + .divisor ( muldiv_rs2_d[31:0] ), // I + .finish_dly ( exu_div_wren ), // O + .out ( exu_div_result[31:0] )); // O + + + + assign exu_i0_result_x[31:0] = (mul_valid_x) ? mul_result_x[31:0] : alu_result_x[31:0]; + + + + + always_comb begin + i0_predict_newp_d = dec_i0_predict_p_d; + i0_predict_newp_d.boffset = dec_i0_pc_d[1]; // from the start of inst + end + + + assign exu_pmu_i0_br_misp = i0_pp_r.misp; + assign exu_pmu_i0_br_ataken = i0_pp_r.ataken; + assign exu_pmu_i0_pc4 = i0_pp_r.pc4; + + + assign i0_valid_d = i0_predict_p_d.valid & dec_i0_alu_decode_d & ~dec_tlu_flush_lower_r; + assign i0_taken_d = (i0_predict_p_d.ataken & dec_i0_alu_decode_d); + + + // maintain GHR at D + assign ghr_d_ns[pt.BHT_GHR_SIZE-1:0] + = ({pt.BHT_GHR_SIZE{~dec_tlu_flush_lower_r & i0_valid_d}} & {ghr_d[pt.BHT_GHR_SIZE-2:0], i0_taken_d}) | + ({pt.BHT_GHR_SIZE{~dec_tlu_flush_lower_r & ~i0_valid_d}} & ghr_d[pt.BHT_GHR_SIZE-1:0] ) | + ({pt.BHT_GHR_SIZE{ dec_tlu_flush_lower_r }} & ghr_x[pt.BHT_GHR_SIZE-1:0] ); + + // maintain GHR at X + assign ghr_x_ns[pt.BHT_GHR_SIZE-1:0] + = ({pt.BHT_GHR_SIZE{ i0_valid_x}} & {ghr_x[pt.BHT_GHR_SIZE-2:0], i0_taken_x}) | + ({pt.BHT_GHR_SIZE{~i0_valid_x}} & ghr_x[pt.BHT_GHR_SIZE-1:0] ) ; + + + assign exu_i0_br_valid_r = i0_pp_r.valid; + assign exu_i0_br_mp_r = i0_pp_r.misp; + assign exu_i0_br_way_r = i0_pp_r.way; + assign exu_i0_br_hist_r[1:0] = i0_pp_r.hist[1:0]; + assign exu_i0_br_error_r = i0_pp_r.br_error; + assign exu_i0_br_middle_r = i0_pp_r.pc4 ^ i0_pp_r.boffset; + assign exu_i0_br_start_error_r = i0_pp_r.br_start_error; + + assign {exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0], + exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]}= predpipe_r[PREDPIPESIZE-1:pt.BTB_BTAG_SIZE]; + + + assign final_predict_mp = (i0_flush_upper_x) ? i0_predict_p_x : '0; + + assign final_predpipe_mp[PREDPIPESIZE-1:0] = (i0_flush_upper_x) ? predpipe_x : '0; + + assign after_flush_eghr[pt.BHT_GHR_SIZE-1:0] = (i0_flush_upper_x & ~dec_tlu_flush_lower_r) ? ghr_d[pt.BHT_GHR_SIZE-1:0] : ghr_x[pt.BHT_GHR_SIZE-1:0]; + + + assign exu_mp_pkt.way = final_predict_mp.way; + assign exu_mp_pkt.misp = final_predict_mp.misp; + assign exu_mp_pkt.pcall = final_predict_mp.pcall; + assign exu_mp_pkt.pja = final_predict_mp.pja; + assign exu_mp_pkt.pret = final_predict_mp.pret; + assign exu_mp_pkt.ataken = final_predict_mp.ataken; + assign exu_mp_pkt.boffset = final_predict_mp.boffset; + assign exu_mp_pkt.pc4 = final_predict_mp.pc4; + assign exu_mp_pkt.hist[1:0] = final_predict_mp.hist[1:0]; + assign exu_mp_pkt.toffset[11:0] = final_predict_mp.toffset[11:0]; + + assign exu_mp_fghr[pt.BHT_GHR_SIZE-1:0] = after_flush_eghr[pt.BHT_GHR_SIZE-1:0]; + + assign {exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO], + exu_mp_btag[pt.BTB_BTAG_SIZE-1:0]} = final_predpipe_mp[PREDPIPESIZE-pt.BHT_GHR_SIZE-1:0]; + + assign exu_mp_eghr[pt.BHT_GHR_SIZE-1:0] = final_predpipe_mp[PREDPIPESIZE-1:pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+pt.BTB_BTAG_SIZE+1]; // mp ghr for bht write + + assign exu_flush_path_final[31:1] = (dec_tlu_flush_lower_r) ? dec_tlu_flush_path_r[31:1] : i0_flush_path_d[31:1]; + + assign exu_npc_r[31:1] = (i0_pred_correct_upper_r) ? pred_correct_npc_r[31:1] : + i0_flush_path_upper_r[31:1]; + + +endmodule // el2_exu diff --git a/design/exu/el2_exu_alu_ctl.sv b/design/exu/el2_exu_alu_ctl.sv new file mode 100644 index 0000000..ed195dc --- /dev/null +++ b/design/exu/el2_exu_alu_ctl.sv @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +module el2_exu_alu_ctl +import el2_pkg::*; +#( +`include "el2_param.vh" +) + ( + input logic clk, // Top level clock + input logic rst_l, // Reset + input logic scan_mode, // Scan control + + input logic flush_upper_x, // Branch flush from previous cycle + input logic flush_lower_r, // Master flush of entire pipeline + input logic enable, // Clock enable + input logic valid_in, // Valid + input el2_alu_pkt_t ap, // predecodes + input logic csr_ren_in, // extra decode + input logic signed [31:0] a_in, // A operand + input logic [31:0] b_in, // B operand + input logic [31:1] pc_in, // for pc=pc+2,4 calculations + input el2_predict_pkt_t pp_in, // Predicted branch structure + input logic [12:1] brimm_in, // Branch offset + + + output logic [31:0] result_ff, // final result + output logic flush_upper_out, // Branch flush + output logic flush_final_out, // Branch flush or flush entire pipeline + output logic [31:1] flush_path_out, // Branch flush PC + output logic [31:1] pc_ff, // flopped PC + output logic pred_correct_out, // NPC control + output el2_predict_pkt_t predict_p_out // Predicted branch structure + ); + + + logic [31:0] aout; + logic cout,ov,neg; + logic [31:0] lout; + logic [5:0] shift_amount; + logic [31:0] shift_mask; + logic [62:0] shift_extend; + logic [62:0] shift_long; + logic [31:0] sout; + logic sel_shift; + logic sel_adder; + logic slt_one; + logic actual_taken; + logic [31:1] pcout; + logic cond_mispredict; + logic target_mispredict; + logic eq, ne, lt, ge; + logic any_jal; + logic [1:0] newhist; + logic sel_pc; + logic [31:0] csr_write_data; + logic [31:0] result; + + + + + + + rvdffe #(31) i_pc_ff (.*, .en(enable), .din(pc_in[31:1]), .dout(pc_ff[31:1])); // any PC is run through here - doesn't have to be alu + rvdffe #(32) i_result_ff (.*, .en(enable), .din(result[31:0]), .dout(result_ff[31:0])); + + + + // immediates are just muxed into rs2 + + // add => add=1; + // sub => add=1; sub=1; + + // and => lctl=3 + // or => lctl=2 + // xor => lctl=1 + + // sll => sctl=3 + // srl => sctl=2 + // sra => sctl=1 + + // slt => slt + + // lui => lctl=2; or x0, imm20 previously << 12 + // auipc => add; add pc, imm20 previously << 12 + + // beq => bctl=4; add; add x0, pc, sext(offset[12:1]) + // bne => bctl=3; add; add x0, pc, sext(offset[12:1]) + // blt => bctl=2; add; add x0, pc, sext(offset[12:1]) + // bge => bctl=1; add; add x0, pc, sext(offset[12:1]) + + // jal => rs1=pc {pc[31:1],1'b0}, rs2=sext(offset20:1]); rd=pc+[2,4] + // jalr => rs1=rs1, rs2=sext(offset20:1]); rd=pc+[2,4] + + + logic [31:0] bm; + + assign bm[31:0] = ( ap.sub ) ? ~b_in[31:0] : b_in[31:0]; + + assign {cout, aout[31:0]} = {1'b0, a_in[31:0]} + {1'b0, bm[31:0]} + {32'b0, ap.sub}; + + assign ov = (~a_in[31] & ~bm[31] & aout[31]) | + ( a_in[31] & bm[31] & ~aout[31] ); + + assign lt = (~ap.unsign & (neg ^ ov)) | + ( ap.unsign & ~cout); + + + assign eq = (a_in[31:0] == b_in[31:0]); + assign ne = ~eq; + assign neg = aout[31]; + assign ge = ~lt; + + + + assign lout[31:0] = ( {32{csr_ren_in}} & b_in[31:0] ) | + ( {32{ap.land }} & a_in[31:0] & b_in[31:0] ) | + ( {32{ap.lor }} & (a_in[31:0] | b_in[31:0]) ) | + ( {32{ap.lxor }} & (a_in[31:0] ^ b_in[31:0]) ); + + + + + + assign shift_amount[5:0] = ( { 6{ap.sll}} & (6'd32 - {1'b0,b_in[4:0]}) ) | // [5] unused + ( { 6{ap.srl}} & {1'b0,b_in[4:0]} ) | + ( { 6{ap.sra}} & {1'b0,b_in[4:0]} ); + + + assign shift_mask[31:0] = ( 32'hffffffff << ({5{ap.sll}} & b_in[4:0]) ); + + + assign shift_extend[31:0] = a_in[31:0]; + + assign shift_extend[62:32] = ( {31{ap.sra}} & {31{a_in[31]}} ) | + ( {31{ap.sll}} & a_in[30:0] ); + + + assign shift_long[62:0] = ( shift_extend[62:0] >> shift_amount[4:0] ); // 62-32 unused + + assign sout[31:0] = ( shift_long[31:0] & shift_mask[31:0] ); + + + + + + assign sel_shift = ap.sll | ap.srl | ap.sra; + assign sel_adder = (ap.add | ap.sub) & ~ap.slt; + assign sel_pc = ap.jal | pp_in.pcall | pp_in.pja | pp_in.pret; + assign csr_write_data[31:0]= (ap.csr_imm) ? b_in[31:0] : a_in[31:0]; + + assign slt_one = ap.slt & lt; + + + + assign result[31:0] = lout[31:0] | + ({32{sel_shift}} & sout[31:0] ) | + ({32{sel_adder}} & aout[31:0] ) | + ({32{sel_pc}} & {pcout[31:1],1'b0} ) | + ({32{ap.csr_write}} & csr_write_data[31:0] ) | + {31'b0, slt_one} ; + + + + // *** branch handling *** + + assign any_jal = ap.jal | + pp_in.pcall | + pp_in.pja | + pp_in.pret; + + assign actual_taken = (ap.beq & eq) | + (ap.bne & ne) | + (ap.blt & lt) | + (ap.bge & ge) | + any_jal; + + // for a conditional br pcout[] will be the opposite of the branch prediction + // for jal or pcall, it will be the link address pc+2 or pc+4 + + rvbradder ibradder ( + .pc ( pc_in[31:1] ), + .offset ( brimm_in[12:1] ), + .dout ( pcout[31:1] )); + + + // pred_correct is for the npc logic + // pred_correct indicates not to use the flush_path + // for any_jal pred_correct==0 + + assign pred_correct_out = (valid_in & ap.predict_nt & ~actual_taken & ~any_jal) | + (valid_in & ap.predict_t & actual_taken & ~any_jal); + + + // for any_jal adder output is the flush path + assign flush_path_out[31:1]= (any_jal) ? aout[31:1] : pcout[31:1]; + + + // pcall and pret are included here + assign cond_mispredict = (ap.predict_t & ~actual_taken) | + (ap.predict_nt & actual_taken); + + + // target mispredicts on ret's + + assign target_mispredict = pp_in.pret & (pp_in.prett[31:1] != aout[31:1]); + + assign flush_upper_out = (ap.jal | cond_mispredict | target_mispredict) & valid_in & ~flush_upper_x & ~flush_lower_r; + assign flush_final_out = ( (ap.jal | cond_mispredict | target_mispredict) & valid_in & ~flush_upper_x ) | flush_lower_r; + + + // .i 3 + // .o 2 + // .ilb hist[1] hist[0] taken + // .ob newhist[1] newhist[0] + // .type fd + // + // 00 0 01 + // 01 0 01 + // 10 0 00 + // 11 0 10 + // 00 1 10 + // 01 1 00 + // 10 1 11 + // 11 1 11 + + assign newhist[1] = ( pp_in.hist[1] & pp_in.hist[0]) | (~pp_in.hist[0] & actual_taken); + assign newhist[0] = (~pp_in.hist[1] & ~actual_taken) | ( pp_in.hist[1] & actual_taken); + + always_comb begin + predict_p_out = pp_in; + + predict_p_out.misp = ~flush_upper_x & ~flush_lower_r & (cond_mispredict | target_mispredict); + predict_p_out.ataken = actual_taken; + predict_p_out.hist[1] = newhist[1]; + predict_p_out.hist[0] = newhist[0]; + + end + + + +endmodule // el2_exu_alu_ctl diff --git a/design/exu/el2_exu_div_ctl.sv b/design/exu/el2_exu_div_ctl.sv new file mode 100644 index 0000000..1beef7d --- /dev/null +++ b/design/exu/el2_exu_div_ctl.sv @@ -0,0 +1,313 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +module el2_exu_div_ctl +import el2_pkg::*; +#( +`include "el2_param.vh" +) + ( + input logic clk, // Top level clock + input logic rst_l, // Reset + input logic scan_mode, // Scan mode + + input el2_div_pkt_t dp, // valid, sign, rem + input logic [31:0] dividend, // Numerator + input logic [31:0] divisor, // Denominator + + input logic cancel, // Cancel divide + + + output logic finish_dly, // Finish to match data + output logic [31:0] out // Result + ); + + + logic div_clken; + logic exu_div_clk; + logic run_in, run_state; + logic [5:0] count_in, count; + logic [32:0] m_ff; + logic qff_enable; + logic aff_enable; + logic [32:0] q_in, q_ff; + logic [32:0] a_in, a_ff; + logic [32:0] m_eff; + logic [32:0] a_shift; + logic dividend_neg_ff, divisor_neg_ff; + logic [31:0] dividend_comp; + logic [31:0] dividend_eff; + logic [31:0] q_ff_comp; + logic [31:0] q_ff_eff; + logic [31:0] a_ff_comp; + logic [31:0] a_ff_eff; + logic sign_ff, sign_eff; + logic rem_ff; + logic add; + logic [32:0] a_eff; + logic [55:0] a_eff_shift; + logic rem_correct; + logic valid_ff_x; + logic valid_x; + logic finish; + logic finish_ff; + + logic smallnum_case, smallnum_case_ff; + logic [3:0] smallnum, smallnum_ff; + logic m_already_comp; + + + rvoclkhdr exu_div_cgc (.*, .en(div_clken), .l1clk(exu_div_clk)); + + rvdff #(1) e1val_ff (.*, .clk(exu_div_clk), .din(dp.valid & ~cancel), .dout(valid_ff_x)); + rvdff #(1) i_finish_ff (.*, .clk(exu_div_clk), .din(finish & ~cancel), .dout(finish_ff)); + + rvdff #(1) runff (.*, .clk(exu_div_clk), .din(run_in), .dout(run_state)); + rvdff #(6) countff (.*, .clk(exu_div_clk), .din(count_in[5:0]), .dout(count[5:0])); + rvdffs #(4) miscf (.*, .clk(exu_div_clk), .din({dividend[31],divisor[31],sign_eff,dp.rem}), .dout({dividend_neg_ff,divisor_neg_ff,sign_ff,rem_ff}), .en(dp.valid)); + rvdff #(5) smallnumff (.*, .clk(exu_div_clk), .din({smallnum_case,smallnum[3:0]}), .dout({smallnum_case_ff,smallnum_ff[3:0]})); + rvdffe #(33) mff (.*, .en(dp.valid), .din({ ~dp.unsign & divisor[31], divisor[31:0]}), .dout(m_ff[32:0])); + rvdffe #(33) qff (.*, .en(qff_enable), .din(q_in[32:0]), .dout(q_ff[32:0])); + rvdffe #(33) aff (.*, .en(aff_enable), .din(a_in[32:0]), .dout(a_ff[32:0])); + + rvtwoscomp #(32) i_dividend_comp (.din(q_ff[31:0]), .dout(dividend_comp[31:0])); + rvtwoscomp #(32) i_q_ff_comp (.din(q_ff[31:0]), .dout(q_ff_comp[31:0])); + rvtwoscomp #(32) i_a_ff_comp (.din(a_ff[31:0]), .dout(a_ff_comp[31:0])); + + + assign valid_x = valid_ff_x & ~cancel; + + + // START - short circuit logic for small numbers {{ + + // small number divides - any 4b / 4b is done in 1 cycle (divisor != 0) + // to generate espresso equations: + // 1) smalldiv > smalldiv.e + // 2) espresso -Dso -oeqntott smalldiv.e | addassign > smalldiv + + // smallnum case does not cover divide by 0 + assign smallnum_case = ((q_ff[31:4] == 28'b0) & (m_ff[31:4] == 28'b0) & (m_ff[31:0] != 32'b0) & ~rem_ff & valid_x) | + ((q_ff[31:0] == 32'b0) & (m_ff[31:0] != 32'b0) & ~rem_ff & valid_x); + + + assign smallnum[3] = ( q_ff[3] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ); + + + assign smallnum[2] = ( q_ff[3] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) | + ( q_ff[2] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ) | + ( q_ff[3] & q_ff[2] & ~m_ff[3] & ~m_ff[2] ); + + + assign smallnum[1] = ( q_ff[2] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) | + ( q_ff[1] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ) | + ( q_ff[3] & ~m_ff[3] & ~m_ff[1] & ~m_ff[0]) | + ( q_ff[3] & ~q_ff[2] & ~m_ff[3] & ~m_ff[2] & m_ff[1] & m_ff[0]) | + (~q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & ~m_ff[2] ) | + ( q_ff[3] & q_ff[2] & ~m_ff[3] & ~m_ff[0]) | + ( q_ff[3] & q_ff[2] & ~m_ff[3] & m_ff[2] & ~m_ff[1] ) | + ( q_ff[3] & q_ff[1] & ~m_ff[3] & ~m_ff[1] ) | + ( q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & m_ff[2] ); + + + assign smallnum[0] = ( q_ff[2] & q_ff[1] & q_ff[0] & ~m_ff[3] & ~m_ff[1] ) | + ( q_ff[3] & ~q_ff[2] & q_ff[0] & ~m_ff[3] & m_ff[1] & m_ff[0]) | + ( q_ff[2] & ~m_ff[3] & ~m_ff[1] & ~m_ff[0]) | + ( q_ff[1] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) | + ( q_ff[0] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ) | + (~q_ff[3] & q_ff[2] & ~q_ff[1] & ~m_ff[3] & ~m_ff[2] & m_ff[1] & m_ff[0]) | + (~q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & ~m_ff[0]) | + ( q_ff[3] & ~m_ff[2] & ~m_ff[1] & ~m_ff[0]) | + ( q_ff[3] & ~q_ff[2] & ~m_ff[3] & m_ff[2] & m_ff[1] ) | + (~q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & m_ff[2] & ~m_ff[1] ) | + (~q_ff[3] & q_ff[2] & q_ff[0] & ~m_ff[3] & ~m_ff[1] ) | + ( q_ff[3] & ~q_ff[2] & ~q_ff[1] & ~m_ff[3] & m_ff[2] & m_ff[0]) | + ( ~q_ff[2] & q_ff[1] & q_ff[0] & ~m_ff[3] & ~m_ff[2] ) | + ( q_ff[3] & q_ff[2] & ~m_ff[1] & ~m_ff[0]) | + ( q_ff[3] & q_ff[1] & ~m_ff[2] & ~m_ff[0]) | + (~q_ff[3] & q_ff[2] & q_ff[1] & q_ff[0] & ~m_ff[3] & m_ff[2] ) | + ( q_ff[3] & q_ff[2] & m_ff[3] & ~m_ff[2] ) | + ( q_ff[3] & q_ff[1] & m_ff[3] & ~m_ff[2] & ~m_ff[1] ) | + ( q_ff[3] & q_ff[0] & ~m_ff[2] & ~m_ff[1] ) | + ( q_ff[3] & ~q_ff[1] & ~m_ff[3] & m_ff[2] & m_ff[1] & m_ff[0]) | + ( q_ff[3] & q_ff[2] & q_ff[1] & m_ff[3] & ~m_ff[0]) | + ( q_ff[3] & q_ff[2] & q_ff[1] & m_ff[3] & ~m_ff[1] ) | + ( q_ff[3] & q_ff[2] & q_ff[0] & m_ff[3] & ~m_ff[1] ) | + ( q_ff[3] & ~q_ff[2] & q_ff[1] & ~m_ff[3] & m_ff[1] ) | + ( q_ff[3] & q_ff[1] & q_ff[0] & ~m_ff[2] ) | + ( q_ff[3] & q_ff[2] & q_ff[1] & q_ff[0] & m_ff[3] ); + + + // END - short circuit logic for small numbers }} + + +// *** Start Short Q *** {{ + + logic [2:0] a_cls; + logic [2:0] b_cls; + logic [3:0] shortq_shift; + logic [4:0] shortq_shift_ff; + logic shortq_enable; + logic shortq_enable_ff; + logic [32:0] short_dividend; + + assign short_dividend[31:0] = q_ff[31:0]; + assign short_dividend[32] = sign_ff & q_ff[31]; + + +// A B +// 210 210 SH +// --- --- -- +// 1xx 000 0 +// 1xx 001 8 +// 1xx 01x 16 +// 1xx 1xx 24 +// 01x 000 8 +// 01x 001 16 +// 01x 01x 24 +// 01x 1xx 32 +// 001 000 16 +// 001 001 24 +// 001 01x 32 +// 001 1xx 32 +// 000 000 24 +// 000 001 32 +// 000 01x 32 +// 000 1xx 32 + + logic [3:0] shortq_raw; + logic [3:0] shortq_shift_xx; + + assign a_cls[2] = (~short_dividend[32] & (short_dividend[31:24] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[31:23] != {9{1'b1}})); + assign a_cls[1] = (~short_dividend[32] & (short_dividend[23:16] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[22:15] != {8{1'b1}})); + assign a_cls[0] = (~short_dividend[32] & (short_dividend[15:08] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[14:07] != {8{1'b1}})); + + assign b_cls[2] = (~m_ff[32] & ( m_ff[31:24] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[31:24] != {8{1'b1}})); + assign b_cls[1] = (~m_ff[32] & ( m_ff[23:16] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[23:16] != {8{1'b1}})); + assign b_cls[0] = (~m_ff[32] & ( m_ff[15:08] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[15:08] != {8{1'b1}})); + + assign shortq_raw[3] = ( (a_cls[2:1] == 2'b01 ) & (b_cls[2] == 1'b1 ) ) | // Shift by 32 + ( (a_cls[2:0] == 3'b001) & (b_cls[2] == 1'b1 ) ) | + ( (a_cls[2:0] == 3'b000) & (b_cls[2] == 1'b1 ) ) | + ( (a_cls[2:0] == 3'b001) & (b_cls[2:1] == 2'b01 ) ) | + ( (a_cls[2:0] == 3'b000) & (b_cls[2:1] == 2'b01 ) ) | + ( (a_cls[2:0] == 3'b000) & (b_cls[2:0] == 3'b001) ); + + assign shortq_raw[2] = ( (a_cls[2] == 1'b1 ) & (b_cls[2] == 1'b1 ) ) | // Shift by 24 + ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:1] == 2'b01 ) ) | + ( (a_cls[2:0] == 3'b001) & (b_cls[2:0] == 3'b001) ) | + ( (a_cls[2:0] == 3'b000) & (b_cls[2:0] == 3'b000) ); + + assign shortq_raw[1] = ( (a_cls[2] == 1'b1 ) & (b_cls[2:1] == 2'b01 ) ) | // Shift by 16 + ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:0] == 3'b001) ) | + ( (a_cls[2:0] == 3'b001) & (b_cls[2:0] == 3'b000) ); + + assign shortq_raw[0] = ( (a_cls[2] == 1'b1 ) & (b_cls[2:0] == 3'b001) ) | // Shift by 8 + ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:0] == 3'b000) ); + + + assign shortq_enable = valid_ff_x & (m_ff[31:0] != 32'b0) & (shortq_raw[3:0] != 4'b0); + + assign shortq_shift[3:0] = ({4{shortq_enable}} & shortq_raw[3:0]); + + rvdff #(5) i_shortq_ff (.*, .clk(exu_div_clk), .din({shortq_enable,shortq_shift[3:0]}), .dout({shortq_enable_ff,shortq_shift_xx[3:0]})); + + assign shortq_shift_ff[4:0] = ({5{shortq_shift_xx[3]}} & 5'b1_1111) | // 31 + ({5{shortq_shift_xx[2]}} & 5'b1_1000) | // 24 + ({5{shortq_shift_xx[1]}} & 5'b1_0000) | // 16 + ({5{shortq_shift_xx[0]}} & 5'b0_1000); // 8 + +`ifdef ASSERT_ON + + logic div_assert_fail; + + assign div_assert_fail = (shortq_shift_xx[3] & shortq_shift_xx[2]) | + (shortq_shift_xx[3] & shortq_shift_xx[1]) | + (shortq_shift_xx[3] & shortq_shift_xx[0]) | + (shortq_shift_xx[2] & shortq_shift_xx[1]) | + (shortq_shift_xx[2] & shortq_shift_xx[0]) | + (shortq_shift_xx[1] & shortq_shift_xx[0]); + + assert_exu_div_shortq_shift_error: assert #0 (~div_assert_fail) else $display("ERROR: SHORTQ_SHIFT_XX with multiple shifts ON!"); + +`endif + + +// *** End Short *** }} + + + + + + assign div_clken = dp.valid | run_state | finish | finish_ff; + + assign run_in = (dp.valid | run_state) & ~finish & ~cancel; + + assign count_in[5:0] = {6{run_state & ~finish & ~cancel & ~shortq_enable}} & (count[5:0] + {1'b0,shortq_shift_ff[4:0]} + 6'd1); + + + assign finish = (smallnum_case | ((~rem_ff) ? (count[5:0] == 6'd32) : (count[5:0] == 6'd33))); + + assign finish_dly = finish_ff & ~cancel; + + assign sign_eff = ~dp.unsign & (divisor[31:0] != 32'b0); + + + assign q_in[32:0] = ({33{~run_state }} & {1'b0,dividend[31:0]}) | + ({33{ run_state & (valid_ff_x | shortq_enable_ff)}} & ({dividend_eff[31:0], ~a_in[32]} << shortq_shift_ff[4:0])) | + ({33{ run_state & ~(valid_ff_x | shortq_enable_ff)}} & {q_ff[31:0], ~a_in[32]}); + + assign qff_enable = dp.valid | (run_state & ~shortq_enable); + + + + + assign dividend_eff[31:0] = (sign_ff & dividend_neg_ff) ? dividend_comp[31:0] : q_ff[31:0]; + + + assign m_eff[32:0] = ( add ) ? m_ff[32:0] : ~m_ff[32:0]; + + assign a_eff_shift[55:0] = {24'b0, dividend_eff[31:0]} << shortq_shift_ff[4:0]; + + assign a_eff[32:0] = ({33{ rem_correct }} & a_ff[32:0] ) | + ({33{~rem_correct & ~shortq_enable_ff}} & {a_ff[31:0], q_ff[32]} ) | + ({33{~rem_correct & shortq_enable_ff}} & {9'b0,a_eff_shift[55:32]}); + + assign a_shift[32:0] = {33{run_state}} & a_eff[32:0]; + + assign a_in[32:0] = {33{run_state}} & (a_shift[32:0] + m_eff[32:0] + {32'b0,~add}); + + assign aff_enable = dp.valid | (run_state & ~shortq_enable & (count[5:0]!=6'd33)) | rem_correct; + + + assign m_already_comp = (divisor_neg_ff & sign_ff); + + // if m already complemented, then invert operation add->sub, sub->add + assign add = (a_ff[32] | rem_correct) ^ m_already_comp; + + assign rem_correct = (count[5:0] == 6'd33) & rem_ff & a_ff[32]; + + + + assign q_ff_eff[31:0] = (sign_ff & (dividend_neg_ff ^ divisor_neg_ff)) ? q_ff_comp[31:0] : q_ff[31:0]; + + assign a_ff_eff[31:0] = (sign_ff & dividend_neg_ff) ? a_ff_comp[31:0] : a_ff[31:0]; + + assign out[31:0] = ({32{ smallnum_case_ff }} & {28'b0, smallnum_ff[3:0]}) | + ({32{ rem_ff}} & a_ff_eff[31:0] ) | + ({32{~smallnum_case_ff & ~rem_ff}} & q_ff_eff[31:0] ); + + +endmodule // el2_exu_div_ctl diff --git a/design/exu/el2_exu_mul_ctl.sv b/design/exu/el2_exu_mul_ctl.sv new file mode 100644 index 0000000..b9b68ba --- /dev/null +++ b/design/exu/el2_exu_mul_ctl.sv @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +module el2_exu_mul_ctl +import el2_pkg::*; +#( +`include "el2_param.vh" + ) + ( + input logic clk, // Top level clock + input logic rst_l, // Reset + input logic scan_mode, // Scan mode + + input el2_mul_pkt_t mul_p, // {Valid, RS1 signed operand, RS2 signed operand, Select low 32-bits of result} + + input logic [31:0] rs1_in, // A operand + input logic [31:0] rs2_in, // B operand + + + output logic [31:0] result_x // Result + ); + + + logic mul_x_enable; + logic signed [32:0] rs1_ext_in; + logic signed [32:0] rs2_ext_in; + logic [65:0] prod_x; + logic low_x; + + + + + assign mul_x_enable = mul_p.valid; + + assign rs1_ext_in[32] = mul_p.rs1_sign & rs1_in[31]; + assign rs2_ext_in[32] = mul_p.rs2_sign & rs2_in[31]; + + assign rs1_ext_in[31:0] = rs1_in[31:0]; + assign rs2_ext_in[31:0] = rs2_in[31:0]; + + + + // --------------------------- Multiply ---------------------------------- + + + logic signed [32:0] rs1_x; + logic signed [32:0] rs2_x; + + rvdffe #(34) i_a_x_ff (.*, .din({mul_p.low,rs1_ext_in[32:0]}), .dout({low_x,rs1_x[32:0]}), .en(mul_x_enable)); + rvdffe #(33) i_b_x_ff (.*, .din( rs2_ext_in[32:0] ), .dout( rs2_x[32:0] ), .en(mul_x_enable)); + + + assign prod_x[65:0] = rs1_x * rs2_x; + + + assign result_x[31:0] = ( {32{~low_x}} & prod_x[63:32] ) | + ( {32{ low_x}} & prod_x[31:0] ); + + + + +endmodule // el2_exu_mul_ctl diff --git a/design/ifu/el2_ifu.sv b/design/ifu/el2_ifu.sv new file mode 100644 index 0000000..35588b1 --- /dev/null +++ b/design/ifu/el2_ifu.sv @@ -0,0 +1,354 @@ +//******************************************************************************** +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** +//******************************************************************************** +// Function: Top level file for Icache, Fetch, Branch prediction & Aligner +// BFF -> F1 -> F2 -> A +//******************************************************************************** + +module el2_ifu +import el2_pkg::*; +#( +`include "el2_param.vh" + ) + ( + input logic free_clk, + input logic active_clk, + input logic clk, + input logic rst_l, + + input logic dec_i0_decode_d, + + input logic exu_flush_final, // flush, includes upper and lower + input logic dec_tlu_i0_commit_cmt , // committed i0 + input logic dec_tlu_flush_err_wb , // flush due to parity error. + input logic dec_tlu_flush_noredir_wb, // don't fetch, validated with exu_flush_final + input logic [31:1] exu_flush_path_final, // flush fetch address + + input logic [31:0] dec_tlu_mrac_ff ,// Side_effect , cacheable for each region + input logic dec_tlu_fence_i_wb, // fence.i, invalidate icache, validated with exu_flush_final + input logic dec_tlu_flush_leak_one_wb, // ignore bp for leak one fetches + + input logic dec_tlu_bpred_disable, // disable all branch prediction + input logic dec_tlu_core_ecc_disable, // disable ecc checking and flagging + input logic dec_tlu_force_halt, // force halt + + //-------------------------- IFU AXI signals-------------------------- + // AXI Write Channels + output logic ifu_axi_awvalid, + output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_awid, + output logic [31:0] ifu_axi_awaddr, + output logic [3:0] ifu_axi_awregion, + output logic [7:0] ifu_axi_awlen, + output logic [2:0] ifu_axi_awsize, + output logic [1:0] ifu_axi_awburst, + output logic ifu_axi_awlock, + output logic [3:0] ifu_axi_awcache, + output logic [2:0] ifu_axi_awprot, + output logic [3:0] ifu_axi_awqos, + + output logic ifu_axi_wvalid, + output logic [63:0] ifu_axi_wdata, + output logic [7:0] ifu_axi_wstrb, + output logic ifu_axi_wlast, + + output logic ifu_axi_bready, + + // AXI Read Channels + output logic ifu_axi_arvalid, + input logic ifu_axi_arready, + output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_arid, + output logic [31:0] ifu_axi_araddr, + output logic [3:0] ifu_axi_arregion, + output logic [7:0] ifu_axi_arlen, + output logic [2:0] ifu_axi_arsize, + output logic [1:0] ifu_axi_arburst, + output logic ifu_axi_arlock, + output logic [3:0] ifu_axi_arcache, + output logic [2:0] ifu_axi_arprot, + output logic [3:0] ifu_axi_arqos, + + input logic ifu_axi_rvalid, + output logic ifu_axi_rready, + input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_rid, + input logic [63:0] ifu_axi_rdata, + input logic [1:0] ifu_axi_rresp, + + + input logic ifu_bus_clk_en, + + input logic dma_iccm_req, + input logic [31:0] dma_mem_addr, + input logic [2:0] dma_mem_sz, + input logic dma_mem_write, + input logic [63:0] dma_mem_wdata, + input logic [2:0] dma_mem_tag, // DMA Buffer entry number + + + input logic dma_iccm_stall_any, + output logic iccm_dma_ecc_error, + output logic iccm_dma_rvalid, + output logic [63:0] iccm_dma_rdata, + output logic [2:0] iccm_dma_rtag, // Tag of the DMA req + output logic iccm_ready, + + output logic ifu_pmu_instr_aligned, + output logic ifu_pmu_fetch_stall, + output logic ifu_ic_error_start, // has all of the I$ ecc/parity for data/tag + +// I$ & ITAG Ports + output logic [31:1] ic_rw_addr, // Read/Write addresss to the Icache. + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en, // Icache write enable, when filling the Icache. + output logic ic_rd_en, // Icache read enable. + + output logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC + input logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [25:0] ictag_debug_rd_data,// Debug icache tag. + output logic [70:0] ic_debug_wr_data, // Debug wr cache. + + output logic [70:0] ifu_ic_debug_rd_data, + + input logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // + input logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, + output logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. + output logic ic_sel_premux_data, // Select the premux data. + + output logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. + output logic ic_debug_rd_en, // Icache debug rd + output logic ic_debug_wr_en, // Icache debug wr + output logic ic_debug_tag_array, // Debug tag array + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. + + + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, // Valid bits when accessing the Icache. One valid bit per way. F2 stage + + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, // Compare hits from Icache tags. Per way. F2 stage + input logic ic_tag_perr, // Icache Tag parity error + + + // ICCM ports + output logic [pt.ICCM_BITS-1:1] iccm_rw_addr, // ICCM read/write address. + output logic iccm_wren, // ICCM write enable (through the DMA) + output logic iccm_rden, // ICCM read enable. + output logic [77:0] iccm_wr_data, // ICCM write data. + output logic [2:0] iccm_wr_size, // ICCM write location within DW. + + input logic [63:0] iccm_rd_data, // Data read from ICCM. + input logic [77:0] iccm_rd_data_ecc, // Data + ECC read from ICCM. + + output logic ifu_iccm_rd_ecc_single_err, // This fetch has a single ICCM ecc error. + +// Perf counter sigs + output logic ifu_pmu_ic_miss, // ic miss + output logic ifu_pmu_ic_hit, // ic hit + output logic ifu_pmu_bus_error, // iside bus error + output logic ifu_pmu_bus_busy, // iside bus busy + output logic ifu_pmu_bus_trxn, // iside bus transactions + + + output logic ifu_i0_icaf, // Instructio 0 access fault. From Aligner to Decode + output logic [1:0] ifu_i0_icaf_type, // Instruction 0 access fault type + + output logic ifu_i0_valid, // Instructio 0 valid. From Aligner to Decode + output logic ifu_i0_icaf_f1, // Instruction 0 has access fault on second fetch group + output logic ifu_i0_dbecc, // Instruction 0 has double bit ecc error + output logic iccm_dma_sb_error, // Single Bit ECC error from a DMA access + output logic[31:0] ifu_i0_instr, // Instructio 0 . From Aligner to Decode + output logic[31:1] ifu_i0_pc, // Instructio 0 pc. From Aligner to Decode + output logic ifu_i0_pc4, // Instructio 0 is 4 byte. From Aligner to Decode + + output logic ifu_miss_state_idle, // There is no outstanding miss. Cache miss state is idle. + + + output el2_br_pkt_t i0_brp, // Instructio 0 branch packet. From Aligner to Decode + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index + output logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR + output logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag + + input el2_predict_pkt_t exu_mp_pkt, // mispredict packet + input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr + input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index + input logic [pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag + + input el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // slot0 update/error pkt + input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index + + input dec_tlu_flush_lower_wb, + + output logic [15:0] ifu_i0_cinst, + +/// Icache debug + input el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt , + output logic ifu_ic_debug_rd_data_valid, + output logic iccm_buf_correct_ecc, + output logic iccm_correction_state, + + input logic scan_mode + ); + + localparam TAGWIDTH = 2 ; + localparam IDWIDTH = 2 ; + + logic ifu_fb_consume1, ifu_fb_consume2; + logic [31:1] ifc_fetch_addr_f; + logic [31:1] ifc_fetch_addr_bf; + + logic [1:0] ifu_fetch_val; // valids on a 2B boundary, left justified [7] implies valid fetch + logic [31:1] ifu_fetch_pc; // starting pc of fetch + + logic iccm_rd_ecc_single_err, ic_error_start; + assign ifu_iccm_rd_ecc_single_err = iccm_rd_ecc_single_err; + assign ifu_ic_error_start = ic_error_start; + + + logic ic_write_stall; + logic ic_dma_active; + logic ifc_dma_access_ok; + logic ic_access_fault_f; + logic [1:0] ic_access_fault_type_f; + logic ifu_ic_mb_empty; + + + logic ic_hit_f; + + // fetch control + el2_ifu_ifc_ctl #(.pt(pt)) ifc (.* + ); + + logic [1:0] ifu_bp_way_f; // way indication; right justified + logic ifu_bp_hit_taken_f; // kill next fetch; taken target found + logic [31:1] ifu_bp_btb_target_f; // predicted target PC + logic ifu_bp_inst_mask_f; // tell ic which valids to kill because of a taken branch; right justified + + logic [1:0] ifu_bp_hist1_f; // history counters for all 4 potential branches; right justified + logic [1:0] ifu_bp_hist0_f; // history counters for all 4 potential branches; right justified + logic [11:0] ifu_bp_poffset_f; // predicted target + logic [1:0] ifu_bp_ret_f; // predicted ret ; right justified + logic [1:0] ifu_bp_pc4_f; // pc4 indication; right justified + logic [1:0] ifu_bp_valid_f; // branch valid, right justified + logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f; + + // branch predictor + el2_ifu_bp_ctl #(.pt(pt)) bp (.*); + + + logic [1:0] ic_fetch_val_f; + logic [31:0] ic_data_f; + logic [31:0] ifu_fetch_data_f; + logic ifc_fetch_req_f; + logic ifc_fetch_req_f_raw; + logic iccm_rd_ecc_double_err; // This fetch has an iccm double error. + + logic ifu_async_error_start; + + + assign ifu_fetch_data_f[31:0] = ic_data_f[31:0]; + assign ifu_fetch_val[1:0] = ic_fetch_val_f[1:0]; + assign ifu_fetch_pc[31:1] = ifc_fetch_addr_f[31:1]; + + logic ifc_fetch_uncacheable_bf; // The fetch request is uncacheable space. BF stage + logic ifc_fetch_req_bf; // Fetch request. Comes with the address. BF stage + logic ifc_fetch_req_bf_raw; // Fetch request without some qualifications. Used for clock-gating. BF stage + logic ifc_iccm_access_bf; // This request is to the ICCM. Do not generate misses to the bus. + logic ifc_region_acc_fault_bf; // Access fault. in ICCM region but offset is outside defined ICCM. + + // aligner + el2_ifu_aln_ctl #(.pt(pt)) aln (.*); + + + // icache + el2_ifu_mem_ctl #(.pt(pt)) mem_ctl + (.*, + .ic_data_f(ic_data_f[31:0]) + ); + + + + // Performance debug info + // + // +`ifdef DUMP_BTB_ON + logic exu_mp_valid; // conditional branch mispredict + logic exu_mp_way; // conditional branch mispredict + logic exu_mp_ataken; // direction is actual taken + logic exu_mp_boffset; // branch offsett + logic exu_mp_pc4; // branch is a 4B inst + logic exu_mp_call; // branch is a call inst + logic exu_mp_ret; // branch is a ret inst + logic exu_mp_ja; // branch is a jump always + logic [1:0] exu_mp_hist; // new history + logic [11:0] exu_mp_tgt; // target offset + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address + + assign exu_mp_valid = exu_mp_pkt.misp; // conditional branch mispredict + assign exu_mp_ataken = exu_mp_pkt.ataken; // direction is actual taken + assign exu_mp_boffset = exu_mp_pkt.boffset; // branch offset + assign exu_mp_pc4 = exu_mp_pkt.pc4; // branch is a 4B inst + assign exu_mp_call = exu_mp_pkt.pcall; // branch is a call inst + assign exu_mp_ret = exu_mp_pkt.pret; // branch is a ret inst + assign exu_mp_ja = exu_mp_pkt.pja; // branch is a jump always + assign exu_mp_way = exu_mp_pkt.way; // branch is a jump always + assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0]; // new history + assign exu_mp_tgt[11:0] = exu_mp_pkt.toffset[11:0] ; // target offset + assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ; // BTB/BHT address + + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_f; + `define DEC `CPU_TOP.dec + `define EXU `CPU_TOP.exu + el2_btb_addr_hash f2hash(.pc(ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])); + logic [31:0] mppc_ns, mppc; + logic exu_flush_final_d1; + assign mppc_ns[31:1] = `EXU.i0_flush_upper_x ? `EXU.exu_i0_pc_x : `EXU.dec_i0_pc_d; + assign mppc_ns[0] = 1'b0; + rvdff #(33) mdseal_ff (.*, .din({mppc_ns[31:0], exu_flush_final}), .dout({mppc[31:0], exu_flush_final_d1})); + logic tmp_bnk; + assign tmp_bnk = bp.btb_sel_f[1]; + + always @(negedge clk) begin + if(`DEC.tlu.mcyclel[31:0] == 32'h0000_0010) begin + $display("BTB_CONFIG: %d",pt.BTB_ARRAY_DEPTH*4); + `ifndef BP_NOGSHARE + $display("BHT_CONFIG: %d gshare: 1",pt.BHT_ARRAY_DEPTH*4); + `else + $display("BHT_CONFIG: %d gshare: 0",pt.BHT_ARRAY_DEPTH*4); + `endif + $display("RS_CONFIG: %d", pt.RET_STACK_SIZE); + end + if(exu_flush_final_d1 & ~(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error) & (exu_mp_pkt.misp | exu_mp_pkt.ataken)) + $display("%7d BTB_MP : index: %0h bank: %0h call: %b ret: %b ataken: %b hist: %h valid: %b tag: %h targ: %h eghr: %b pred: %b ghr_index: %h brpc: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha, exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO], 1'b0, exu_mp_call, exu_mp_ret, exu_mp_ataken, exu_mp_hist[1:0], exu_mp_valid, exu_mp_btag[pt.BTB_BTAG_SIZE-1:0], {exu_flush_path_final[31:1], 1'b0}, exu_mp_eghr[pt.BHT_GHR_SIZE-1:0], exu_mp_valid, bp.bht_wr_addr0, mppc[31:0], exu_mp_pkt.way); + + for(int i = 0; i < 8; i++) begin + if(ifu_bp_valid_f[i] & ifc_fetch_req_f) + $display("%7d BTB_HIT : index: %0h bank: %0h call: %b ret: %b taken: %b strength: %b tag: %h targ: %0h ghr: %4b ghr_index: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO],bp.btb_sel_f[1], bp.btb_rd_call_f, bp.btb_rd_ret_f, ifu_bp_hist1_f[tmp_bnk], ifu_bp_hist0_f[tmp_bnk], bp.fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0], {ifu_bp_btb_target_f[31:1], 1'b0}, bp.fghr[pt.BHT_GHR_SIZE-1:0], bp.bht_rd_addr_f, ifu_bp_way_f[tmp_bnk]); + end + if(dec_tlu_br0_r_pkt.valid & ~(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error)) + $display("%7d BTB_UPD0: ghr_index: %0h bank: %0h hist: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,bp.br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO],{dec_tlu_br0_r_pkt.middle}, dec_tlu_br0_r_pkt.hist, dec_tlu_br0_r_pkt.way); + + if(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error) + $display("%7d BTB_ERR0: index: %0h bank: %0h start: %b rfpc: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO],1'b0, dec_tlu_br0_r_pkt.br_start_error, {exu_flush_path_final[31:1], 1'b0}, dec_tlu_br0_r_pkt.way); + end // always @ (negedge clk) + function [1:0] encode4_2; + input [3:0] in; + + encode4_2[1] = in[3] | in[2]; + encode4_2[0] = in[3] | in[1]; + + endfunction +`endif +endmodule // el2_ifu diff --git a/design/ifu/el2_ifu_aln_ctl.sv b/design/ifu/el2_ifu_aln_ctl.sv new file mode 100644 index 0000000..55e17db --- /dev/null +++ b/design/ifu/el2_ifu_aln_ctl.sv @@ -0,0 +1,627 @@ +//******************************************************************************** +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** + +//******************************************************************************** +// Function: Instruction aligner +//******************************************************************************** +module el2_ifu_aln_ctl +import el2_pkg::*; +#( +`include "el2_param.vh" + ) + ( + + input logic scan_mode, + input logic rst_l, + input logic clk, + input logic active_clk, + + input logic ifu_async_error_start, // ecc/parity related errors with current fetch - not sent down the pipe + + input logic iccm_rd_ecc_double_err, // This fetch has a double ICCM ecc error. + + input logic ic_access_fault_f, // Instruction access fault for the current fetch. + input logic [1:0] ic_access_fault_type_f, // Instruction access fault types + input logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f, // fetch GHR + input logic [31:1] ifu_bp_btb_target_f, // predicted RET target + input logic [11:0] ifu_bp_poffset_f, // predicted target offset + + input logic [1:0] ifu_bp_hist0_f, // history counters for all 4 potential branches, bit 1, right justified + input logic [1:0] ifu_bp_hist1_f, // history counters for all 4 potential branches, bit 1, right justified + input logic [1:0] ifu_bp_pc4_f, // pc4 indication, right justified + input logic [1:0] ifu_bp_way_f, // way indication, right justified + input logic [1:0] ifu_bp_valid_f, // branch valid, right justified + input logic [1:0] ifu_bp_ret_f, // predicted ret indication, right justified + + input logic exu_flush_final, // Flush from the pipeline. + + input logic dec_i0_decode_d, + + input logic [31:0] ifu_fetch_data_f, // fetch data in memory format - not right justified + + input logic [1:0] ifu_fetch_val, // valids on a 2B boundary, right justified + input logic [31:1] ifu_fetch_pc, // starting pc of fetch + + + + output logic ifu_i0_valid, // Instruction 0 is valid + output logic ifu_i0_icaf, // Instruction 0 has access fault + output logic [1:0] ifu_i0_icaf_type, // Instruction 0 access fault type + output logic ifu_i0_icaf_f1, // Instruction 0 has access fault on second fetch group + + output logic ifu_i0_dbecc, // Instruction 0 has double bit ecc error + output logic [31:0] ifu_i0_instr, // Instruction 0 + output logic [31:1] ifu_i0_pc, // Instruction 0 PC + output logic ifu_i0_pc4, + + output logic ifu_fb_consume1, // Consumed one buffer. To fetch control fetch for buffer mass balance + output logic ifu_fb_consume2, // Consumed two buffers.To fetch control fetch for buffer mass balance + output el2_br_pkt_t i0_brp, // Branch packet for I0. + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index + output logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR + output logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag + output logic ifu_pmu_instr_aligned, // number of inst aligned this cycle + + output logic [15:0] ifu_i0_cinst // 16b compress inst for i0 + ); + + + + logic ifvalid; + logic shift_f1_f0, shift_f2_f0, shift_f2_f1; + logic fetch_to_f0, fetch_to_f1, fetch_to_f2; + + logic [1:0] f2val_in, f2val; + logic [1:0] f1val_in, f1val; + logic [1:0] f0val_in, f0val; + logic [1:0] sf1val, sf0val; + + logic [31:1] f2pc_in, f2pc; + logic [31:1] f1pc_in, f1pc; + logic [31:1] f0pc_in, f0pc; + logic [31:1] sf1pc; + + logic [31:0] aligndata; + logic first4B, first2B; + + logic [31:0] uncompress0; + logic i0_shift; + logic shift_2B, shift_4B; + logic f1_shift_2B; + logic f2_valid, sf1_valid, sf0_valid; + + logic [31:0] ifirst; + logic [31:1] f0pc_plus1; + logic [31:1] f1pc_plus1; + logic [1:0] alignval; + logic [31:1] firstpc, secondpc; + + logic [11:0] f1poffset; + logic [11:0] f0poffset; + logic [pt.BHT_GHR_SIZE-1:0] f1fghr; + logic [pt.BHT_GHR_SIZE-1:0] f0fghr; + logic [1:0] f1hist1; + logic [1:0] f0hist1; + logic [1:0] f1hist0; + logic [1:0] f0hist0; + + logic [1:0] f1ictype; + logic [1:0] f0ictype; + + logic [1:0] f1pc4; + logic [1:0] f0pc4; + + logic [1:0] f1ret; + logic [1:0] f0ret; + logic [1:0] f1way; + logic [1:0] f0way; + + logic [1:0] f1brend; + logic [1:0] f0brend; + + logic [1:0] alignbrend; + logic [1:0] alignpc4; + + logic [1:0] alignret; + logic [1:0] alignway; + logic [1:0] alignhist1; + logic [1:0] alignhist0; + logic [1:1] alignfromf1; + logic i0_ends_f1; + logic i0_br_start_error; + + logic [31:1] f1prett; + logic [31:1] f0prett; + logic f1dbecc; + logic f0dbecc; + logic f1icaf; + logic f0icaf; + + logic [1:0] aligndbecc; + logic [1:0] alignicaf; + logic i0_brp_pc4; + + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] firstpc_hash, secondpc_hash; + + logic first_legal; + + logic f2_wr_en; + logic f0_shift_wr_en; + logic f1_shift_wr_en; + + logic [1:0] wrptr, wrptr_in; + logic [1:0] rdptr, rdptr_in; + logic [2:0] qwen; + logic [31:0] q2,q1,q0; + logic q2off_in, q2off; + logic q1off_in, q1off; + logic q0off_in, q0off; + logic f0_shift_2B; + + logic [31:0] q0eff; + logic [31:0] q0final; + logic q0ptr; + logic [1:0] q0sel; + + logic [31:0] q1eff; + logic [15:0] q1final; + logic q1ptr; + logic [1:0] q1sel; + + logic [2:0] qren; + + logic consume_fb1, consume_fb0; + logic [1:1] icaf_eff; + + localparam BRDATA_SIZE = 12; + localparam BRDATA_WIDTH = 6; + logic [BRDATA_SIZE-1:0] brdata_in, brdata2, brdata1, brdata0; + logic [BRDATA_SIZE-1:0] brdata1eff, brdata0eff; + logic [BRDATA_SIZE-1:0] brdata1final, brdata0final; + + localparam MHI = 46+pt.BHT_GHR_SIZE; + localparam MSIZE = 47+pt.BHT_GHR_SIZE; + logic [MHI:0] misc_data_in, misc2, misc1, misc0; + logic [MHI:0] misc1eff, misc0eff; + + logic [pt.BTB_BTAG_SIZE-1:0] firstbrtag_hash, secondbrtag_hash; + + logic error_stall_in, error_stall; + + assign error_stall_in = (error_stall | ifu_async_error_start) & ~exu_flush_final; + + rvdff #(1) error_stallff (.*, .clk(active_clk), .din(error_stall_in), .dout(error_stall)); + + rvdff #(2) wrpff (.*, .clk(active_clk), .din(wrptr_in[1:0]), .dout(wrptr[1:0])); + rvdff #(2) rdpff (.*, .clk(active_clk), .din(rdptr_in[1:0]), .dout(rdptr[1:0])); + + rvdff #(2) f2valff (.*, .clk(active_clk), .din(f2val_in[1:0]), .dout(f2val[1:0])); + rvdff #(2) f1valff (.*, .clk(active_clk), .din(f1val_in[1:0]), .dout(f1val[1:0])); + rvdff #(2) f0valff (.*, .clk(active_clk), .din(f0val_in[1:0]), .dout(f0val[1:0])); + + rvdff #(1) q2offsetff (.*, .clk(active_clk), .din(q2off_in), .dout(q2off)); + rvdff #(1) q1offsetff (.*, .clk(active_clk), .din(q1off_in), .dout(q1off)); + rvdff #(1) q0offsetff (.*, .clk(active_clk), .din(q0off_in), .dout(q0off)); + rvdffe #(31) f2pcff (.*, .en(f2_wr_en), .din(f2pc_in[31:1]), .dout(f2pc[31:1])); + rvdffe #(31) f1pcff (.*, .en(f1_shift_wr_en), .din(f1pc_in[31:1]), .dout(f1pc[31:1])); + rvdffe #(31) f0pcff (.*, .en(f0_shift_wr_en), .din(f0pc_in[31:1]), .dout(f0pc[31:1])); + rvdffe #(BRDATA_SIZE) brdata2ff (.*, .en(qwen[2]), .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata2[BRDATA_SIZE-1:0])); + rvdffe #(BRDATA_SIZE) brdata1ff (.*, .en(qwen[1]), .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata1[BRDATA_SIZE-1:0])); + rvdffe #(BRDATA_SIZE) brdata0ff (.*, .en(qwen[0]), .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata0[BRDATA_SIZE-1:0])); + rvdffe #(MSIZE) misc2ff (.*, .en(qwen[2]), .din(misc_data_in[MHI:0]), .dout(misc2[MHI:0])); + rvdffe #(MSIZE) misc1ff (.*, .en(qwen[1]), .din(misc_data_in[MHI:0]), .dout(misc1[MHI:0])); + rvdffe #(MSIZE) misc0ff (.*, .en(qwen[0]), .din(misc_data_in[MHI:0]), .dout(misc0[MHI:0])); + + rvdffe #(32) q2ff (.*, .en(qwen[2]), .din(ifu_fetch_data_f[31:0]), .dout(q2[31:0])); + rvdffe #(32) q1ff (.*, .en(qwen[1]), .din(ifu_fetch_data_f[31:0]), .dout(q1[31:0])); + rvdffe #(32) q0ff (.*, .en(qwen[0]), .din(ifu_fetch_data_f[31:0]), .dout(q0[31:0])); + + + + + + assign f2_wr_en = fetch_to_f2; + assign f1_shift_wr_en = fetch_to_f1 | shift_f2_f1 | f1_shift_2B; + assign f0_shift_wr_en = fetch_to_f0 | shift_f2_f0 | shift_f1_f0 | shift_2B | shift_4B; + + + + // new queue control logic + + assign qren[2:0] = { rdptr[1:0] == 2'b10, + rdptr[1:0] == 2'b01, + rdptr[1:0] == 2'b00 }; + + assign qwen[2:0] = { (wrptr[1:0] == 2'b10) & ifvalid, + (wrptr[1:0] == 2'b01) & ifvalid, + (wrptr[1:0] == 2'b00) & ifvalid }; + + + assign rdptr_in[1:0] = ({2{ qren[0] & ifu_fb_consume1 & ~exu_flush_final}} & 2'b01 ) | + ({2{ qren[1] & ifu_fb_consume1 & ~exu_flush_final}} & 2'b10 ) | + ({2{ qren[2] & ifu_fb_consume1 & ~exu_flush_final}} & 2'b00 ) | + ({2{ qren[0] & ifu_fb_consume2 & ~exu_flush_final}} & 2'b10 ) | + ({2{ qren[1] & ifu_fb_consume2 & ~exu_flush_final}} & 2'b00 ) | + ({2{ qren[2] & ifu_fb_consume2 & ~exu_flush_final}} & 2'b01 ) | + ({2{~ifu_fb_consume1 & ~ifu_fb_consume2 & ~exu_flush_final}} & rdptr[1:0]); + + assign wrptr_in[1:0] = ({2{ qwen[0] & ~exu_flush_final}} & 2'b01 ) | + ({2{ qwen[1] & ~exu_flush_final}} & 2'b10 ) | + ({2{ qwen[2] & ~exu_flush_final}} & 2'b00 ) | + ({2{~ifvalid & ~exu_flush_final}} & wrptr[1:0]); + + + + assign q2off_in = ( ~qwen[2] & (rdptr[1:0]==2'd2) & (q2off | f0_shift_2B) ) | + ( ~qwen[2] & (rdptr[1:0]==2'd1) & (q2off | f1_shift_2B) ) | + ( ~qwen[2] & (rdptr[1:0]==2'd0) & q2off ); + + assign q1off_in = ( ~qwen[1] & (rdptr[1:0]==2'd1) & (q1off | f0_shift_2B) ) | + ( ~qwen[1] & (rdptr[1:0]==2'd0) & (q1off | f1_shift_2B) ) | + ( ~qwen[1] & (rdptr[1:0]==2'd2) & q1off ); + + assign q0off_in = ( ~qwen[0] & (rdptr[1:0]==2'd0) & (q0off | f0_shift_2B) ) | + ( ~qwen[0] & (rdptr[1:0]==2'd2) & (q0off | f1_shift_2B) ) | + ( ~qwen[0] & (rdptr[1:0]==2'd1) & q0off ); + + + + assign q0ptr = ( (rdptr[1:0]==2'b00) & q0off ) | + ( (rdptr[1:0]==2'b01) & q1off ) | + ( (rdptr[1:0]==2'b10) & q2off ); + + assign q1ptr = ( (rdptr[1:0]==2'b00) & q1off ) | + ( (rdptr[1:0]==2'b01) & q2off ) | + ( (rdptr[1:0]==2'b10) & q0off ); + + assign q0sel[1:0] = {q0ptr,~q0ptr}; + + assign q1sel[1:0] = {q1ptr,~q1ptr}; + + // end new queue control logic + + + // misc data that is associated with each fetch buffer + + assign misc_data_in[MHI:0] = { iccm_rd_ecc_double_err, + ic_access_fault_f, + ic_access_fault_type_f[1:0], + ifu_bp_btb_target_f[31:1], + ifu_bp_poffset_f[11:0], + ifu_bp_fghr_f[pt.BHT_GHR_SIZE-1:0] + }; + + + assign {misc1eff[MHI:0],misc0eff[MHI:0]} = (({MSIZE*2{qren[0]}} & {misc1[MHI:0],misc0[MHI:0]}) | + ({MSIZE*2{qren[1]}} & {misc2[MHI:0],misc1[MHI:0]}) | + ({MSIZE*2{qren[2]}} & {misc0[MHI:0],misc2[MHI:0]})); + + assign { f1dbecc, + f1icaf, + f1ictype[1:0], + f1prett[31:1], + f1poffset[11:0], + f1fghr[pt.BHT_GHR_SIZE-1:0] + } = misc1eff[MHI:0]; + + assign { f0dbecc, + f0icaf, + f0ictype[1:0], + f0prett[31:1], + f0poffset[11:0], + f0fghr[pt.BHT_GHR_SIZE-1:0] + } = misc0eff[MHI:0]; + + + assign brdata_in[BRDATA_SIZE-1:0] = { + ifu_bp_hist1_f[1],ifu_bp_hist0_f[1],ifu_bp_pc4_f[1],ifu_bp_way_f[1],ifu_bp_valid_f[1],ifu_bp_ret_f[1], + ifu_bp_hist1_f[0],ifu_bp_hist0_f[0],ifu_bp_pc4_f[0],ifu_bp_way_f[0],ifu_bp_valid_f[0],ifu_bp_ret_f[0] + }; + + + + assign {brdata1eff[BRDATA_SIZE-1:0],brdata0eff[BRDATA_SIZE-1:0]} = (({BRDATA_SIZE*2{qren[0]}} & {brdata1[BRDATA_SIZE-1:0],brdata0[BRDATA_SIZE-1:0]}) | + ({BRDATA_SIZE*2{qren[1]}} & {brdata2[BRDATA_SIZE-1:0],brdata1[BRDATA_SIZE-1:0]}) | + ({BRDATA_SIZE*2{qren[2]}} & {brdata0[BRDATA_SIZE-1:0],brdata2[BRDATA_SIZE-1:0]})); + + assign brdata0final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q0sel[0]}} & { brdata0eff[2*BRDATA_WIDTH-1:0]}) | + ({BRDATA_SIZE{q0sel[1]}} & {{BRDATA_WIDTH{1'b0}},brdata0eff[BRDATA_SIZE-1:BRDATA_WIDTH]})); + + assign brdata1final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q1sel[0]}} & { brdata1eff[2*BRDATA_WIDTH-1:0]}) | + ({BRDATA_SIZE{q1sel[1]}} & {{BRDATA_WIDTH{1'b0}},brdata1eff[BRDATA_SIZE-1:BRDATA_WIDTH]})); + + assign {f0hist1[1],f0hist0[1],f0pc4[1],f0way[1],f0brend[1],f0ret[1], + f0hist1[0],f0hist0[0],f0pc4[0],f0way[0],f0brend[0],f0ret[0]} = brdata0final[BRDATA_SIZE-1:0]; + + assign {f1hist1[1],f1hist0[1],f1pc4[1],f1way[1],f1brend[1],f1ret[1], + f1hist1[0],f1hist0[0],f1pc4[0],f1way[0],f1brend[0],f1ret[0]} = brdata1final[BRDATA_SIZE-1:0]; + + + // possible states of { sf0_valid, sf1_valid, f2_valid } + // + // 000 if->f0 + // 100 if->f1 + // 101 illegal + // 010 if->f1, f1->f0 + // 110 if->f2 + // 001 if->f1, f2->f0 + // 011 if->f2, f2->f1, f1->f0 + // 111 !if, no shift + + assign f2_valid = f2val[0]; + assign sf1_valid = sf1val[0]; + assign sf0_valid = sf0val[0]; + + // interface to fetch + + assign consume_fb0 = ~sf0val[0] & f0val[0]; + + assign consume_fb1 = ~sf1val[0] & f1val[0]; + + assign ifu_fb_consume1 = consume_fb0 & ~consume_fb1 & ~exu_flush_final; + assign ifu_fb_consume2 = consume_fb0 & consume_fb1 & ~exu_flush_final; + + assign ifvalid = ifu_fetch_val[0]; + + assign shift_f1_f0 = ~sf0_valid & sf1_valid; + assign shift_f2_f0 = ~sf0_valid & ~sf1_valid & f2_valid; + assign shift_f2_f1 = ~sf0_valid & sf1_valid & f2_valid; + + assign fetch_to_f0 = ~sf0_valid & ~sf1_valid & ~f2_valid & ifvalid; + + assign fetch_to_f1 = (~sf0_valid & ~sf1_valid & f2_valid & ifvalid) | + (~sf0_valid & sf1_valid & ~f2_valid & ifvalid) | + ( sf0_valid & ~sf1_valid & ~f2_valid & ifvalid); + + assign fetch_to_f2 = (~sf0_valid & sf1_valid & f2_valid & ifvalid) | + ( sf0_valid & sf1_valid & ~f2_valid & ifvalid); + + + + assign f0pc_plus1[31:1] = f0pc[31:1] + 31'd1; + assign f1pc_plus1[31:1] = f1pc[31:1] + 31'd1; + + assign f2pc_in[31:1] = ifu_fetch_pc[31:1]; + + + assign sf1pc[31:1] = ({31{ f1_shift_2B}} & f1pc_plus1[31:1]) | + ({31{~f1_shift_2B}} & f1pc[31:1] ); + + assign f1pc_in[31:1] = ({31{ fetch_to_f1 }} & ifu_fetch_pc[31:1]) | + ({31{ shift_f2_f1}} & f2pc[31:1] ) | + ({31{~fetch_to_f1 & ~shift_f2_f1}} & sf1pc[31:1] ); + + + assign f0pc_in[31:1] = ({31{ fetch_to_f0 }} & ifu_fetch_pc[31:1]) | + ({31{ shift_f2_f0 }} & f2pc[31:1] ) | + ({31{ shift_f1_f0}} & sf1pc[31:1] ) | + ({31{~fetch_to_f0 & ~shift_f2_f0 & ~shift_f1_f0}} & f0pc_plus1[31:1] ); + + + + assign f2val_in[1:0] = ({2{ fetch_to_f2 & ~exu_flush_final}} & ifu_fetch_val[1:0]) | + ({2{~fetch_to_f2 & ~shift_f2_f1 & ~shift_f2_f0 & ~exu_flush_final}} & f2val[1:0] ); + + + assign sf1val[1:0] = ({2{ f1_shift_2B}} & {1'b0,f1val[1]}) | + ({2{~f1_shift_2B}} & f1val[1:0] ); + + assign f1val_in[1:0] = ({2{ fetch_to_f1 & ~exu_flush_final}} & ifu_fetch_val[1:0]) | + ({2{ shift_f2_f1 & ~exu_flush_final}} & f2val[1:0] ) | + ({2{~fetch_to_f1 & ~shift_f2_f1 & ~shift_f1_f0 & ~exu_flush_final}} & sf1val[1:0] ); + + + + assign sf0val[1:0] = ({2{ shift_2B }} & {1'b0,f0val[1]}) | + ({2{~shift_2B & ~shift_4B}} & f0val[1:0]); + + assign f0val_in[1:0] = ({2{fetch_to_f0 & ~exu_flush_final}} & ifu_fetch_val[1:0]) | + ({2{ shift_f2_f0 & ~exu_flush_final}} & f2val[1:0] ) | + ({2{ shift_f1_f0 & ~exu_flush_final}} & sf1val[1:0] ) | + ({2{~fetch_to_f0 & ~shift_f2_f0 & ~shift_f1_f0 & ~exu_flush_final}} & sf0val[1:0] ); + + + + + + + assign {q1eff[31:0],q0eff[31:0]} = (({64{qren[0]}} & {q1[31:0],q0[31:0]}) | + ({64{qren[1]}} & {q2[31:0],q1[31:0]}) | + ({64{qren[2]}} & {q0[31:0],q2[31:0]})); + + assign q0final[31:0] = ({32{q0sel[0]}} & { q0eff[31:0]}) | + ({32{q0sel[1]}} & {16'b0,q0eff[31:16]}); + + assign q1final[15:0] = ({16{q1sel[0]}} & q1eff[15:0] ) | + ({16{q1sel[1]}} & q1eff[31:16]); + + assign aligndata[31:0] = ({32{ f0val[1] }} & {q0final[31:0]}) | + ({32{~f0val[1] & f0val[0]}} & {q1final[15:0],q0final[15:0]}); + + assign alignval[1:0] = ({ 2{ f0val[1] }} & {2'b11}) | + ({ 2{~f0val[1] & f0val[0]}} & {f1val[0],1'b1}); + + assign alignicaf[1:0] = ({ 2{ f0val[1] }} & {{2{f0icaf}}}) | + ({ 2{~f0val[1] & f0val[0]}} & {f1icaf,f0icaf}); + + assign aligndbecc[1:0] = ({ 2{ f0val[1] }} & {{2{f0dbecc}}}) | + ({ 2{~f0val[1] & f0val[0]}} & {f1dbecc,f0dbecc}); + + // for branch prediction + assign alignbrend[1:0] = ({ 2{ f0val[1] }} & f0brend[1:0] ) | + ({ 2{~f0val[1] & f0val[0]}} & {f1brend[0],f0brend[0]}); + + assign alignpc4[1:0] = ({ 2{ f0val[1] }} & f0pc4[1:0] ) | + ({ 2{~f0val[1] & f0val[0]}} & {f1pc4[0],f0pc4[0]}); + + + assign alignret[1:0] = ({ 2{ f0val[1] }} & f0ret[1:0] ) | + ({ 2{~f0val[1] & f0val[0]}} & {f1ret[0],f0ret[0]}); + + assign alignway[1:0] = ({ 2{ f0val[1] }} & f0way[1:0] ) | + ({ 2{~f0val[1] & f0val[0]}} & {f1way[0],f0way[0]}); + + assign alignhist1[1:0] = ({ 2{ f0val[1] }} & f0hist1[1:0] ) | + ({ 2{~f0val[1] & f0val[0]}} & {f1hist1[0],f0hist1[0]}); + + assign alignhist0[1:0] = ({ 2{ f0val[1] }} & f0hist0[1:0] ) | + ({ 2{~f0val[1] & f0val[0]}} & {f1hist0[0],f0hist0[0]}); + + assign alignfromf1[1] = ~f0val[1] & f0val[0]; + + assign secondpc[31:1] = ({31{ f0val[1] }} & f0pc_plus1[31:1]) | + ({31{~f0val[1] & f0val[0]}} & f1pc[31:1] ); + + + assign ifu_i0_pc[31:1] = f0pc[31:1]; + + assign firstpc[31:1] = f0pc[31:1]; + + assign ifu_i0_pc4 = first4B; + + + + assign ifu_i0_cinst[15:0] = aligndata[15:0]; + + assign first4B = (aligndata[1:0] == 2'b11); + assign first2B = ~first4B; + + assign ifu_i0_valid = (first4B & alignval[1]) | + (first2B & alignval[0]); + + // inst access fault on any byte of inst results in access fault for the inst + assign ifu_i0_icaf = (first4B & (|alignicaf[1:0])) | + (first2B & alignicaf[0] ); + + assign ifu_i0_icaf_type[1:0] = (first4B & ~f0val[1] & f0val[0] & ~alignicaf[0] & ~aligndbecc[0]) ? f1ictype[1:0] : f0ictype[1:0]; + + + assign icaf_eff[1] = alignicaf[1] | aligndbecc[1]; + + assign ifu_i0_icaf_f1 = first4B & icaf_eff[1] & alignfromf1[1]; + + assign ifu_i0_dbecc = (first4B & (|aligndbecc[1:0])) | + (first2B & aligndbecc[0] ); + + + assign ifirst[31:0] = aligndata[31:0]; + + + assign ifu_i0_instr[31:0] = ({32{first4B}} & ifirst[31:0]) | + ({32{first2B}} & uncompress0[31:0]); + + + // if you detect br does not start on instruction boundary + + el2_btb_addr_hash #(.pt(pt)) firsthash (.pc(firstpc [pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(firstpc_hash [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])); + el2_btb_addr_hash #(.pt(pt)) secondhash(.pc(secondpc[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(secondpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])); + +if(pt.BTB_BTAG_FOLD) begin : btbfold + el2_btb_tag_hash_fold #(.pt(pt)) first_brhash (.pc(firstpc [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]), .hash(firstbrtag_hash [pt.BTB_BTAG_SIZE-1:0])); + el2_btb_tag_hash_fold #(.pt(pt)) second_brhash(.pc(secondpc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]), .hash(secondbrtag_hash[pt.BTB_BTAG_SIZE-1:0])); +end +else begin + el2_btb_tag_hash #(.pt(pt)) first_brhash (.pc(firstpc [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]), .hash(firstbrtag_hash [pt.BTB_BTAG_SIZE-1:0])); + el2_btb_tag_hash #(.pt(pt)) second_brhash(.pc(secondpc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]), .hash(secondbrtag_hash[pt.BTB_BTAG_SIZE-1:0])); +end + // start_indexing - you want pc to be based on where the end of branch is prediction + // normal indexing pc based that's incorrect now for pc4 cases it's pc4 + 2 + + always_comb begin + + i0_brp = '0; + + i0_br_start_error = (first4B & alignval[1] & alignbrend[0]); + + i0_brp.valid = (first2B & alignbrend[0]) | + (first4B & alignbrend[1]) | + i0_br_start_error; + + i0_brp_pc4 = (first2B & alignpc4[0]) | + (first4B & alignpc4[1]); + + i0_brp.ret = (first2B & alignret[0]) | + (first4B & alignret[1]); + + i0_brp.way = (first2B | alignbrend[0]) ? alignway[0] : alignway[1]; + + i0_brp.hist[1] = (first2B & alignhist1[0]) | + (first4B & alignhist1[1]); + + i0_brp.hist[0] = (first2B & alignhist0[0]) | + (first4B & alignhist0[1]); + + i0_ends_f1 = first4B & alignfromf1[1]; + + i0_brp.toffset[11:0] = (i0_ends_f1) ? f1poffset[11:0] : f0poffset[11:0]; + + i0_brp.prett[31:1] = (i0_ends_f1) ? f1prett[31:1] : f0prett[31:1]; + + i0_brp.br_start_error = i0_br_start_error; + + i0_brp.bank = (first2B | alignbrend[0]) ? firstpc[1] : secondpc[1]; + + i0_brp.br_error = (i0_brp.valid & i0_brp_pc4 & first2B) | + (i0_brp.valid & ~i0_brp_pc4 & first4B); + + end + + + assign ifu_i0_bp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = (first2B | alignbrend[0]) ? firstpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] : + secondpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; + + assign ifu_i0_bp_fghr[pt.BHT_GHR_SIZE-1:0] = (i0_ends_f1) ? f1fghr[pt.BHT_GHR_SIZE-1:0] : + f0fghr[pt.BHT_GHR_SIZE-1:0]; + + assign ifu_i0_bp_btag[pt.BTB_BTAG_SIZE-1:0] = (first2B | alignbrend[0]) ? firstbrtag_hash[pt.BTB_BTAG_SIZE-1:0] : + secondbrtag_hash[pt.BTB_BTAG_SIZE-1:0]; + + + // decompress + + el2_ifu_compress_ctl compress0 (.din(aligndata[15:0]), .dout(uncompress0[31:0])); + + + + assign i0_shift = dec_i0_decode_d & ~error_stall; + + assign ifu_pmu_instr_aligned = i0_shift; + + + // compute how many bytes are being shifted from f0 + + // assign shift_0B = ~i0_shift; + + assign shift_2B = i0_shift & first2B; + + assign shift_4B = i0_shift & first4B; + + // exact equations for the queue logic + assign f0_shift_2B = (shift_2B & f0val[0] ) | + (shift_4B & f0val[0] & ~f0val[1]); + + + // f0 valid states + // 11 + // 10 + // 00 + + assign f1_shift_2B = f0val[0] & ~f0val[1] & shift_4B; + + + +endmodule diff --git a/design/ifu/el2_ifu_bp_ctl.sv b/design/ifu/el2_ifu_bp_ctl.sv new file mode 100644 index 0000000..ad1c501 --- /dev/null +++ b/design/ifu/el2_ifu_bp_ctl.sv @@ -0,0 +1,752 @@ +//******************************************************************************** +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** + +//******************************************************************************** +// Function: Branch predictor +// Comments: +// +// +// Bank3 : Bank2 : Bank1 : Bank0 +// FA C 8 4 0 +//******************************************************************************** + +module el2_ifu_bp_ctl +import el2_pkg::*; +#( +`include "el2_param.vh" + ) + ( + + input logic clk, + input logic active_clk, + input logic rst_l, + + input logic ic_hit_f, // Icache hit, enables F address capture + + input logic [31:1] ifc_fetch_addr_f, // look up btb address + input logic ifc_fetch_req_f, // F1 valid + + input el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // BP commit update packet, includes errors + input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index + + input logic dec_tlu_flush_lower_wb, // used to move EX4 RS to EX1 and F + input logic dec_tlu_flush_leak_one_wb, // don't hit for leak one fetches + + input logic dec_tlu_bpred_disable, // disable all branch prediction + + input el2_predict_pkt_t exu_mp_pkt, // mispredict packet + + input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr (for patching fghr) + input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index + input logic [pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag + + input logic exu_flush_final, // all flushes + + output logic ifu_bp_hit_taken_f, // btb hit, select target + output logic [31:1] ifu_bp_btb_target_f, // predicted target PC + output logic ifu_bp_inst_mask_f, // tell ic which valids to kill because of a taken branch, right justified + + output logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f, // fetch ghr + + output logic [1:0] ifu_bp_way_f, // way + output logic [1:0] ifu_bp_ret_f, // predicted ret + output logic [1:0] ifu_bp_hist1_f, // history counters for all 4 potential branches, bit 1, right justified + output logic [1:0] ifu_bp_hist0_f, // history counters for all 4 potential branches, bit 0, right justified + output logic [1:0] ifu_bp_pc4_f, // pc4 indication, right justified + output logic [1:0] ifu_bp_valid_f, // branch valid, right justified + output logic [11:0] ifu_bp_poffset_f, // predicted target + + input logic scan_mode + ); + + localparam TAG_START=16+pt.BTB_BTAG_SIZE; + localparam PC4=4; + localparam BOFF=3; + localparam CALL=2; + localparam RET=1; + localparam BV=0; + + localparam LRU_SIZE=pt.BTB_ARRAY_DEPTH; + localparam NUM_BHT_LOOP = (pt.BHT_ARRAY_DEPTH > 16 ) ? 16 : pt.BHT_ARRAY_DEPTH; + localparam NUM_BHT_LOOP_INNER_HI = (pt.BHT_ARRAY_DEPTH > 16 ) ? pt.BHT_ADDR_LO+3 : pt.BHT_ADDR_HI; + localparam NUM_BHT_LOOP_OUTER_LO = (pt.BHT_ARRAY_DEPTH > 16 ) ? pt.BHT_ADDR_LO+4 : pt.BHT_ADDR_LO; + localparam BHT_NO_ADDR_MATCH = ( pt.BHT_ARRAY_DEPTH <= 16 ); + + logic exu_mp_valid_write; + logic exu_mp_ataken; + logic exu_mp_valid; // conditional branch mispredict + logic exu_mp_boffset; // branch offsett + logic exu_mp_pc4; // branch is a 4B inst + logic exu_mp_call; // branch is a call inst + logic exu_mp_ret; // branch is a ret inst + logic exu_mp_ja; // branch is a jump always + logic [1:0] exu_mp_hist; // new history + logic [11:0] exu_mp_tgt; // target offset + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address + logic dec_tlu_br0_v_wb; // WB stage history update + logic [1:0] dec_tlu_br0_hist_wb; // new history + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_tlu_br0_addr_wb; // addr + logic dec_tlu_br0_error_wb; // error; invalidate bank + logic dec_tlu_br0_start_error_wb; // error; invalidate all 4 banks in fg + logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_wb; + + logic use_mp_way, use_mp_way_p1; + logic [pt.RET_STACK_SIZE-1:0][31:0] rets_out, rets_in; + logic [pt.RET_STACK_SIZE-1:0] rsenable; + + + logic [11:0] btb_rd_tgt_f; + logic btb_rd_pc4_f, btb_rd_call_f, btb_rd_ret_f; + logic [1:1] bp_total_branch_offset_f; + + logic [31:1] bp_btb_target_adder_f; + logic [31:1] bp_rs_call_target_f; + logic rs_push, rs_pop, rs_hold; + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_p1_f, btb_wr_addr, btb_rd_addr_f; + logic [pt.BTB_BTAG_SIZE-1:0] btb_wr_tag, fetch_rd_tag_f, fetch_rd_tag_p1_f; + logic [16+pt.BTB_BTAG_SIZE:0] btb_wr_data; + logic btb_wr_en_way0, btb_wr_en_way1; + + + logic dec_tlu_error_wb, btb_valid, dec_tlu_br0_middle_wb; + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_error_addr_wb; + + logic branch_error_collision_f, fetch_mp_collision_f, branch_error_collision_p1_f, fetch_mp_collision_p1_f; + + logic branch_error_bank_conflict_f; + logic [pt.BHT_GHR_SIZE-1:0] merged_ghr, fghr_ns, fghr; + logic [1:0] num_valids; + logic [LRU_SIZE-1:0] btb_lru_b0_f, btb_lru_b0_hold, btb_lru_b0_ns, + fetch_wrindex_dec, fetch_wrindex_p1_dec, fetch_wrlru_b0, fetch_wrlru_p1_b0, + mp_wrindex_dec, mp_wrlru_b0; + logic btb_lru_rd_f, btb_lru_rd_p1_f, lru_update_valid_f; + logic tag_match_way0_f, tag_match_way1_f; + logic [1:0] way_raw, bht_dir_f, btb_sel_f, wayhit_f, vwayhit_f, wayhit_p1_f; + logic [1:0] bht_valid_f, bht_force_taken_f; + + logic leak_one_f, leak_one_f_d1; + + logic [LRU_SIZE-1:0][16+pt.BTB_BTAG_SIZE:0] btb_bank0_rd_data_way0_out ; + + logic [LRU_SIZE-1:0][16+pt.BTB_BTAG_SIZE:0] btb_bank0_rd_data_way1_out ; + + logic [16+pt.BTB_BTAG_SIZE:0] btb_bank0_rd_data_way0_f ; + logic [16+pt.BTB_BTAG_SIZE:0] btb_bank0_rd_data_way1_f ; + + logic [16+pt.BTB_BTAG_SIZE:0] btb_bank0_rd_data_way0_p1_f ; + logic [16+pt.BTB_BTAG_SIZE:0] btb_bank0_rd_data_way1_p1_f ; + + logic [16+pt.BTB_BTAG_SIZE:0] btb_vbank0_rd_data_f, btb_vbank1_rd_data_f; + + logic final_h; + logic btb_fg_crossing_f; + logic middle_of_bank; + + + logic [1:0] bht_vbank0_rd_data_f, bht_vbank1_rd_data_f; + logic branch_error_bank_conflict_p1_f; + logic tag_match_way0_p1_f, tag_match_way1_p1_f; + + logic [1:0] btb_vlru_rd_f, fetch_start_f, tag_match_vway1_expanded_f, tag_match_way0_expanded_p1_f, tag_match_way1_expanded_p1_f; + logic [31:2] fetch_addr_p1_f; + + + logic exu_mp_way, exu_mp_way_f, dec_tlu_br0_way_wb, dec_tlu_way_wb, dec_tlu_way_wb_f; + logic [16+pt.BTB_BTAG_SIZE:0] btb_bank0e_rd_data_f, btb_bank0e_rd_data_p1_f; + + logic [16+pt.BTB_BTAG_SIZE:0] btb_bank0o_rd_data_f; + + logic [1:0] tag_match_way0_expanded_f, tag_match_way1_expanded_f; + + + logic [1:0] bht_bank0_rd_data_f; + logic [1:0] bht_bank1_rd_data_f; + logic [1:0] bht_bank0_rd_data_p1_f; +logic exu_flush_final_d1; + + assign exu_mp_valid = exu_mp_pkt.misp & ~leak_one_f; // conditional branch mispredict + assign exu_mp_boffset = exu_mp_pkt.boffset; // branch offset + assign exu_mp_pc4 = exu_mp_pkt.pc4; // branch is a 4B inst + assign exu_mp_call = exu_mp_pkt.pcall; // branch is a call inst + assign exu_mp_ret = exu_mp_pkt.pret; // branch is a ret inst + assign exu_mp_ja = exu_mp_pkt.pja; // branch is a jump always + assign exu_mp_way = exu_mp_pkt.way; // repl way + assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0]; // new history + assign exu_mp_tgt[11:0] = exu_mp_pkt.toffset[11:0] ; // target offset + assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ; // BTB/BHT address + assign exu_mp_ataken = exu_mp_pkt.ataken; + + + assign dec_tlu_br0_v_wb = dec_tlu_br0_r_pkt.valid; + assign dec_tlu_br0_hist_wb[1:0] = dec_tlu_br0_r_pkt.hist[1:0]; + assign dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; + assign dec_tlu_br0_error_wb = dec_tlu_br0_r_pkt.br_error; + assign dec_tlu_br0_middle_wb = dec_tlu_br0_r_pkt.middle; + assign dec_tlu_br0_way_wb = dec_tlu_br0_r_pkt.way; + assign dec_tlu_br0_start_error_wb = dec_tlu_br0_r_pkt.br_start_error; + assign exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0] = exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0]; + + + + + // ---------------------------------------------------------------------- + // READ + // ---------------------------------------------------------------------- + + // hash the incoming fetch PC, first guess at hashing algorithm + el2_btb_addr_hash #(.pt(pt)) f1hash(.pc(ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])); + + + assign fetch_addr_p1_f[31:2] = ifc_fetch_addr_f[31:2] + 30'b1; + el2_btb_addr_hash #(.pt(pt)) f1hash_p1(.pc(fetch_addr_p1_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])); + + assign btb_sel_f[1] = ~bht_dir_f[0]; + assign btb_sel_f[0] = bht_dir_f[0]; + + assign fetch_start_f[1:0] = {ifc_fetch_addr_f[1], ~ifc_fetch_addr_f[1]}; + + // Errors colliding with fetches must kill the btb/bht hit. + + assign branch_error_collision_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]); + assign branch_error_collision_p1_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]); + + assign branch_error_bank_conflict_f = branch_error_collision_f & dec_tlu_error_wb; + assign branch_error_bank_conflict_p1_f = branch_error_collision_p1_f & dec_tlu_error_wb; + + assign fetch_mp_collision_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) & + exu_mp_valid & ifc_fetch_req_f & + (exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]) + ); + assign fetch_mp_collision_p1_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) & + exu_mp_valid & ifc_fetch_req_f & + (exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]) + ); + // set on leak one, hold until next flush without leak one + assign leak_one_f = (dec_tlu_flush_leak_one_wb & dec_tlu_flush_lower_wb) | (leak_one_f_d1 & ~dec_tlu_flush_lower_wb); + + + rvdff #(4) coll_ff (.*, .clk(active_clk), + .din({exu_flush_final, exu_mp_way, dec_tlu_way_wb, leak_one_f}), + .dout({exu_flush_final_d1, exu_mp_way_f, dec_tlu_way_wb_f, leak_one_f_d1})); + + // 2 -way SA, figure out the way hit and mux accordingly + assign tag_match_way0_f = btb_bank0_rd_data_way0_f[BV] & (btb_bank0_rd_data_way0_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) & + ~(dec_tlu_way_wb_f & branch_error_bank_conflict_f) & ifc_fetch_req_f & ~leak_one_f; + + assign tag_match_way1_f = btb_bank0_rd_data_way1_f[BV] & (btb_bank0_rd_data_way1_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) & + ~(dec_tlu_way_wb_f & branch_error_bank_conflict_f) & ifc_fetch_req_f & ~leak_one_f; + + assign tag_match_way0_p1_f = btb_bank0_rd_data_way0_p1_f[BV] & (btb_bank0_rd_data_way0_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) & + ~(dec_tlu_way_wb_f & branch_error_bank_conflict_p1_f) & ifc_fetch_req_f & ~leak_one_f; + + assign tag_match_way1_p1_f = btb_bank0_rd_data_way1_p1_f[BV] & (btb_bank0_rd_data_way1_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) & + ~(dec_tlu_way_wb_f & branch_error_bank_conflict_p1_f) & ifc_fetch_req_f & ~leak_one_f; + + + // Both ways could hit, use the offset bit to reorder + + assign tag_match_way0_expanded_f[1:0] = {tag_match_way0_f & (btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4]), + tag_match_way0_f & ~(btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4])}; + + assign tag_match_way1_expanded_f[1:0] = {tag_match_way1_f & (btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4]), + tag_match_way1_f & ~(btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4])}; + + assign tag_match_way0_expanded_p1_f[1:0] = {tag_match_way0_p1_f & (btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4]), + tag_match_way0_p1_f & ~(btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4])}; + + assign tag_match_way1_expanded_p1_f[1:0] = {tag_match_way1_p1_f & (btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4]), + tag_match_way1_p1_f & ~(btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4])}; + + assign wayhit_f[1:0] = tag_match_way0_expanded_f[1:0] | tag_match_way1_expanded_f[1:0]; + assign wayhit_p1_f[1:0] = tag_match_way0_expanded_p1_f[1:0] | tag_match_way1_expanded_p1_f[1:0]; + + assign btb_bank0o_rd_data_f[16+pt.BTB_BTAG_SIZE:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[1]}} & btb_bank0_rd_data_way0_f[16+pt.BTB_BTAG_SIZE:0]) | + ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_f[1]}} & btb_bank0_rd_data_way1_f[16+pt.BTB_BTAG_SIZE:0]) ); + assign btb_bank0e_rd_data_f[16+pt.BTB_BTAG_SIZE:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[0]}} & btb_bank0_rd_data_way0_f[16+pt.BTB_BTAG_SIZE:0]) | + ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_f[0]}} & btb_bank0_rd_data_way1_f[16+pt.BTB_BTAG_SIZE:0]) ); + + assign btb_bank0e_rd_data_p1_f[16+pt.BTB_BTAG_SIZE:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_p1_f[0]}} & btb_bank0_rd_data_way0_p1_f[16+pt.BTB_BTAG_SIZE:0]) | + ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_p1_f[0]}} & btb_bank0_rd_data_way1_p1_f[16+pt.BTB_BTAG_SIZE:0]) ); + + // virtual bank order + + assign btb_vbank0_rd_data_f[16+pt.BTB_BTAG_SIZE:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} & btb_bank0e_rd_data_f[16+pt.BTB_BTAG_SIZE:0]) | + ({17+pt.BTB_BTAG_SIZE{fetch_start_f[1]}} & btb_bank0o_rd_data_f[16+pt.BTB_BTAG_SIZE:0]) ); + assign btb_vbank1_rd_data_f[16+pt.BTB_BTAG_SIZE:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} & btb_bank0o_rd_data_f[16+pt.BTB_BTAG_SIZE:0]) | + ({17+pt.BTB_BTAG_SIZE{fetch_start_f[1]}} & btb_bank0e_rd_data_p1_f[16+pt.BTB_BTAG_SIZE:0]) ); + + + // -------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------- + // update lru + // mp + + // create a onehot lru write vector + assign mp_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; + + // fetch + assign fetch_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; + assign fetch_wrindex_p1_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; + + assign mp_wrlru_b0[LRU_SIZE-1:0] = mp_wrindex_dec[LRU_SIZE-1:0] & {LRU_SIZE{exu_mp_valid}}; + + genvar j, i; + + + assign lru_update_valid_f = (vwayhit_f[0] | vwayhit_f[1]) & ifc_fetch_req_f & ~leak_one_f; + + + assign fetch_wrlru_b0[LRU_SIZE-1:0] = fetch_wrindex_dec[LRU_SIZE-1:0] & + {LRU_SIZE{lru_update_valid_f}}; + assign fetch_wrlru_p1_b0[LRU_SIZE-1:0] = fetch_wrindex_p1_dec[LRU_SIZE-1:0] & + {LRU_SIZE{lru_update_valid_f}}; + + assign btb_lru_b0_hold[LRU_SIZE-1:0] = ~mp_wrlru_b0[LRU_SIZE-1:0] & ~fetch_wrlru_b0[LRU_SIZE-1:0]; + + // Forward the mp lru information to the fetch, avoids multiple way hits later + assign use_mp_way = fetch_mp_collision_f; + assign use_mp_way_p1 = fetch_mp_collision_p1_f; + + + assign btb_lru_b0_ns[LRU_SIZE-1:0] = ( (btb_lru_b0_hold[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]) | + (mp_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{~exu_mp_way}}) | + (fetch_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_f}}) | + (fetch_wrlru_p1_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_p1_f}}) ); + + assign btb_lru_rd_f = use_mp_way ? exu_mp_way_f : |(fetch_wrindex_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]); + + assign btb_lru_rd_p1_f = use_mp_way_p1 ? exu_mp_way_f : |(fetch_wrindex_p1_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]); + + // rotated + assign btb_vlru_rd_f[1:0] = ( ({2{fetch_start_f[0]}} & {btb_lru_rd_f, btb_lru_rd_f}) | + ({2{fetch_start_f[1]}} & {btb_lru_rd_p1_f, btb_lru_rd_f})); + + assign tag_match_vway1_expanded_f[1:0] = ( ({2{fetch_start_f[0]}} & {tag_match_way1_expanded_f[1:0]}) | + ({2{fetch_start_f[1]}} & {tag_match_way1_expanded_p1_f[0], tag_match_way1_expanded_f[1]}) ); + + assign way_raw[1:0] = tag_match_vway1_expanded_f[1:0] | (~vwayhit_f[1:0] & btb_vlru_rd_f[1:0]); + + rvdffe #(LRU_SIZE) btb_lru_ff (.*, .en(ifc_fetch_req_f | exu_mp_valid), + .din(btb_lru_b0_ns[(LRU_SIZE)-1:0]), + .dout(btb_lru_b0_f[(LRU_SIZE)-1:0])); + + // Detect end of cache line and mask as needed + logic eoc_near; + logic eoc_mask; + assign eoc_near = &ifc_fetch_addr_f[pt.ICACHE_BEAT_ADDR_HI:3]; + assign eoc_mask = ~eoc_near| (|(~ifc_fetch_addr_f[2:1])); + + + assign vwayhit_f[1:0] = ( ({2{fetch_start_f[0]}} & {wayhit_f[1:0]}) | + ({2{fetch_start_f[1]}} & {wayhit_p1_f[0], wayhit_f[1]})) & {eoc_mask, 1'b1}; + + // -------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------- + + // mux out critical hit bank for pc computation + // This is only useful for the first taken branch in the fetch group + logic [16:1] btb_sel_data_f; + + assign btb_rd_tgt_f[11:0] = btb_sel_data_f[16:5]; + assign btb_rd_pc4_f = btb_sel_data_f[4]; + assign btb_rd_call_f = btb_sel_data_f[2]; + assign btb_rd_ret_f = btb_sel_data_f[1]; + + assign btb_sel_data_f[16:1] = ( ({16{btb_sel_f[1]}} & btb_vbank1_rd_data_f[16:1]) | + ({16{btb_sel_f[0]}} & btb_vbank0_rd_data_f[16:1]) ); + + + logic [1:0] hist0_raw, hist1_raw, pc4_raw, pret_raw; + + // a valid taken target needs to kill the next fetch as we compute the target address + assign ifu_bp_hit_taken_f = |(vwayhit_f[1:0] & hist1_raw[1:0]) & ifc_fetch_req_f & ~leak_one_f_d1 & ~dec_tlu_bpred_disable; + + + // Don't put calls/rets/ja in the predictor, force the bht taken instead + assign bht_force_taken_f[1:0] = {(btb_vbank1_rd_data_f[CALL] | btb_vbank1_rd_data_f[RET]), + (btb_vbank0_rd_data_f[CALL] | btb_vbank0_rd_data_f[RET])}; + + + // taken and valid, otherwise, branch errors must clear the bht + assign bht_valid_f[1:0] = vwayhit_f[1:0]; + + assign bht_vbank0_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank0_rd_data_f[1:0]) | + ({2{fetch_start_f[1]}} & bht_bank1_rd_data_f[1:0]) ); + + assign bht_vbank1_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank1_rd_data_f[1:0]) | + ({2{fetch_start_f[1]}} & bht_bank0_rd_data_p1_f[1:0]) ); + + + assign bht_dir_f[1:0] = {(bht_force_taken_f[1] | bht_vbank1_rd_data_f[1]) & bht_valid_f[1], + (bht_force_taken_f[0] | bht_vbank0_rd_data_f[1]) & bht_valid_f[0]}; + + assign ifu_bp_inst_mask_f = (ifu_bp_hit_taken_f & btb_sel_f[1]) | ~ifu_bp_hit_taken_f; + + + + + // Branch prediction info is sent with the 2byte lane associated with the end of the branch. + // Cases + // BANK1 BANK0 + // ------------------------------- + // | : | : | + // ------------------------------- + // <------------> : PC4 branch, offset, should be in B1 (indicated on [2]) + // <------------> : PC4 branch, no offset, indicate PC4, VALID, HIST on [1] + // <------------> : PC4 branch, offset, indicate PC4, VALID, HIST on [0] + // <------> : PC2 branch, offset, indicate VALID, HIST on [1] + // <------> : PC2 branch, no offset, indicate VALID, HIST on [0] + // + + + + assign hist1_raw[1:0] = bht_force_taken_f[1:0] | {bht_vbank1_rd_data_f[1], + bht_vbank0_rd_data_f[1]}; + + assign hist0_raw[1:0] = {bht_vbank1_rd_data_f[0], + bht_vbank0_rd_data_f[0]}; + + + assign pc4_raw[1:0] = {vwayhit_f[1] & btb_vbank1_rd_data_f[PC4], + vwayhit_f[0] & btb_vbank0_rd_data_f[PC4]}; + + assign pret_raw[1:0] = {vwayhit_f[1] & ~btb_vbank1_rd_data_f[CALL] & btb_vbank1_rd_data_f[RET], + vwayhit_f[0] & ~btb_vbank0_rd_data_f[CALL] & btb_vbank0_rd_data_f[RET]}; + + // GHR + + + // count the valids with masking based on first taken + assign num_valids[1:0] = countones(bht_valid_f[1:0]); + + // Note that the following property holds + // P: prior ghr, H: history bit of last valid branch in line (could be 1 or 0) + // Num valid branches What new GHR must be + // 2 0H + // 1 PH + // 0 PP + + assign final_h = |(btb_sel_f[1:0] & bht_dir_f[1:0]); + + assign merged_ghr[pt.BHT_GHR_SIZE-1:0] = ( + ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h2}} & {fghr[pt.BHT_GHR_SIZE-3:0], 1'b0, final_h}) | // 0H + ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h1}} & {fghr[pt.BHT_GHR_SIZE-2:0], final_h}) | // PH + ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h0}} & {fghr[pt.BHT_GHR_SIZE-1:0]}) ); // PP + + logic [pt.BHT_GHR_SIZE-1:0] exu_flush_ghr; + assign exu_flush_ghr[pt.BHT_GHR_SIZE-1:0] = exu_mp_fghr[pt.BHT_GHR_SIZE-1:0]; + + assign fghr_ns[pt.BHT_GHR_SIZE-1:0] = ( ({pt.BHT_GHR_SIZE{exu_flush_final_d1}} & exu_flush_ghr[pt.BHT_GHR_SIZE-1:0]) | + ({pt.BHT_GHR_SIZE{~exu_flush_final_d1 & ifc_fetch_req_f & ic_hit_f & ~leak_one_f_d1}} & merged_ghr[pt.BHT_GHR_SIZE-1:0]) | + ({pt.BHT_GHR_SIZE{~exu_flush_final_d1 & ~(ifc_fetch_req_f & ic_hit_f & ~leak_one_f_d1)}} & fghr[pt.BHT_GHR_SIZE-1:0])); + + rvdff #(pt.BHT_GHR_SIZE) fetchghr (.*, .clk(active_clk), .din(fghr_ns[pt.BHT_GHR_SIZE-1:0]), .dout(fghr[pt.BHT_GHR_SIZE-1:0])); + assign ifu_bp_fghr_f[pt.BHT_GHR_SIZE-1:0] = fghr[pt.BHT_GHR_SIZE-1:0]; + + + assign ifu_bp_way_f[1:0] = way_raw[1:0]; + assign ifu_bp_hist1_f[1:0] = hist1_raw[1:0]; + assign ifu_bp_hist0_f[1:0] = hist0_raw[1:0]; + assign ifu_bp_pc4_f[1:0] = pc4_raw[1:0]; + + assign ifu_bp_valid_f[1:0] = vwayhit_f[1:0] & ~{2{dec_tlu_bpred_disable}}; + assign ifu_bp_ret_f[1:0] = pret_raw[1:0]; + + + // compute target + // Form the fetch group offset based on the btb hit location and the location of the branch within the 4 byte chunk + +// .i 5 +// .o 3 +// .ilb bht_dir_f[1] bht_dir_f[0] fetch_start_f[1] fetch_start_f[0] btb_rd_pc4_f +// .ob bloc_f[1] bloc_f[0] use_fa_plus +// .type fr +// +// +// ## rotdir[1:0] fs pc4 off fapl +// -1 01 - 01 0 +// 10 01 - 10 0 +// +// -1 10 - 10 0 +// 10 10 0 01 1 +// 10 10 1 01 0 +logic [1:0] bloc_f; +logic use_fa_plus; +assign bloc_f[1] = (bht_dir_f[0] & ~fetch_start_f[0]) | (~bht_dir_f[0] + & fetch_start_f[0]); +assign bloc_f[0] = (bht_dir_f[0] & fetch_start_f[0]) | (~bht_dir_f[0] + & ~fetch_start_f[0]); +assign use_fa_plus = (~bht_dir_f[0] & ~fetch_start_f[0] & ~btb_rd_pc4_f); + + + + + assign btb_fg_crossing_f = fetch_start_f[0] & btb_sel_f[0] & btb_rd_pc4_f; + + assign bp_total_branch_offset_f = bloc_f[1] ^ btb_rd_pc4_f; + + logic [31:2] adder_pc_in_f, ifc_fetch_adder_prior; + rvdffe #(30) faddrf_ff (.*, .en(ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f), .din(ifc_fetch_addr_f[31:2]), .dout(ifc_fetch_adder_prior[31:2])); + + assign ifu_bp_poffset_f[11:0] = btb_rd_tgt_f[11:0]; + + assign adder_pc_in_f[31:2] = ( ({30{ use_fa_plus}} & fetch_addr_p1_f[31:2]) | + ({30{ btb_fg_crossing_f}} & ifc_fetch_adder_prior[31:2]) | + ({30{~btb_fg_crossing_f & ~use_fa_plus}} & ifc_fetch_addr_f[31:2])); + + rvbradder predtgt_addr (.pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}), + .offset(btb_rd_tgt_f[11:0]), + .dout(bp_btb_target_adder_f[31:1]) + ); + // mux in the return stack address here for a predicted return assuming the RS is valid + assign ifu_bp_btb_target_f[31:1] = (btb_rd_ret_f & ~btb_rd_call_f & rets_out[0][0]) ? rets_out[0][31:1] : bp_btb_target_adder_f[31:1]; + + + // ---------------------------------------------------------------------- + // Return Stack + // ---------------------------------------------------------------------- + + rvbradder rs_addr (.pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}), + .offset({11'b0, ~btb_rd_pc4_f}), + .dout(bp_rs_call_target_f[31:1]) + ); + + assign rs_push = (btb_rd_call_f & ~btb_rd_ret_f & ifu_bp_hit_taken_f); + assign rs_pop = (btb_rd_ret_f & ~btb_rd_call_f & ifu_bp_hit_taken_f); + assign rs_hold = ~rs_push & ~rs_pop; + + + + // Fetch based (bit 0 is a valid) + assign rets_in[0][31:0] = ( ({32{rs_push}} & {bp_rs_call_target_f[31:1], 1'b1}) | // target[31:1], valid + ({32{rs_pop}} & rets_out[1][31:0]) ); + + assign rsenable[0] = ~rs_hold; + + for (i=0; i<32'(pt.RET_STACK_SIZE); i++) begin : retstack + + // for the last entry in the stack, we don't have a pop position + if(i==pt.RET_STACK_SIZE-1) begin + assign rets_in[i][31:0] = rets_out[i-1][31:0]; + assign rsenable[i] = rs_push; + end + else if(i>0) begin + assign rets_in[i][31:0] = ( ({32{rs_push}} & rets_out[i-1][31:0]) | + ({32{rs_pop}} & rets_out[i+1][31:0]) ); + assign rsenable[i] = rs_push | rs_pop; + end + rvdffe #(32) rets_ff (.*, .en(rsenable[i]), .din(rets_in[i][31:0]), .dout(rets_out[i][31:0])); + + end : retstack + + // ---------------------------------------------------------------------- + // WRITE + // ---------------------------------------------------------------------- + + + assign dec_tlu_error_wb = dec_tlu_br0_start_error_wb | dec_tlu_br0_error_wb; + + assign btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; + + assign dec_tlu_way_wb = dec_tlu_br0_way_wb; + + assign btb_valid = exu_mp_valid & ~dec_tlu_error_wb; + + assign btb_wr_tag[pt.BTB_BTAG_SIZE-1:0] = exu_mp_btag[pt.BTB_BTAG_SIZE-1:0]; + +if(pt.BTB_BTAG_FOLD) begin : btbfold + el2_btb_tag_hash_fold #(.pt(pt)) rdtagf (.hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]), .pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]})); + el2_btb_tag_hash_fold #(.pt(pt)) rdtagp1f(.hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]), .pc({fetch_addr_p1_f[ pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]})); +end +else begin + el2_btb_tag_hash #(.pt(pt)) rdtagf(.hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]), .pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]})); + el2_btb_tag_hash #(.pt(pt)) rdtagp1f(.hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]), .pc({fetch_addr_p1_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]})); +end + + assign btb_wr_data[16+pt.BTB_BTAG_SIZE:0] = {btb_wr_tag[pt.BTB_BTAG_SIZE-1:0], exu_mp_tgt[11:0], exu_mp_pc4, exu_mp_boffset, exu_mp_call | exu_mp_ja, exu_mp_ret | exu_mp_ja, btb_valid} ; + + assign exu_mp_valid_write = exu_mp_valid & exu_mp_ataken; + assign btb_wr_en_way0 = ( ({{~exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) | + ({{~dec_tlu_way_wb & dec_tlu_error_wb}})); + + assign btb_wr_en_way1 = ( ({{exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) | + ({{dec_tlu_way_wb & dec_tlu_error_wb}})); + assign btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_error_wb ? btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] : exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; + + logic [1:0] bht_wr_data0, bht_wr_data2; + logic [1:0] bht_wr_en0, bht_wr_en2; + + assign middle_of_bank = exu_mp_pc4 ^ exu_mp_boffset; + assign bht_wr_en0[1:0] = {2{exu_mp_valid & ~exu_mp_call & ~exu_mp_ret & ~exu_mp_ja}} & {middle_of_bank, ~middle_of_bank}; + assign bht_wr_en2[1:0] = {2{dec_tlu_br0_v_wb}} & {dec_tlu_br0_middle_wb, ~dec_tlu_br0_middle_wb} ; + + // Experiments show this is the best priority scheme for same bank/index writes at the same time. + assign bht_wr_data0[1:0] = exu_mp_hist[1:0]; // lowest priority + assign bht_wr_data2[1:0] = dec_tlu_br0_hist_wb[1:0]; // highest priority + + + + logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] bht_rd_addr_f, bht_rd_addr_p1_f, bht_wr_addr0, bht_wr_addr2; + + logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] mp_hashed, br0_hashed_wb, bht_rd_addr_hashed_f, bht_rd_addr_hashed_p1_f; + el2_btb_ghr_hash #(.pt(pt)) mpghrhs (.hashin(exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(exu_mp_eghr[pt.BHT_GHR_SIZE-1:0]), .hash(mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO])); + el2_btb_ghr_hash #(.pt(pt)) br0ghrhs (.hashin(dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0]), .hash(br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO])); + el2_btb_ghr_hash #(.pt(pt)) fghrhs (.hashin(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), .hash(bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO])); + el2_btb_ghr_hash #(.pt(pt)) fghrhs_p1 (.hashin(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), .hash(bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO])); + + assign bht_wr_addr0[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]; + assign bht_wr_addr2[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]; + assign bht_rd_addr_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]; + assign bht_rd_addr_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]; + + + // ---------------------------------------------------------------------- + // Structures. Using FLOPS + // ---------------------------------------------------------------------- + // BTB + // Entry -> tag[pt.BTB_BTAG_SIZE-1:0], toffset[11:0], pc4, boffset, call, ret, valid + + + for (j=0 ; j<32'(LRU_SIZE) ; j++) begin : BTB_FLOPS + // Way 0 + rvdffe #(17+pt.BTB_BTAG_SIZE) btb_bank0_way0 (.*, + .en(((btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == j) & btb_wr_en_way0)), + .din (btb_wr_data[16+pt.BTB_BTAG_SIZE:0]), + .dout (btb_bank0_rd_data_way0_out[j])); + + // Way 1 + rvdffe #(17+pt.BTB_BTAG_SIZE) btb_bank0_way1 (.*, + .en(((btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == j) & btb_wr_en_way1)), + .din (btb_wr_data[16+pt.BTB_BTAG_SIZE:0]), + .dout (btb_bank0_rd_data_way1_out[j])); + + end + + + always_comb begin : BTB_rd_mux + btb_bank0_rd_data_way0_f[16+pt.BTB_BTAG_SIZE:0] = '0 ; + btb_bank0_rd_data_way1_f[16+pt.BTB_BTAG_SIZE:0] = '0 ; + btb_bank0_rd_data_way0_p1_f[16+pt.BTB_BTAG_SIZE:0] = '0 ; + btb_bank0_rd_data_way1_p1_f[16+pt.BTB_BTAG_SIZE:0] = '0 ; + + for (int j=0; j< LRU_SIZE; j++) begin + if (btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == (pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1)'(j)) begin + + btb_bank0_rd_data_way0_f[16+pt.BTB_BTAG_SIZE:0] = btb_bank0_rd_data_way0_out[j]; + btb_bank0_rd_data_way1_f[16+pt.BTB_BTAG_SIZE:0] = btb_bank0_rd_data_way1_out[j]; + + end + end + for (int j=0; j< LRU_SIZE; j++) begin + if (btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == (pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1)'(j)) begin + + btb_bank0_rd_data_way0_p1_f[16+pt.BTB_BTAG_SIZE:0] = btb_bank0_rd_data_way0_out[j]; + btb_bank0_rd_data_way1_p1_f[16+pt.BTB_BTAG_SIZE:0] = btb_bank0_rd_data_way1_out[j]; + + end + end + end + + //----------------------------------------------------------------------------- + // BHT + // 2 bit Entry -> direction, strength + // + //----------------------------------------------------------------------------- + + logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0][1:0] bht_bank_wr_data ; + logic [1:0] [pt.BHT_ARRAY_DEPTH-1:0] [1:0] bht_bank_rd_data_out ; + logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0] bht_bank_clken ; + logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0] bht_bank_clk ; + logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0] bht_bank_sel ; + + for ( i=0; i<2; i++) begin : BANKS + for (genvar k=0 ; k < 32'((pt.BHT_ARRAY_DEPTH)/NUM_BHT_LOOP) ; k++) begin : BHT_CLK_GROUP + assign bht_bank_clken[i][k] = (bht_wr_en0[i] & ((bht_wr_addr0[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) | + (bht_wr_en2[i] & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)); + + rvclkhdr bht_bank_grp_cgc ( .en(bht_bank_clken[i][k]), .l1clk(bht_bank_clk[i][k]), .* ); + + for (j=0 ; j cdecode.e + +// 2) espresso -Dso -oeqntott cdecode.e | addassign > compress_equations + +// to generate the legal (16b compressed instruction is legal) equation below: + +// 1) coredecode -in cdecode -legal > clegal.e + +// 2) espresso -Dso -oeqntott clegal.e | addassign > clegal_equation + + + + + +// espresso decodes +assign rdrd = (!i[14]&i[6]&i[1]) | (!i[15]&i[14]&i[11]&i[0]) | (!i[14]&i[5]&i[1]) | ( + !i[15]&i[14]&i[10]&i[0]) | (!i[14]&i[4]&i[1]) | (!i[15]&i[14]&i[9] + &i[0]) | (!i[14]&i[3]&i[1]) | (!i[15]&i[14]&!i[8]&i[0]) | (!i[14] + &i[2]&i[1]) | (!i[15]&i[14]&i[7]&i[0]) | (!i[15]&i[1]) | (!i[15] + &!i[13]&i[0]); + +assign rdrs1 = (!i[14]&i[12]&i[11]&i[1]) | (!i[14]&i[12]&i[10]&i[1]) | (!i[14] + &i[12]&i[9]&i[1]) | (!i[14]&i[12]&i[8]&i[1]) | (!i[14]&i[12]&i[7] + &i[1]) | (!i[14]&!i[12]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14] + &i[12]&i[6]&i[1]) | (!i[14]&i[12]&i[5]&i[1]) | (!i[14]&i[12]&i[4] + &i[1]) | (!i[14]&i[12]&i[3]&i[1]) | (!i[14]&i[12]&i[2]&i[1]) | ( + !i[15]&!i[14]&!i[13]&i[0]) | (!i[15]&!i[14]&i[1]); + +assign rs2rs2 = (i[15]&i[6]&i[1]) | (i[15]&i[5]&i[1]) | (i[15]&i[4]&i[1]) | ( + i[15]&i[3]&i[1]) | (i[15]&i[2]&i[1]) | (i[15]&i[14]&i[1]); + +assign rdprd = (i[15]&!i[14]&!i[13]&i[0]); + +assign rdprs1 = (i[15]&!i[13]&i[0]) | (i[15]&i[14]&i[0]) | (i[14]&!i[1]&!i[0]); + +assign rs2prs2 = (i[15]&!i[14]&!i[13]&i[11]&i[10]&i[0]) | (i[15]&!i[1]&!i[0]); + +assign rs2prd = (!i[15]&!i[1]&!i[0]); + +assign uimm9_2 = (!i[14]&!i[1]&!i[0]); + +assign ulwimm6_2 = (!i[15]&i[14]&!i[1]&!i[0]); + +assign ulwspimm7_2 = (!i[15]&i[14]&i[1]); + +assign rdeq2 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]); + +assign rdeq1 = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14] + &i[12]&i[10]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[9] + &!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[8]&!i[6]&!i[5] + &!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[7]&!i[6]&!i[5]&!i[4]&!i[3] + &!i[2]&i[1]) | (!i[15]&!i[14]&i[13]); + +assign rs1eq2 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]) | (i[14] + &i[1]) | (!i[14]&!i[1]&!i[0]); + +assign sbroffset8_1 = (i[15]&i[14]&i[0]); + +assign simm9_4 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]); + +assign simm5_0 = (!i[14]&!i[13]&i[11]&!i[10]&i[0]) | (!i[15]&!i[13]&i[0]); + +assign sjaloffset11_1 = (!i[14]&i[13]); + +assign sluimm17_12 = (!i[15]&i[14]&i[13]&i[7]) | (!i[15]&i[14]&i[13]&!i[8]) | ( + !i[15]&i[14]&i[13]&i[9]) | (!i[15]&i[14]&i[13]&i[10]) | (!i[15]&i[14] + &i[13]&i[11]); + +assign uimm5_0 = (i[15]&!i[14]&!i[13]&!i[11]&i[0]) | (!i[15]&!i[14]&i[1]); + +assign uswimm6_2 = (i[15]&!i[1]&!i[0]); + +assign uswspimm7_2 = (i[15]&i[14]&i[1]); + +assign o[31] = 1'b0; + +assign o[30] = (i[15]&!i[14]&!i[13]&i[10]&!i[6]&!i[5]&i[0]) | (i[15]&!i[14] + &!i[13]&!i[11]&i[10]&i[0]); + +assign o[29] = 1'b0; + +assign o[28] = 1'b0; + +assign o[27] = 1'b0; + +assign o[26] = 1'b0; + +assign o[25] = 1'b0; + +assign o[24] = 1'b0; + +assign o[23] = 1'b0; + +assign o[22] = 1'b0; + +assign o[21] = 1'b0; + +assign o[20] = (!i[14]&i[12]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[6]&!i[5]&!i[4] + &!i[3]&!i[2]&i[1]); + +assign o[19] = 1'b0; + +assign o[18] = 1'b0; + +assign o[17] = 1'b0; + +assign o[16] = 1'b0; + +assign o[15] = 1'b0; + +assign o[14] = (i[15]&!i[14]&!i[13]&!i[11]&i[0]) | (i[15]&!i[14]&!i[13]&!i[10] + &i[0]) | (i[15]&!i[14]&!i[13]&i[6]&i[0]) | (i[15]&!i[14]&!i[13]&i[5] + &i[0]); + +assign o[13] = (i[15]&!i[14]&!i[13]&i[11]&!i[10]&i[0]) | (i[15]&!i[14]&!i[13] + &i[11]&i[6]&i[0]) | (i[14]&!i[0]); + +assign o[12] = (i[15]&!i[14]&!i[13]&i[6]&i[5]&i[0]) | (i[15]&!i[14]&!i[13]&!i[11] + &i[0]) | (i[15]&!i[14]&!i[13]&!i[10]&i[0]) | (!i[15]&!i[14]&i[1]) | ( + i[15]&i[14]&i[13]); + +assign o[11] = 1'b0; + +assign o[10] = 1'b0; + +assign o[9] = 1'b0; + +assign o[8] = 1'b0; + +assign o[7] = 1'b0; + +assign o[6] = (i[15]&!i[14]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&!i[0]) | (!i[14]&i[13]) | ( + i[15]&i[14]&i[0]); + +assign o[5] = (i[15]&!i[0]) | (i[15]&i[11]&i[10]) | (i[13]&!i[8]) | (i[13]&i[7]) | ( + i[13]&i[9]) | (i[13]&i[10]) | (i[13]&i[11]) | (!i[14]&i[13]) | ( + i[15]&i[14]); + +assign o[4] = (!i[14]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[0]) | (!i[15]&!i[14] + &!i[0]) | (!i[14]&i[6]&!i[0]) | (!i[15]&i[14]&i[0]) | (!i[14]&i[5] + &!i[0]) | (!i[14]&i[4]&!i[0]) | (!i[14]&!i[13]&i[0]) | (!i[14]&i[3] + &!i[0]) | (!i[14]&i[2]&!i[0]); + +assign o[3] = (!i[14]&i[13]); + +assign o[2] = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14] + &i[12]&i[10]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[9] + &!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[8]&!i[6]&!i[5] + &!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[7]&!i[6]&!i[5]&!i[4]&!i[3] + &!i[2]&i[1]) | (i[15]&!i[14]&!i[12]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2] + &!i[0]) | (!i[15]&i[13]&!i[8]) | (!i[15]&i[13]&i[7]) | (!i[15]&i[13] + &i[9]) | (!i[15]&i[13]&i[10]) | (!i[15]&i[13]&i[11]) | (!i[14]&i[13]); + +// 32b instruction has lower two bits 2'b11 + +assign o[1] = 1'b1; + +assign o[0] = 1'b1; + +assign legal = (!i[13]&!i[12]&i[11]&i[1]&!i[0]) | (!i[13]&!i[12]&i[6]&i[1]&!i[0]) | ( + !i[15]&!i[13]&i[11]&!i[1]) | (!i[13]&!i[12]&i[5]&i[1]&!i[0]) | ( + !i[13]&!i[12]&i[10]&i[1]&!i[0]) | (!i[15]&!i[13]&i[6]&!i[1]) | ( + i[15]&!i[12]&!i[1]&i[0]) | (!i[13]&!i[12]&i[9]&i[1]&!i[0]) | (!i[12] + &i[6]&!i[1]&i[0]) | (!i[15]&!i[13]&i[5]&!i[1]) | (!i[13]&!i[12]&i[8] + &i[1]&!i[0]) | (!i[12]&i[5]&!i[1]&i[0]) | (!i[15]&!i[13]&i[10]&!i[1]) | ( + !i[13]&!i[12]&i[7]&i[1]&!i[0]) | (i[12]&i[11]&!i[10]&!i[1]&i[0]) | ( + !i[15]&!i[13]&i[9]&!i[1]) | (!i[13]&!i[12]&i[4]&i[1]&!i[0]) | (i[13] + &i[12]&!i[1]&i[0]) | (!i[15]&!i[13]&i[8]&!i[1]) | (!i[13]&!i[12]&i[3] + &i[1]&!i[0]) | (i[13]&i[4]&!i[1]&i[0]) | (!i[13]&!i[12]&i[2]&i[1] + &!i[0]) | (!i[15]&!i[13]&i[7]&!i[1]) | (i[13]&i[3]&!i[1]&i[0]) | ( + i[13]&i[2]&!i[1]&i[0]) | (i[14]&!i[13]&!i[1]) | (!i[14]&!i[12]&!i[1] + &i[0]) | (i[15]&!i[13]&i[12]&i[1]&!i[0]) | (!i[15]&!i[13]&!i[12]&i[1] + &!i[0]) | (!i[15]&!i[13]&i[12]&!i[1]) | (i[14]&!i[13]&!i[0]); + + + + +endmodule diff --git a/design/ifu/el2_ifu_ic_mem.sv b/design/ifu/el2_ifu_ic_mem.sv new file mode 100644 index 0000000..22ac22d --- /dev/null +++ b/design/ifu/el2_ifu_ic_mem.sv @@ -0,0 +1,1802 @@ +//******************************************************************************** +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** +//////////////////////////////////////////////////// +// ICACHE DATA & TAG MODULE WRAPPER // +///////////////////////////////////////////////////// +module el2_ifu_ic_mem +import el2_pkg::*; + #( +`include "el2_param.vh" + ) + ( + input logic clk, + input logic rst_l, + input logic clk_override, + input logic dec_tlu_core_ecc_disable, + + input logic [31:1] ic_rw_addr, + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en , // Which way to write + input logic ic_rd_en , // Read enable + input logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. + input logic ic_debug_rd_en, // Icache debug rd + input logic ic_debug_wr_en, // Icache debug wr + input logic ic_debug_tag_array, // Debug tag array + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. + input logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. + input logic ic_sel_premux_data, // Select the pre_muxed data + + input logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC + output logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + output logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + output logic [25:0] ictag_debug_rd_data,// Debug icache tag. + input logic [70:0] ic_debug_wr_data, // Debug wr cache. + + output logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // ecc error per bank + output logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, // ecc error per bank + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, // Valid from the I$ tag valid outside (in flops). + + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, // ic_rd_hit[3:0] + output logic ic_tag_perr, // Tag Parity error + input logic scan_mode + ) ; + + + EL2_IC_TAG #(.pt(pt)) ic_tag_inst + ( + .*, + .ic_wr_en (ic_wr_en[pt.ICACHE_NUM_WAYS-1:0]), + .ic_debug_addr(ic_debug_addr[pt.ICACHE_INDEX_HI:3]), + .ic_rw_addr (ic_rw_addr[31:3]) + ) ; + + EL2_IC_DATA #(.pt(pt)) ic_data_inst + ( + .*, + .ic_wr_en (ic_wr_en[pt.ICACHE_NUM_WAYS-1:0]), + .ic_debug_addr(ic_debug_addr[pt.ICACHE_INDEX_HI:3]), + .ic_rw_addr (ic_rw_addr[pt.ICACHE_INDEX_HI:1]) + ) ; + + endmodule + + +///////////////////////////////////////////////// +////// ICACHE DATA MODULE //////////////////// +///////////////////////////////////////////////// +module EL2_IC_DATA +import el2_pkg::*; +#( +`include "el2_param.vh" + ) + ( + input logic clk, + input logic rst_l, + input logic clk_override, + + input logic [pt.ICACHE_INDEX_HI:1] ic_rw_addr, + input logic [pt.ICACHE_NUM_WAYS-1:0]ic_wr_en, + input logic ic_rd_en, // Read enable + + input logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC + output logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [70:0] ic_debug_wr_data, // Debug wr cache. + output logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + output logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, + output logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // ecc error per bank + input logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. + input logic ic_debug_rd_en, // Icache debug rd + input logic ic_debug_wr_en, // Icache debug wr + input logic ic_debug_tag_array, // Debug tag array + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. + input logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. + input logic ic_sel_premux_data, // Select the pre_muxed data + + input logic [pt.ICACHE_NUM_WAYS-1:0]ic_rd_hit, + input logic scan_mode + + ) ; + + logic [pt.ICACHE_TAG_INDEX_LO-1:1] ic_rw_addr_ff; + logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_WAYS-1:0] ic_b_sb_wren; //bank x ways + logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_WAYS-1:0] ic_b_sb_rden; //bank x ways + logic [pt.ICACHE_BANKS_WAY-1:0] ic_b_rden; //bank + logic [pt.ICACHE_BANKS_WAY-1:0] ic_b_rden_ff; //bank + logic [pt.ICACHE_BANKS_WAY-1:0] ic_debug_sel_sb; + + logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][70:0] wb_dout ; // ways x bank + logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_sb_wr_data, ic_bank_wr_data, wb_dout_ecc_bank; + logic [pt.ICACHE_NUM_WAYS-1:0] [141:0] wb_dout_way_pre; + logic [pt.ICACHE_NUM_WAYS-1:0] [63:0] wb_dout_way, wb_dout_way_with_premux; + logic [141:0] wb_dout_ecc; + + logic [pt.ICACHE_BANKS_WAY-1:0] bank_check_en; + + logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_WAYS-1:0] ic_bank_way_clken; // ; + logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_rd_way_en; // debug wr_way + logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_rd_way_en_ff; // debug wr_way + logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_wr_way_en; // debug wr_way + logic [pt.ICACHE_INDEX_HI:1] ic_rw_addr_q; + logic [pt.ICACHE_BANKS_WAY-1:0] [pt.ICACHE_INDEX_HI : pt.ICACHE_DATA_INDEX_LO] ic_rw_addr_bank_q; + logic [pt.ICACHE_TAG_LO-1 : pt.ICACHE_DATA_INDEX_LO] ic_rw_addr_q_inc; + logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit_q; + + logic ic_rd_en_with_debug; + logic ic_rw_addr_wrap, ic_cacheline_wrap_ff; + logic ic_debug_rd_en_ff; + + +//----------------------------------------------------------- +// ----------- Logic section starts here -------------------- +//----------------------------------------------------------- + assign ic_debug_rd_way_en[pt.ICACHE_NUM_WAYS-1:0] = {pt.ICACHE_NUM_WAYS{ic_debug_rd_en & ~ic_debug_tag_array}} & ic_debug_way[pt.ICACHE_NUM_WAYS-1:0] ; + assign ic_debug_wr_way_en[pt.ICACHE_NUM_WAYS-1:0] = {pt.ICACHE_NUM_WAYS{ic_debug_wr_en & ~ic_debug_tag_array}} & ic_debug_way[pt.ICACHE_NUM_WAYS-1:0] ; + + always_comb begin : clkens + ic_bank_way_clken = '0; + + for ( int i=0; i> (16*iccm_rd_addr_lo_q[1]))}); + assign iccm_rd_data[63:0] = {iccm_data[63:0]}; + assign iccm_rd_data_ecc[77:0] = {iccm_bank_dout_fn[iccm_rd_addr_hi_q][38:0], iccm_bank_dout_fn[iccm_rd_addr_lo_q[pt.ICCM_BANK_HI:2]][38:0]}; + +endmodule // el2_ifu_iccm_mem diff --git a/design/ifu/el2_ifu_ifc_ctl.sv b/design/ifu/el2_ifu_ifc_ctl.sv new file mode 100644 index 0000000..33c1401 --- /dev/null +++ b/design/ifu/el2_ifu_ifc_ctl.sv @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// el2_ifu_ifc_ctl.sv +// Function: Fetch pipe control +// +// Comments: +//******************************************************************************** + +module el2_ifu_ifc_ctl +import el2_pkg::*; +#( +`include "el2_param.vh" + ) + ( + input logic clk, + input logic free_clk, + input logic active_clk, + + input logic rst_l, // reset enable, from core pin + input logic scan_mode, // scan + + input logic ic_hit_f, // Icache hit + input logic ifu_ic_mb_empty, // Miss buffer empty + + input logic ifu_fb_consume1, // Aligner consumed 1 fetch buffer + input logic ifu_fb_consume2, // Aligner consumed 2 fetch buffers + + input logic dec_tlu_flush_noredir_wb, // Don't fetch on flush + input logic exu_flush_final, // FLush + input logic [31:1] exu_flush_path_final, // Flush path + + input logic ifu_bp_hit_taken_f, // btb hit, select the target path + input logic [31:1] ifu_bp_btb_target_f, // predicted target PC + + input logic ic_dma_active, // IC DMA active, stop fetching + input logic ic_write_stall, // IC is writing, stop fetching + input logic dma_iccm_stall_any, // force a stall in the fetch pipe for DMA ICCM access + + input logic [31:0] dec_tlu_mrac_ff , // side_effect and cacheable for each region + + output logic [31:1] ifc_fetch_addr_f, // fetch addr F + output logic [31:1] ifc_fetch_addr_bf, // fetch addr BF + + output logic ifc_fetch_req_f, // fetch request valid F + + output logic ifu_pmu_fetch_stall, // pmu event measuring fetch stall + + output logic ifc_fetch_uncacheable_bf, // The fetch request is uncacheable space. BF stage + output logic ifc_fetch_req_bf, // Fetch request. Comes with the address. BF stage + output logic ifc_fetch_req_bf_raw, // Fetch request without some qualifications. Used for clock-gating. BF stage + output logic ifc_iccm_access_bf, // This request is to the ICCM. Do not generate misses to the bus. + output logic ifc_region_acc_fault_bf, // Access fault. in ICCM region but offset is outside defined ICCM. + + output logic ifc_dma_access_ok // fetch is not accessing the ICCM, DMA can proceed + + + ); + + logic [31:1] fetch_addr_bf; + logic [31:1] fetch_addr_next; + logic [3:0] fb_write_f, fb_write_ns; + + logic fb_full_f_ns, fb_full_f; + logic fb_right, fb_right2, fb_left, wfm, idle; + logic sel_last_addr_bf, sel_btb_addr_bf, sel_next_addr_bf; + logic miss_f, miss_a; + logic flush_fb, dma_iccm_stall_any_f; + logic mb_empty_mod, goto_idle, leave_idle; + logic fetch_bf_en; + logic line_wrap; + logic fetch_addr_next_1; + + // FSM assignment + typedef enum logic [1:0] { IDLE = 2'b00 , + FETCH = 2'b01 , + STALL = 2'b10 , + WFM = 2'b11 } state_t ; + state_t state ; + state_t next_state ; + + logic dma_stall; + assign dma_stall = ic_dma_active | dma_iccm_stall_any_f; + + rvdff #(2) ran_ff (.*, .clk(free_clk), .din({dma_iccm_stall_any, miss_f}), .dout({dma_iccm_stall_any_f, miss_a})); + + // Fetch address mux + // - flush + // - Miss *or* flush during WFM (icache miss buffer is blocking) + // - Sequential + + + assign sel_last_addr_bf = ~exu_flush_final & (~ifc_fetch_req_f | ~ic_hit_f); + assign sel_btb_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ifu_bp_hit_taken_f & ic_hit_f; + assign sel_next_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f; + + + assign fetch_addr_bf[31:1] = ( ({31{exu_flush_final}} & exu_flush_path_final[31:1]) | // FLUSH path + ({31{sel_last_addr_bf}} & ifc_fetch_addr_f[31:1]) | // MISS path + ({31{sel_btb_addr_bf}} & {ifu_bp_btb_target_f[31:1]})| // BTB target + ({31{sel_next_addr_bf}} & {fetch_addr_next[31:1]})); // SEQ path + + + assign fetch_addr_next[31:1] = {({ifc_fetch_addr_f[31:2]} + 31'b1), fetch_addr_next_1 }; + assign line_wrap = (fetch_addr_next[pt.ICACHE_TAG_INDEX_LO] ^ ifc_fetch_addr_f[pt.ICACHE_TAG_INDEX_LO]); + + assign fetch_addr_next_1 = line_wrap ? 1'b0 : ifc_fetch_addr_f[1]; + + assign ifc_fetch_req_bf_raw = ~idle; + assign ifc_fetch_req_bf = ifc_fetch_req_bf_raw & + + ~(fb_full_f_ns & ~(ifu_fb_consume2 | ifu_fb_consume1)) & + ~dma_stall & + ~ic_write_stall & + ~dec_tlu_flush_noredir_wb; + + + assign fetch_bf_en = exu_flush_final | ifc_fetch_req_f; + + assign miss_f = ifc_fetch_req_f & ~ic_hit_f & ~exu_flush_final; + + assign mb_empty_mod = (ifu_ic_mb_empty | exu_flush_final) & ~dma_stall & ~miss_f & ~miss_a; + + // Halt flushes and takes us to IDLE + assign goto_idle = exu_flush_final & dec_tlu_flush_noredir_wb; + // If we're in IDLE, and we get a flush, goto FETCH + assign leave_idle = exu_flush_final & ~dec_tlu_flush_noredir_wb & idle; + +//.i 7 +//.o 2 +//.ilb state[1] state[0] reset_delayed miss_f mb_empty_mod goto_idle leave_idle +//.ob next_state[1] next_state[0] +//.type fr +// +//# fetch 01, stall 10, wfm 11, idle 00 +//-- 1---- 01 +//-- 0--1- 00 +//00 0--00 00 +//00 0--01 01 +// +//01 01-0- 11 +//01 00-0- 01 +// +//11 0-10- 01 +//11 0-00- 11 + + assign next_state[1] = (~state[1] & state[0] & miss_f & ~goto_idle) | + (state[1] & ~mb_empty_mod & ~goto_idle); + + assign next_state[0] = (~goto_idle & leave_idle) | (state[0] & ~goto_idle); + + assign flush_fb = exu_flush_final; + + // model fb write logic to mass balance the fetch buffers + assign fb_right = ( ifu_fb_consume1 & ~ifu_fb_consume2 & (~ifc_fetch_req_f | miss_f)) | // Consumed and no new fetch + (ifu_fb_consume2 & ifc_fetch_req_f); // Consumed 2 and new fetch + + + assign fb_right2 = (ifu_fb_consume2 & (~ifc_fetch_req_f | miss_f)); // Consumed 2 and no new fetch + + assign fb_left = ifc_fetch_req_f & ~(ifu_fb_consume1 | ifu_fb_consume2) & ~miss_f; + +// CBH + assign fb_write_ns[3:0] = ( ({4{(flush_fb)}} & 4'b0001) | + ({4{~flush_fb & fb_right }} & {1'b0, fb_write_f[3:1]}) | + ({4{~flush_fb & fb_right2}} & {2'b0, fb_write_f[3:2]}) | + ({4{~flush_fb & fb_left }} & {fb_write_f[2:0], 1'b0}) | + ({4{~flush_fb & ~fb_right & ~fb_right2 & ~fb_left}} & fb_write_f[3:0])); + + + assign fb_full_f_ns = fb_write_ns[3]; + + assign idle = state == IDLE ; + assign wfm = state == WFM ; + + rvdff #(2) fsm_ff (.*, .clk(active_clk), .din({next_state[1:0]}), .dout({state[1:0]})); + rvdff #(5) fbwrite_ff (.*, .clk(active_clk), .din({fb_full_f_ns, fb_write_ns[3:0]}), .dout({fb_full_f, fb_write_f[3:0]})); + + assign ifu_pmu_fetch_stall = wfm | + (ifc_fetch_req_bf_raw & + ( (fb_full_f & ~(ifu_fb_consume2 | ifu_fb_consume1 | exu_flush_final)) | + dma_stall)); + + + rvdff #(1) req_ff (.*, .clk(active_clk), .din(ifc_fetch_req_bf), .dout(ifc_fetch_req_f)); + + assign ifc_fetch_addr_bf[31:1] = fetch_addr_bf[31:1]; + + rvdffe #(31) faddrf1_ff (.*, .en(fetch_bf_en), .din(fetch_addr_bf[31:1]), .dout(ifc_fetch_addr_f[31:1])); + + + if (pt.ICCM_ENABLE) begin + logic iccm_acc_in_region_bf; + logic iccm_acc_in_range_bf; + rvrangecheck #( .CCM_SADR (pt.ICCM_SADR), + .CCM_SIZE (pt.ICCM_SIZE) ) iccm_rangecheck ( + .addr ({ifc_fetch_addr_bf[31:1],1'b0}) , + .in_range (iccm_acc_in_range_bf) , + .in_region(iccm_acc_in_region_bf) + ); + + assign ifc_iccm_access_bf = iccm_acc_in_range_bf ; + + assign ifc_dma_access_ok = ( (~ifc_iccm_access_bf | + (fb_full_f & ~(ifu_fb_consume2 | ifu_fb_consume1)) | + (wfm & ~ifc_fetch_req_bf) | + idle ) & ~exu_flush_final) | + dma_iccm_stall_any_f; + + assign ifc_region_acc_fault_bf = ~iccm_acc_in_range_bf & iccm_acc_in_region_bf ; + end + else begin + assign ifc_iccm_access_bf = 1'b0 ; + assign ifc_dma_access_ok = 1'b0 ; + assign ifc_region_acc_fault_bf = 1'b0 ; + end + + assign ifc_fetch_uncacheable_bf = ~dec_tlu_mrac_ff[{ifc_fetch_addr_bf[31:28] , 1'b0 }] ; // bit 0 of each region description is the cacheable bit + +endmodule // el2_ifu_ifc_ctl + diff --git a/design/ifu/el2_ifu_mem_ctl.sv b/design/ifu/el2_ifu_mem_ctl.sv new file mode 100644 index 0000000..edb10db --- /dev/null +++ b/design/ifu/el2_ifu_mem_ctl.sv @@ -0,0 +1,1630 @@ + //******************************************************************************** +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** + + +//******************************************************************************** +// Function: Icache , iccm control +// BFF -> F1 -> F2 -> A +//******************************************************************************** + +module el2_ifu_mem_ctl +import el2_pkg::*; +#( +`include "el2_param.vh" + ) + ( + input logic clk, + input logic free_clk, // free clock always except during pause + input logic active_clk, // Active always except during pause + input logic rst_l, + + input logic exu_flush_final, // Flush from the pipeline., includes flush lower + input logic dec_tlu_flush_lower_wb, // Flush lower from the pipeline. + input logic dec_tlu_flush_err_wb, // Flush from the pipeline due to perr. + input logic dec_tlu_i0_commit_cmt, // committed i0 instruction + input logic dec_tlu_force_halt, // force halt. + + input logic [31:1] ifc_fetch_addr_bf, // Fetch Address byte aligned always. F1 stage. + input logic ifc_fetch_uncacheable_bf, // The fetch request is uncacheable space. F1 stage + input logic ifc_fetch_req_bf, // Fetch request. Comes with the address. F1 stage + input logic ifc_fetch_req_bf_raw, // Fetch request without some qualifications. Used for clock-gating. F1 stage + input logic ifc_iccm_access_bf, // This request is to the ICCM. Do not generate misses to the bus. + input logic ifc_region_acc_fault_bf, // Access fault. in ICCM region but offset is outside defined ICCM. + input logic ifc_dma_access_ok, // It is OK to give dma access to the ICCM. (ICCM is not busy this cycle). + input logic dec_tlu_fence_i_wb, // Fence.i instruction is committing. Clear all Icache valids. + input logic ifu_bp_hit_taken_f, // Branch is predicted taken. Kill the fetch next cycle. + + input logic ifu_bp_inst_mask_f, // tell ic which valids to kill because of a taken branch, right justified + + output logic ifu_miss_state_idle, // No icache misses are outstanding. + output logic ifu_ic_mb_empty, // Continue with normal fetching. This does not mean that miss is finished. + output logic ic_dma_active , // In the middle of servicing dma request to ICCM. Do not make any new requests. + output logic ic_write_stall, // Stall fetch the cycle we are writing the cache. + +/// PMU signals + output logic ifu_pmu_ic_miss, // IC miss event + output logic ifu_pmu_ic_hit, // IC hit event + output logic ifu_pmu_bus_error, // Bus error event + output logic ifu_pmu_bus_busy, // Bus busy event + output logic ifu_pmu_bus_trxn, // Bus transaction + + //-------------------------- IFU AXI signals-------------------------- + // AXI Write Channels + output logic ifu_axi_awvalid, + output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_awid, + output logic [31:0] ifu_axi_awaddr, + output logic [3:0] ifu_axi_awregion, + output logic [7:0] ifu_axi_awlen, + output logic [2:0] ifu_axi_awsize, + output logic [1:0] ifu_axi_awburst, + output logic ifu_axi_awlock, + output logic [3:0] ifu_axi_awcache, + output logic [2:0] ifu_axi_awprot, + output logic [3:0] ifu_axi_awqos, + + output logic ifu_axi_wvalid, + output logic [63:0] ifu_axi_wdata, + output logic [7:0] ifu_axi_wstrb, + output logic ifu_axi_wlast, + + output logic ifu_axi_bready, + + // AXI Read Channels + output logic ifu_axi_arvalid, + input logic ifu_axi_arready, + output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_arid, + output logic [31:0] ifu_axi_araddr, + output logic [3:0] ifu_axi_arregion, + output logic [7:0] ifu_axi_arlen, + output logic [2:0] ifu_axi_arsize, + output logic [1:0] ifu_axi_arburst, + output logic ifu_axi_arlock, + output logic [3:0] ifu_axi_arcache, + output logic [2:0] ifu_axi_arprot, + output logic [3:0] ifu_axi_arqos, + + input logic ifu_axi_rvalid, + output logic ifu_axi_rready, + input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_rid, + input logic [63:0] ifu_axi_rdata, + input logic [1:0] ifu_axi_rresp, + + input logic ifu_bus_clk_en, + + + input logic dma_iccm_req, // dma iccm command (read or write) + input logic [31:0] dma_mem_addr, // dma address + input logic [2:0] dma_mem_sz, // size + input logic dma_mem_write, // write + input logic [63:0] dma_mem_wdata, // write data + input logic [2:0] dma_mem_tag, // DMA Buffer entry number + + output logic iccm_dma_ecc_error,// Data read from iccm has an ecc error + output logic iccm_dma_rvalid, // Data read from iccm is valid + output logic [63:0] iccm_dma_rdata, // dma data read from iccm + output logic [2:0] iccm_dma_rtag, // Tag of the DMA req + output logic iccm_ready, // iccm ready to accept new command. + + +// I$ & ITAG Ports + output logic [31:1] ic_rw_addr, // Read/Write addresss to the Icache. + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en, // Icache write enable, when filling the Icache. + output logic ic_rd_en, // Icache read enable. + + output logic [pt.ICACHE_BANKS_WAY-1:0] [70:0] ic_wr_data, // Data to fill to the Icache. With ECC + input logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [25:0] ictag_debug_rd_data, // Debug icache tag. + output logic [70:0] ic_debug_wr_data, // Debug wr cache. + output logic [70:0] ifu_ic_debug_rd_data, // debug data read + + + input logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // + input logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, + + output logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. + output logic ic_debug_rd_en, // Icache debug rd + output logic ic_debug_wr_en, // Icache debug wr + output logic ic_debug_tag_array, // Debug tag array + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. + + + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, // Valid bits when accessing the Icache. One valid bit per way. F2 stage + + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, // Compare hits from Icache tags. Per way. F2 stage + input logic ic_tag_perr, // Icache Tag parity error + + // ICCM ports + output logic [pt.ICCM_BITS-1:1] iccm_rw_addr, // ICCM read/write address. + output logic iccm_wren, // ICCM write enable (through the DMA) + output logic iccm_rden, // ICCM read enable. + output logic [77:0] iccm_wr_data, // ICCM write data. + output logic [2:0] iccm_wr_size, // ICCM write location within DW. + + input logic [63:0] iccm_rd_data, // Data read from ICCM. + input logic [77:0] iccm_rd_data_ecc, // Data + ECC read from ICCM. + input logic [1:0] ifu_fetch_val, + // IFU control signals + output logic ic_hit_f, // Hit in Icache(if Icache access) or ICCM access( ICCM always has ic_hit_f) + output logic ic_access_fault_f, // Access fault (bus error or ICCM access in region but out of offset range). + output logic [1:0] ic_access_fault_type_f, // Access fault types + output logic iccm_rd_ecc_single_err, // This fetch has a single ICCM ecc error. + output logic iccm_rd_ecc_double_err, // This fetch has a double ICCM ecc error. + output logic ic_error_start, // This has any I$ errors ( data/tag/ecc/parity ) + + output logic ifu_async_error_start, // Or of the sb iccm, and all the icache errors sent to aligner to stop + output logic iccm_dma_sb_error, // Single Bit ECC error from a DMA access + output logic [1:0] ic_fetch_val_f, // valid bytes for fetch. To the Aligner. + output logic [31:0] ic_data_f, // Data read from Icache or ICCM. To the Aligner. + output logic [63:0] ic_premux_data, // Premuxed data to be muxed with Icache data + output logic ic_sel_premux_data, // Select premux data. + +///// Debug + input el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt , // Icache/tag debug read/write packet + input logic dec_tlu_core_ecc_disable, // disable the ecc checking and flagging + output logic ifu_ic_debug_rd_data_valid, // debug data valid. + output logic iccm_buf_correct_ecc, + output logic iccm_correction_state, + + + input logic scan_mode + ); + + +// Create different defines for ICACHE and ICCM enable combinations + + localparam NUM_OF_BEATS = 8 ; + + + + logic [31:3] ifu_ic_req_addr_f; + logic uncacheable_miss_in ; + logic uncacheable_miss_ff; + + + + logic bus_ifu_wr_en ; + logic bus_ifu_wr_en_ff ; + logic bus_ifu_wr_en_ff_q ; + logic bus_ifu_wr_en_ff_wo_err ; + logic [pt.ICACHE_NUM_WAYS-1:0] bus_ic_wr_en ; + + logic reset_tag_valid_for_miss ; + + + logic [pt.ICACHE_STATUS_BITS-1:0] way_status; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_mb_in; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_rep_new; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_mb_ff; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_new; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_hit_new; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_new_w_debug; + logic [pt.ICACHE_NUM_WAYS-1:0] tagv_mb_in; + logic [pt.ICACHE_NUM_WAYS-1:0] tagv_mb_ff; + + + logic ifu_wr_data_comb_err ; + logic ifu_byp_data_err_new; + logic ifu_wr_cumulative_err_data; + logic ifu_wr_cumulative_err; + logic ifu_wr_data_comb_err_ff; + logic scnd_miss_index_match ; + + + logic ifc_dma_access_q_ok; + logic ifc_iccm_access_f ; + logic ifc_region_acc_fault_f; + logic ifc_region_acc_fault_final_f; + logic ifc_bus_acc_fault_f; + logic ic_act_miss_f; + logic ic_miss_under_miss_f; + logic ic_ignore_2nd_miss_f; + logic ic_act_hit_f; + logic miss_pending; + logic [31:1] imb_in , imb_ff ; + logic [31:pt.ICACHE_BEAT_ADDR_HI+1] miss_addr_in , miss_addr ; + logic miss_wrap_f ; + logic flush_final_f; + logic ifc_fetch_req_f; + logic ifc_fetch_req_f_raw; + logic fetch_req_f_qual ; + logic ifc_fetch_req_qual_bf ; + logic [pt.ICACHE_NUM_WAYS-1:0] replace_way_mb_any; + logic last_beat; + logic reset_beat_cnt ; + logic [pt.ICACHE_BEAT_ADDR_HI:3] ic_req_addr_bits_hi_3 ; + logic [pt.ICACHE_BEAT_ADDR_HI:3] ic_wr_addr_bits_hi_3 ; + logic [31:1] ifu_fetch_addr_int_f ; + logic [31:1] ifu_ic_rw_int_addr ; + logic crit_wd_byp_ok_ff ; + logic ic_crit_wd_rdy_new_ff; + logic [79:0] ic_byp_data_only_pre_new; + logic [79:0] ic_byp_data_only_new; + logic ic_byp_hit_f ; + logic ic_valid ; + logic ic_valid_ff; + logic reset_all_tags; + logic ic_valid_w_debug; + + logic [pt.ICACHE_NUM_WAYS-1:0] ifu_tag_wren,ifu_tag_wren_ff; + logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_tag_wr_en; + logic [pt.ICACHE_NUM_WAYS-1:0] ifu_tag_wren_w_debug; + logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way_ff; + logic ic_debug_rd_en_ff ; + logic fetch_bf_f_c1_clken ; + logic fetch_bf_f_c1_clk; + logic debug_c1_clken; + logic debug_c1_clk; + + logic reset_ic_in ; + logic reset_ic_ff ; + logic [pt.ICACHE_BEAT_ADDR_HI:1] vaddr_f ; + logic [31:1] ifu_status_wr_addr; + logic sel_mb_addr ; + logic sel_mb_addr_ff ; + logic sel_mb_status_addr ; + logic [63:0] ic_final_data; + + logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_ic_rw_int_addr_ff ; + logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_status_wr_addr_ff ; + logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_ic_rw_int_addr_w_debug ; + logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_status_wr_addr_w_debug ; + + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_new_ff ; + logic way_status_wr_en_ff ; + logic [pt.ICACHE_TAG_DEPTH-1:0][pt.ICACHE_STATUS_BITS-1:0] way_status_out ; + logic [1:0] ic_debug_way_enc; + + logic [pt.IFU_BUS_TAG-1:0] ifu_bus_rid_ff; + + logic fetch_req_icache_f; + logic fetch_req_iccm_f; + logic ic_iccm_hit_f; + logic fetch_uncacheable_ff; + logic way_status_wr_en; + logic sel_byp_data; + logic sel_ic_data; + logic sel_iccm_data; + logic ic_rd_parity_final_err; + logic ic_act_miss_f_delayed; + logic bus_ifu_wr_data_error; + logic bus_ifu_wr_data_error_ff; + logic way_status_wr_en_w_debug; + logic ic_debug_tag_val_rd_out; + logic ifu_pmu_ic_miss_in; + logic ifu_pmu_ic_hit_in; + logic ifu_pmu_bus_error_in; + logic ifu_pmu_bus_trxn_in; + logic ifu_pmu_bus_busy_in; + logic ic_debug_ict_array_sel_in; + logic ic_debug_ict_array_sel_ff; + logic debug_data_clk; + logic debug_data_clken; + logic last_data_recieved_in ; + logic last_data_recieved_ff ; + + logic ifu_bus_rvalid ; + logic ifu_bus_rvalid_ff ; + logic ifu_bus_rvalid_unq ; + logic ifu_bus_rvalid_unq_ff ; + logic ifu_bus_arready_unq ; + logic ifu_bus_arready_unq_ff ; + logic ifu_bus_arvalid ; + logic ifu_bus_arvalid_ff ; + logic ifu_bus_arready ; + logic ifu_bus_arready_ff ; + logic [63:0] ifu_bus_rdata_ff ; + logic [1:0] ifu_bus_rresp_ff ; + logic ifu_bus_rsp_valid ; + logic ifu_bus_rsp_ready ; + logic [pt.IFU_BUS_TAG-1:0] ifu_bus_rsp_tag; + logic [63:0] ifu_bus_rsp_rdata; + logic [1:0] ifu_bus_rsp_opc; + + logic [pt.ICACHE_NUM_BEATS-1:0] write_fill_data; + logic [pt.ICACHE_NUM_BEATS-1:0] wr_data_c1_clk; + logic [pt.ICACHE_NUM_BEATS-1:0] ic_miss_buff_data_valid_in; + logic [pt.ICACHE_NUM_BEATS-1:0] ic_miss_buff_data_valid; + logic [pt.ICACHE_NUM_BEATS-1:0] ic_miss_buff_data_error_in; + logic [pt.ICACHE_NUM_BEATS-1:0] ic_miss_buff_data_error; + logic [pt.ICACHE_BEAT_ADDR_HI:1] byp_fetch_index; + logic [pt.ICACHE_BEAT_ADDR_HI:2] byp_fetch_index_0; + logic [pt.ICACHE_BEAT_ADDR_HI:2] byp_fetch_index_1; + logic [pt.ICACHE_BEAT_ADDR_HI:3] byp_fetch_index_inc; + logic [pt.ICACHE_BEAT_ADDR_HI:2] byp_fetch_index_inc_0; + logic [pt.ICACHE_BEAT_ADDR_HI:2] byp_fetch_index_inc_1; + logic miss_buff_hit_unq_f ; + logic stream_hit_f ; + logic stream_miss_f ; + logic stream_eol_f ; + logic crit_byp_hit_f ; + logic [pt.IFU_BUS_TAG-1:0] other_tag ; + logic [(2*pt.ICACHE_NUM_BEATS)-1:0] [31:0] ic_miss_buff_data; + logic [63:0] ic_miss_buff_half; + logic scnd_miss_req, scnd_miss_req_q, scnd_miss_req_ff2; + logic scnd_miss_req_in; + + + logic [pt.ICCM_BITS-1:2] iccm_ecc_corr_index_ff; + logic [pt.ICCM_BITS-1:2] iccm_ecc_corr_index_in; + logic [38:0] iccm_ecc_corr_data_ff; + logic iccm_ecc_write_status ; + logic iccm_rd_ecc_single_err_ff ; + logic iccm_error_start; // start the error fsm + logic perr_state_en; + logic miss_state_en; + + logic busclk; + logic busclk_force; + logic busclk_reset; + logic bus_ifu_bus_clk_en_ff; + logic bus_ifu_bus_clk_en ; + + logic ifc_bus_ic_req_ff_in; + logic ifu_bus_cmd_valid ; + logic ifu_bus_cmd_ready ; + + logic bus_inc_data_beat_cnt ; + logic bus_reset_data_beat_cnt ; + logic bus_hold_data_beat_cnt ; + + logic bus_inc_cmd_beat_cnt ; + logic bus_reset_cmd_beat_cnt_0 ; + logic bus_reset_cmd_beat_cnt_secondlast ; + logic bus_hold_cmd_beat_cnt ; + + logic [pt.ICACHE_BEAT_BITS-1:0] bus_new_data_beat_count ; + logic [pt.ICACHE_BEAT_BITS-1:0] bus_data_beat_count ; + + logic [pt.ICACHE_BEAT_BITS-1:0] bus_new_cmd_beat_count ; + logic [pt.ICACHE_BEAT_BITS-1:0] bus_cmd_beat_count ; + + + logic [pt.ICACHE_BEAT_BITS-1:0] bus_new_rd_addr_count; + logic [pt.ICACHE_BEAT_BITS-1:0] bus_rd_addr_count; + + + logic bus_cmd_sent ; + logic bus_last_data_beat ; + + + logic [pt.ICACHE_NUM_WAYS-1:0] bus_wren ; + + logic [pt.ICACHE_NUM_WAYS-1:0] bus_wren_last ; + logic [pt.ICACHE_NUM_WAYS-1:0] wren_reset_miss ; + logic ifc_dma_access_ok_d; + logic ifc_dma_access_ok_prev; + + logic bus_cmd_req_in ; + logic bus_cmd_req_hold ; + + logic second_half_available ; + logic write_ic_16_bytes ; + + logic ifc_region_acc_fault_final_bf; + logic ifc_region_acc_fault_memory_bf; + logic ifc_region_acc_fault_memory_f; + logic ifc_region_acc_okay; + + logic iccm_correct_ecc; + logic dma_sb_err_state, dma_sb_err_state_ff; + logic two_byte_instr; + + typedef enum logic [2:0] {IDLE=3'b000, CRIT_BYP_OK=3'b001, HIT_U_MISS=3'b010, MISS_WAIT=3'b011,CRIT_WRD_RDY=3'b100,SCND_MISS=3'b101,STREAM=3'b110 , STALL_SCND_MISS=3'b111} miss_state_t; + miss_state_t miss_state, miss_nxtstate; + + typedef enum logic [1:0] {ERR_STOP_IDLE=2'b00, ERR_FETCH1=2'b01 , ERR_FETCH2=2'b10 , ERR_STOP_FETCH=2'b11} err_stop_state_t; + err_stop_state_t err_stop_state, err_stop_nxtstate; + logic err_stop_state_en ; + logic err_stop_fetch ; + + logic ic_crit_wd_rdy; // Critical fetch is ready to be bypassed. + + logic ifu_bp_hit_taken_q_f; + logic bus_cmd_beat_en; + + +// ---- Clock gating section ----- +// c1 clock enables + + + assign fetch_bf_f_c1_clken = ifc_fetch_req_bf_raw | ifc_fetch_req_f | miss_pending | exu_flush_final | scnd_miss_req; + assign debug_c1_clken = ic_debug_rd_en | ic_debug_wr_en ; + // C1 - 1 clock pulse for data + + rvclkhdr fetch_bf_f_c1_cgc ( .en(fetch_bf_f_c1_clken), .l1clk(fetch_bf_f_c1_clk), .* ); + rvclkhdr debug_c1_cgc ( .en(debug_c1_clken), .l1clk(debug_c1_clk), .* ); + +// ------ end clock gating section ------------------------ + + logic [1:0] iccm_single_ecc_error; + logic dma_iccm_req_f ; + assign iccm_dma_sb_error = (|iccm_single_ecc_error[1:0] ) & dma_iccm_req_f ; + assign ifu_async_error_start = iccm_rd_ecc_single_err | ic_error_start; + + + typedef enum logic [2:0] {ERR_IDLE=3'b000, IC_WFF=3'b001 , ECC_WFF=3'b010 , ECC_CORR=3'b011, DMA_SB_ERR=3'b100} perr_state_t; + perr_state_t perr_state, perr_nxtstate; + + + assign ic_dma_active = iccm_correct_ecc | (perr_state == DMA_SB_ERR) | (err_stop_state == ERR_STOP_FETCH) | err_stop_fetch | + dec_tlu_flush_err_wb; // The last term is to give a error-correction a chance to finish before refetch starts + + assign scnd_miss_req_in = ifu_bus_rsp_valid & bus_ifu_bus_clk_en & ifu_bus_rsp_ready & + (&bus_new_data_beat_count[pt.ICACHE_BEAT_BITS-1:0]) & + ~uncacheable_miss_ff & ((miss_state == SCND_MISS) | (miss_nxtstate == SCND_MISS)) & ~exu_flush_final; + + assign ifu_bp_hit_taken_q_f = ifu_bp_hit_taken_f & ic_hit_f ; + + //////////////////////////////////// Create Miss State Machine /////////////////////// + // Create Miss State Machine // + // Create Miss State Machine // + // Create Miss State Machine // + //////////////////////////////////// Create Miss State Machine /////////////////////// + // FIFO state machine + always_comb begin : MISS_SM + miss_nxtstate = IDLE; + miss_state_en = 1'b0; + case (miss_state) + IDLE: begin : idle + miss_nxtstate = (ic_act_miss_f & ~exu_flush_final) ? CRIT_BYP_OK : HIT_U_MISS ; + miss_state_en = ic_act_miss_f & ~dec_tlu_force_halt ; + end + CRIT_BYP_OK: begin : crit_byp_ok + miss_nxtstate = (dec_tlu_force_halt ) ? IDLE : + ( ic_byp_hit_f & (last_data_recieved_ff | (bus_ifu_wr_en_ff & last_beat)) & uncacheable_miss_ff) ? IDLE : + ( ic_byp_hit_f & ~last_data_recieved_ff & uncacheable_miss_ff) ? MISS_WAIT : + (~ic_byp_hit_f & ~exu_flush_final & (bus_ifu_wr_en_ff & last_beat) & uncacheable_miss_ff) ? CRIT_WRD_RDY : + ( (bus_ifu_wr_en_ff & last_beat) & ~uncacheable_miss_ff) ? IDLE : + ( ic_byp_hit_f & ~exu_flush_final & ~(bus_ifu_wr_en_ff & last_beat) & ~ifu_bp_hit_taken_q_f & ~uncacheable_miss_ff) ? STREAM : + ( bus_ifu_wr_en_ff & ~exu_flush_final & ~(bus_ifu_wr_en_ff & last_beat) & ~ifu_bp_hit_taken_q_f & ~uncacheable_miss_ff) ? STREAM : + (~ic_byp_hit_f & ~exu_flush_final & (bus_ifu_wr_en_ff & last_beat) & ~uncacheable_miss_ff) ? IDLE : + ( (exu_flush_final | ifu_bp_hit_taken_q_f) & ~(bus_ifu_wr_en_ff & last_beat) ) ? HIT_U_MISS : IDLE; + miss_state_en = dec_tlu_force_halt | exu_flush_final | ic_byp_hit_f | ifu_bp_hit_taken_q_f | (bus_ifu_wr_en_ff & last_beat) | (bus_ifu_wr_en_ff & ~uncacheable_miss_ff) ; + end + CRIT_WRD_RDY: begin : crit_wrd_rdy + miss_nxtstate = IDLE ; + miss_state_en = exu_flush_final | flush_final_f | ic_byp_hit_f | dec_tlu_force_halt ; + end + STREAM: begin : stream + miss_nxtstate = ((exu_flush_final | ifu_bp_hit_taken_q_f | stream_eol_f ) & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt) ? HIT_U_MISS : IDLE ; + miss_state_en = exu_flush_final | ifu_bp_hit_taken_q_f | stream_eol_f | (bus_ifu_wr_en_ff & last_beat) | dec_tlu_force_halt ; + end + MISS_WAIT: begin : miss_wait + miss_nxtstate = (exu_flush_final & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt) ? HIT_U_MISS : IDLE ; + miss_state_en = exu_flush_final | (bus_ifu_wr_en_ff & last_beat) | dec_tlu_force_halt ; + end + HIT_U_MISS: begin : hit_u_miss + miss_nxtstate = ic_miss_under_miss_f & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt ? SCND_MISS : + ic_ignore_2nd_miss_f & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt ? STALL_SCND_MISS : IDLE ; + miss_state_en = (bus_ifu_wr_en_ff & last_beat) | ic_miss_under_miss_f | ic_ignore_2nd_miss_f | dec_tlu_force_halt; + end + SCND_MISS: begin : scnd_miss + miss_nxtstate = dec_tlu_force_halt ? IDLE : + exu_flush_final ? ((bus_ifu_wr_en_ff & last_beat) ? IDLE : HIT_U_MISS) : CRIT_BYP_OK; + miss_state_en = (bus_ifu_wr_en_ff & last_beat) | exu_flush_final | dec_tlu_force_halt; + end + STALL_SCND_MISS: begin : stall_scnd_miss + miss_nxtstate = dec_tlu_force_halt ? IDLE : + exu_flush_final ? ((bus_ifu_wr_en_ff & last_beat) ? IDLE : HIT_U_MISS) : IDLE; + miss_state_en = (bus_ifu_wr_en_ff & last_beat) | exu_flush_final | dec_tlu_force_halt; + end + default: begin : def_case + miss_nxtstate = IDLE; + miss_state_en = 1'b0; + end + endcase + end + rvdffs #(($bits(miss_state_t))) miss_state_ff (.clk(free_clk), .din(miss_nxtstate), .dout({miss_state}), .en(miss_state_en), .*); + + logic sel_hold_imb ; + + assign miss_pending = (miss_state != IDLE) ; + assign crit_wd_byp_ok_ff = (miss_state == CRIT_BYP_OK) | ((miss_state == CRIT_WRD_RDY) & ~flush_final_f); + assign sel_hold_imb = (miss_pending & ~(bus_ifu_wr_en_ff & last_beat) & ~((miss_state == CRIT_WRD_RDY) & exu_flush_final) & + ~((miss_state == CRIT_WRD_RDY) & crit_byp_hit_f) ) | ic_act_miss_f | + (miss_pending & (miss_nxtstate == CRIT_WRD_RDY)) ; + + + logic sel_hold_imb_scnd; + logic [31:1] imb_scnd_in; + logic [31:1] imb_scnd_ff; + logic uncacheable_miss_scnd_in ; + logic uncacheable_miss_scnd_ff ; + + logic [pt.ICACHE_NUM_WAYS-1:0] tagv_mb_scnd_in; + logic [pt.ICACHE_NUM_WAYS-1:0] tagv_mb_scnd_ff; + + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_mb_scnd_in; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_mb_scnd_ff; + + assign sel_hold_imb_scnd =((miss_state == SCND_MISS) | ic_miss_under_miss_f) & ~flush_final_f ; + assign way_status_mb_scnd_in[pt.ICACHE_STATUS_BITS-1:0] = (miss_state == SCND_MISS) ? way_status_mb_scnd_ff[pt.ICACHE_STATUS_BITS-1:0] : {way_status[pt.ICACHE_STATUS_BITS-1:0]} ; + assign tagv_mb_scnd_in[pt.ICACHE_NUM_WAYS-1:0] = (miss_state == SCND_MISS) ? tagv_mb_scnd_ff[pt.ICACHE_NUM_WAYS-1:0] : ({ic_tag_valid[pt.ICACHE_NUM_WAYS-1:0]} & {pt.ICACHE_NUM_WAYS{~reset_all_tags}}); + assign uncacheable_miss_scnd_in = sel_hold_imb_scnd ? uncacheable_miss_scnd_ff : ifc_fetch_uncacheable_bf ; + + + rvdff #(1) unc_miss_scnd_ff (.*, .clk(fetch_bf_f_c1_clk), .din (uncacheable_miss_scnd_in), .dout(uncacheable_miss_scnd_ff)); + rvdff #(31) imb_f_scnd_ff (.*, .clk(fetch_bf_f_c1_clk), .din ({imb_scnd_in[31:1]}), .dout({imb_scnd_ff[31:1]})); + rvdff #(pt.ICACHE_STATUS_BITS) mb_rep_wayf2_scnd_ff (.*, .clk(fetch_bf_f_c1_clk), .din ({way_status_mb_scnd_in[pt.ICACHE_STATUS_BITS-1:0]}), .dout({way_status_mb_scnd_ff[pt.ICACHE_STATUS_BITS-1:0]})); + rvdff #(pt.ICACHE_NUM_WAYS) mb_tagv_scnd_ff (.*, .clk(fetch_bf_f_c1_clk), .din ({tagv_mb_scnd_in[pt.ICACHE_NUM_WAYS-1:0]}), .dout({tagv_mb_scnd_ff[pt.ICACHE_NUM_WAYS-1:0]})); + + + + + assign ic_req_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] = bus_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0] ; + assign ic_wr_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] = ifu_bus_rid_ff[pt.ICACHE_BEAT_BITS-1:0] & {pt.ICACHE_BEAT_BITS{bus_ifu_wr_en_ff}}; + // NOTE: Cacheline size is 16 bytes in this example. + // Tag Index Bank Offset + // [31:16] [15:5] [4] [3:0] + + + assign fetch_req_icache_f = ifc_fetch_req_f & ~ifc_iccm_access_f & ~ifc_region_acc_fault_final_f; + assign fetch_req_iccm_f = ifc_fetch_req_f & ifc_iccm_access_f; + + assign ic_iccm_hit_f = fetch_req_iccm_f & (~miss_pending | (miss_state==HIT_U_MISS) | (miss_state==STREAM)); + assign ic_byp_hit_f = (crit_byp_hit_f | stream_hit_f) & fetch_req_icache_f & miss_pending ; + assign ic_act_hit_f = (|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) & fetch_req_icache_f & ~reset_all_tags & (~miss_pending | (miss_state==HIT_U_MISS)) & ~sel_mb_addr_ff; + assign ic_act_miss_f = (((~(|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) | reset_all_tags) & fetch_req_icache_f & ~miss_pending) | scnd_miss_req) & ~ifc_region_acc_fault_final_f; + assign ic_miss_under_miss_f = (~(|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) | reset_all_tags) & fetch_req_icache_f & (miss_state == HIT_U_MISS) & + (imb_ff[31:pt.ICACHE_TAG_INDEX_LO] != ifu_fetch_addr_int_f[31:pt.ICACHE_TAG_INDEX_LO]) & ~uncacheable_miss_ff & ~sel_mb_addr_ff & ~ifc_region_acc_fault_final_f; + assign ic_ignore_2nd_miss_f = (~(|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) | reset_all_tags) & fetch_req_icache_f & (miss_state == HIT_U_MISS) & + ((imb_ff[31:pt.ICACHE_TAG_INDEX_LO] == ifu_fetch_addr_int_f[31:pt.ICACHE_TAG_INDEX_LO]) | uncacheable_miss_ff) ; + assign ic_hit_f = ic_act_hit_f | ic_byp_hit_f | ic_iccm_hit_f | (ifc_region_acc_fault_final_f & ifc_fetch_req_f); + + assign uncacheable_miss_in = scnd_miss_req ? uncacheable_miss_scnd_ff : sel_hold_imb ? uncacheable_miss_ff : ifc_fetch_uncacheable_bf ; + assign imb_in[31:1] = scnd_miss_req ? imb_scnd_ff[31:1] : sel_hold_imb ? imb_ff[31:1] : {ifc_fetch_addr_bf[31:1]} ; + + assign imb_scnd_in[31:1] = sel_hold_imb_scnd ? imb_scnd_ff[31:1] : {ifc_fetch_addr_bf[31:1]} ; + + assign scnd_miss_index_match = (imb_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] == imb_scnd_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO]) & scnd_miss_req & ~ifu_wr_cumulative_err_data; + assign way_status_mb_in[pt.ICACHE_STATUS_BITS-1:0] = (scnd_miss_req & ~scnd_miss_index_match) ? way_status_mb_scnd_ff[pt.ICACHE_STATUS_BITS-1:0] : + (scnd_miss_req & scnd_miss_index_match) ? way_status_rep_new[pt.ICACHE_STATUS_BITS-1:0] : + miss_pending ? way_status_mb_ff[pt.ICACHE_STATUS_BITS-1:0] : + {way_status[pt.ICACHE_STATUS_BITS-1:0]} ; + assign tagv_mb_in[pt.ICACHE_NUM_WAYS-1:0] = scnd_miss_req ? (tagv_mb_scnd_ff[pt.ICACHE_NUM_WAYS-1:0] | ({pt.ICACHE_NUM_WAYS {scnd_miss_index_match}} & replace_way_mb_any[pt.ICACHE_NUM_WAYS-1:0])) : + miss_pending ? tagv_mb_ff[pt.ICACHE_NUM_WAYS-1:0] : ({ic_tag_valid[pt.ICACHE_NUM_WAYS-1:0]} & {pt.ICACHE_NUM_WAYS{~reset_all_tags}}) ; + + assign reset_ic_in = miss_pending & ~scnd_miss_req_q & (reset_all_tags | reset_ic_ff) ; + + rvdff #(1) reset_ic_f (.*, .clk(free_clk), .din (reset_ic_in), .dout(reset_ic_ff)); + rvdff #(1) uncache_ff (.*, .clk(active_clk), .din (ifc_fetch_uncacheable_bf), .dout(fetch_uncacheable_ff)); + + + + rvdff #(31) ifu_fetch_addr_f_ff (.*, + .clk (fetch_bf_f_c1_clk), + .din ({ifc_fetch_addr_bf[31:1]}), + .dout({ifu_fetch_addr_int_f[31:1]})); + + assign vaddr_f[pt.ICACHE_BEAT_ADDR_HI:1] = ifu_fetch_addr_int_f[pt.ICACHE_BEAT_ADDR_HI:1] ; + + rvdff #(1) unc_miss_ff (.*, .clk(fetch_bf_f_c1_clk), .din (uncacheable_miss_in), .dout(uncacheable_miss_ff)); + rvdff #(31) imb_f_ff (.*, .clk(fetch_bf_f_c1_clk), .din ({imb_in[31:1]}), .dout({imb_ff[31:1]})); + + + assign miss_addr_in[31:pt.ICACHE_BEAT_ADDR_HI+1] = (~miss_pending ) ? imb_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] : + ( scnd_miss_req_q ) ? imb_scnd_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] : miss_addr[31:pt.ICACHE_BEAT_ADDR_HI+1] ; + + rvdff #(31-pt.ICACHE_BEAT_ADDR_HI) miss_f_ff (.*, .clk(busclk_reset), .din ({miss_addr_in[31:pt.ICACHE_BEAT_ADDR_HI+1]}), .dout({miss_addr[31:pt.ICACHE_BEAT_ADDR_HI+1]})); + + + + rvdff #(pt.ICACHE_STATUS_BITS) mb_rep_wayf2_ff (.*, .clk(fetch_bf_f_c1_clk), .din ({way_status_mb_in[pt.ICACHE_STATUS_BITS-1:0]}), .dout({way_status_mb_ff[pt.ICACHE_STATUS_BITS-1:0]})); + + rvdff #(pt.ICACHE_NUM_WAYS) mb_tagv_ff (.*, .clk(fetch_bf_f_c1_clk), .din ({tagv_mb_in[pt.ICACHE_NUM_WAYS-1:0]}), .dout({tagv_mb_ff[pt.ICACHE_NUM_WAYS-1:0]})); + + assign ifc_fetch_req_qual_bf = ifc_fetch_req_bf & ~((miss_state == CRIT_WRD_RDY) & flush_final_f) & ~stream_miss_f ;// & ~exu_flush_final ; + rvdff #(1) fetch_req_f_ff (.*, .clk(active_clk), .din(ifc_fetch_req_qual_bf), .dout(ifc_fetch_req_f_raw)); + + assign ifc_fetch_req_f = ifc_fetch_req_f_raw & ~exu_flush_final ; + + rvdff #(1) ifu_iccm_acc_ff (.*, .clk(fetch_bf_f_c1_clk), .din(ifc_iccm_access_bf), .dout(ifc_iccm_access_f)); + rvdff #(1) ifu_iccm_reg_acc_ff (.*, .clk(fetch_bf_f_c1_clk), .din(ifc_region_acc_fault_final_bf), .dout(ifc_region_acc_fault_final_f)); + rvdff #(1) rgn_acc_ff (.*, .clk(fetch_bf_f_c1_clk), .din(ifc_region_acc_fault_bf), .dout(ifc_region_acc_fault_f)); + + + assign ifu_ic_req_addr_f[31:3] = {miss_addr[31:pt.ICACHE_BEAT_ADDR_HI+1] , ic_req_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] }; + assign ifu_ic_mb_empty = (((miss_state == HIT_U_MISS) | (miss_state == STREAM)) & ~(bus_ifu_wr_en_ff & last_beat)) | ~miss_pending ; + assign ifu_miss_state_idle = (miss_state == IDLE) ; + + + assign sel_mb_addr = ((miss_pending & write_ic_16_bytes & ~uncacheable_miss_ff) | reset_tag_valid_for_miss) ; + assign ifu_ic_rw_int_addr[31:1] = ({31{ sel_mb_addr}} & {imb_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] , ic_wr_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] , imb_ff[2:1]}) | + ({31{~sel_mb_addr}} & ifc_fetch_addr_bf[31:1] ) ; + + assign sel_mb_status_addr = ((miss_pending & write_ic_16_bytes & ~uncacheable_miss_ff & last_beat & bus_ifu_wr_en_ff_q) | reset_tag_valid_for_miss) ; + assign ifu_status_wr_addr[31:1] = ({31{ sel_mb_status_addr}} & {imb_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] , ic_wr_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] , imb_ff[2:1]}) | + ({31{~sel_mb_status_addr}} & ifu_fetch_addr_int_f[31:1] ) ; + + + assign ic_rw_addr[31:1] = ifu_ic_rw_int_addr[31:1] ; + + rvdff #(1) sel_mb_ff (.*, .clk(free_clk), .din (sel_mb_addr), .dout(sel_mb_addr_ff)); + +if (pt.ICACHE_ECC == 1) begin: icache_ecc_1 + logic [6:0] ic_wr_ecc; + logic [6:0] ic_miss_buff_ecc; + logic [141:0] ic_wr_16bytes_data ; + logic [70:0] ifu_ic_debug_rd_data_in ; + + rvecc_encode_64 ic_ecc_encode_64_bus ( + .din (ifu_bus_rdata_ff[63:0]), + .ecc_out(ic_wr_ecc[6:0])); + rvecc_encode_64 ic_ecc_encode_64_buff ( + .din (ic_miss_buff_half[63:0]), + .ecc_out(ic_miss_buff_ecc[6:0])); + + for (genvar i=0; i < 32'(pt.ICACHE_BANKS_WAY) ; i++) begin : ic_wr_data_loop + assign ic_wr_data[i][70:0] = ic_wr_16bytes_data[((71*i)+70): (71*i)]; + end + + + assign ic_debug_wr_data[70:0] = {dec_tlu_ic_diag_pkt.icache_wrdata[70:0]} ; + assign ic_error_start = ((|ic_eccerr[pt.ICACHE_BANKS_WAY-1:0]) & ic_act_hit_f) | ic_rd_parity_final_err; + + + + assign ifu_ic_debug_rd_data_in[70:0] = ic_debug_ict_array_sel_ff ? {2'b0,ictag_debug_rd_data[25:21],32'b0,ictag_debug_rd_data[20:0],{7-pt.ICACHE_STATUS_BITS{1'b0}}, way_status[pt.ICACHE_STATUS_BITS-1:0],3'b0,ic_debug_tag_val_rd_out} : + ic_debug_rd_data[70:0]; + + rvdff #(71) ifu_debug_data_ff (.*, .clk (debug_data_clk), + .din ({ + ifu_ic_debug_rd_data_in[70:0] + }), + .dout({ + ifu_ic_debug_rd_data[70:0] + })); + + assign ic_wr_16bytes_data[141:0] = ifu_bus_rid_ff[0] ? {ic_wr_ecc[6:0] , ifu_bus_rdata_ff[63:0] , ic_miss_buff_ecc[6:0] , ic_miss_buff_half[63:0] } : + {ic_miss_buff_ecc[6:0] , ic_miss_buff_half[63:0] , ic_wr_ecc[6:0] , ifu_bus_rdata_ff[63:0] } ; + + +end +else begin : icache_parity_1 + logic [3:0] ic_wr_parity; + logic [3:0] ic_miss_buff_parity; + logic [135:0] ic_wr_16bytes_data ; + logic [70:0] ifu_ic_debug_rd_data_in ; + for (genvar i=0 ; i < 4 ; i++) begin : DATA_PGEN + rveven_paritygen #(16) par_bus (.data_in (ifu_bus_rdata_ff[((16*i)+15):(16*i)]), + .parity_out(ic_wr_parity[i])); + rveven_paritygen #(16) par_buff (.data_in (ic_miss_buff_half[((16*i)+15):(16*i)]), + .parity_out(ic_miss_buff_parity[i])); + end + + + for (genvar i=0; i < pt.ICACHE_BANKS_WAY ; i++) begin : ic_wr_data_loop + assign ic_wr_data[i][67:0] = ic_wr_16bytes_data[((68*i)+67): (68*i)]; + end + + + + + + assign ic_debug_wr_data[70:0] = {dec_tlu_ic_diag_pkt.icache_wrdata[70:0]} ; + assign ic_error_start = ((|ic_parerr[pt.ICACHE_BANKS_WAY-1:0]) & ic_act_hit_f) | ic_rd_parity_final_err; + + assign ifu_ic_debug_rd_data_in[70:0] = ic_debug_ict_array_sel_ff ? {6'b0,ictag_debug_rd_data[21],32'b0,ictag_debug_rd_data[20:0],{7-pt.ICACHE_STATUS_BITS{1'b0}},way_status[pt.ICACHE_STATUS_BITS-1:0],3'b0,ic_debug_tag_val_rd_out} : + ic_debug_rd_data[70:0] ; + + rvdff #(71) ifu_debug_data_ff (.*, .clk (debug_data_clk), + .din ({ + ifu_ic_debug_rd_data_in[70:0] + }), + .dout({ + ifu_ic_debug_rd_data[70:0] + })); + + assign ic_wr_16bytes_data[135:0] = ifu_bus_rid_ff[0] ? {ic_wr_parity[3:0] , ifu_bus_rdata_ff[63:0] , ic_miss_buff_parity[3:0] , ic_miss_buff_half[63:0] } : + {ic_miss_buff_parity[3:0] , ic_miss_buff_half[63:0] , ic_wr_parity[3:0] , ifu_bus_rdata_ff[63:0] } ; + +end + + + assign ifu_wr_data_comb_err = bus_ifu_wr_data_error_ff ; + assign ifu_wr_cumulative_err = (ifu_wr_data_comb_err | ifu_wr_data_comb_err_ff) & ~reset_beat_cnt; + assign ifu_wr_cumulative_err_data = ifu_wr_data_comb_err | ifu_wr_data_comb_err_ff ; + + rvdff #(1) cumul_err_ff (.*, .clk(free_clk), .din (ifu_wr_cumulative_err), .dout(ifu_wr_data_comb_err_ff)); + + + assign sel_byp_data = (ic_crit_wd_rdy | (miss_state == STREAM) | (miss_state == CRIT_BYP_OK)) & ~ifu_byp_data_err_new; + assign sel_ic_data = ~(ic_crit_wd_rdy | (miss_state == STREAM) | (miss_state == CRIT_BYP_OK)) & ~fetch_req_iccm_f ; + + if (pt.ICCM_ICACHE==1) begin: iccm_icache + assign sel_iccm_data = fetch_req_iccm_f ; + + assign ic_final_data[63:0] = ({64{sel_byp_data | sel_iccm_data | sel_ic_data}} & {ic_rd_data[63:0]} ) ; + + assign ic_premux_data[63:0] = ({64{sel_byp_data }} & {ic_byp_data_only_new[63:0]} ) | + ({64{sel_iccm_data}} & {iccm_rd_data[63:0]}); + + assign ic_sel_premux_data = sel_iccm_data | sel_byp_data ; + end + +if (pt.ICCM_ONLY == 1 ) begin: iccm_only + assign sel_iccm_data = fetch_req_iccm_f ; + assign ic_final_data[63:0] = ({64{sel_byp_data }} & {ic_byp_data_only_new[63:0]} ) | + ({64{sel_iccm_data}} & {iccm_rd_data[63:0]}); + assign ic_premux_data = '0 ; + assign ic_sel_premux_data = '0 ; +end + +if (pt.ICACHE_ONLY == 1 ) begin: icache_only + assign ic_final_data[63:0] = ({64{sel_byp_data | sel_ic_data}} & {ic_rd_data[63:0]} ) ; + assign ic_premux_data[63:0] = ({64{sel_byp_data }} & {ic_byp_data_only_new[63:0]} ) ; + assign ic_sel_premux_data = sel_byp_data ; +end + + +if (pt.NO_ICCM_NO_ICACHE == 1 ) begin: no_iccm_no_icache + assign ic_final_data[63:0] = ({64{sel_byp_data }} & {ic_byp_data_only_new[63:0]} ) ; + assign ic_premux_data = 0 ; + assign ic_sel_premux_data = '0 ; +end + + + assign ifc_bus_acc_fault_f = ic_byp_hit_f & ifu_byp_data_err_new ; + assign ic_data_f[31:0] = ic_final_data[31:0]; + + +rvdff #(1) flush_final_ff (.*, .clk(free_clk), .din({exu_flush_final}), .dout({flush_final_f})); +assign fetch_req_f_qual = ic_hit_f & ~exu_flush_final; +assign ic_access_fault_f = (ifc_region_acc_fault_final_f | ifc_bus_acc_fault_f) & ~exu_flush_final; +assign ic_access_fault_type_f[1:0] = iccm_rd_ecc_double_err ? 2'b01 : + ifc_region_acc_fault_f ? 2'b10 : + ifc_region_acc_fault_memory_f ? 2'b11 : 2'b00 ; + + // right justified + +assign ic_fetch_val_f[1] = fetch_req_f_qual & ifu_bp_inst_mask_f & ~(vaddr_f[pt.ICACHE_BEAT_ADDR_HI:1] == {pt.ICACHE_BEAT_ADDR_HI{1'b1}}) & (err_stop_state != ERR_FETCH2); +assign ic_fetch_val_f[0] = fetch_req_f_qual ; +assign two_byte_instr = (ic_data_f[1:0] != 2'b11 ) ; + +///////////////////////////////////////////////////////////////////////////////////// +// Create full buffer... // +///////////////////////////////////////////////////////////////////////////////////// + logic [63:0] ic_miss_buff_data_in; + assign ic_miss_buff_data_in[63:0] = ifu_bus_rsp_rdata[63:0]; + + for (genvar i=0; i<32'(pt.ICACHE_NUM_BEATS); i++) begin : wr_flop + assign write_fill_data[i] = bus_ifu_wr_en & ( (pt.IFU_BUS_TAG)'(i) == ifu_bus_rsp_tag[pt.IFU_BUS_TAG-1:0]); + rvclkhdr data_c1_cgc ( .en(write_fill_data[i]), .l1clk(wr_data_c1_clk[i]), .* ); + rvdff #(32) byp_data_0_ff (.*, + .clk (wr_data_c1_clk[i]), + .din (ic_miss_buff_data_in[31:0]), + .dout(ic_miss_buff_data[i*2][31:0])); + + rvdff #(32) byp_data_1_ff (.*, + .clk (wr_data_c1_clk[i]), + .din (ic_miss_buff_data_in[63:32]), + .dout(ic_miss_buff_data[i*2+1][31:0])); + + assign ic_miss_buff_data_valid_in[i] = write_fill_data[i] ? 1'b1 : (ic_miss_buff_data_valid[i] & ~ic_act_miss_f) ; + rvdff #(1) byp_data_valid_ff (.*, + .clk (free_clk), + .din (ic_miss_buff_data_valid_in[i]), + .dout(ic_miss_buff_data_valid[i])); + + assign ic_miss_buff_data_error_in[i] = write_fill_data[i] ? bus_ifu_wr_data_error : (ic_miss_buff_data_error[i] & ~ic_act_miss_f) ; + rvdff #(1) byp_data_error_ff (.*, + .clk (free_clk), + .din (ic_miss_buff_data_error_in[i] ), + .dout(ic_miss_buff_data_error[i])); + end + +///////////////////////////////////////////////////////////////////////////////////// +// New bypass ready // +///////////////////////////////////////////////////////////////////////////////////// + logic [pt.ICACHE_BEAT_ADDR_HI:1] bypass_index; + logic [pt.ICACHE_BEAT_ADDR_HI:3] bypass_index_5_3_inc; + logic bypass_data_ready_in; + logic ic_crit_wd_rdy_new_in; + + assign bypass_index[pt.ICACHE_BEAT_ADDR_HI:1] = imb_ff[pt.ICACHE_BEAT_ADDR_HI:1] ; + assign bypass_index_5_3_inc[pt.ICACHE_BEAT_ADDR_HI:3] = bypass_index[pt.ICACHE_BEAT_ADDR_HI:3] + 1 ; + + + assign bypass_data_ready_in = ((ic_miss_buff_data_valid_in[bypass_index[pt.ICACHE_BEAT_ADDR_HI:3]] & ~bypass_index[2] & ~bypass_index[1])) | + ((ic_miss_buff_data_valid_in[bypass_index[pt.ICACHE_BEAT_ADDR_HI:3]] & ~bypass_index[2] & bypass_index[1])) | + ((ic_miss_buff_data_valid_in[bypass_index[pt.ICACHE_BEAT_ADDR_HI:3]] & bypass_index[2] & ~bypass_index[1])) | + ((ic_miss_buff_data_valid_in[bypass_index[pt.ICACHE_BEAT_ADDR_HI:3]] & ic_miss_buff_data_valid_in[bypass_index_5_3_inc[pt.ICACHE_BEAT_ADDR_HI:3]] & bypass_index[2] & bypass_index[1])) | + ((ic_miss_buff_data_valid_in[bypass_index[pt.ICACHE_BEAT_ADDR_HI:3]] & (bypass_index[pt.ICACHE_BEAT_ADDR_HI:3] == {pt.ICACHE_BEAT_ADDR_HI{1'b1}}))) ; + + + + assign ic_crit_wd_rdy_new_in = ( bypass_data_ready_in & crit_wd_byp_ok_ff & uncacheable_miss_ff & ~exu_flush_final & ~ifu_bp_hit_taken_q_f) | + ( crit_wd_byp_ok_ff & ~uncacheable_miss_ff & ~exu_flush_final & ~ifu_bp_hit_taken_q_f) | + (ic_crit_wd_rdy_new_ff & ~fetch_req_icache_f & crit_wd_byp_ok_ff & ~exu_flush_final) ; + + rvdff #(1) crit_wd_new_ff (.*, .clk(free_clk), .din(ic_crit_wd_rdy_new_in), .dout(ic_crit_wd_rdy_new_ff)); + + assign byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:1] = ifu_fetch_addr_int_f[pt.ICACHE_BEAT_ADDR_HI:1] ; + assign byp_fetch_index_0[pt.ICACHE_BEAT_ADDR_HI:2] = {ifu_fetch_addr_int_f[pt.ICACHE_BEAT_ADDR_HI:3],1'b0} ; + assign byp_fetch_index_1[pt.ICACHE_BEAT_ADDR_HI:2] = {ifu_fetch_addr_int_f[pt.ICACHE_BEAT_ADDR_HI:3],1'b1} ; + assign byp_fetch_index_inc[pt.ICACHE_BEAT_ADDR_HI:3] = ifu_fetch_addr_int_f[pt.ICACHE_BEAT_ADDR_HI:3]+1'b1 ; + assign byp_fetch_index_inc_0[pt.ICACHE_BEAT_ADDR_HI:2] = {byp_fetch_index_inc[pt.ICACHE_BEAT_ADDR_HI:3], 1'b0} ; + assign byp_fetch_index_inc_1[pt.ICACHE_BEAT_ADDR_HI:2] = {byp_fetch_index_inc[pt.ICACHE_BEAT_ADDR_HI:3], 1'b1} ; + + assign ifu_byp_data_err_new = (~ifu_fetch_addr_int_f[2] & ~ifu_fetch_addr_int_f[1] & ic_miss_buff_data_error[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] ) | + (~ifu_fetch_addr_int_f[2] & ifu_fetch_addr_int_f[1] & ic_miss_buff_data_error[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] ) | + ( ifu_fetch_addr_int_f[2] & ~ifu_fetch_addr_int_f[1] & ic_miss_buff_data_error[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] ) | + ( ifu_fetch_addr_int_f[2] & ifu_fetch_addr_int_f[1] & (ic_miss_buff_data_error[byp_fetch_index_inc[pt.ICACHE_BEAT_ADDR_HI:3]] | ic_miss_buff_data_error[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] )) ; + + assign ic_byp_data_only_pre_new[79:0] = ({80{~ifu_fetch_addr_int_f[2]}} & {ic_miss_buff_data[byp_fetch_index_inc_0][15:0],ic_miss_buff_data[byp_fetch_index_1][31:0] , ic_miss_buff_data[byp_fetch_index_0][31:0]}) | + ({80{ ifu_fetch_addr_int_f[2]}} & {ic_miss_buff_data[byp_fetch_index_inc_1][15:0],ic_miss_buff_data[byp_fetch_index_inc_0][31:0] , ic_miss_buff_data[byp_fetch_index_1][31:0]}) ; + + assign ic_byp_data_only_new[79:0] = ~ifu_fetch_addr_int_f[1] ? {ic_byp_data_only_pre_new[79:0]} : + {16'b0,ic_byp_data_only_pre_new[79:16]} ; + + assign miss_wrap_f = (imb_ff[pt.ICACHE_TAG_INDEX_LO] != ifu_fetch_addr_int_f[pt.ICACHE_TAG_INDEX_LO] ) ; + + assign miss_buff_hit_unq_f = ((ic_miss_buff_data_valid[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] & ~byp_fetch_index[2] & ~byp_fetch_index[1])) | + ((ic_miss_buff_data_valid[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] & ~byp_fetch_index[2] & byp_fetch_index[1])) | + ((ic_miss_buff_data_valid[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] & byp_fetch_index[2] & ~byp_fetch_index[1])) | + ((ic_miss_buff_data_valid[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] & ic_miss_buff_data_valid[byp_fetch_index_inc[pt.ICACHE_BEAT_ADDR_HI:3]] & byp_fetch_index[2] & byp_fetch_index[1])) | + ((ic_miss_buff_data_valid[byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3]] & (byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:3] == {pt.ICACHE_BEAT_BITS{1'b1}}))) ; + + assign stream_hit_f = (miss_buff_hit_unq_f & ~miss_wrap_f ) & (miss_state==STREAM) ; + assign stream_miss_f = ~(miss_buff_hit_unq_f & ~miss_wrap_f ) & (miss_state==STREAM) & ifc_fetch_req_f; + assign stream_eol_f = (byp_fetch_index[pt.ICACHE_BEAT_ADDR_HI:2] == {pt.ICACHE_BEAT_BITS+1{1'b1}}) & ifc_fetch_req_f & stream_hit_f; + + assign crit_byp_hit_f = (miss_buff_hit_unq_f ) & ((miss_state == CRIT_WRD_RDY) | (miss_state==CRIT_BYP_OK)) ; + +///////////////////////////////////////////////////////////////////////////////////// +// Figure out if you have the data to write. // +///////////////////////////////////////////////////////////////////////////////////// + +assign other_tag[pt.IFU_BUS_TAG-1:0] = {ifu_bus_rid_ff[pt.IFU_BUS_TAG-1:1] , ~ifu_bus_rid_ff[0] } ; +assign second_half_available = ic_miss_buff_data_valid[other_tag] ; +assign write_ic_16_bytes = second_half_available & bus_ifu_wr_en_ff ; +assign ic_miss_buff_half[63:0] = {ic_miss_buff_data[{other_tag,1'b1}],ic_miss_buff_data[{other_tag,1'b0}] } ; + + +///////////////////////////////////////////////////////////////////////////////////// +// Parity checking logic for Icache logic. // +///////////////////////////////////////////////////////////////////////////////////// + + +assign ic_rd_parity_final_err = ic_tag_perr & sel_ic_data & ~(ifc_region_acc_fault_final_f | ifc_bus_acc_fault_f) ; + +logic [pt.ICACHE_NUM_WAYS-1:0] perr_err_inv_way; +logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] perr_ic_index_ff; +logic perr_sel_invalidate; +logic perr_sb_write_status ; + + + + rvdffs #(pt.ICACHE_INDEX_HI-pt.ICACHE_TAG_INDEX_LO+1) perr_dat_ff (.clk(active_clk), .din(ifu_ic_rw_int_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO]), .dout(perr_ic_index_ff[pt.ICACHE_INDEX_HI : pt.ICACHE_TAG_INDEX_LO]), .en(perr_sb_write_status), .*); + + assign perr_err_inv_way[pt.ICACHE_NUM_WAYS-1:0] = {pt.ICACHE_NUM_WAYS{perr_sel_invalidate}} ; + assign iccm_correct_ecc = (perr_state == ECC_CORR); + assign dma_sb_err_state = (perr_state == DMA_SB_ERR); + assign iccm_buf_correct_ecc = iccm_correct_ecc & ~dma_sb_err_state_ff; + rvdff #((1)) dma_sb_err_ff (.clk(active_clk), .din(dma_sb_err_state), .dout(dma_sb_err_state_ff), .*); + + + + //////////////////////////////////// Create Parity Error State Machine /////////////////////// + // Create Parity Error State Machine // + // Create Parity Error State Machine // + // Create Parity Error State Machine // + //////////////////////////////////// Create Parity Error State Machine /////////////////////// + + + // FIFO state machine + always_comb begin : ERROR_SM + perr_nxtstate = ERR_IDLE; + perr_state_en = 1'b0; + perr_sb_write_status = 1'b0; + perr_sel_invalidate = 1'b0; + + case (perr_state) + ERR_IDLE: begin : err_idle + perr_nxtstate = iccm_dma_sb_error ? DMA_SB_ERR : (ic_error_start & ~exu_flush_final) ? IC_WFF : ECC_WFF; + perr_state_en = (((iccm_error_start | ic_error_start) & ~exu_flush_final) | iccm_dma_sb_error) & ~dec_tlu_force_halt; + perr_sb_write_status = perr_state_en; + end + IC_WFF: begin : icache_wff // All the I$ data and/or Tag errors ( parity/ECC ) will come to this state + perr_nxtstate = ERR_IDLE ; + perr_state_en = dec_tlu_flush_lower_wb | dec_tlu_force_halt ; + perr_sel_invalidate = (dec_tlu_flush_err_wb & dec_tlu_flush_lower_wb); + end + ECC_WFF: begin : ecc_wff + perr_nxtstate = ((~dec_tlu_flush_err_wb & dec_tlu_flush_lower_wb ) | dec_tlu_force_halt) ? ERR_IDLE : ECC_CORR ; + perr_state_en = dec_tlu_flush_lower_wb | dec_tlu_force_halt ; + end + DMA_SB_ERR : begin : dma_sb_ecc + perr_nxtstate = dec_tlu_force_halt ? ERR_IDLE : ECC_CORR; + perr_state_en = 1'b1; + end + ECC_CORR: begin : ecc_corr + perr_nxtstate = ERR_IDLE ; + perr_state_en = 1'b1 ; + end + default: begin : def_case + perr_nxtstate = ERR_IDLE; + perr_state_en = 1'b0; + perr_sb_write_status = 1'b0; + perr_sel_invalidate = 1'b0; + end + endcase + end + rvdffs #(($bits(perr_state_t))) perr_state_ff (.clk(free_clk), .din(perr_nxtstate), .dout({perr_state}), .en(perr_state_en), .*); + + //////////////////////////////////// Create stop fetch State Machine ///////////////////////// + //////////////////////////////////// Create stop fetch State Machine ///////////////////////// + //////////////////////////////////// Create stop fetch State Machine ///////////////////////// + //////////////////////////////////// Create stop fetch State Machine ///////////////////////// + //////////////////////////////////// Create stop fetch State Machine ///////////////////////// + always_comb begin : ERROR_STOP_FETCH + err_stop_nxtstate = ERR_STOP_IDLE; + err_stop_state_en = 1'b0; + err_stop_fetch = 1'b0; + iccm_correction_state = 1'b0; + + case (err_stop_state) + ERR_STOP_IDLE: begin : err_stop_idle + err_stop_nxtstate = ERR_FETCH1; + err_stop_state_en = dec_tlu_flush_err_wb & (perr_state == ECC_WFF) & ~dec_tlu_force_halt; + end + ERR_FETCH1: begin : err_fetch1 // All the I$ data and/or Tag errors ( parity/ECC ) will come to this state + err_stop_nxtstate = (dec_tlu_flush_lower_wb | dec_tlu_i0_commit_cmt | dec_tlu_force_halt) ? ERR_STOP_IDLE : ((ifu_fetch_val[1:0] == 2'b11) | (ifu_fetch_val[0] & two_byte_instr)) ? ERR_STOP_FETCH : ifu_fetch_val[0] ? ERR_FETCH2 : ERR_FETCH1; + err_stop_state_en = dec_tlu_flush_lower_wb | dec_tlu_i0_commit_cmt | ifu_fetch_val[0] | ifu_bp_hit_taken_q_f | dec_tlu_force_halt; + err_stop_fetch = ((ifu_fetch_val[1:0] == 2'b11) | (ifu_fetch_val[0] & two_byte_instr)) & ~(exu_flush_final | dec_tlu_i0_commit_cmt); + iccm_correction_state = 1'b1; + + end + ERR_FETCH2: begin : err_fetch2 // All the I$ data and/or Tag errors ( parity/ECC ) will come to this state + err_stop_nxtstate = (dec_tlu_flush_lower_wb | dec_tlu_i0_commit_cmt | dec_tlu_force_halt) ? ERR_STOP_IDLE : ifu_fetch_val[0] ? ERR_STOP_FETCH : ERR_FETCH2; + err_stop_state_en = dec_tlu_flush_lower_wb | dec_tlu_i0_commit_cmt | ifu_fetch_val[0] | dec_tlu_force_halt ; + err_stop_fetch = ifu_fetch_val[0] & ~exu_flush_final & ~dec_tlu_i0_commit_cmt ; + iccm_correction_state = 1'b1; + + end + ERR_STOP_FETCH: begin : ecc_wff + err_stop_nxtstate = ( (dec_tlu_flush_lower_wb & ~dec_tlu_flush_err_wb) | dec_tlu_i0_commit_cmt | dec_tlu_force_halt) ? ERR_STOP_IDLE : dec_tlu_flush_err_wb ? ERR_FETCH1 : ERR_STOP_FETCH ; + err_stop_state_en = dec_tlu_flush_lower_wb | dec_tlu_i0_commit_cmt | dec_tlu_force_halt ; + err_stop_fetch = 1'b1; + iccm_correction_state = 1'b1; + + end + default: begin : def_case + err_stop_nxtstate = ERR_STOP_IDLE; + err_stop_state_en = 1'b0; + err_stop_fetch = 1'b0 ; + iccm_correction_state = 1'b1; + + end + endcase + end + rvdffs #(($bits(err_stop_state_t))) err_stop_state_ff (.clk(free_clk), .din(err_stop_nxtstate), .dout({err_stop_state}), .en(err_stop_state_en), .*); + + + + assign bus_ifu_bus_clk_en = ifu_bus_clk_en ; + + rvclkhdr bus_clk_f(.en(bus_ifu_bus_clk_en), + .l1clk(busclk), .*); + + rvclkhdr bus_clk(.en(bus_ifu_bus_clk_en | dec_tlu_force_halt), + .l1clk(busclk_force), .*); + + rvdff #(1) bus_clken_ff (.*, .clk(free_clk), .din(bus_ifu_bus_clk_en), .dout(bus_ifu_bus_clk_en_ff)); + rvdff #(1) scnd_mss_req_ff (.*, .clk(free_clk), .din(scnd_miss_req_in), .dout(scnd_miss_req_q)); + rvdff #(1) scnd_mss_req_ff2 (.*, .clk(free_clk), .din(scnd_miss_req), .dout(scnd_miss_req_ff2)); + assign scnd_miss_req = scnd_miss_req_q & ~exu_flush_final; + + assign ifc_bus_ic_req_ff_in = (ic_act_miss_f | bus_cmd_req_hold | ifu_bus_cmd_valid) & ~dec_tlu_force_halt & ~((bus_cmd_beat_count== {pt.ICACHE_BEAT_BITS{1'b1}}) & ifu_bus_cmd_valid & ifu_bus_cmd_ready & miss_pending); + rvdff #(1) bus_ic_req_ff2(.*, .clk(busclk_force), .din(ifc_bus_ic_req_ff_in), .dout(ifu_bus_cmd_valid)); + + assign bus_cmd_req_in = (ic_act_miss_f | bus_cmd_req_hold) & ~bus_cmd_sent & ~dec_tlu_force_halt ; // hold until first command sent + // changes for making the bus blocking + rvdff #(1) bus_cmd_req_ff (.*, .clk(free_clk), .din(bus_cmd_req_in), .dout(bus_cmd_req_hold)); + + + // AXI command signals + // Read Channel + assign ifu_axi_arvalid = ifu_bus_cmd_valid ; + assign ifu_axi_arid[pt.IFU_BUS_TAG-1:0] = ((pt.IFU_BUS_TAG)'(bus_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0])) & {pt.IFU_BUS_TAG{ifu_bus_cmd_valid}}; + assign ifu_axi_araddr[31:0] = {ifu_ic_req_addr_f[31:3],3'b0} & {32{ifu_bus_cmd_valid}}; + assign ifu_axi_arsize[2:0] = 3'b011; + assign ifu_axi_arprot[2:0] = '0; + assign ifu_axi_arcache[3:0] = 4'b1111; + assign ifu_axi_arregion[3:0] = ifu_ic_req_addr_f[31:28]; + assign ifu_axi_arlen[7:0] = '0; + assign ifu_axi_arburst[1:0] = 2'b01; + assign ifu_axi_arqos[3:0] = '0; + assign ifu_axi_arlock = '0; + assign ifu_axi_rready = 1'b1; + + // Write Channel + assign ifu_axi_awvalid = '0 ; + assign ifu_axi_awid[pt.IFU_BUS_TAG-1:0] = '0 ; + assign ifu_axi_awaddr[31:0] = '0 ; + assign ifu_axi_awsize[2:0] = '0 ; + assign ifu_axi_awprot[2:0] = '0; + assign ifu_axi_awcache[3:0] = '0 ; + assign ifu_axi_awregion[3:0] = '0 ; + assign ifu_axi_awlen[7:0] = '0; + assign ifu_axi_awburst[1:0] = '0 ; + assign ifu_axi_awqos[3:0] = '0; + assign ifu_axi_awlock = '0; + + assign ifu_axi_wvalid = '0; + assign ifu_axi_wstrb[7:0] = '0; + assign ifu_axi_wdata[63:0] = '0; + assign ifu_axi_wlast = '0; + assign ifu_axi_bready = '0; + + + assign ifu_bus_arready_unq = ifu_axi_arready ; + assign ifu_bus_rvalid_unq = ifu_axi_rvalid ; + assign ifu_bus_arvalid = ifu_axi_arvalid ; + + rvdff #(1) bus_rdy_ff (.*, .clk(busclk), .din(ifu_bus_arready_unq), .dout(ifu_bus_arready_unq_ff)); + rvdff #(1) bus_rsp_vld_ff (.*, .clk(busclk), .din(ifu_bus_rvalid_unq), .dout(ifu_bus_rvalid_unq_ff)); + rvdff #(1) bus_cmd_ff (.*, .clk(busclk), .din(ifu_bus_arvalid), .dout(ifu_bus_arvalid_ff)); + rvdff #(2) bus_rsp_cmd_ff (.*, .clk(busclk), .din(ifu_axi_rresp[1:0]), .dout(ifu_bus_rresp_ff[1:0])); + rvdff #(64) bus_data_ff (.*, .clk(busclk), .din(ifu_axi_rdata[63:0]), .dout(ifu_bus_rdata_ff[63:0])); + rvdff #(pt.IFU_BUS_TAG) bus_rsp_tag_ff (.*, .clk(busclk), .din(ifu_axi_rid[pt.IFU_BUS_TAG-1:0]),.dout(ifu_bus_rid_ff[pt.IFU_BUS_TAG-1:0])); + + assign ifu_bus_cmd_ready = ifu_axi_arready ; + assign ifu_bus_rsp_valid = ifu_axi_rvalid ; + assign ifu_bus_rsp_ready = ifu_axi_rready ; + assign ifu_bus_rsp_tag[pt.IFU_BUS_TAG-1:0] = ifu_axi_rid[pt.IFU_BUS_TAG-1:0] ; + assign ifu_bus_rsp_rdata[63:0] = ifu_axi_rdata[63:0] ; + assign ifu_bus_rsp_opc[1:0] = {ifu_axi_rresp[1:0]} ; + + + + + + + + + + + + + // Create write signals so we can write to the miss-buffer directly from + // the bus. + + assign ifu_bus_rvalid = ifu_bus_rsp_valid & bus_ifu_bus_clk_en ; + + + + assign ifu_bus_arready = ifu_bus_arready_unq & bus_ifu_bus_clk_en ; + assign ifu_bus_arready_ff = ifu_bus_arready_unq_ff & bus_ifu_bus_clk_en_ff ; + + assign ifu_bus_rvalid_ff = ifu_bus_rvalid_unq_ff & bus_ifu_bus_clk_en_ff ; + assign bus_cmd_sent = ifu_bus_arvalid & ifu_bus_arready & miss_pending & ~dec_tlu_force_halt; + assign bus_inc_data_beat_cnt = (bus_ifu_wr_en_ff & ~bus_last_data_beat & ~dec_tlu_force_halt) ; + assign bus_reset_data_beat_cnt = ic_act_miss_f | (bus_ifu_wr_en_ff & bus_last_data_beat) | dec_tlu_force_halt; + assign bus_hold_data_beat_cnt = ~bus_inc_data_beat_cnt & ~bus_reset_data_beat_cnt ; + + assign bus_new_data_beat_count[pt.ICACHE_BEAT_BITS-1:0] = ({pt.ICACHE_BEAT_BITS{bus_reset_data_beat_cnt}} & (pt.ICACHE_BEAT_BITS)'(0)) | + ({pt.ICACHE_BEAT_BITS{bus_inc_data_beat_cnt}} & (bus_data_beat_count[pt.ICACHE_BEAT_BITS-1:0] + {{pt.ICACHE_BEAT_BITS-1{1'b0}},1'b1})) | + ({pt.ICACHE_BEAT_BITS{bus_hold_data_beat_cnt}} & bus_data_beat_count[pt.ICACHE_BEAT_BITS-1:0]); + + rvdff #(pt.ICACHE_BEAT_BITS) bus_mb_beat_count_ff (.*, .clk(free_clk), .din ({bus_new_data_beat_count[pt.ICACHE_BEAT_BITS-1:0]}), .dout({bus_data_beat_count[pt.ICACHE_BEAT_BITS-1:0]})); + + assign last_data_recieved_in = (bus_ifu_wr_en_ff & bus_last_data_beat & ~scnd_miss_req) | (last_data_recieved_ff & ~ic_act_miss_f) ; + rvdff #(1) last_beat_ff (.*, .clk(free_clk), .din (last_data_recieved_in), .dout(last_data_recieved_ff)); + + +// Request Address Count + assign bus_new_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0] = (~miss_pending ) ? imb_ff[pt.ICACHE_BEAT_ADDR_HI:3] : + ( scnd_miss_req_q ) ? imb_scnd_ff[pt.ICACHE_BEAT_ADDR_HI:3] : + ( bus_cmd_sent ) ? (bus_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0] + 3'b001) : + bus_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0]; + + rvdff #(pt.ICACHE_BEAT_BITS) bus_rd_addr_ff (.*, .clk(busclk_reset), .din ({bus_new_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0]}), .dout({bus_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0]})); + + + +// command beat Count + assign bus_inc_cmd_beat_cnt = ifu_bus_cmd_valid & ifu_bus_cmd_ready & miss_pending & ~dec_tlu_force_halt; + assign bus_reset_cmd_beat_cnt_0 = (ic_act_miss_f & ~uncacheable_miss_in) | dec_tlu_force_halt ; + assign bus_reset_cmd_beat_cnt_secondlast = ic_act_miss_f & uncacheable_miss_in ; + assign bus_hold_cmd_beat_cnt = ~bus_inc_cmd_beat_cnt & ~(ic_act_miss_f | scnd_miss_req | dec_tlu_force_halt) ; + assign bus_cmd_beat_en = bus_inc_cmd_beat_cnt | ic_act_miss_f | dec_tlu_force_halt; + + assign bus_new_cmd_beat_count[pt.ICACHE_BEAT_BITS-1:0] = ({pt.ICACHE_BEAT_BITS{bus_reset_cmd_beat_cnt_0}} & (pt.ICACHE_BEAT_BITS)'(0) ) | + ({pt.ICACHE_BEAT_BITS{bus_reset_cmd_beat_cnt_secondlast}} & (pt.ICACHE_BEAT_BITS)'(pt.ICACHE_SCND_LAST)) | + ({pt.ICACHE_BEAT_BITS{bus_inc_cmd_beat_cnt}} & (bus_cmd_beat_count[pt.ICACHE_BEAT_BITS-1:0] + {{pt.ICACHE_BEAT_BITS-1{1'b0}}, 1'b1})) | + ({pt.ICACHE_BEAT_BITS{bus_hold_cmd_beat_cnt}} & bus_cmd_beat_count[pt.ICACHE_BEAT_BITS-1:0]) ; + + rvclkhdr bus_clk_reset(.en(bus_ifu_bus_clk_en | ic_act_miss_f | dec_tlu_force_halt), + .l1clk(busclk_reset), .*); + + + rvdffs #(pt.ICACHE_BEAT_BITS) bus_cmd_beat_ff (.*, .clk(busclk_reset), .en (bus_cmd_beat_en), .din ({bus_new_cmd_beat_count[pt.ICACHE_BEAT_BITS-1:0]}), + .dout({bus_cmd_beat_count[pt.ICACHE_BEAT_BITS-1:0]})); + + + assign bus_last_data_beat = uncacheable_miss_ff ? (bus_data_beat_count[pt.ICACHE_BEAT_BITS-1:0] == {{pt.ICACHE_BEAT_BITS-1{1'b0}},1'b1}) : (&bus_data_beat_count[pt.ICACHE_BEAT_BITS-1:0]); + + assign bus_ifu_wr_en = ifu_bus_rvalid & miss_pending ; + assign bus_ifu_wr_en_ff = ifu_bus_rvalid_ff & miss_pending ; + assign bus_ifu_wr_en_ff_q = ifu_bus_rvalid_ff & miss_pending & ~uncacheable_miss_ff & ~(|ifu_bus_rresp_ff[1:0]) & write_ic_16_bytes; // qualify with no-error conditions ; + assign bus_ifu_wr_en_ff_wo_err = ifu_bus_rvalid_ff & miss_pending & ~uncacheable_miss_ff; + + + rvdff #(1) act_miss_ff (.*, .clk(free_clk), .din (ic_act_miss_f), .dout(ic_act_miss_f_delayed)); + assign reset_tag_valid_for_miss = ic_act_miss_f_delayed & (miss_state == CRIT_BYP_OK) & ~uncacheable_miss_ff; + assign bus_ifu_wr_data_error = |ifu_bus_rsp_opc[1:0] & ifu_bus_rvalid & miss_pending; + assign bus_ifu_wr_data_error_ff = |ifu_bus_rresp_ff[1:0] & ifu_bus_rvalid_ff & miss_pending; + + rvdff #(1) dma_ok_prev_ff (.*, .clk(free_clk), .din(ifc_dma_access_ok_d), .dout(ifc_dma_access_ok_prev)); + + assign ic_crit_wd_rdy = ic_crit_wd_rdy_new_in | ic_crit_wd_rdy_new_ff ; + assign last_beat = bus_last_data_beat & bus_ifu_wr_en_ff; + assign reset_beat_cnt = bus_reset_data_beat_cnt ; + +// DMA + // Making sure that the dma_access is allowed when we have 2 back to back dma_access_ok. Also gating with current state == idle + assign ifc_dma_access_ok_d = ifc_dma_access_ok & ~iccm_correct_ecc & ~iccm_dma_sb_error; + assign ifc_dma_access_q_ok = ifc_dma_access_ok & ~iccm_correct_ecc & ifc_dma_access_ok_prev & (perr_state == ERR_IDLE) & ~iccm_dma_sb_error; + assign iccm_ready = ifc_dma_access_q_ok ; + rvdff #(1) dma_req_ff (.*, .clk(free_clk), .din (dma_iccm_req), .dout(dma_iccm_req_f)); + + logic [1:0] iccm_ecc_word_enable; + + if (pt.ICCM_ENABLE == 1 ) begin: iccm_enabled + logic [3:2] dma_mem_addr_ff ; + logic iccm_dma_rden ; + + logic iccm_dma_ecc_error_in; + logic [13:0] dma_mem_ecc; + logic [63:0] iccm_dma_rdata_in; + logic [31:0] iccm_dma_rdata_1_muxed; + logic [1:0] [31:0] iccm_corrected_data; + logic [1:0] [06:0] iccm_corrected_ecc; + + + logic [1:0] iccm_double_ecc_error; + + + logic [pt.ICCM_BITS-1:2] iccm_rw_addr_f; + + logic [31:0] iccm_corrected_data_f_mux; + logic [06:0] iccm_corrected_ecc_f_mux; + logic iccm_dma_rvalid_in; + logic [77:0] iccm_rdmux_data; + logic iccm_rd_ecc_single_err_hold_in ; + logic [2:0] dma_mem_tag_ff; + + + + + assign iccm_wren = (ifc_dma_access_q_ok & dma_iccm_req & dma_mem_write) | iccm_correct_ecc; + assign iccm_rden = (ifc_dma_access_q_ok & dma_iccm_req & ~dma_mem_write) | (ifc_iccm_access_bf & ifc_fetch_req_bf); + assign iccm_dma_rden = (ifc_dma_access_q_ok & dma_iccm_req & ~dma_mem_write) ; + assign iccm_wr_size[2:0] = {3{dma_iccm_req}} & dma_mem_sz[2:0] ; + + rvecc_encode iccm_ecc_encode0 ( + .din(dma_mem_wdata[31:0]), + .ecc_out(dma_mem_ecc[6:0])); + + rvecc_encode iccm_ecc_encode1 ( + .din(dma_mem_wdata[63:32]), + .ecc_out(dma_mem_ecc[13:7])); + + assign iccm_wr_data[77:0] = (iccm_correct_ecc & ~(ifc_dma_access_q_ok & dma_iccm_req)) ? {iccm_ecc_corr_data_ff[38:0], iccm_ecc_corr_data_ff[38:0]} : + {dma_mem_ecc[13:7],dma_mem_wdata[63:32], dma_mem_ecc[6:0],dma_mem_wdata[31:0]}; + + assign iccm_dma_rdata_1_muxed[31:0] = dma_mem_addr_ff[2] ? iccm_corrected_data[0][31:0] : iccm_corrected_data[1][31:0] ; + assign iccm_dma_rdata_in[63:0] = iccm_dma_ecc_error_in ? {2{dma_mem_addr[31:0]}} : {iccm_dma_rdata_1_muxed[31:0], iccm_corrected_data[0]}; + assign iccm_dma_ecc_error_in = |(iccm_double_ecc_error[1:0]); + + rvdff #(3) dma_tag_ff1 (.*, .clk(free_clk), .din(dma_mem_tag[2:0]), .dout(dma_mem_tag_ff[2:0])); + rvdff #(3) dma_tag_ff2 (.*, .clk(free_clk), .din(dma_mem_tag_ff[2:0]), .dout(iccm_dma_rtag[2:0])); + rvdff #(2) dma_addr_bt3_ff (.*, .clk(free_clk), .din(dma_mem_addr[3:2]), .dout(dma_mem_addr_ff[3:2])); + rvdff #(1) ccm_rdy_in_ff (.*, .clk(free_clk), .din(iccm_dma_rden), .dout(iccm_dma_rvalid_in)); + rvdff #(1) ccm_rdy_ff (.*, .clk(free_clk), .din(iccm_dma_rvalid_in), .dout(iccm_dma_rvalid)); + rvdff #(1) ccm_err_ff (.*, .clk(free_clk), .din(iccm_dma_ecc_error_in), .dout(iccm_dma_ecc_error)); + rvdff #(64)dma_data_ff (.*, .clk(free_clk), .din(iccm_dma_rdata_in[63:0]), .dout(iccm_dma_rdata[63:0])); + + assign iccm_rw_addr[pt.ICCM_BITS-1:1] = ( ifc_dma_access_q_ok & dma_iccm_req & ~iccm_correct_ecc) ? dma_mem_addr[pt.ICCM_BITS-1:1] : + (~(ifc_dma_access_q_ok & dma_iccm_req) & iccm_correct_ecc) ? {iccm_ecc_corr_index_ff[pt.ICCM_BITS-1:2],1'b0} : ifc_fetch_addr_bf[pt.ICCM_BITS-1:1] ; + + + + +///////////////////////////////////////////////////////////////////////////////////// +// ECC checking logic for ICCM data. // +///////////////////////////////////////////////////////////////////////////////////// + + logic [3:0] ic_fetch_val_int_f; + logic [3:0] ic_fetch_val_shift_right; + assign ic_fetch_val_int_f[3:0] = {2'b00 , ic_fetch_val_f[1:0] } ; + assign ic_fetch_val_shift_right[3:0] = {ic_fetch_val_int_f << ifu_fetch_addr_int_f[1] } ; + + assign iccm_rdmux_data[77:0] = iccm_rd_data_ecc[77:0]; + for (genvar i=0; i < 2 ; i++) begin : ICCM_ECC_CHECK + assign iccm_ecc_word_enable[i] = ((|ic_fetch_val_shift_right[(2*i+1):(2*i)] & ~exu_flush_final & sel_iccm_data) | iccm_dma_rvalid_in) & ~dec_tlu_core_ecc_disable; + rvecc_decode ecc_decode ( + .en(iccm_ecc_word_enable[i]), + .sed_ded ( 1'b0 ), // 1 : means only detection + .din(iccm_rdmux_data[(39*i+31):(39*i)]), + .ecc_in(iccm_rdmux_data[(39*i+38):(39*i+32)]), + .dout(iccm_corrected_data[i][31:0]), + .ecc_out(iccm_corrected_ecc[i][6:0]), + .single_ecc_error(iccm_single_ecc_error[i]), + .double_ecc_error(iccm_double_ecc_error[i])); +end + + assign iccm_rd_ecc_single_err = (|iccm_single_ecc_error[1:0] ) & ifc_iccm_access_f & ifc_fetch_req_f; + assign iccm_rd_ecc_double_err = (|iccm_double_ecc_error[1:0] ) & ifc_iccm_access_f; + + assign iccm_corrected_data_f_mux[31:0] = iccm_single_ecc_error[0] ? iccm_corrected_data[0] : iccm_corrected_data[1]; + assign iccm_corrected_ecc_f_mux[6:0] = iccm_single_ecc_error[0] ? iccm_corrected_ecc[0] : iccm_corrected_ecc[1]; + + assign iccm_ecc_write_status = ((iccm_rd_ecc_single_err & ~iccm_rd_ecc_single_err_ff) & ~exu_flush_final) | iccm_dma_sb_error; + assign iccm_rd_ecc_single_err_hold_in = (iccm_rd_ecc_single_err | iccm_rd_ecc_single_err_ff) & ~exu_flush_final ; // & ~(perr_state == ERR_IDLE); + assign iccm_error_start = iccm_rd_ecc_single_err; + assign iccm_ecc_corr_index_in[pt.ICCM_BITS-1:2] = iccm_single_ecc_error[0] ? iccm_rw_addr_f[pt.ICCM_BITS-1:2] : iccm_rw_addr_f[pt.ICCM_BITS-1:2] + 1'b1 ; + + rvdff #(pt.ICCM_BITS-2) iccm_index_f (.*, .clk(free_clk), .din(iccm_rw_addr[pt.ICCM_BITS-1:2]), .dout(iccm_rw_addr_f[pt.ICCM_BITS-1:2])); + rvdff #((1)) ecc_rr_ff (.clk(free_clk), .din(iccm_rd_ecc_single_err_hold_in), .dout(iccm_rd_ecc_single_err_ff), .*); + rvdffs #((32)) ecc_dat0_ff (.clk(free_clk), .din(iccm_corrected_data_f_mux[31:0]), .dout(iccm_ecc_corr_data_ff[31:0]), .en(iccm_ecc_write_status), .*); + rvdffs #((7)) ecc_dat1_ff (.clk(free_clk), .din(iccm_corrected_ecc_f_mux[6:0]), .dout(iccm_ecc_corr_data_ff[38:32]), .en(iccm_ecc_write_status), .*); + rvdffs #((pt.ICCM_BITS-2))ecc_ind0_ff (.clk(free_clk), .din(iccm_ecc_corr_index_in[pt.ICCM_BITS-1:2]), .dout(iccm_ecc_corr_index_ff[pt.ICCM_BITS-1:2]),.en(iccm_ecc_write_status), .*); + + end else begin : iccm_disabled + assign iccm_dma_rvalid = 1'b0 ; + assign iccm_dma_ecc_error = 1'b0 ; + assign iccm_dma_rdata[63:0] = '0 ; + assign iccm_single_ecc_error = '0 ; + assign iccm_dma_rtag = '0 ; + + + + + + + assign iccm_rd_ecc_single_err = 1'b0 ; + assign iccm_rd_ecc_double_err = 1'b0 ; + assign iccm_rd_ecc_single_err_ff = 1'b0 ; + assign iccm_error_start = 1'b0; + assign iccm_ecc_corr_index_ff[pt.ICCM_BITS-1:2] = '0; + assign iccm_ecc_corr_data_ff[38:0] = '0; + assign iccm_ecc_write_status = '0; + + + + + + + end + + +////// ICCM signals + + +// Use the equation below for more power savings. + assign ic_rd_en = (ifc_fetch_req_bf & ~ifc_fetch_uncacheable_bf & ~ifc_iccm_access_bf & + ~(((miss_state == STREAM) & ~miss_state_en) | + ((miss_state == CRIT_BYP_OK) & ~miss_state_en) | + ((miss_state == STALL_SCND_MISS) & ~miss_state_en) | + ((miss_state == MISS_WAIT) & ~miss_state_en) | + ((miss_state == CRIT_WRD_RDY) & ~miss_state_en) | + ((miss_state == CRIT_BYP_OK) & miss_state_en & (miss_nxtstate == MISS_WAIT)) )) | + ( ifc_fetch_req_bf & exu_flush_final & ~ifc_fetch_uncacheable_bf & ~ifc_iccm_access_bf ) ; + + +logic ic_real_rd_wp_unused; +assign ic_real_rd_wp_unused = (ifc_fetch_req_bf & ~ifc_iccm_access_bf & ~ifc_region_acc_fault_final_bf & ~dec_tlu_fence_i_wb & ~stream_miss_f & ~ic_act_miss_f & + ~(((miss_state == STREAM) & ~miss_state_en) | + ((miss_state == CRIT_BYP_OK) & ~miss_state_en & ~(miss_nxtstate == MISS_WAIT)) | + ((miss_state == CRIT_BYP_OK) & miss_state_en & (miss_nxtstate == MISS_WAIT)) | + ((miss_state == MISS_WAIT) & ~miss_state_en) | + ((miss_state == STALL_SCND_MISS) & ~miss_state_en) | + ((miss_state == CRIT_WRD_RDY) & ~miss_state_en) | + ((miss_nxtstate == STREAM) & miss_state_en) | + ((miss_state == SCND_MISS) & ~miss_state_en))) | + (ifc_fetch_req_bf & ~ifc_iccm_access_bf & ~ifc_region_acc_fault_final_bf & ~dec_tlu_fence_i_wb & ~stream_miss_f & exu_flush_final) ; + + +assign ic_wr_en[pt.ICACHE_NUM_WAYS-1:0] = bus_ic_wr_en[pt.ICACHE_NUM_WAYS-1:0] & {pt.ICACHE_NUM_WAYS{write_ic_16_bytes}}; +assign ic_write_stall = write_ic_16_bytes & ~((((miss_state== CRIT_BYP_OK) | (miss_state==STREAM)) & ~(bus_ifu_wr_en_ff & last_beat & ~uncacheable_miss_ff))); + + rvdff #(1) reset_all_tag_ff (.*, .clk(active_clk), .din(dec_tlu_fence_i_wb), .dout(reset_all_tags)); + + + +/////////////////////////////////////////////////////////////// +// Icache status and LRU +/////////////////////////////////////////////////////////////// +logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid_unq; +if (pt.ICACHE_ENABLE == 1 ) begin: icache_enabled + assign ic_valid = ~ifu_wr_cumulative_err_data & ~(reset_ic_in | reset_ic_ff) & ~reset_tag_valid_for_miss; + + assign ifu_status_wr_addr_w_debug[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] = ((ic_debug_rd_en | ic_debug_wr_en ) & ic_debug_tag_array) ? + ic_debug_addr[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] : + ifu_status_wr_addr[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO]; + + // status + rvdff #(pt.ICACHE_TAG_LO-pt.ICACHE_TAG_INDEX_LO) status_wr_addr_ff (.*, .clk(free_clk), .din(ifu_status_wr_addr_w_debug[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO]), + .dout(ifu_status_wr_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO])); + + assign way_status_wr_en_w_debug = way_status_wr_en | (ic_debug_wr_en & ic_debug_tag_array); + rvdff #(1) status_wren_ff (.*, .clk(free_clk), .din(way_status_wr_en_w_debug), .dout(way_status_wr_en_ff)); + + assign way_status_new_w_debug[pt.ICACHE_STATUS_BITS-1:0] = (ic_debug_wr_en & ic_debug_tag_array) ? (pt.ICACHE_STATUS_BITS == 1) ? ic_debug_wr_data[4] : ic_debug_wr_data[6:4] : + way_status_new[pt.ICACHE_STATUS_BITS-1:0] ; + rvdff #(pt.ICACHE_STATUS_BITS) status_data_ff (.*, .clk(free_clk), .din(way_status_new_w_debug[pt.ICACHE_STATUS_BITS-1:0]), .dout(way_status_new_ff[pt.ICACHE_STATUS_BITS-1:0])); + + logic [(pt.ICACHE_TAG_DEPTH/8)-1 : 0] way_status_clken; + logic [(pt.ICACHE_TAG_DEPTH/8)-1 : 0] way_status_clk; + + for (genvar i=0 ; i<32'(pt.ICACHE_TAG_DEPTH)/8 ; i++) begin : CLK_GRP_WAY_STATUS + assign way_status_clken[i] = (ifu_status_wr_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO+3] == i ); + rvclkhdr way_status_cgc ( .en(way_status_clken[i]), .l1clk(way_status_clk[i]), .* ); + + for (genvar j=0 ; j<8 ; j++) begin : WAY_STATUS + rvdffs #(pt.ICACHE_STATUS_BITS) ic_way_status (.*, + .clk(way_status_clk[i]), + .en(((ifu_status_wr_addr_ff[pt.ICACHE_TAG_INDEX_LO+2:pt.ICACHE_TAG_INDEX_LO] == j) & way_status_wr_en_ff)), + .din(way_status_new_ff[pt.ICACHE_STATUS_BITS-1:0]), + .dout(way_status_out[8*i+j])); + end // WAY_STATUS + end // CLK_GRP_WAY_STATUS + + always_comb begin : way_status_out_mux + way_status[pt.ICACHE_STATUS_BITS-1:0] = '0 ; + for (int j=0; j< 32'(pt.ICACHE_TAG_DEPTH); j++) begin : status_mux_loop + if (ifu_ic_rw_int_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] == (pt.ICACHE_TAG_LO-pt.ICACHE_TAG_INDEX_LO)'(j)) begin : mux_out + way_status[pt.ICACHE_STATUS_BITS-1:0] = way_status_out[j]; + end + end + end + +assign ifu_ic_rw_int_addr_w_debug[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] = ((ic_debug_rd_en | ic_debug_wr_en ) & ic_debug_tag_array) ? + ic_debug_addr[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] : + ifu_ic_rw_int_addr[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO]; + + rvdff #(pt.ICACHE_TAG_LO-pt.ICACHE_TAG_INDEX_LO) tag_addr_ff (.*, .clk(free_clk), + .din(ifu_ic_rw_int_addr_w_debug[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO]), + .dout(ifu_ic_rw_int_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO])); + + assign ifu_tag_wren_w_debug[pt.ICACHE_NUM_WAYS-1:0] = ifu_tag_wren[pt.ICACHE_NUM_WAYS-1:0] | ic_debug_tag_wr_en[pt.ICACHE_NUM_WAYS-1:0] ; + rvdff #(pt.ICACHE_NUM_WAYS) tag_v_we_ff (.*, .clk(free_clk), + .din(ifu_tag_wren_w_debug[pt.ICACHE_NUM_WAYS-1:0]), + .dout(ifu_tag_wren_ff[pt.ICACHE_NUM_WAYS-1:0])); + + assign ic_valid_w_debug = (ic_debug_wr_en & ic_debug_tag_array) ? ic_debug_wr_data[0] : ic_valid; + rvdff #(1) tag_v_ff (.*, .clk(free_clk), + .din(ic_valid_w_debug), + .dout(ic_valid_ff)); + + logic [pt.ICACHE_NUM_WAYS-1:0] [pt.ICACHE_TAG_DEPTH-1:0] ic_tag_valid_out ; + + logic [(pt.ICACHE_TAG_DEPTH/32)-1:0] [pt.ICACHE_NUM_WAYS-1:0] tag_valid_clken ; + logic [(pt.ICACHE_TAG_DEPTH/32)-1:0] [pt.ICACHE_NUM_WAYS-1:0] tag_valid_clk ; + + for (genvar i=0 ; i<32'(pt.ICACHE_TAG_DEPTH)/32 ; i++) begin : CLK_GRP_TAG_VALID + for (genvar j=0; j<32'(pt.ICACHE_NUM_WAYS); j++) begin : way_clken + if (pt.ICACHE_TAG_DEPTH == 32 ) begin + assign tag_valid_clken[i][j] = ifu_tag_wren_ff[j] | perr_err_inv_way[j] | reset_all_tags; + end else begin + assign tag_valid_clken[i][j] = (((ifu_ic_rw_int_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO+5] == i ) & ifu_tag_wren_ff[j] ) | + ((perr_ic_index_ff [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO+5] == i ) & perr_err_inv_way[j]) | reset_all_tags); + end + + rvclkhdr way_status_cgc ( .en(tag_valid_clken[i][j]), .l1clk(tag_valid_clk[i][j]), .* ); + + for (genvar k=0 ; k<32 ; k++) begin : TAG_VALID + rvdffs #(1) ic_way_tagvalid_dup (.*, + .clk(tag_valid_clk[i][j]), + .en(((ifu_ic_rw_int_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] == (k + 32*i)) & ifu_tag_wren_ff[j] ) | + ((perr_ic_index_ff [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] == (k + 32*i)) & perr_err_inv_way[j]) | reset_all_tags), + .din(ic_valid_ff & ~reset_all_tags & ~perr_sel_invalidate), + .dout(ic_tag_valid_out[j][32*i+k])); + end + end + end + + + always_comb begin : tag_valid_out_mux + ic_tag_valid_unq[pt.ICACHE_NUM_WAYS-1:0] = '0; + for (int j=0; j< pt.ICACHE_TAG_DEPTH; j++) begin : tag_valid_loop + if (ifu_ic_rw_int_addr_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] == (pt.ICACHE_TAG_LO-pt.ICACHE_TAG_INDEX_LO)'(j)) begin : valid_out + for ( int k=0; k=1 & clk_count<=3) rst_l <= 1'b0; + else rst_l <= 1'b1; + + if (clk_count > 3) begin + + compressed_din[15:0] <= compressed[clk_count-3]; // c.mv + expected_val[31:0] <= expected[clk_count-3]; + + end + + if (clk_count == 65000) begin + $dumpoff; + $finish; + end + end // always @ (posedge clk) + + always @(negedge clk) begin + if (clk_count > 3 & error) begin + $display("clock: %d compressed %h error actual %h expected %h",clk_count,compressed_din,actual,expected_val); + end + end + + + el2_ifu_compress_ctl align (.*,.din(compressed_din[15:0]),.dout(actual[31:0])); + + assign error = actual[31:0] != expected_val[31:0]; + + + +endmodule // el2_ifu_tb_memread + + diff --git a/design/include/el2_def.sv b/design/include/el2_def.sv new file mode 100644 index 0000000..ae7e2bd --- /dev/null +++ b/design/include/el2_def.sv @@ -0,0 +1,325 @@ +// performance monitor stuff +//`ifndef EL2_DEF_SV +//`define EL2_DEF_SV +package el2_pkg; + +typedef struct packed { + logic [1:0] rv_i_valid_ip; + logic [31:0] rv_i_insn_ip; + logic [31:0] rv_i_address_ip; + logic [1:0] rv_i_exception_ip; + logic [4:0] rv_i_ecause_ip; + logic [2:0] rv_i_interrupt_ip; + logic [31:0] rv_i_tval_ip; + } el2_trace_pkt_t; + + +typedef enum logic [3:0] { + NULL = 4'b0000, + MUL = 4'b0001, + LOAD = 4'b0010, + STORE = 4'b0011, + ALU = 4'b0100, + CSRREAD = 4'b0101, + CSRWRITE = 4'b0110, + CSRRW = 4'b0111, + EBREAK = 4'b1000, + ECALL = 4'b1001, + FENCE = 4'b1010, + FENCEI = 4'b1011, + MRET = 4'b1100, + CONDBR = 4'b1101, + JAL = 4'b1110, + BITMANIPU = 4'b1111 + } el2_inst_pkt_t; + +typedef struct packed { + logic valid; + logic wb; + logic [2:0] tag; + logic [4:0] rd; + } el2_load_cam_pkt_t; + +typedef struct packed { + logic pc0_call; + logic pc0_ret; + logic pc0_pc4; + } el2_rets_pkt_t; +typedef struct packed { + logic valid; + logic [11:0] toffset; + logic [1:0] hist; + logic br_error; + logic br_start_error; + logic bank; + logic [31:1] prett; // predicted ret target + logic way; + logic ret; + } el2_br_pkt_t; + +typedef struct packed { + logic valid; + logic [1:0] hist; + logic br_error; + logic br_start_error; + logic way; + logic middle; + } el2_br_tlu_pkt_t; + +typedef struct packed { + logic misp; + logic ataken; + logic boffset; + logic pc4; + logic [1:0] hist; + logic [11:0] toffset; + logic valid; + logic br_error; + logic br_start_error; + logic [31:1] prett; + logic pcall; + logic pret; + logic pja; + logic way; + } el2_predict_pkt_t; + +typedef struct packed { + logic legal; + logic icaf; + logic icaf_f1; + logic [1:0] icaf_type; + logic fence_i; + logic [3:0] i0trigger; + el2_inst_pkt_t pmu_i0_itype; // pmu - instruction type + logic pmu_i0_br_unpred; // pmu + logic pmu_divide; + logic pmu_lsu_misaligned; + } el2_trap_pkt_t; + +typedef struct packed { + logic [4:0] i0rd; + logic i0load; + logic i0store; + logic i0div; + logic i0v; + logic i0valid; + logic csrwen; + logic csrwonly; + logic [11:0] csrwaddr; + } el2_dest_pkt_t; + +typedef struct packed { + logic mul; + logic load; + logic alu; + } el2_class_pkt_t; + +typedef struct packed { + logic [4:0] rs1; + logic [4:0] rs2; + logic [4:0] rd; + } el2_reg_pkt_t; + + +typedef struct packed { + logic land; + logic lor; + logic lxor; + logic sll; + logic srl; + logic sra; + logic beq; + logic bne; + logic blt; + logic bge; + logic add; + logic sub; + logic slt; + logic unsign; + logic jal; + logic predict_t; + logic predict_nt; + logic csr_write; + logic csr_imm; + } el2_alu_pkt_t; + +typedef struct packed { + logic fast_int; + logic by; + logic half; + logic word; + logic dword; // for dma + logic load; + logic store; + logic unsign; + logic dma; // dma pkt + logic store_data_bypass_d; + logic load_ldst_bypass_d; + logic store_data_bypass_m; + logic valid; + } el2_lsu_pkt_t; + +typedef struct packed { + logic exc_valid; + logic single_ecc_error; + logic inst_type; //0: Load, 1: Store + logic exc_type; //0: MisAligned, 1: Access Fault + logic [2:0] mscause; + logic [31:0] addr; + } el2_lsu_error_pkt_t; + +typedef struct packed { + logic alu; + logic rs1; + logic rs2; + logic imm12; + logic rd; + logic shimm5; + logic imm20; + logic pc; + logic load; + logic store; + logic lsu; + logic add; + logic sub; + logic land; + logic lor; + logic lxor; + logic sll; + logic sra; + logic srl; + logic slt; + logic unsign; + logic condbr; + logic beq; + logic bne; + logic bge; + logic blt; + logic jal; + logic by; + logic half; + logic word; + logic csr_read; + logic csr_clr; + logic csr_set; + logic csr_write; + logic csr_imm; + logic presync; + logic postsync; + logic ebreak; + logic ecall; + logic mret; + logic mul; + logic rs1_sign; + logic rs2_sign; + logic low; + logic div; + logic rem; + logic fence; + logic fence_i; + logic pm_alu; + logic legal; + } el2_dec_pkt_t; + + +typedef struct packed { + logic valid; + logic rs1_sign; + logic rs2_sign; + logic low; + logic bext; + logic bdep; + logic clmul; + logic clmulh; + logic clmulr; + logic grev; + logic shfl; + logic unshfl; + logic crc32_b; + logic crc32_h; + logic crc32_w; + logic crc32c_b; + logic crc32c_h; + logic crc32c_w; + logic bfp; + } el2_mul_pkt_t; + +typedef struct packed { + logic valid; + logic unsign; + logic rem; + } el2_div_pkt_t; + +typedef struct packed { + logic TEST1; + logic RME; + logic [3:0] RM; + + logic LS; + logic DS; + logic SD; + logic TEST_RNM; + logic BC1; + logic BC2; + } el2_ccm_ext_in_pkt_t; + +typedef struct packed { + logic TEST1; + logic RME; + logic [3:0] RM; + logic LS; + logic DS; + logic SD; + logic TEST_RNM; + logic BC1; + logic BC2; + } el2_dccm_ext_in_pkt_t; + + +typedef struct packed { + logic TEST1; + logic RME; + logic [3:0] RM; + logic LS; + logic DS; + logic SD; + logic TEST_RNM; + logic BC1; + logic BC2; + } el2_ic_data_ext_in_pkt_t; + + +typedef struct packed { + logic TEST1; + logic RME; + logic [3:0] RM; + logic LS; + logic DS; + logic SD; + logic TEST_RNM; + logic BC1; + logic BC2; + } el2_ic_tag_ext_in_pkt_t; + + + +typedef struct packed { + logic select; + logic match; + logic store; + logic load; + logic execute; + logic m; + logic [31:0] tdata2; + } el2_trigger_pkt_t; + + +typedef struct packed { + logic [70:0] icache_wrdata; // {dicad1[1:0], dicad0h[31:0], dicad0[31:0]} + logic [16:0] icache_dicawics; // Arraysel:24, Waysel:21:20, Index:16:3 + logic icache_rd_valid; + logic icache_wr_valid; + } el2_cache_debug_pkt_t; + + +endpackage // el2_pkg diff --git a/design/lib/ahb_to_axi4.sv b/design/lib/ahb_to_axi4.sv new file mode 100644 index 0000000..82d125e --- /dev/null +++ b/design/lib/ahb_to_axi4.sv @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//******************************************************************************** +// $Id$ +// +// Owner: +// Function: AHB to AXI4 Bridge +// Comments: +// +//******************************************************************************** +module ahb_to_axi4 +import el2_pkg::*; +#( + TAG = 1, + `include "el2_param.vh" +) +// ,TAG = 1) +( + input clk, + input rst_l, + input scan_mode, + input bus_clk_en, + input clk_override, + + // AXI signals + // AXI Write Channels + output logic axi_awvalid, + input logic axi_awready, + output logic [TAG-1:0] axi_awid, + output logic [31:0] axi_awaddr, + output logic [2:0] axi_awsize, + output logic [2:0] axi_awprot, + output logic [7:0] axi_awlen, + output logic [1:0] axi_awburst, + + output logic axi_wvalid, + input logic axi_wready, + output logic [63:0] axi_wdata, + output logic [7:0] axi_wstrb, + output logic axi_wlast, + + input logic axi_bvalid, + output logic axi_bready, + input logic [1:0] axi_bresp, + input logic [TAG-1:0] axi_bid, + + // AXI Read Channels + output logic axi_arvalid, + input logic axi_arready, + output logic [TAG-1:0] axi_arid, + output logic [31:0] axi_araddr, + output logic [2:0] axi_arsize, + output logic [2:0] axi_arprot, + output logic [7:0] axi_arlen, + output logic [1:0] axi_arburst, + + input logic axi_rvalid, + output logic axi_rready, + input logic [TAG-1:0] axi_rid, + input logic [63:0] axi_rdata, + input logic [1:0] axi_rresp, + + // AHB-Lite signals + input logic [31:0] ahb_haddr, // ahb bus address + input logic [2:0] ahb_hburst, // tied to 0 + input logic ahb_hmastlock, // tied to 0 + input logic [3:0] ahb_hprot, // tied to 4'b0011 + input logic [2:0] ahb_hsize, // size of bus transaction (possible values 0,1,2,3) + input logic [1:0] ahb_htrans, // Transaction type (possible values 0,2 only right now) + input logic ahb_hwrite, // ahb bus write + input logic [63:0] ahb_hwdata, // ahb bus write data + input logic ahb_hsel, // this slave was selected + input logic ahb_hreadyin, // previous hready was accepted or not + + output logic [63:0] ahb_hrdata, // ahb bus read data + output logic ahb_hreadyout, // slave ready to accept transaction + output logic ahb_hresp // slave response (high indicates erro) + +); + + logic [7:0] master_wstrb; + + typedef enum logic [1:0] { IDLE = 2'b00, // Nothing in the buffer. No commands yet recieved + WR = 2'b01, // Write Command recieved + RD = 2'b10, // Read Command recieved + PEND = 2'b11 // Waiting on Read Data from core + } state_t; + state_t buf_state, buf_nxtstate; + logic buf_state_en; + + // Buffer signals (one entry buffer) + logic buf_read_error_in, buf_read_error; + logic [63:0] buf_rdata; + + logic ahb_hready; + logic ahb_hready_q; + logic [1:0] ahb_htrans_in, ahb_htrans_q; + logic [2:0] ahb_hsize_q; + logic ahb_hwrite_q; + logic [31:0] ahb_haddr_q; + logic [63:0] ahb_hwdata_q; + logic ahb_hresp_q; + + //Miscellaneous signals + logic ahb_addr_in_dccm, ahb_addr_in_iccm, ahb_addr_in_pic; + logic ahb_addr_in_dccm_region_nc, ahb_addr_in_iccm_region_nc, ahb_addr_in_pic_region_nc; + // signals needed for the read data coming back from the core and to block any further commands as AHB is a blocking bus + logic buf_rdata_en; + + logic ahb_bus_addr_clk_en, buf_rdata_clk_en; + logic ahb_clk, ahb_addr_clk, buf_rdata_clk; + // Command buffer is the holding station where we convert to AXI and send to core + logic cmdbuf_wr_en, cmdbuf_rst; + logic cmdbuf_full; + logic cmdbuf_vld, cmdbuf_write; + logic [1:0] cmdbuf_size; + logic [7:0] cmdbuf_wstrb; + logic [31:0] cmdbuf_addr; + logic [63:0] cmdbuf_wdata; + + logic bus_clk; + +// FSM to control the bus states and when to block the hready and load the command buffer + always_comb begin + buf_nxtstate = IDLE; + buf_state_en = 1'b0; + buf_rdata_en = 1'b0; // signal to load the buffer when the core sends read data back + buf_read_error_in = 1'b0; // signal indicating that an error came back with the read from the core + cmdbuf_wr_en = 1'b0; // all clear from the gasket to load the buffer with the command for reads, command/dat for writes + case (buf_state) + IDLE: begin // No commands recieved + buf_nxtstate = ahb_hwrite ? WR : RD; + buf_state_en = ahb_hready & ahb_htrans[1] & ahb_hsel; // only transition on a valid hrtans + end + WR: begin // Write command recieved last cycle + buf_nxtstate = (ahb_hresp | (ahb_htrans[1:0] == 2'b0) | ~ahb_hsel) ? IDLE : ahb_hwrite ? WR : RD; + buf_state_en = (~cmdbuf_full | ahb_hresp) ; + cmdbuf_wr_en = ~cmdbuf_full & ~(ahb_hresp | ((ahb_htrans[1:0] == 2'b01) & ahb_hsel)); // Dont send command to the buffer in case of an error or when the master is not ready with the data now. + end + RD: begin // Read command recieved last cycle. + buf_nxtstate = ahb_hresp ? IDLE :PEND; // If error go to idle, else wait for read data + buf_state_en = (~cmdbuf_full | ahb_hresp); // only when command can go, or if its an error + cmdbuf_wr_en = ~ahb_hresp & ~cmdbuf_full; // send command only when no error + end + PEND: begin // Read Command has been sent. Waiting on Data. + buf_nxtstate = IDLE; // go back for next command and present data next cycle + buf_state_en = axi_rvalid & ~cmdbuf_write; // read data is back + buf_rdata_en = buf_state_en; // buffer the read data coming back from core + buf_read_error_in = buf_state_en & |axi_rresp[1:0]; // buffer error flag if return has Error ( ECC ) + end + endcase + end // always_comb begin + + rvdffs #($bits(state_t)) state_reg (.*, .din(buf_nxtstate), .dout({buf_state}), .en(buf_state_en), .clk(ahb_clk)); + + assign master_wstrb[7:0] = ({8{ahb_hsize_q[2:0] == 3'b0}} & (8'b1 << ahb_haddr_q[2:0])) | + ({8{ahb_hsize_q[2:0] == 3'b1}} & (8'b11 << ahb_haddr_q[2:0])) | + ({8{ahb_hsize_q[2:0] == 3'b10}} & (8'b1111 << ahb_haddr_q[2:0])) | + ({8{ahb_hsize_q[2:0] == 3'b11}} & 8'b1111_1111); + + // AHB signals + assign ahb_hreadyout = ahb_hresp ? (ahb_hresp_q & ~ahb_hready_q) : + ((~cmdbuf_full | (buf_state == IDLE)) & ~(buf_state == RD | buf_state == PEND) & ~buf_read_error); + + assign ahb_hready = ahb_hreadyout & ahb_hreadyin; + assign ahb_htrans_in[1:0] = {2{ahb_hsel}} & ahb_htrans[1:0]; + assign ahb_hrdata[63:0] = buf_rdata[63:0]; + assign ahb_hresp = ((ahb_htrans_q[1:0] != 2'b0) & (buf_state != IDLE) & + + ((~(ahb_addr_in_dccm | ahb_addr_in_iccm)) | // request not for ICCM or DCCM + ((ahb_addr_in_iccm | (ahb_addr_in_dccm & ahb_hwrite_q)) & ~((ahb_hsize_q[1:0] == 2'b10) | (ahb_hsize_q[1:0] == 2'b11))) | // ICCM Rd/Wr OR DCCM Wr not the right size + ((ahb_hsize_q[2:0] == 3'h1) & ahb_haddr_q[0]) | // HW size but unaligned + ((ahb_hsize_q[2:0] == 3'h2) & (|ahb_haddr_q[1:0])) | // W size but unaligned + ((ahb_hsize_q[2:0] == 3'h3) & (|ahb_haddr_q[2:0])))) | // DW size but unaligned + buf_read_error | // Read ECC error + (ahb_hresp_q & ~ahb_hready_q); + + // Buffer signals - needed for the read data and ECC error response + rvdff #(.WIDTH(64)) buf_rdata_ff (.din(axi_rdata[63:0]), .dout(buf_rdata[63:0]), .clk(buf_rdata_clk), .*); + rvdff #(.WIDTH(1)) buf_read_error_ff(.din(buf_read_error_in), .dout(buf_read_error), .clk(ahb_clk), .*); // buf_read_error will be high only one cycle + + // All the Master signals are captured before presenting it to the command buffer. We check for Hresp before sending it to the cmd buffer. + rvdff #(.WIDTH(1)) hresp_ff (.din(ahb_hresp), .dout(ahb_hresp_q), .clk(ahb_clk), .*); + rvdff #(.WIDTH(1)) hready_ff (.din(ahb_hready), .dout(ahb_hready_q), .clk(ahb_clk), .*); + rvdff #(.WIDTH(2)) htrans_ff (.din(ahb_htrans_in[1:0]), .dout(ahb_htrans_q[1:0]), .clk(ahb_clk), .*); + rvdff #(.WIDTH(3)) hsize_ff (.din(ahb_hsize[2:0]), .dout(ahb_hsize_q[2:0]), .clk(ahb_addr_clk), .*); + rvdff #(.WIDTH(1)) hwrite_ff (.din(ahb_hwrite), .dout(ahb_hwrite_q), .clk(ahb_addr_clk), .*); + rvdff #(.WIDTH(32)) haddr_ff (.din(ahb_haddr[31:0]), .dout(ahb_haddr_q[31:0]), .clk(ahb_addr_clk), .*); + + // Clock header logic + assign ahb_bus_addr_clk_en = bus_clk_en & (ahb_hready & ahb_htrans[1]); + assign buf_rdata_clk_en = bus_clk_en & buf_rdata_en; + + rvclkhdr ahb_cgc (.en(bus_clk_en), .l1clk(ahb_clk), .*); + rvclkhdr ahb_addr_cgc (.en(ahb_bus_addr_clk_en), .l1clk(ahb_addr_clk), .*); + rvclkhdr buf_rdata_cgc (.en(buf_rdata_clk_en), .l1clk(buf_rdata_clk), .*); + + // Address check dccm + rvrangecheck #(.CCM_SADR(pt.DCCM_SADR), + .CCM_SIZE(pt.DCCM_SIZE)) addr_dccm_rangecheck ( + .addr(ahb_haddr_q[31:0]), + .in_range(ahb_addr_in_dccm), + .in_region(ahb_addr_in_dccm_region_nc) + ); + + // Address check iccm + if (pt.ICCM_ENABLE == 1) begin: GenICCM + rvrangecheck #(.CCM_SADR(pt.ICCM_SADR), + .CCM_SIZE(pt.ICCM_SIZE)) addr_iccm_rangecheck ( + .addr(ahb_haddr_q[31:0]), + .in_range(ahb_addr_in_iccm), + .in_region(ahb_addr_in_iccm_region_nc) + ); + end else begin: GenNoICCM + assign ahb_addr_in_iccm = '0; + assign ahb_addr_in_iccm_region_nc = '0; + end + + // PIC memory address check + rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR), + .CCM_SIZE(pt.PIC_SIZE)) addr_pic_rangecheck ( + .addr(ahb_haddr_q[31:0]), + .in_range(ahb_addr_in_pic), + .in_region(ahb_addr_in_pic_region_nc) + ); + + // Command Buffer - Holding for the commands to be sent for the AXI. It will be converted to the AXI signals. + assign cmdbuf_rst = (((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready)) & ~cmdbuf_wr_en) | (ahb_hresp & ~cmdbuf_write); + assign cmdbuf_full = (cmdbuf_vld & ~((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready))); + + rvdffsc #(.WIDTH(1)) cmdbuf_vldff (.din(1'b1), .dout(cmdbuf_vld), .en(cmdbuf_wr_en), .clear(cmdbuf_rst), .clk(bus_clk), .*); + rvdffs #(.WIDTH(1)) cmdbuf_writeff (.din(ahb_hwrite_q), .dout(cmdbuf_write), .en(cmdbuf_wr_en), .clk(bus_clk), .*); + rvdffs #(.WIDTH(2)) cmdbuf_sizeff (.din(ahb_hsize_q[1:0]), .dout(cmdbuf_size[1:0]), .en(cmdbuf_wr_en), .clk(bus_clk), .*); + rvdffs #(.WIDTH(8)) cmdbuf_wstrbff (.din(master_wstrb[7:0]), .dout(cmdbuf_wstrb[7:0]), .en(cmdbuf_wr_en), .clk(bus_clk), .*); + rvdffe #(.WIDTH(32)) cmdbuf_addrff (.din(ahb_haddr_q[31:0]), .dout(cmdbuf_addr[31:0]), .en(cmdbuf_wr_en), .clk(bus_clk), .*); + rvdffe #(.WIDTH(64)) cmdbuf_wdataff (.din(ahb_hwdata[63:0]), .dout(cmdbuf_wdata[63:0]), .en(cmdbuf_wr_en), .clk(bus_clk), .*); + + // AXI Write Command Channel + assign axi_awvalid = cmdbuf_vld & cmdbuf_write; + assign axi_awid[TAG-1:0] = '0; + assign axi_awaddr[31:0] = cmdbuf_addr[31:0]; + assign axi_awsize[2:0] = {1'b0, cmdbuf_size[1:0]}; + assign axi_awprot[2:0] = 3'b0; + assign axi_awlen[7:0] = '0; + assign axi_awburst[1:0] = 2'b01; + // AXI Write Data Channel - This is tied to the command channel as we only write the command buffer once we have the data. + assign axi_wvalid = cmdbuf_vld & cmdbuf_write; + assign axi_wdata[63:0] = cmdbuf_wdata[63:0]; + assign axi_wstrb[7:0] = cmdbuf_wstrb[7:0]; + assign axi_wlast = 1'b1; + // AXI Write Response - Always ready. AHB does not require a write response. + assign axi_bready = 1'b1; + // AXI Read Channels + assign axi_arvalid = cmdbuf_vld & ~cmdbuf_write; + assign axi_arid[TAG-1:0] = '0; + assign axi_araddr[31:0] = cmdbuf_addr[31:0]; + assign axi_arsize[2:0] = {1'b0, cmdbuf_size[1:0]}; + assign axi_arprot = 3'b0; + assign axi_arlen[7:0] = '0; + assign axi_arburst[1:0] = 2'b01; + // AXI Read Response Channel - Always ready as AHB reads are blocking and the the buffer is available for the read coming back always. + assign axi_rready = 1'b1; + + // Clock header logic + rvclkhdr bus_cgc (.en(bus_clk_en), .l1clk(bus_clk), .*); + +`ifdef ASSERT_ON + property ahb_error_protocol; + @(posedge ahb_clk) (ahb_hready & ahb_hresp) |-> (~$past(ahb_hready) & $past(ahb_hresp)); + endproperty + assert_ahb_error_protocol: assert property (ahb_error_protocol) else + $display("Bus Error with hReady isn't preceded with Bus Error without hready"); + +`endif + +endmodule // ahb_to_axi4 \ No newline at end of file diff --git a/design/lib/axi4_to_ahb.sv b/design/lib/axi4_to_ahb.sv new file mode 100644 index 0000000..481ad88 --- /dev/null +++ b/design/lib/axi4_to_ahb.sv @@ -0,0 +1,475 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// Owner: +// Function: AXI4 -> AHB Bridge +// Comments: +// +//******************************************************************************** +module axi4_to_ahb +import el2_pkg::*; +#( +`include "el2_param.vh" +,parameter TAG = 1) ( + + input clk, + input rst_l, + input scan_mode, + input bus_clk_en, + input clk_override, + + // AXI signals + // AXI Write Channels + input logic axi_awvalid, + output logic axi_awready, + input logic [TAG-1:0] axi_awid, + input logic [31:0] axi_awaddr, + input logic [2:0] axi_awsize, + input logic [2:0] axi_awprot, + + input logic axi_wvalid, + output logic axi_wready, + input logic [63:0] axi_wdata, + input logic [7:0] axi_wstrb, + input logic axi_wlast, + + output logic axi_bvalid, + input logic axi_bready, + output logic [1:0] axi_bresp, + output logic [TAG-1:0] axi_bid, + + // AXI Read Channels + input logic axi_arvalid, + output logic axi_arready, + input logic [TAG-1:0] axi_arid, + input logic [31:0] axi_araddr, + input logic [2:0] axi_arsize, + input logic [2:0] axi_arprot, + + output logic axi_rvalid, + input logic axi_rready, + output logic [TAG-1:0] axi_rid, + output logic [63:0] axi_rdata, + output logic [1:0] axi_rresp, + output logic axi_rlast, + + // AHB-Lite signals + output logic [31:0] ahb_haddr, // ahb bus address + output logic [2:0] ahb_hburst, // tied to 0 + output logic ahb_hmastlock, // tied to 0 + output logic [3:0] ahb_hprot, // tied to 4'b0011 + output logic [2:0] ahb_hsize, // size of bus transaction (possible values 0,1,2,3) + output logic [1:0] ahb_htrans, // Transaction type (possible values 0,2 only right now) + output logic ahb_hwrite, // ahb bus write + output logic [63:0] ahb_hwdata, // ahb bus write data + + input logic [63:0] ahb_hrdata, // ahb bus read data + input logic ahb_hready, // slave ready to accept transaction + input logic ahb_hresp // slave response (high indicates erro) + +); + + localparam ID = 1; + localparam PRTY = 1; + typedef enum logic [2:0] {IDLE=3'b000, CMD_RD=3'b001, CMD_WR=3'b010, DATA_RD=3'b011, DATA_WR=3'b100, DONE=3'b101, STREAM_RD=3'b110, STREAM_ERR_RD=3'b111} state_t; + state_t buf_state, buf_nxtstate; + + logic slave_valid; + logic slave_ready; + logic [TAG-1:0] slave_tag; + logic [63:0] slave_rdata; + logic [3:0] slave_opc; + + logic wrbuf_en, wrbuf_data_en; + logic wrbuf_cmd_sent, wrbuf_rst; + logic wrbuf_vld; + logic wrbuf_data_vld; + logic [TAG-1:0] wrbuf_tag; + logic [2:0] wrbuf_size; + logic [31:0] wrbuf_addr; + logic [63:0] wrbuf_data; + logic [7:0] wrbuf_byteen; + + logic bus_write_clk_en; + logic bus_clk, bus_write_clk; + + logic master_valid; + logic master_ready; + logic [TAG-1:0] master_tag; + logic [31:0] master_addr; + logic [63:0] master_wdata; + logic [2:0] master_size; + logic [2:0] master_opc; + logic [7:0] master_byteen; + + // Buffer signals (one entry buffer) + logic [31:0] buf_addr; + logic [1:0] buf_size; + logic buf_write; + logic [7:0] buf_byteen; + logic buf_aligned; + logic [63:0] buf_data; + logic [TAG-1:0] buf_tag; + + //Miscellaneous signals + logic buf_rst; + logic [TAG-1:0] buf_tag_in; + logic [31:0] buf_addr_in; + logic [7:0] buf_byteen_in; + logic [63:0] buf_data_in; + logic buf_write_in; + logic buf_aligned_in; + logic [2:0] buf_size_in; + + logic buf_state_en; + logic buf_wr_en; + logic buf_data_wr_en; + logic slvbuf_error_en; + logic wr_cmd_vld; + + logic cmd_done_rst, cmd_done, cmd_doneQ; + logic trxn_done; + logic [2:0] buf_cmd_byte_ptr, buf_cmd_byte_ptrQ, buf_cmd_nxtbyte_ptr; + logic buf_cmd_byte_ptr_en; + logic found; + + logic slave_valid_pre; + logic ahb_hready_q; + logic ahb_hresp_q; + logic [1:0] ahb_htrans_q; + logic ahb_hwrite_q; + logic [63:0] ahb_hrdata_q; + + + logic slvbuf_write; + logic slvbuf_error; + logic [TAG-1:0] slvbuf_tag; + + logic slvbuf_error_in; + logic slvbuf_wr_en; + logic bypass_en; + logic rd_bypass_idle; + + logic last_addr_en; + logic [31:0] last_bus_addr; + + // Clocks + logic buf_clken, slvbuf_clken; + logic ahbm_addr_clken; + logic ahbm_data_clken; + + logic buf_clk, slvbuf_clk; + logic ahbm_clk; + logic ahbm_addr_clk; + logic ahbm_data_clk; + + // Function to get the length from byte enable + function automatic logic [1:0] get_write_size; + input logic [7:0] byteen; + + logic [1:0] size; + + size[1:0] = (2'b11 & {2{(byteen[7:0] == 8'hff)}}) | + (2'b10 & {2{((byteen[7:0] == 8'hf0) | (byteen[7:0] == 8'h0f))}}) | + (2'b01 & {2{((byteen[7:0] == 8'hc0) | (byteen[7:0] == 8'h30) | (byteen[7:0] == 8'h0c) | (byteen[7:0] == 8'h03))}}); + + return size[1:0]; + endfunction // get_write_size + + // Function to get the length from byte enable + function automatic logic [2:0] get_write_addr; + input logic [7:0] byteen; + + logic [2:0] addr; + + addr[2:0] = (3'h0 & {3{((byteen[7:0] == 8'hff) | (byteen[7:0] == 8'h0f) | (byteen[7:0] == 8'h03))}}) | + (3'h2 & {3{(byteen[7:0] == 8'h0c)}}) | + (3'h4 & {3{((byteen[7:0] == 8'hf0) | (byteen[7:0] == 8'h03))}}) | + (3'h6 & {3{(byteen[7:0] == 8'hc0)}}); + + return addr[2:0]; + endfunction // get_write_addr + + // Function to get the next byte pointer + function automatic logic [2:0] get_nxtbyte_ptr (logic [2:0] current_byte_ptr, logic [7:0] byteen, logic get_next); + logic [2:0] start_ptr; + logic found; + found = '0; + start_ptr[2:0] = get_next ? (current_byte_ptr[2:0] + 3'b1) : current_byte_ptr[2:0]; + for (int j=0; j<8; j++) begin + if (~found) begin + get_nxtbyte_ptr[2:0] = 3'(j); + found |= (byteen[j] & (3'(j) >= start_ptr[2:0])) ; + end + end + endfunction // get_nextbyte_ptr + + + // Write buffer + assign wrbuf_en = axi_awvalid & axi_awready & master_ready; + assign wrbuf_data_en = axi_wvalid & axi_wready & master_ready; + assign wrbuf_cmd_sent = master_valid & master_ready & (master_opc[2:1] == 2'b01); + assign wrbuf_rst = wrbuf_cmd_sent & ~wrbuf_en; + + assign axi_awready = ~(wrbuf_vld & ~wrbuf_cmd_sent) & master_ready; + assign axi_wready = ~(wrbuf_data_vld & ~wrbuf_cmd_sent) & master_ready; + assign axi_arready = ~(wrbuf_vld & wrbuf_data_vld) & master_ready; + assign axi_rlast = 1'b1; + + assign wr_cmd_vld = (wrbuf_vld & wrbuf_data_vld); + assign master_valid = wr_cmd_vld | axi_arvalid; + assign master_tag[TAG-1:0] = wr_cmd_vld ? wrbuf_tag[TAG-1:0] : axi_arid[TAG-1:0]; + assign master_opc[2:0] = wr_cmd_vld ? 3'b011 : 3'b0; + assign master_addr[31:0] = wr_cmd_vld ? wrbuf_addr[31:0] : axi_araddr[31:0]; + assign master_size[2:0] = wr_cmd_vld ? wrbuf_size[2:0] : axi_arsize[2:0]; + assign master_byteen[7:0] = wrbuf_byteen[7:0]; + assign master_wdata[63:0] = wrbuf_data[63:0]; + + // AXI response channel signals + assign axi_bvalid = slave_valid & slave_ready & slave_opc[3]; + assign axi_bresp[1:0] = slave_opc[0] ? 2'b10 : (slave_opc[1] ? 2'b11 : 2'b0); + assign axi_bid[TAG-1:0] = slave_tag[TAG-1:0]; + + assign axi_rvalid = slave_valid & slave_ready & (slave_opc[3:2] == 2'b0); + assign axi_rresp[1:0] = slave_opc[0] ? 2'b10 : (slave_opc[1] ? 2'b11 : 2'b0); + assign axi_rid[TAG-1:0] = slave_tag[TAG-1:0]; + assign axi_rdata[63:0] = slave_rdata[63:0]; + assign slave_ready = axi_bready & axi_rready; + + // Clock header logic + assign bus_write_clk_en = bus_clk_en & ((axi_awvalid & axi_awready) | (axi_wvalid & axi_wready)); + + rvclkhdr bus_cgc (.en(bus_clk_en), .l1clk(bus_clk), .*); + rvclkhdr bus_write_cgc (.en(bus_write_clk_en), .l1clk(bus_write_clk), .*); + + + // FIFO state machine + always_comb begin + buf_nxtstate = IDLE; + buf_state_en = 1'b0; + buf_wr_en = 1'b0; + buf_data_wr_en = 1'b0; + slvbuf_error_in = 1'b0; + slvbuf_error_en = 1'b0; + buf_write_in = 1'b0; + cmd_done = 1'b0; + trxn_done = 1'b0; + buf_cmd_byte_ptr_en = 1'b0; + buf_cmd_byte_ptr[2:0] = '0; + slave_valid_pre = 1'b0; + master_ready = 1'b0; + ahb_htrans[1:0] = 2'b0; + slvbuf_wr_en = 1'b0; + bypass_en = 1'b0; + rd_bypass_idle = 1'b0; + + case (buf_state) + IDLE: begin + master_ready = 1'b1; + buf_write_in = (master_opc[2:1] == 2'b01); + buf_nxtstate = buf_write_in ? CMD_WR : CMD_RD; + buf_state_en = master_valid & master_ready; + buf_wr_en = buf_state_en; + buf_data_wr_en = buf_state_en & (buf_nxtstate == CMD_WR); + buf_cmd_byte_ptr_en = buf_state_en; + buf_cmd_byte_ptr[2:0] = buf_write_in ? get_nxtbyte_ptr(3'b0,buf_byteen_in[7:0],1'b0) : master_addr[2:0]; + bypass_en = buf_state_en; + rd_bypass_idle = bypass_en & (buf_nxtstate == CMD_RD); + ahb_htrans[1:0] = {2{bypass_en}} & 2'b10; + end + CMD_RD: begin + buf_nxtstate = (master_valid & (master_opc[2:0] == 3'b000))? STREAM_RD : DATA_RD; + buf_state_en = ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & ~ahb_hwrite_q; + cmd_done = buf_state_en & ~master_valid; + slvbuf_wr_en = buf_state_en; + master_ready = buf_state_en & (buf_nxtstate == STREAM_RD); + buf_wr_en = master_ready; + bypass_en = master_ready & master_valid; + buf_cmd_byte_ptr[2:0] = bypass_en ? master_addr[2:0] : buf_addr[2:0]; + ahb_htrans[1:0] = 2'b10 & {2{~buf_state_en | bypass_en}}; + end + STREAM_RD: begin + master_ready = (ahb_hready_q & ~ahb_hresp_q) & ~(master_valid & master_opc[2:1] == 2'b01); + buf_wr_en = (master_valid & master_ready & (master_opc[2:0] == 3'b000)); // update the fifo if we are streaming the read commands + buf_nxtstate = ahb_hresp_q ? STREAM_ERR_RD : (buf_wr_en ? STREAM_RD : DATA_RD); // assuming that the master accpets the slave response right away. + buf_state_en = (ahb_hready_q | ahb_hresp_q); + buf_data_wr_en = buf_state_en; + slvbuf_error_in = ahb_hresp_q; + slvbuf_error_en = buf_state_en; + slave_valid_pre = buf_state_en & ~ahb_hresp_q; // send a response right away if we are not going through an error response. + cmd_done = buf_state_en & ~master_valid; // last one of the stream should not send a htrans + bypass_en = master_ready & master_valid & (buf_nxtstate == STREAM_RD) & buf_state_en; + buf_cmd_byte_ptr[2:0] = bypass_en ? master_addr[2:0] : buf_addr[2:0]; + ahb_htrans[1:0] = 2'b10 & {2{~((buf_nxtstate != STREAM_RD) & buf_state_en)}}; + slvbuf_wr_en = buf_wr_en; // shifting the contents from the buf to slv_buf for streaming cases + end // case: STREAM_RD + STREAM_ERR_RD: begin + buf_nxtstate = DATA_RD; + buf_state_en = ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & ~ahb_hwrite_q; + slave_valid_pre = buf_state_en; + slvbuf_wr_en = buf_state_en; // Overwrite slvbuf with buffer + buf_cmd_byte_ptr[2:0] = buf_addr[2:0]; + ahb_htrans[1:0] = 2'b10 & {2{~buf_state_en}}; + end + DATA_RD: begin + buf_nxtstate = DONE; + buf_state_en = (ahb_hready_q | ahb_hresp_q); + buf_data_wr_en = buf_state_en; + slvbuf_error_in= ahb_hresp_q; + slvbuf_error_en= buf_state_en; + slvbuf_wr_en = buf_state_en; + + end + CMD_WR: begin + buf_nxtstate = DATA_WR; + trxn_done = ahb_hready_q & ahb_hwrite_q & (ahb_htrans_q[1:0] != 2'b0); + buf_state_en = trxn_done; + buf_cmd_byte_ptr_en = buf_state_en; + slvbuf_wr_en = buf_state_en; + buf_cmd_byte_ptr = trxn_done ? get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1) : buf_cmd_byte_ptrQ; + cmd_done = trxn_done & (buf_aligned | (buf_cmd_byte_ptrQ == 3'b111) | + (buf_byteen[get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1)] == 1'b0)); + ahb_htrans[1:0] = {2{~(cmd_done | cmd_doneQ)}} & 2'b10; + end + DATA_WR: begin + buf_state_en = (cmd_doneQ & ahb_hready_q) | ahb_hresp_q; + master_ready = buf_state_en & ~ahb_hresp_q & slave_ready; // Ready to accept new command if current command done and no error + buf_nxtstate = (ahb_hresp_q | ~slave_ready) ? DONE : + ((master_valid & master_ready) ? ((master_opc[2:1] == 2'b01) ? CMD_WR : CMD_RD) : IDLE); + slvbuf_error_in = ahb_hresp_q; + slvbuf_error_en = buf_state_en; + + buf_write_in = (master_opc[2:1] == 2'b01); + buf_wr_en = buf_state_en & ((buf_nxtstate == CMD_WR) | (buf_nxtstate == CMD_RD)); + buf_data_wr_en = buf_wr_en; + + cmd_done = (ahb_hresp_q | (ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & + ((buf_cmd_byte_ptrQ == 3'b111) | (buf_byteen[get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1)] == 1'b0)))); + bypass_en = buf_state_en & buf_write_in & (buf_nxtstate == CMD_WR); // Only bypass for writes for the time being + ahb_htrans[1:0] = {2{(~(cmd_done | cmd_doneQ) | bypass_en)}} & 2'b10; + slave_valid_pre = buf_state_en & (buf_nxtstate != DONE); + + trxn_done = ahb_hready_q & ahb_hwrite_q & (ahb_htrans_q[1:0] != 2'b0); + buf_cmd_byte_ptr_en = trxn_done | bypass_en; + buf_cmd_byte_ptr = bypass_en ? get_nxtbyte_ptr(3'b0,buf_byteen_in[7:0],1'b0) : + trxn_done ? get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1) : buf_cmd_byte_ptrQ; + end + DONE: begin + buf_nxtstate = IDLE; + buf_state_en = slave_ready; + slvbuf_error_en = 1'b1; + slave_valid_pre = 1'b1; + end + endcase + end + + assign buf_rst = 1'b0; + assign cmd_done_rst = slave_valid_pre; + assign buf_addr_in[31:3] = master_addr[31:3]; + assign buf_addr_in[2:0] = (buf_aligned_in & (master_opc[2:1] == 2'b01)) ? get_write_addr(master_byteen[7:0]) : master_addr[2:0]; + assign buf_tag_in[TAG-1:0] = master_tag[TAG-1:0]; + assign buf_byteen_in[7:0] = wrbuf_byteen[7:0]; + assign buf_data_in[63:0] = (buf_state == DATA_RD) ? ahb_hrdata_q[63:0] : master_wdata[63:0]; + assign buf_size_in[1:0] = (buf_aligned_in & (master_size[1:0] == 2'b11) & (master_opc[2:1] == 2'b01)) ? get_write_size(master_byteen[7:0]) : master_size[1:0]; + assign buf_aligned_in = (master_opc[2:0] == 3'b0) | // reads are always aligned since they are either DW or sideeffects + (master_size[1:0] == 2'b0) | (master_size[1:0] == 2'b01) | (master_size[1:0] == 2'b10) | // Always aligned for Byte/HW/Word since they can be only for non-idempotent. IFU/SB are always aligned + ((master_size[1:0] == 2'b11) & + ((master_byteen[7:0] == 8'h3) | (master_byteen[7:0] == 8'hc) | (master_byteen[7:0] == 8'h30) | (master_byteen[7:0] == 8'hc0) | + (master_byteen[7:0] == 8'hf) | (master_byteen[7:0] == 8'hf0) | (master_byteen[7:0] == 8'hff))); + + // Generate the ahb signals + assign ahb_haddr[31:0] = bypass_en ? {master_addr[31:3],buf_cmd_byte_ptr[2:0]} : {buf_addr[31:3],buf_cmd_byte_ptr[2:0]}; + assign ahb_hsize[2:0] = bypass_en ? {1'b0, ({2{buf_aligned_in}} & buf_size_in[1:0])} : + {1'b0, ({2{buf_aligned}} & buf_size[1:0])}; // Send the full size for aligned trxn + assign ahb_hburst[2:0] = 3'b0; + assign ahb_hmastlock = 1'b0; + assign ahb_hprot[3:0] = {3'b001,~axi_arprot[2]}; + assign ahb_hwrite = bypass_en ? (master_opc[2:1] == 2'b01) : buf_write; + assign ahb_hwdata[63:0] = buf_data[63:0]; + + assign slave_valid = slave_valid_pre; + assign slave_opc[3:2] = slvbuf_write ? 2'b11 : 2'b00; + assign slave_opc[1:0] = {2{slvbuf_error}} & 2'b10; + assign slave_rdata[63:0] = slvbuf_error ? {2{last_bus_addr[31:0]}} : ((buf_state == DONE) ? buf_data[63:0] : ahb_hrdata_q[63:0]); + assign slave_tag[TAG-1:0] = slvbuf_tag[TAG-1:0]; + + assign last_addr_en = (ahb_htrans[1:0] != 2'b0) & ahb_hready & ahb_hwrite ; + + + rvdffsc #(.WIDTH(1)) wrbuf_vldff (.din(1'b1), .dout(wrbuf_vld), .en(wrbuf_en), .clear(wrbuf_rst), .clk(bus_clk), .*); + rvdffsc #(.WIDTH(1)) wrbuf_data_vldff(.din(1'b1), .dout(wrbuf_data_vld), .en(wrbuf_data_en), .clear(wrbuf_rst), .clk(bus_clk), .*); + rvdffs #(.WIDTH(TAG)) wrbuf_tagff (.din(axi_awid[TAG-1:0]), .dout(wrbuf_tag[TAG-1:0]), .en(wrbuf_en), .clk(bus_clk), .*); + rvdffs #(.WIDTH(3)) wrbuf_sizeff (.din(axi_awsize[2:0]), .dout(wrbuf_size[2:0]), .en(wrbuf_en), .clk(bus_clk), .*); + rvdffe #(.WIDTH(32)) wrbuf_addrff (.din(axi_awaddr[31:0]), .dout(wrbuf_addr[31:0]), .en(wrbuf_en), .clk(bus_clk), .*); + rvdffe #(.WIDTH(64)) wrbuf_dataff (.din(axi_wdata[63:0]), .dout(wrbuf_data[63:0]), .en(wrbuf_data_en), .clk(bus_clk), .*); + rvdffs #(.WIDTH(8)) wrbuf_byteenff (.din(axi_wstrb[7:0]), .dout(wrbuf_byteen[7:0]), .en(wrbuf_data_en), .clk(bus_clk), .*); + + rvdffs #(.WIDTH(32)) last_bus_addrff (.din(ahb_haddr[31:0]), .dout(last_bus_addr[31:0]), .en(last_addr_en), .clk(ahbm_clk), .*); + + rvdffsc #(.WIDTH($bits(state_t))) buf_state_ff (.din(buf_nxtstate), .dout({buf_state}), .en(buf_state_en), .clear(buf_rst), .clk(ahbm_clk), .*); + rvdffs #(.WIDTH(1)) buf_writeff (.din(buf_write_in), .dout(buf_write), .en(buf_wr_en), .clk(buf_clk), .*); + rvdffs #(.WIDTH(TAG)) buf_tagff (.din(buf_tag_in[TAG-1:0]), .dout(buf_tag[TAG-1:0]), .en(buf_wr_en), .clk(buf_clk), .*); + rvdffe #(.WIDTH(32)) buf_addrff (.din(buf_addr_in[31:0]), .dout(buf_addr[31:0]), .en(buf_wr_en & bus_clk_en), .*); + rvdffs #(.WIDTH(2)) buf_sizeff (.din(buf_size_in[1:0]), .dout(buf_size[1:0]), .en(buf_wr_en), .clk(buf_clk), .*); + rvdffs #(.WIDTH(1)) buf_alignedff (.din(buf_aligned_in), .dout(buf_aligned), .en(buf_wr_en), .clk(buf_clk), .*); + rvdffs #(.WIDTH(8)) buf_byteenff (.din(buf_byteen_in[7:0]), .dout(buf_byteen[7:0]), .en(buf_wr_en), .clk(buf_clk), .*); + rvdffe #(.WIDTH(64)) buf_dataff (.din(buf_data_in[63:0]), .dout(buf_data[63:0]), .en(buf_data_wr_en & bus_clk_en), .*); + + + rvdffs #(.WIDTH(1)) slvbuf_writeff (.din(buf_write), .dout(slvbuf_write), .en(slvbuf_wr_en), .clk(buf_clk), .*); + rvdffs #(.WIDTH(TAG)) slvbuf_tagff (.din(buf_tag[TAG-1:0]), .dout(slvbuf_tag[TAG-1:0]), .en(slvbuf_wr_en), .clk(buf_clk), .*); + rvdffs #(.WIDTH(1)) slvbuf_errorff (.din(slvbuf_error_in), .dout(slvbuf_error), .en(slvbuf_error_en), .clk(ahbm_clk), .*); + + rvdffsc #(.WIDTH(1)) buf_cmd_doneff (.din(1'b1), .en(cmd_done), .dout(cmd_doneQ), .clear(cmd_done_rst), .clk(ahbm_clk), .*); + rvdffs #(.WIDTH(3)) buf_cmd_byte_ptrff (.din(buf_cmd_byte_ptr[2:0]), .dout(buf_cmd_byte_ptrQ[2:0]), .en(buf_cmd_byte_ptr_en), .clk(ahbm_clk), .*); + + rvdff #(.WIDTH(1)) hready_ff (.din(ahb_hready), .dout(ahb_hready_q), .clk(ahbm_clk), .*); + rvdff #(.WIDTH(2)) htrans_ff (.din(ahb_htrans[1:0]), .dout(ahb_htrans_q[1:0]), .clk(ahbm_clk), .*); + rvdff #(.WIDTH(1)) hwrite_ff (.din(ahb_hwrite), .dout(ahb_hwrite_q), .clk(ahbm_addr_clk), .*); + rvdff #(.WIDTH(1)) hresp_ff (.din(ahb_hresp), .dout(ahb_hresp_q), .clk(ahbm_clk), .*); + rvdff #(.WIDTH(64)) hrdata_ff (.din(ahb_hrdata[63:0]), .dout(ahb_hrdata_q[63:0]), .clk(ahbm_data_clk), .*); + + // Clock headers + // clock enables for ahbm addr/data + assign buf_clken = bus_clk_en & (buf_wr_en | slvbuf_wr_en | clk_override); + assign ahbm_addr_clken = bus_clk_en & ((ahb_hready & ahb_htrans[1]) | clk_override); + assign ahbm_data_clken = bus_clk_en & ((buf_state != IDLE) | clk_override); + + rvclkhdr buf_cgc (.en(buf_clken), .l1clk(buf_clk), .*); + rvclkhdr ahbm_cgc (.en(bus_clk_en), .l1clk(ahbm_clk), .*); + rvclkhdr ahbm_addr_cgc (.en(ahbm_addr_clken), .l1clk(ahbm_addr_clk), .*); + rvclkhdr ahbm_data_cgc (.en(ahbm_data_clken), .l1clk(ahbm_data_clk), .*); + +`ifdef ASSERT_ON + property ahb_trxn_aligned; + @(posedge ahbm_clk) ahb_htrans[1] |-> ((ahb_hsize[2:0] == 3'h0) | + ((ahb_hsize[2:0] == 3'h1) & (ahb_haddr[0] == 1'b0)) | + ((ahb_hsize[2:0] == 3'h2) & (ahb_haddr[1:0] == 2'b0)) | + ((ahb_hsize[2:0] == 3'h3) & (ahb_haddr[2:0] == 3'b0))); + endproperty + assert_ahb_trxn_aligned: assert property (ahb_trxn_aligned) else + $display("Assertion ahb_trxn_aligned failed: ahb_htrans=2'h%h, ahb_hsize=3'h%h, ahb_haddr=32'h%h",ahb_htrans[1:0], ahb_hsize[2:0], ahb_haddr[31:0]); + + property ahb_error_protocol; + @(posedge ahbm_clk) (ahb_hready & ahb_hresp) |-> (~$past(ahb_hready) & $past(ahb_hresp)); + endproperty + assert_ahb_error_protocol: assert property (ahb_error_protocol) else + $display("Bus Error with hReady isn't preceded with Bus Error without hready"); +`endif + +endmodule // axi4_to_ahb diff --git a/design/lib/beh_lib.sv b/design/lib/beh_lib.sv new file mode 100644 index 0000000..96612b0 --- /dev/null +++ b/design/lib/beh_lib.sv @@ -0,0 +1,506 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// all flops call the rvdff flop + + +module rvdff #( parameter WIDTH=1, SHORT=0 ) + ( + input logic [WIDTH-1:0] din, + input logic clk, + input logic rst_l, + + output logic [WIDTH-1:0] dout + ); + +if (SHORT == 1) begin + assign dout = din; +end +else begin +`ifdef CLOCKGATE + always @(posedge tb_top.clk) begin + #0 $strobe("CG: %0t %m din %x dout %x clk %b width %d",$time,din,dout,clk,WIDTH); + end +`endif + + always_ff @(posedge clk or negedge rst_l) begin + if (rst_l == 0) + dout[WIDTH-1:0] <= 0; + else + dout[WIDTH-1:0] <= din[WIDTH-1:0]; + end + +end +endmodule + +// rvdff with 2:1 input mux to flop din iff sel==1 +module rvdffs #( parameter WIDTH=1, SHORT=0 ) + ( + input logic [WIDTH-1:0] din, + input logic en, + input logic clk, + input logic rst_l, + output logic [WIDTH-1:0] dout + ); + +if (SHORT == 1) begin : genblock + assign dout = din; +end +else begin : genblock + rvdff #(WIDTH) dffs (.din((en) ? din[WIDTH-1:0] : dout[WIDTH-1:0]), .*); +end + +endmodule + +// rvdff with en and clear +module rvdffsc #( parameter WIDTH=1, SHORT=0 ) + ( + input logic [WIDTH-1:0] din, + input logic en, + input logic clear, + input logic clk, + input logic rst_l, + output logic [WIDTH-1:0] dout + ); + + logic [WIDTH-1:0] din_new; +if (SHORT == 1) begin + assign dout = din; +end +else begin + assign din_new = {WIDTH{~clear}} & (en ? din[WIDTH-1:0] : dout[WIDTH-1:0]); + rvdff #(WIDTH) dffsc (.din(din_new[WIDTH-1:0]), .*); +end +endmodule + +module rvdffe #( parameter WIDTH=1, SHORT=0 ) + ( + input logic [WIDTH-1:0] din, + input logic en, + input logic clk, + input logic rst_l, + input logic scan_mode, + output logic [WIDTH-1:0] dout + ); + + logic l1clk; + +if (SHORT == 1) begin : genblock + if (1) begin : genblock + assign dout = din; + end +end +else begin : genblock + +`ifndef PHYSICAL + if (WIDTH >= 8) begin: genblock +`endif + +`ifdef RV_FPGA_OPTIMIZE + rvdffs #(WIDTH) dff ( .* ); +`else + rvclkhdr clkhdr ( .* ); + rvdff #(WIDTH) dff (.*, .clk(l1clk)); +`endif + +`ifndef PHYSICAL + end + else + $error("%m: rvdffe width must be >= 8"); +`endif +end // else: !if(SHORT == 1) + +endmodule // rvdffe + +module rvsyncss #(parameter WIDTH = 251) + ( + input logic clk, + input logic rst_l, + input logic [WIDTH-1:0] din, + output logic [WIDTH-1:0] dout + ); + + logic [WIDTH-1:0] din_ff1; + + rvdff #(WIDTH) sync_ff1 (.*, .din (din[WIDTH-1:0]), .dout(din_ff1[WIDTH-1:0])); + rvdff #(WIDTH) sync_ff2 (.*, .din (din_ff1[WIDTH-1:0]), .dout(dout[WIDTH-1:0])); + +endmodule // rvsyncss + +module rvlsadder + ( + input logic [31:0] rs1, + input logic [11:0] offset, + + output logic [31:0] dout + ); + + logic cout; + logic sign; + + logic [31:12] rs1_inc; + logic [31:12] rs1_dec; + + assign {cout,dout[11:0]} = {1'b0,rs1[11:0]} + {1'b0,offset[11:0]}; + + assign rs1_inc[31:12] = rs1[31:12] + 1; + + assign rs1_dec[31:12] = rs1[31:12] - 1; + + assign sign = offset[11]; + + assign dout[31:12] = ({20{ sign ^~ cout}} & rs1[31:12]) | + ({20{ ~sign & cout}} & rs1_inc[31:12]) | + ({20{ sign & ~cout}} & rs1_dec[31:12]); + +endmodule // rvlsadder + +// assume we only maintain pc[31:1] in the pipe + +module rvbradder + ( + input [31:1] pc, + input [12:1] offset, + + output [31:1] dout + ); + + logic cout; + logic sign; + + logic [31:13] pc_inc; + logic [31:13] pc_dec; + + assign {cout,dout[12:1]} = {1'b0,pc[12:1]} + {1'b0,offset[12:1]}; + + assign pc_inc[31:13] = pc[31:13] + 1; + + assign pc_dec[31:13] = pc[31:13] - 1; + + assign sign = offset[12]; + + + assign dout[31:13] = ({19{ sign ^~ cout}} & pc[31:13]) | + ({19{ ~sign & cout}} & pc_inc[31:13]) | + ({19{ sign & ~cout}} & pc_dec[31:13]); + + +endmodule // rvbradder + + +// 2s complement circuit +module rvtwoscomp #( parameter WIDTH=32 ) + ( + input logic [WIDTH-1:0] din, + + output logic [WIDTH-1:0] dout + ); + + logic [WIDTH-1:1] dout_temp; // holding for all other bits except for the lsb. LSB is always din + + genvar i; + + for ( i = 1; i < WIDTH; i++ ) begin : flip_after_first_one + assign dout_temp[i] = (|din[i-1:0]) ? ~din[i] : din[i]; + end : flip_after_first_one + + assign dout[WIDTH-1:0] = { dout_temp[WIDTH-1:1], din[0] }; + +endmodule // 2'scomp + +// find first +module rvfindfirst1 #( parameter WIDTH=32, SHIFT=$clog2(WIDTH) ) + ( + input logic [WIDTH-1:0] din, + + output logic [SHIFT-1:0] dout + ); + logic done; + + always_comb begin + dout[SHIFT-1:0] = {SHIFT{1'b0}}; + done = 1'b0; + + for ( int i = WIDTH-1; i > 0; i-- ) begin : find_first_one + done |= din[i]; + dout[SHIFT-1:0] += done ? 1'b0 : 1'b1; + end : find_first_one + end +endmodule // rvfindfirst1 + +module rvfindfirst1hot #( parameter WIDTH=32 ) + ( + input logic [WIDTH-1:0] din, + + output logic [WIDTH-1:0] dout + ); + logic done; + + always_comb begin + dout[WIDTH-1:0] = {WIDTH{1'b0}}; + done = 1'b0; + for ( int i = 0; i < WIDTH; i++ ) begin : find_first_one + dout[i] = ~done & din[i]; + done |= din[i]; + end : find_first_one + end +endmodule // rvfindfirst1hot + +// mask and match function matches bits after finding the first 0 position +// find first starting from LSB. Skip that location and match the rest of the bits +module rvmaskandmatch #( parameter WIDTH=32 ) + ( + input logic [WIDTH-1:0] mask, // this will have the mask in the lower bit positions + input logic [WIDTH-1:0] data, // this is what needs to be matched on the upper bits with the mask's upper bits + input logic masken, // when 1 : do mask. 0 : full match + output logic match + ); + + logic [WIDTH-1:0] matchvec; + logic masken_or_fullmask; + + assign masken_or_fullmask = masken & ~(&mask[WIDTH-1:0]); + + assign matchvec[0] = masken_or_fullmask | (mask[0] == data[0]); + genvar i; + + for ( i = 1; i < WIDTH; i++ ) begin : match_after_first_zero + assign matchvec[i] = (&mask[i-1:0] & masken_or_fullmask) ? 1'b1 : (mask[i] == data[i]); + end : match_after_first_zero + + assign match = &matchvec[WIDTH-1:0]; // all bits either matched or were masked off + +endmodule // rvmaskandmatch + + + + +// Check if the S_ADDR <= addr < E_ADDR +module rvrangecheck #(CCM_SADR = 32'h0, + CCM_SIZE = 128) ( + input logic [31:0] addr, // Address to be checked for range + output logic in_range, // S_ADDR <= start_addr < E_ADDR + output logic in_region +); + + localparam REGION_BITS = 4; + localparam MASK_BITS = 10 + $clog2(CCM_SIZE); + + logic [31:0] start_addr; + logic [3:0] region; + + assign start_addr[31:0] = CCM_SADR; + assign region[REGION_BITS-1:0] = start_addr[31:(32-REGION_BITS)]; + + assign in_region = (addr[31:(32-REGION_BITS)] == region[REGION_BITS-1:0]); + if (CCM_SIZE == 48) + assign in_range = (addr[31:MASK_BITS] == start_addr[31:MASK_BITS]) & ~(&addr[MASK_BITS-1 : MASK_BITS-2]); + else + assign in_range = (addr[31:MASK_BITS] == start_addr[31:MASK_BITS]); + +endmodule // rvrangechecker + +// 16 bit even parity generator +module rveven_paritygen #(WIDTH = 16) ( + input logic [WIDTH-1:0] data_in, // Data + output logic parity_out // generated even parity + ); + + assign parity_out = ^(data_in[WIDTH-1:0]) ; + +endmodule // rveven_paritygen + +module rveven_paritycheck #(WIDTH = 16) ( + input logic [WIDTH-1:0] data_in, // Data + input logic parity_in, + output logic parity_err // Parity error + ); + + assign parity_err = ^(data_in[WIDTH-1:0]) ^ parity_in ; + +endmodule // rveven_paritycheck + +module rvecc_encode ( + input [31:0] din, + output [6:0] ecc_out + ); +logic [5:0] ecc_out_temp; + + assign ecc_out_temp[0] = din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]; + assign ecc_out_temp[1] = din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]; + assign ecc_out_temp[2] = din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]; + assign ecc_out_temp[3] = din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; + assign ecc_out_temp[4] = din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; + assign ecc_out_temp[5] = din[26]^din[27]^din[28]^din[29]^din[30]^din[31]; + + assign ecc_out[6:0] = {(^din[31:0])^(^ecc_out_temp[5:0]),ecc_out_temp[5:0]}; + +endmodule // rvecc_encode + +module rvecc_decode ( + input en, + input [31:0] din, + input [6:0] ecc_in, + input sed_ded, // only do detection and no correction. Used for the I$ + output [31:0] dout, + output [6:0] ecc_out, + output single_ecc_error, + output double_ecc_error + + ); + + logic [6:0] ecc_check; + logic [38:0] error_mask; + logic [38:0] din_plus_parity, dout_plus_parity; + + // Generate the ecc bits + assign ecc_check[0] = ecc_in[0]^din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]; + assign ecc_check[1] = ecc_in[1]^din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]; + assign ecc_check[2] = ecc_in[2]^din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]; + assign ecc_check[3] = ecc_in[3]^din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; + assign ecc_check[4] = ecc_in[4]^din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; + assign ecc_check[5] = ecc_in[5]^din[26]^din[27]^din[28]^din[29]^din[30]^din[31]; + + // This is the parity bit + assign ecc_check[6] = ((^din[31:0])^(^ecc_in[6:0])) & ~sed_ded; + + assign single_ecc_error = en & (ecc_check[6:0] != 0) & ecc_check[6]; // this will never be on for sed_ded + assign double_ecc_error = en & (ecc_check[6:0] != 0) & ~ecc_check[6]; // all errors in the sed_ded case will be recorded as DE + + // Generate the mask for error correctiong + for (genvar i=1; i<40; i++) begin + assign error_mask[i-1] = (ecc_check[5:0] == i); + end + + // Generate the corrected data + assign din_plus_parity[38:0] = {ecc_in[6], din[31:26], ecc_in[5], din[25:11], ecc_in[4], din[10:4], ecc_in[3], din[3:1], ecc_in[2], din[0], ecc_in[1:0]}; + + assign dout_plus_parity[38:0] = single_ecc_error ? (error_mask[38:0] ^ din_plus_parity[38:0]) : din_plus_parity[38:0]; + assign dout[31:0] = {dout_plus_parity[37:32], dout_plus_parity[30:16], dout_plus_parity[14:8], dout_plus_parity[6:4], dout_plus_parity[2]}; + assign ecc_out[6:0] = {(dout_plus_parity[38] ^ (ecc_check[6:0] == 7'b1000000)), dout_plus_parity[31], dout_plus_parity[15], dout_plus_parity[7], dout_plus_parity[3], dout_plus_parity[1:0]}; + +endmodule // rvecc_decode + +module rvecc_encode_64 ( + input [63:0] din, + output [6:0] ecc_out + ); + assign ecc_out[0] = din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]^din[32]^din[34]^din[36]^din[38]^din[40]^din[42]^din[44]^din[46]^din[48]^din[50]^din[52]^din[54]^din[56]^din[57]^din[59]^din[61]^din[63]; + + assign ecc_out[1] = din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]^din[32]^din[35]^din[36]^din[39]^din[40]^din[43]^din[44]^din[47]^din[48]^din[51]^din[52]^din[55]^din[56]^din[58]^din[59]^din[62]^din[63]; + + assign ecc_out[2] = din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]^din[32]^din[37]^din[38]^din[39]^din[40]^din[45]^din[46]^din[47]^din[48]^din[53]^din[54]^din[55]^din[56]^din[60]^din[61]^din[62]^din[63]; + + assign ecc_out[3] = din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; + + assign ecc_out[4] = din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; + + assign ecc_out[5] = din[26]^din[27]^din[28]^din[29]^din[30]^din[31]^din[32]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; + + assign ecc_out[6] = din[57]^din[58]^din[59]^din[60]^din[61]^din[62]^din[63]; + +endmodule // rvecc_encode_64 + + +module rvecc_decode_64 ( + input en, + input [63:0] din, + input [6:0] ecc_in, + output ecc_error + ); + + logic [6:0] ecc_check; + + // Generate the ecc bits + assign ecc_check[0] = ecc_in[0]^din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]^din[32]^din[34]^din[36]^din[38]^din[40]^din[42]^din[44]^din[46]^din[48]^din[50]^din[52]^din[54]^din[56]^din[57]^din[59]^din[61]^din[63]; + + assign ecc_check[1] = ecc_in[1]^din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]^din[32]^din[35]^din[36]^din[39]^din[40]^din[43]^din[44]^din[47]^din[48]^din[51]^din[52]^din[55]^din[56]^din[58]^din[59]^din[62]^din[63]; + + assign ecc_check[2] = ecc_in[2]^din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]^din[32]^din[37]^din[38]^din[39]^din[40]^din[45]^din[46]^din[47]^din[48]^din[53]^din[54]^din[55]^din[56]^din[60]^din[61]^din[62]^din[63]; + + assign ecc_check[3] = ecc_in[3]^din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; + + assign ecc_check[4] = ecc_in[4]^din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; + + assign ecc_check[5] = ecc_in[5]^din[26]^din[27]^din[28]^din[29]^din[30]^din[31]^din[32]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; + + assign ecc_check[6] = ecc_in[6]^din[57]^din[58]^din[59]^din[60]^din[61]^din[62]^din[63]; + + assign ecc_error = en & (ecc_check[6:0] != 0); // all errors in the sed_ded case will be recorded as DE + + endmodule // rvecc_decode_64 + + +module `TEC_RV_ICG + ( + input logic SE, EN, CK, + output Q + ); + + logic en_ff; + logic enable; + + assign enable = EN | SE; + +`ifdef VERILATOR + always @(negedge CK) begin + en_ff <= enable; + end +`else + always @(CK, enable) begin + if(!CK) + en_ff = enable; + end +`endif + assign Q = CK & en_ff; + +endmodule + +module rvclkhdr + ( + input logic en, + input logic clk, + input logic scan_mode, + output logic l1clk + ); + + logic SE; + assign SE = scan_mode; + + `TEC_RV_ICG clkhdr ( .*, .EN(en), .CK(clk), .Q(l1clk)); + +endmodule // rvclkhdr + +module rvoclkhdr + ( + input logic en, + input logic clk, + input logic scan_mode, + output logic l1clk + ); + + logic SE; + assign SE = scan_mode; + +`ifdef RV_FPGA_OPTIMIZE + assign l1clk = clk; +`else + `TEC_RV_ICG clkhdr ( .*, .EN(en), .CK(clk), .Q(l1clk)); +`endif + +endmodule + + + diff --git a/design/lib/el2_lib.sv b/design/lib/el2_lib.sv new file mode 100644 index 0000000..6f71a3c --- /dev/null +++ b/design/lib/el2_lib.sv @@ -0,0 +1,64 @@ +module el2_btb_tag_hash #( +`include "el2_param.vh" + ) ( + input logic [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1] pc, + output logic [pt.BTB_BTAG_SIZE-1:0] hash + ); + + assign hash = {(pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+1] ^ + pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+1] ^ + pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1])}; +endmodule + +module el2_btb_tag_hash_fold #( +`include "el2_param.vh" + )( + input logic [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1] pc, + output logic [pt.BTB_BTAG_SIZE-1:0] hash + ); + + assign hash = {( + pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+1] ^ + pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1])}; + +endmodule + +module el2_btb_addr_hash #( +`include "el2_param.vh" + )( + input logic [pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO] pc, + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] hash + ); + + +if(pt.BTB_FOLD2_INDEX_HASH) begin : fold2 + assign hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = pc[pt.BTB_INDEX1_HI:pt.BTB_INDEX1_LO] ^ + pc[pt.BTB_INDEX3_HI:pt.BTB_INDEX3_LO]; +end + else begin + assign hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = pc[pt.BTB_INDEX1_HI:pt.BTB_INDEX1_LO] ^ + pc[pt.BTB_INDEX2_HI:pt.BTB_INDEX2_LO] ^ + pc[pt.BTB_INDEX3_HI:pt.BTB_INDEX3_LO]; +end + +endmodule + +module el2_btb_ghr_hash #( +`include "el2_param.vh" + )( + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] hashin, + input logic [pt.BHT_GHR_SIZE-1:0] ghr, + output logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] hash + ); + + // The hash function is too complex to write in verilog for all cases. + // The config script generates the logic string based on the bp config. + if(pt.BHT_GHR_HASH_1) begin : ghrhash_cfg1 + assign hash[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = { ghr[pt.BHT_GHR_SIZE-1:pt.BTB_INDEX1_HI-1], hashin[pt.BTB_INDEX1_HI:2]^ghr[pt.BTB_INDEX1_HI-2:0]}; + end + else begin : ghrhash_cfg2 + assign hash[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = { hashin[pt.BHT_GHR_SIZE+1:2]^ghr[pt.BHT_GHR_SIZE-1:0]}; + end + + +endmodule diff --git a/design/lib/mem_lib.sv b/design/lib/mem_lib.sv new file mode 100644 index 0000000..e741c61 --- /dev/null +++ b/design/lib/mem_lib.sv @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`define EL2_LOCAL_RAM_TEST_IO \ +input logic WE, \ +input logic ME, \ +input logic CLK + +`define EL2_RAM(depth, width) \ +module ram_``depth``x``width( \ + input logic [$clog2(depth)-1:0] ADR, \ + input logic [(width-1):0] D, \ + output logic [(width-1):0] Q, \ + `EL2_LOCAL_RAM_TEST_IO \ +); \ +reg [(width-1):0] ram_core [(depth-1):0]; \ + \ +always @(posedge CLK) begin \ + if (ME && WE) ram_core[ADR] = D; \ + if (ME && ~WE) Q <= ram_core[ADR]; \ +end \ + \ +endmodule + +`define EL2_RAM_BE(depth, width) \ +module ram_be_``depth``x``width( \ + input logic [$clog2(depth)-1:0] ADR, \ + input logic [(width-1):0] D, WEM, \ + output logic [(width-1):0] Q, \ + `EL2_LOCAL_RAM_TEST_IO \ +); \ +reg [(width-1):0] ram_core [(depth-1):0]; \ + \ +always @(posedge CLK) begin \ + if (ME && WE) ram_core[ADR] = D & WEM | ~WEM & ram_core[ADR];\ + if (ME && ~WE) Q <= ram_core[ADR]; \ +end \ + \ + \ +endmodule + +// parameterizable RAM for verilator sims +module el2_ram #(depth=4096, width=39) ( +input logic [$clog2(depth)-1:0] ADR, +input logic [(width-1):0] D, +output logic [(width-1):0] Q, + `EL2_LOCAL_RAM_TEST_IO +); +reg [(width-1):0] ram_core [(depth-1):0]; + +always @(posedge CLK) begin + if (ME && WE) ram_core[ADR] = D; + if (ME && ~WE) Q <= ram_core[ADR]; +end +endmodule + +//========================================================================================================================= +//=================================== START OF CCM ======================================================================= +//============= Possible sram sizes for a 39 bit wide memory ( 4 bytes + 7 bits ECC ) ===================================== +//------------------------------------------------------------------------------------------------------------------------- +`EL2_RAM(32768, 39) +`EL2_RAM(16384, 39) +`EL2_RAM(8192, 39) +`EL2_RAM(4096, 39) +`EL2_RAM(3072, 39) +`EL2_RAM(2048, 39) +`EL2_RAM(1536, 39) // need this for the 48KB DCCM option) +`EL2_RAM(1024, 39) +`EL2_RAM(768, 39) +`EL2_RAM(512, 39) +`EL2_RAM(256, 39) +`EL2_RAM(128, 39) +`EL2_RAM(1024, 20) +`EL2_RAM(512, 20) +`EL2_RAM(256, 20) +`EL2_RAM(128, 20) +`EL2_RAM(64, 20) +`EL2_RAM(4096, 34) +`EL2_RAM(2048, 34) +`EL2_RAM(1024, 34) +`EL2_RAM(512, 34) +`EL2_RAM(256, 34) +`EL2_RAM(128, 34) +`EL2_RAM(64, 34) +`EL2_RAM(8192, 68) +`EL2_RAM(4096, 68) +`EL2_RAM(2048, 68) +`EL2_RAM(1024, 68) +`EL2_RAM(512, 68) +`EL2_RAM(256, 68) +`EL2_RAM(128, 68) +`EL2_RAM(64, 68) +`EL2_RAM(8192, 71) +`EL2_RAM(4096, 71) +`EL2_RAM(2048, 71) +`EL2_RAM(1024, 71) +`EL2_RAM(512, 71) +`EL2_RAM(256, 71) +`EL2_RAM(128, 71) +`EL2_RAM(64, 71) +`EL2_RAM(4096, 42) +`EL2_RAM(2048, 42) +`EL2_RAM(1024, 42) +`EL2_RAM(512, 42) +`EL2_RAM(256, 42) +`EL2_RAM(128, 42) +`EL2_RAM(64, 42) +`EL2_RAM(4096, 22) +`EL2_RAM(2048, 22) +`EL2_RAM(1024, 22) +`EL2_RAM(512, 22) +`EL2_RAM(256, 22) +`EL2_RAM(128, 22) +`EL2_RAM(64, 22) +`EL2_RAM(1024, 26) +`EL2_RAM(4096, 26) +`EL2_RAM(2048, 26) +`EL2_RAM(512, 26) +`EL2_RAM(256, 26) +`EL2_RAM(128, 26) +`EL2_RAM(64, 26) +`EL2_RAM(32, 26) +`EL2_RAM(32, 22) +`EL2_RAM_BE(8192, 142) +`EL2_RAM_BE(4096, 142) +`EL2_RAM_BE(2048, 142) +`EL2_RAM_BE(1024, 142) +`EL2_RAM_BE(512, 142) +`EL2_RAM_BE(256, 142) +`EL2_RAM_BE(128, 142) +`EL2_RAM_BE(64, 142) +`EL2_RAM_BE(8192, 284) +`EL2_RAM_BE(4096, 284) +`EL2_RAM_BE(2048, 284) +`EL2_RAM_BE(1024, 284) +`EL2_RAM_BE(512, 284) +`EL2_RAM_BE(256, 284) +`EL2_RAM_BE(128, 284) +`EL2_RAM_BE(64, 284) +`EL2_RAM_BE(8192, 136) +`EL2_RAM_BE(4096, 136) +`EL2_RAM_BE(2048, 136) +`EL2_RAM_BE(1024, 136) +`EL2_RAM_BE(512, 136) +`EL2_RAM_BE(256, 136) +`EL2_RAM_BE(128, 136) +`EL2_RAM_BE(64, 136) +`EL2_RAM_BE(8192, 272) +`EL2_RAM_BE(4096, 272) +`EL2_RAM_BE(2048, 272) +`EL2_RAM_BE(1024, 272) +`EL2_RAM_BE(512, 272) +`EL2_RAM_BE(256, 272) +`EL2_RAM_BE(128, 272) +`EL2_RAM_BE(64, 272) +`EL2_RAM_BE(4096, 52) +`EL2_RAM_BE(2048, 52) +`EL2_RAM_BE(1024, 52) +`EL2_RAM_BE(512, 52) +`EL2_RAM_BE(256, 52) +`EL2_RAM_BE(128, 52) +`EL2_RAM_BE(64, 52) +`EL2_RAM_BE(4096, 104) +`EL2_RAM_BE(2048, 104) +`EL2_RAM_BE(1024, 104) +`EL2_RAM_BE(512, 104) +`EL2_RAM_BE(256, 104) +`EL2_RAM_BE(128, 104) +`EL2_RAM_BE(64, 104) +`EL2_RAM_BE(4096, 44) +`EL2_RAM_BE(2048, 44) +`EL2_RAM_BE(1024, 44) +`EL2_RAM_BE(512, 44) +`EL2_RAM_BE(256, 44) +`EL2_RAM_BE(128, 44) +`EL2_RAM_BE(64, 44) +`EL2_RAM_BE(4096, 88) +`EL2_RAM_BE(2048, 88) +`EL2_RAM_BE(1024, 88) +`EL2_RAM_BE(512, 88) +`EL2_RAM_BE(256, 88) +`EL2_RAM_BE(128, 88) +`EL2_RAM_BE(64, 88) + + +`undef EL2_RAM +`undef EL2_RAM_BE +`undef EL2_LOCAL_RAM_TEST_IO + + diff --git a/design/lsu/el2_lsu.sv b/design/lsu/el2_lsu.sv new file mode 100644 index 0000000..cd2a097 --- /dev/null +++ b/design/lsu/el2_lsu.sv @@ -0,0 +1,408 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Function: Top level file for load store unit +// Comments: +// +// +// DC1 -> DC2 -> DC3 -> DC4 (Commit) +// +//******************************************************************************** + +module el2_lsu +import el2_pkg::*; +#( +`include "el2_param.vh" + ) +( + + input logic clk_override, // Override non-functional clock gating + input logic dec_tlu_flush_lower_r, // I0/I1 writeback flush. This is used to flush the old packets only + input logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state + input logic dec_tlu_force_halt, // This will be high till TLU goes to debug halt + + // chicken signals + input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals + input logic dec_tlu_wb_coalescing_disable, // disable the write buffer coalesce + input logic dec_tlu_sideeffect_posted_disable, // disable the posted sideeffect load store to the bus + input logic dec_tlu_core_ecc_disable, // disable the generation of the ecc + + + input logic [31:0] exu_lsu_rs1_d, // address rs operand + input logic [31:0] exu_lsu_rs2_d, // store data + input logic [11:0] dec_lsu_offset_d, // address offset operand + + input el2_lsu_pkt_t lsu_p, // lsu control packet + input logic dec_lsu_valid_raw_d, // Raw valid for address computation + input logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control + + output logic [31:0] lsu_result_m, // lsu load data + output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF + output logic lsu_load_stall_any, // This is for blocking loads in the decode + output logic lsu_store_stall_any, // This is for blocking stores in the decode + output logic lsu_fastint_stall_any, // Stall the fastint in decode-1 stage + output logic lsu_idle_any, // lsu buffers are empty and no instruction in the pipeline. Doesn't include DMA + + output logic [31:1] lsu_fir_addr, // fast interrupt address + output logic [1:0] lsu_fir_error, // Error during fast interrupt lookup + + output logic lsu_single_ecc_error_incr, // Increment the ecc counter + output el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu exception packet + output logic lsu_imprecise_error_load_any, // bus load imprecise error + output logic lsu_imprecise_error_store_any, // bus store imprecise error + output logic [31:0] lsu_imprecise_error_addr_any, // bus store imprecise error address + + // Non-blocking loads + output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load + output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated + output logic lsu_nonblock_load_data_valid, // the non block is valid - sending information back to the cam + output logic lsu_nonblock_load_data_error, // non block load has an error + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error + output logic [31:0] lsu_nonblock_load_data, // Data of the non block load + + + output logic lsu_pmu_load_external_m, // PMU : Bus loads + output logic lsu_pmu_store_external_m, // PMU : Bus loads + output logic lsu_pmu_misaligned_m, // PMU : misaligned + output logic lsu_pmu_bus_trxn, // PMU : bus transaction + output logic lsu_pmu_bus_misaligned, // PMU : misaligned access going to the bus + output logic lsu_pmu_bus_error, // PMU : bus sending error back + output logic lsu_pmu_bus_busy, // PMU : bus is not ready + + // Trigger signals + input el2_trigger_pkt_t [3:0] trigger_pkt_any, // Trigger info from the decode + output logic [3:0] lsu_trigger_match_m, // lsu trigger hit (one bit per trigger) + + // DCCM ports + output logic dccm_wren, // DCCM write enable + output logic dccm_rden, // DCCM read enable + output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // DCCM write address low bank + output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // DCCM write address hi bank + output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // DCCM read address low bank + output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // DCCM read address hi bank (hi and low same if aligned read) + output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // DCCM write data for lo bank + output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // DCCM write data for hi bank + + input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // DCCM read data low bank + input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // DCCM read data hi bank + + // PIC ports + output logic picm_wren, // PIC memory write enable + output logic picm_rden, // PIC memory read enable + output logic picm_mken, // Need to read the mask for stores to determine which bits to write/forward + output logic [31:0] picm_rdaddr, // address for pic read access + output logic [31:0] picm_wraddr, // address for pic write access + output logic [31:0] picm_wr_data, // PIC memory write data + input logic [31:0] picm_rd_data, // PIC memory read/mask data + + // AXI Write Channels + output logic lsu_axi_awvalid, + input logic lsu_axi_awready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid, + output logic [31:0] lsu_axi_awaddr, + output logic [3:0] lsu_axi_awregion, + output logic [7:0] lsu_axi_awlen, + output logic [2:0] lsu_axi_awsize, + output logic [1:0] lsu_axi_awburst, + output logic lsu_axi_awlock, + output logic [3:0] lsu_axi_awcache, + output logic [2:0] lsu_axi_awprot, + output logic [3:0] lsu_axi_awqos, + + output logic lsu_axi_wvalid, + input logic lsu_axi_wready, + output logic [63:0] lsu_axi_wdata, + output logic [7:0] lsu_axi_wstrb, + output logic lsu_axi_wlast, + + input logic lsu_axi_bvalid, + output logic lsu_axi_bready, + input logic [1:0] lsu_axi_bresp, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid, + + // AXI Read Channels + output logic lsu_axi_arvalid, + input logic lsu_axi_arready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid, + output logic [31:0] lsu_axi_araddr, + output logic [3:0] lsu_axi_arregion, + output logic [7:0] lsu_axi_arlen, + output logic [2:0] lsu_axi_arsize, + output logic [1:0] lsu_axi_arburst, + output logic lsu_axi_arlock, + output logic [3:0] lsu_axi_arcache, + output logic [2:0] lsu_axi_arprot, + output logic [3:0] lsu_axi_arqos, + + input logic lsu_axi_rvalid, + output logic lsu_axi_rready, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid, + input logic [63:0] lsu_axi_rdata, + input logic [1:0] lsu_axi_rresp, + input logic lsu_axi_rlast, + + input logic lsu_bus_clk_en, // external drives a clock_en to control bus ratio + + // DMA slave + input logic dma_dccm_req, // DMA read/write to dccm + input logic [2:0] dma_mem_tag, // DMA request tag + input logic [31:0] dma_mem_addr, // DMA address + input logic [2:0] dma_mem_sz, // DMA access size + input logic dma_mem_write, // DMA access is a write + input logic [63:0] dma_mem_wdata, // DMA write data + + output logic dccm_dma_rvalid, // lsu data valid for DMA dccm read + output logic dccm_dma_ecc_error, // DMA load had ecc error + output logic [2:0] dccm_dma_rtag, // DMA request tag + output logic [63:0] dccm_dma_rdata, // lsu data for DMA dccm read + output logic dccm_ready, // lsu ready for DMA access + + input logic scan_mode, // scan + input logic clk, + input logic free_clk, + input logic rst_l + + ); + + + logic lsu_dccm_rden_m; + logic lsu_dccm_rden_r; + logic [31:0] store_data_m; + logic [31:0] store_data_r; + logic [31:0] store_data_hi_r, store_data_lo_r; + logic [31:0] store_datafn_hi_r, store_datafn_lo_r; + logic [31:0] sec_data_lo_m, sec_data_hi_m; + logic [31:0] sec_data_lo_r, sec_data_hi_r; + + logic [31:0] lsu_ld_data_m; + logic [31:0] dccm_rdata_hi_m, dccm_rdata_lo_m; + logic [6:0] dccm_data_ecc_hi_m, dccm_data_ecc_lo_m; + logic lsu_single_ecc_error_m; + logic lsu_double_ecc_error_m; + + logic [31:0] lsu_ld_data_r; + logic [31:0] lsu_ld_data_corr_r; + logic [31:0] dccm_rdata_hi_r, dccm_rdata_lo_r; + logic [6:0] dccm_data_ecc_hi_r, dccm_data_ecc_lo_r; + logic single_ecc_error_hi_r, single_ecc_error_lo_r; + logic lsu_single_ecc_error_r; + logic lsu_double_ecc_error_r; + logic ld_single_ecc_error_r, ld_single_ecc_error_r_ff; + + logic [31:0] picm_mask_data_m; + + logic [31:0] lsu_addr_d, lsu_addr_m, lsu_addr_r; + logic [31:0] end_addr_d, end_addr_m, end_addr_r; + + el2_lsu_pkt_t lsu_pkt_d, lsu_pkt_m, lsu_pkt_r; + logic lsu_i0_valid_d, lsu_i0_valid_m, lsu_i0_valid_r; + + // Store Buffer signals + logic store_stbuf_reqvld_r; + logic ldst_stbuf_reqvld_r; + + logic lsu_commit_r; + logic lsu_exc_m; + + logic addr_in_dccm_d, addr_in_dccm_m, addr_in_dccm_r; + logic addr_in_pic_d, addr_in_pic_m, addr_in_pic_r; + logic addr_external_m; + + logic stbuf_reqvld_any; + logic stbuf_reqvld_flushed_any; + logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any; + logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any; + logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any; + logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, sec_data_hi_r_ff; + logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, sec_data_ecc_lo_r_ff; + + logic lsu_cmpen_m; + logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m; + logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m; + logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m; + logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m; + + logic lsu_stbuf_commit_any; + logic lsu_stbuf_empty_any; // This is for blocking loads + logic lsu_stbuf_full_any; + + // Bus signals + logic lsu_busreq_r; + logic lsu_bus_buffer_pend_any; + logic lsu_bus_buffer_empty_any; + logic lsu_bus_buffer_full_any; + logic lsu_bus_idle_any; + logic lsu_busreq_m; + logic [31:0] bus_read_data_m; + + logic flush_m_up, flush_r; + logic is_sideeffects_m; + logic [2:0] dma_mem_tag_d, dma_mem_tag_m; + logic ldst_nodma_mtor; + logic dma_dccm_wen; + logic [31:0] dma_dccm_wdata_lo, dma_dccm_wdata_hi; + logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, dma_dccm_wdata_ecc_hi; + + // Clocks + logic lsu_c1_m_clk, lsu_c1_r_clk; + logic lsu_c2_m_clk, lsu_c2_r_clk; + logic lsu_store_c1_m_clk, lsu_store_c1_r_clk; + + logic lsu_stbuf_c1_clk; + logic lsu_bus_ibuf_c1_clk, lsu_bus_obuf_c1_clk, lsu_bus_buf_c1_clk; + logic lsu_busm_clk; + logic lsu_free_c2_clk; + + logic lsu_raw_fwd_lo_m, lsu_raw_fwd_hi_m; + logic lsu_raw_fwd_lo_r, lsu_raw_fwd_hi_r; + + assign lsu_raw_fwd_lo_m = (|stbuf_fwdbyteen_lo_m[pt.DCCM_BYTE_WIDTH-1:0]); + assign lsu_raw_fwd_hi_m = (|stbuf_fwdbyteen_hi_m[pt.DCCM_BYTE_WIDTH-1:0]); + + el2_lsu_lsc_ctl #(.pt(pt)) lsu_lsc_ctl (.*); + + // block stores in decode - for either bus or stbuf reasons + assign lsu_store_stall_any = lsu_stbuf_full_any | lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff; + assign lsu_load_stall_any = lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff; + assign lsu_fastint_stall_any = ld_single_ecc_error_r; // Stall the fastint in decode-1 stage + + // Ready to accept dma trxns + // There can't be any inpipe forwarding from non-dma packet to dma packet since they can be flushed so we can't have st in r when dma is in m + assign dma_mem_tag_d[2:0] = dma_mem_tag[2:0]; + assign ldst_nodma_mtor = (lsu_pkt_m.valid & ~lsu_pkt_m.dma & (addr_in_dccm_m | addr_in_pic_m) & lsu_pkt_m.store); + assign dccm_ready = ~(dec_lsu_valid_raw_d | ldst_nodma_mtor | ld_single_ecc_error_r_ff); + + assign dma_dccm_wen = dma_dccm_req & dma_mem_write & addr_in_dccm_d; + assign {dma_dccm_wdata_hi[31:0], dma_dccm_wdata_lo[31:0]} = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores + + + // Generate per cycle flush signals + assign flush_m_up = dec_tlu_flush_lower_r; + assign flush_r = dec_tlu_i0_kill_writeb_r; + + // lsu halt idle. This is used for entering the halt mode. Also, DMA accesses are allowed during fence. + // Indicates non-idle if there is a instruction valid in d-r or read/write buffers are non-empty since they can come with error + // Store buffer now have only non-dma dccm stores + // stbuf_empty not needed since it has only dccm stores + assign lsu_idle_any = ~((lsu_pkt_m.valid & ~lsu_pkt_m.dma) | + (lsu_pkt_r.valid & ~lsu_pkt_r.dma)) & + lsu_bus_buffer_empty_any & lsu_bus_idle_any; + + // Instantiate the store buffer + assign store_stbuf_reqvld_r = lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r & ~flush_r & ~lsu_pkt_r.dma; + + // Disable Forwarding for now + assign lsu_cmpen_m = lsu_pkt_m.valid & (lsu_pkt_m.load | lsu_pkt_m.store) & (addr_in_dccm_m | addr_in_pic_m); + + // Bus signals + assign lsu_busreq_m = lsu_pkt_m.valid & ((lsu_pkt_m.load | lsu_pkt_m.store) & addr_external_m) & ~flush_m_up & ~lsu_exc_m & ~lsu_pkt_m.fast_int; + + // PMU signals + assign lsu_pmu_misaligned_m = lsu_pkt_m.valid & ((lsu_pkt_m.half & lsu_addr_m[0]) | (lsu_pkt_m.word & (|lsu_addr_m[1:0]))); + assign lsu_pmu_load_external_m = lsu_pkt_m.valid & lsu_pkt_m.load & addr_external_m; + assign lsu_pmu_store_external_m = lsu_pkt_m.valid & lsu_pkt_m.store & addr_external_m; + + el2_lsu_dccm_ctl #(.pt(pt)) dccm_ctl ( + .lsu_addr_d(lsu_addr_d[31:0]), + .end_addr_d(end_addr_d[pt.DCCM_BITS-1:0]), + .lsu_addr_m(lsu_addr_m[pt.DCCM_BITS-1:0]), + .lsu_addr_r(lsu_addr_r[31:0]), + + .end_addr_m(end_addr_m[pt.DCCM_BITS-1:0]), + .end_addr_r(end_addr_r[pt.DCCM_BITS-1:0]), + .* + ); + + el2_lsu_stbuf #(.pt(pt)) stbuf ( + .lsu_addr_d(lsu_addr_d[pt.LSU_SB_BITS-1:0]), + .end_addr_d(end_addr_d[pt.LSU_SB_BITS-1:0]), + + .* + + ); + + el2_lsu_ecc #(.pt(pt)) ecc ( + .lsu_addr_r(lsu_addr_r[pt.DCCM_BITS-1:0]), + .end_addr_r(end_addr_r[pt.DCCM_BITS-1:0]), + .lsu_addr_m(lsu_addr_m[pt.DCCM_BITS-1:0]), + .end_addr_m(end_addr_m[pt.DCCM_BITS-1:0]), + .* + ); + + el2_lsu_trigger #(.pt(pt)) trigger ( + .store_data_m(store_data_m[31:0]), + .* + ); + + // Clk domain + el2_lsu_clkdomain #(.pt(pt)) clkdomain (.*); + + // Bus interface + el2_lsu_bus_intf #(.pt(pt)) bus_intf ( + .* + ); + + //Flops + rvdff #(3) dma_mem_tag_mff (.*, .din(dma_mem_tag_d[2:0]), .dout(dma_mem_tag_m[2:0]), .clk(lsu_c1_m_clk)); + + rvdff #(2) lsu_raw_fwd_r_ff (.*, .din({lsu_raw_fwd_hi_m, lsu_raw_fwd_lo_m}), .dout({lsu_raw_fwd_hi_r, lsu_raw_fwd_lo_r}), .clk(lsu_c2_r_clk)); + + +`ifdef ASSERT_ON + logic [1:0] store_data_bypass_sel; + assign store_data_bypass_sel[1:0] = {lsu_p.store_data_bypass_d, lsu_p.store_data_bypass_m}; + property exception_no_lsu_flush; + @(posedge clk) disable iff(~rst_l) lsu_lsc_ctl.lsu_error_pkt_m.exc_valid |-> ##[1:2] (flush_r ); + endproperty + assert_exception_no_lsu_flush: assert property (exception_no_lsu_flush) else + $display("No flush within 2 cycles of exception"); + + // offset should be zero for fast interrupt + property offset_0_fastint; + @(posedge clk) disable iff(~rst_l) (lsu_p.valid & lsu_p.fast_int) |-> (dec_lsu_offset_d[11:0] == 12'b0); + endproperty + assert_offset_0_fastint: assert property (offset_0_fastint) else + $display("dec_tlu_offset_d not zero for fast interrupt redirect"); + + // DMA req should assert dccm rden/wren + property dmareq_dccm_wren_or_rden; + @(posedge clk) disable iff(~rst_l) dma_dccm_req |-> (dccm_rden | dccm_wren | addr_in_pic_d); + endproperty + assert_dmareq_dccm_wren_or_rden: assert property(dmareq_dccm_wren_or_rden) else + $display("dccm rden or wren not asserted during DMA request"); + + // fastint_stall should cause load/store stall next cycle + property fastint_stall_imply_loadstore_stall; + @(posedge clk) disable iff(~rst_l) (lsu_fastint_stall_any & (lsu_commit_r | lsu_pkt_r.dma)) |-> ##1 ((lsu_load_stall_any | lsu_store_stall_any) | ~ld_single_ecc_error_r_ff); + endproperty + assert_fastint_stall_imply_loadstore_stall: assert property (fastint_stall_imply_loadstore_stall) else + $display("fastint_stall should be followed by lsu_load/store_stall_any"); + + // Single ECC error implies rfnpc flush + property single_ecc_error_rfnpc_flush; + @(posedge clk) disable iff(~rst_l) (lsu_error_pkt_r.single_ecc_error & lsu_pkt_r.load) |=> ~lsu_commit_r; + endproperty + assert_single_ecc_error_rfnpc_flush: assert property (single_ecc_error_rfnpc_flush) else + $display("LSU commit next cycle after single ecc error"); + +`endif + +endmodule // el2_lsu diff --git a/design/lsu/el2_lsu_addrcheck.sv b/design/lsu/el2_lsu_addrcheck.sv new file mode 100644 index 0000000..77329fc --- /dev/null +++ b/design/lsu/el2_lsu_addrcheck.sv @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: Checks the memory map for the address +// Comments: +// +//******************************************************************************** +module el2_lsu_addrcheck +import el2_pkg::*; +#( +`include "el2_param.vh" + )( + input logic lsu_c2_m_clk, // clock + input logic rst_l, // reset + + input logic [31:0] start_addr_d, // start address for lsu + input logic [31:0] end_addr_d, // end address for lsu + input el2_lsu_pkt_t lsu_pkt_d, // packet in d + input logic [31:0] dec_tlu_mrac_ff, // CSR read + input logic [3:0] rs1_region_d, + + input logic [31:0] rs1_d, + + output logic is_sideeffects_m, // is sideffects space + output logic addr_in_dccm_d, // address in dccm + output logic addr_in_pic_d, // address in pic + output logic addr_external_d, // address in external + + output logic access_fault_d, // access fault + output logic misaligned_fault_d, // misaligned + output logic [2:0] exc_mscause_d, // mscause for access/misaligned faults + + output logic fir_dccm_access_error_d, // Fast interrupt dccm access error + output logic fir_nondccm_access_error_d,// Fast interrupt dccm access error + + input logic scan_mode +); + + + logic non_dccm_access_ok; + logic is_sideeffects_d, is_aligned_d; + logic start_addr_in_dccm_d, end_addr_in_dccm_d; + logic start_addr_in_dccm_region_d, end_addr_in_dccm_region_d; + logic start_addr_in_pic_d, end_addr_in_pic_d; + logic start_addr_in_pic_region_d, end_addr_in_pic_region_d; + logic [4:0] csr_idx; + logic addr_in_iccm; + logic start_addr_dccm_or_pic; + logic base_reg_dccm_or_pic; + logic unmapped_access_fault_d, mpu_access_fault_d, picm_access_fault_d, regpred_access_fault_d; + logic regcross_misaligned_fault_d, sideeffect_misaligned_fault_d; + logic [2:0] access_fault_mscause_d; + logic misaligned_fault_mscause_d; + + if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable + // Start address check + rvrangecheck #(.CCM_SADR(pt.DCCM_SADR), + .CCM_SIZE(pt.DCCM_SIZE)) start_addr_dccm_rangecheck ( + .addr(start_addr_d[31:0]), + .in_range(start_addr_in_dccm_d), + .in_region(start_addr_in_dccm_region_d) + ); + + // End address check + rvrangecheck #(.CCM_SADR(pt.DCCM_SADR), + .CCM_SIZE(pt.DCCM_SIZE)) end_addr_dccm_rangecheck ( + .addr(end_addr_d[31:0]), + .in_range(end_addr_in_dccm_d), + .in_region(end_addr_in_dccm_region_d) + ); + end else begin: Gen_dccm_disable // block: Gen_dccm_enable + assign start_addr_in_dccm_d = '0; + assign start_addr_in_dccm_region_d = '0; + assign end_addr_in_dccm_d = '0; + assign end_addr_in_dccm_region_d = '0; + end + + if (pt.ICCM_ENABLE == 1) begin : check_iccm + assign addr_in_iccm = (start_addr_d[31:28] == pt.ICCM_REGION); + end else begin + assign addr_in_iccm = 1'b0; + end + + // PIC memory check + // Start address check + rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR), + .CCM_SIZE(pt.PIC_SIZE)) start_addr_pic_rangecheck ( + .addr(start_addr_d[31:0]), + .in_range(start_addr_in_pic_d), + .in_region(start_addr_in_pic_region_d) + ); + + // End address check + rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR), + .CCM_SIZE(pt.PIC_SIZE)) end_addr_pic_rangecheck ( + .addr(end_addr_d[31:0]), + .in_range(end_addr_in_pic_d), + .in_region(end_addr_in_pic_region_d) + ); + + assign start_addr_dccm_or_pic = start_addr_in_dccm_region_d | start_addr_in_pic_region_d; + assign base_reg_dccm_or_pic = (rs1_region_d[3:0] == pt.DCCM_REGION) | (rs1_region_d[3:0] == pt.PIC_REGION); + assign addr_in_dccm_d = (start_addr_in_dccm_d & end_addr_in_dccm_d); + assign addr_in_pic_d = (start_addr_in_pic_d & end_addr_in_pic_d); + + assign addr_external_d = ~(start_addr_in_dccm_region_d | start_addr_in_pic_region_d); + assign csr_idx[4:0] = {start_addr_d[31:28], 1'b1}; + assign is_sideeffects_d = dec_tlu_mrac_ff[csr_idx] & ~(start_addr_in_dccm_region_d | start_addr_in_pic_region_d | addr_in_iccm) & lsu_pkt_d.valid & (lsu_pkt_d.store | lsu_pkt_d.load); //every region has the 2 LSB indicating ( 1: sideeffects/no_side effects, and 0: cacheable ). Ignored in internal regions + assign is_aligned_d = (lsu_pkt_d.word & (start_addr_d[1:0] == 2'b0)) | + (lsu_pkt_d.half & (start_addr_d[0] == 1'b0)) | + lsu_pkt_d.by; + + assign non_dccm_access_ok = (~(|{pt.DATA_ACCESS_ENABLE0,pt.DATA_ACCESS_ENABLE1,pt.DATA_ACCESS_ENABLE2,pt.DATA_ACCESS_ENABLE3,pt.DATA_ACCESS_ENABLE4,pt.DATA_ACCESS_ENABLE5,pt.DATA_ACCESS_ENABLE6,pt.DATA_ACCESS_ENABLE7})) | + (((pt.DATA_ACCESS_ENABLE0 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK0)) == (pt.DATA_ACCESS_ADDR0 | pt.DATA_ACCESS_MASK0)) | + (pt.DATA_ACCESS_ENABLE1 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK1)) == (pt.DATA_ACCESS_ADDR1 | pt.DATA_ACCESS_MASK1)) | + (pt.DATA_ACCESS_ENABLE2 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK2)) == (pt.DATA_ACCESS_ADDR2 | pt.DATA_ACCESS_MASK2)) | + (pt.DATA_ACCESS_ENABLE3 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK3)) == (pt.DATA_ACCESS_ADDR3 | pt.DATA_ACCESS_MASK3)) | + (pt.DATA_ACCESS_ENABLE4 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK4)) == (pt.DATA_ACCESS_ADDR4 | pt.DATA_ACCESS_MASK4)) | + (pt.DATA_ACCESS_ENABLE5 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK5)) == (pt.DATA_ACCESS_ADDR5 | pt.DATA_ACCESS_MASK5)) | + (pt.DATA_ACCESS_ENABLE6 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK6)) == (pt.DATA_ACCESS_ADDR6 | pt.DATA_ACCESS_MASK6)) | + (pt.DATA_ACCESS_ENABLE7 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK7)) == (pt.DATA_ACCESS_ADDR7 | pt.DATA_ACCESS_MASK7))) & + ((pt.DATA_ACCESS_ENABLE0 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK0)) == (pt.DATA_ACCESS_ADDR0 | pt.DATA_ACCESS_MASK0)) | + (pt.DATA_ACCESS_ENABLE1 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK1)) == (pt.DATA_ACCESS_ADDR1 | pt.DATA_ACCESS_MASK1)) | + (pt.DATA_ACCESS_ENABLE2 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK2)) == (pt.DATA_ACCESS_ADDR2 | pt.DATA_ACCESS_MASK2)) | + (pt.DATA_ACCESS_ENABLE3 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK3)) == (pt.DATA_ACCESS_ADDR3 | pt.DATA_ACCESS_MASK3)) | + (pt.DATA_ACCESS_ENABLE4 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK4)) == (pt.DATA_ACCESS_ADDR4 | pt.DATA_ACCESS_MASK4)) | + (pt.DATA_ACCESS_ENABLE5 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK5)) == (pt.DATA_ACCESS_ADDR5 | pt.DATA_ACCESS_MASK5)) | + (pt.DATA_ACCESS_ENABLE6 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK6)) == (pt.DATA_ACCESS_ADDR6 | pt.DATA_ACCESS_MASK6)) | + (pt.DATA_ACCESS_ENABLE7 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK7)) == (pt.DATA_ACCESS_ADDR7 | pt.DATA_ACCESS_MASK7)))); + + // Access fault logic + // 0. Unmapped local memory : Addr in dccm region but not in dccm offset OR Addr in picm region but not in picm offset OR DCCM -> PIC cross when DCCM/PIC in same region + // 1. Uncorrectable (double bit) ECC error + // 3. Address is not in a populated non-dccm region + // 5. Region predication access fault: Base Address in DCCM/PIC and Final address in non-DCCM/non-PIC region or vice versa + // 6. Ld/St access to picm are not word aligned or word size + assign regpred_access_fault_d = (start_addr_dccm_or_pic ^ base_reg_dccm_or_pic); // 5. Region predication access fault: Base Address in DCCM/PIC and Final address in non-DCCM/non-PIC region or vice versa + assign picm_access_fault_d = (addr_in_pic_d & ((start_addr_d[1:0] != 2'b0) | ~lsu_pkt_d.word)); // 6. Ld/St access to picm are not word aligned or word size + + if (pt.DCCM_REGION == pt.PIC_REGION) begin + assign unmapped_access_fault_d = ((start_addr_in_dccm_region_d & ~(start_addr_in_dccm_d | start_addr_in_pic_d)) | // 0. Addr in dccm/pic region but not in dccm/pic offset + (end_addr_in_dccm_region_d & ~(end_addr_in_dccm_d | end_addr_in_pic_d)) | // 0. Addr in dccm/pic region but not in dccm/pic offset + (start_addr_in_dccm_d & end_addr_in_pic_d) | // 0. DCCM -> PIC cross when DCCM/PIC in same region + (start_addr_in_pic_d & end_addr_in_dccm_d)); // 0. DCCM -> PIC cross when DCCM/PIC in same region + assign mpu_access_fault_d = (~start_addr_in_dccm_region_d & ~non_dccm_access_ok); // 3. Address is not in a populated non-dccm region + end else begin + assign unmapped_access_fault_d = ((start_addr_in_dccm_region_d & ~start_addr_in_dccm_d) | // 0. Addr in dccm region but not in dccm offset + (end_addr_in_dccm_region_d & ~end_addr_in_dccm_d) | // 0. Addr in dccm region but not in dccm offset + (start_addr_in_pic_region_d & ~start_addr_in_pic_d) | // 0. Addr in picm region but not in picm offset + (end_addr_in_pic_region_d & ~end_addr_in_pic_d)); // 0. Addr in picm region but not in picm offset + assign mpu_access_fault_d = (~start_addr_in_pic_region_d & ~start_addr_in_dccm_region_d & ~non_dccm_access_ok); // 3. Address is not in a populated non-dccm region + end + + assign access_fault_d = (unmapped_access_fault_d | mpu_access_fault_d | picm_access_fault_d | regpred_access_fault_d) & lsu_pkt_d.valid & ~lsu_pkt_d.dma; + assign access_fault_mscause_d[2:0] = unmapped_access_fault_d ? 3'h0 : mpu_access_fault_d ? 3'h3 : regpred_access_fault_d ? 3'h5 : picm_access_fault_d ? 3'h6 : 3'h7; + + // Misaligned happens due to 2 reasons + // 0. Region cross + // 1. sideeffects access which are not aligned + assign regcross_misaligned_fault_d = (start_addr_d[31:28] != end_addr_d[31:28]); + assign sideeffect_misaligned_fault_d = (is_sideeffects_d & ~is_aligned_d); + assign misaligned_fault_d = (regcross_misaligned_fault_d | (sideeffect_misaligned_fault_d & addr_external_d)) & lsu_pkt_d.valid & ~lsu_pkt_d.dma; + assign misaligned_fault_mscause_d = regcross_misaligned_fault_d ? 1'b0 : 1'b1; + + assign exc_mscause_d[2:0] = misaligned_fault_d ? {2'b0,misaligned_fault_mscause_d} : access_fault_mscause_d[2:0]; + + // Fast interrupt error logic + assign fir_dccm_access_error_d = ((start_addr_in_dccm_region_d & ~start_addr_in_dccm_d) | + (end_addr_in_dccm_region_d & ~end_addr_in_dccm_d)) & lsu_pkt_d.valid & lsu_pkt_d.fast_int; + assign fir_nondccm_access_error_d = ~(start_addr_in_dccm_region_d & end_addr_in_dccm_region_d) & lsu_pkt_d.valid & lsu_pkt_d.fast_int; + + rvdff #(.WIDTH(1)) is_sideeffects_mff (.din(is_sideeffects_d), .dout(is_sideeffects_m), .clk(lsu_c2_m_clk), .*); + +endmodule // el2_lsu_addrcheck diff --git a/design/lsu/el2_lsu_bus_buffer.sv b/design/lsu/el2_lsu_bus_buffer.sv new file mode 100644 index 0000000..33f6379 --- /dev/null +++ b/design/lsu/el2_lsu_bus_buffer.sv @@ -0,0 +1,946 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: lsu interface with interface queue +// Comments: +// +//******************************************************************************** + +module el2_lsu_bus_buffer +import el2_pkg::*; +#( +`include "el2_param.vh" + )( + input logic clk, + input logic rst_l, + input logic scan_mode, + input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals + input logic dec_tlu_wb_coalescing_disable, // disable write buffer coalescing + input logic dec_tlu_sideeffect_posted_disable, // Don't block the sideeffect load store to the bus + input logic dec_tlu_force_halt, + + // various clocks needed for the bus reads and writes + input logic lsu_c2_r_clk, + input logic lsu_bus_ibuf_c1_clk, + input logic lsu_bus_obuf_c1_clk, + input logic lsu_bus_buf_c1_clk, + input logic lsu_free_c2_clk, + input logic lsu_busm_clk, + + + input logic dec_lsu_valid_raw_d, // Raw valid for address computation + input el2_lsu_pkt_t lsu_pkt_m, // lsu packet flowing down the pipe + input el2_lsu_pkt_t lsu_pkt_r, // lsu packet flowing down the pipe + + input logic [31:0] lsu_addr_m, // lsu address flowing down the pipe + input logic [31:0] end_addr_m, // lsu address flowing down the pipe + input logic [31:0] lsu_addr_r, // lsu address flowing down the pipe + input logic [31:0] end_addr_r, // lsu address flowing down the pipe + input logic [31:0] store_data_r, // store data flowing down the pipe + + input logic no_word_merge_r, // r store doesn't need to wait in ibuf since it will not coalesce + input logic no_dword_merge_r, // r store doesn't need to wait in ibuf since it will not coalesce + input logic lsu_busreq_m, // bus request is in m + output logic lsu_busreq_r, // bus request is in r + input logic ld_full_hit_m, // load can get all its byte from a write buffer entry + input logic flush_m_up, // flush + input logic flush_r, // flush + input logic lsu_commit_r, // lsu instruction in r commits + input logic is_sideeffects_r, // lsu attribute is side_effects + input logic ldst_dual_d, // load/store is unaligned at 32 bit boundary + input logic ldst_dual_m, // load/store is unaligned at 32 bit boundary + input logic ldst_dual_r, // load/store is unaligned at 32 bit boundary + + input logic [7:0] ldst_byteen_ext_m, + + output logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry + output logic lsu_bus_buffer_full_any, // bus buffer is full + output logic lsu_bus_buffer_empty_any, // bus buffer is empty + output logic lsu_bus_idle_any, // No pending responses from the bus + + output logic [3:0] ld_byte_hit_buf_lo, ld_byte_hit_buf_hi, // Byte enables for forwarding data + output logic [31:0] ld_fwddata_buf_lo, ld_fwddata_buf_hi, // load forwarding data + + output logic lsu_imprecise_error_load_any, // imprecise load bus error + output logic lsu_imprecise_error_store_any, // imprecise store bus error + output logic [31:0] lsu_imprecise_error_addr_any, // address of the imprecise error + + // Non-blocking loads + output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load + output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated + output logic lsu_nonblock_load_data_valid, // the non block is valid - sending information back to the cam + output logic lsu_nonblock_load_data_error, // non block load has an error + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error + output logic [31:0] lsu_nonblock_load_data, // Data of the non block load + + + // PMU events + output logic lsu_pmu_bus_trxn, + output logic lsu_pmu_bus_misaligned, + output logic lsu_pmu_bus_error, + output logic lsu_pmu_bus_busy, + + // AXI Write Channels + output logic lsu_axi_awvalid, + input logic lsu_axi_awready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid, + output logic [31:0] lsu_axi_awaddr, + output logic [3:0] lsu_axi_awregion, + output logic [7:0] lsu_axi_awlen, + output logic [2:0] lsu_axi_awsize, + output logic [1:0] lsu_axi_awburst, + output logic lsu_axi_awlock, + output logic [3:0] lsu_axi_awcache, + output logic [2:0] lsu_axi_awprot, + output logic [3:0] lsu_axi_awqos, + + output logic lsu_axi_wvalid, + input logic lsu_axi_wready, + output logic [63:0] lsu_axi_wdata, + output logic [7:0] lsu_axi_wstrb, + output logic lsu_axi_wlast, + + input logic lsu_axi_bvalid, + output logic lsu_axi_bready, + input logic [1:0] lsu_axi_bresp, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid, + + // AXI Read Channels + output logic lsu_axi_arvalid, + input logic lsu_axi_arready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid, + output logic [31:0] lsu_axi_araddr, + output logic [3:0] lsu_axi_arregion, + output logic [7:0] lsu_axi_arlen, + output logic [2:0] lsu_axi_arsize, + output logic [1:0] lsu_axi_arburst, + output logic lsu_axi_arlock, + output logic [3:0] lsu_axi_arcache, + output logic [2:0] lsu_axi_arprot, + output logic [3:0] lsu_axi_arqos, + + input logic lsu_axi_rvalid, + output logic lsu_axi_rready, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid, + input logic [63:0] lsu_axi_rdata, + input logic [1:0] lsu_axi_rresp, + + input logic lsu_bus_clk_en, + input logic lsu_bus_clk_en_q + +); + + + // For Ld: IDLE -> WAIT -> CMD -> RESP -> DONE_PARTIAL(?) -> DONE_WAIT(?) -> DONE -> IDLE + // For St: IDLE -> WAIT -> CMD -> RESP(?) -> IDLE + typedef enum logic [2:0] {IDLE=3'b000, WAIT=3'b001, CMD=3'b010, RESP=3'b011, DONE_PARTIAL=3'b100, DONE_WAIT=3'b101, DONE=3'b110} state_t; + + localparam DEPTH = pt.LSU_NUM_NBLOAD; + localparam DEPTH_LOG2 = pt.LSU_NUM_NBLOAD_WIDTH; + localparam TIMER = 8; // This can be only power of 2 + localparam TIMER_MAX = TIMER - 1; // Maximum value of timer + localparam TIMER_LOG2 = (TIMER < 2) ? 1 : $clog2(TIMER); + + logic [3:0] ldst_byteen_hi_m, ldst_byteen_lo_m; + logic [DEPTH-1:0] ld_addr_hitvec_lo, ld_addr_hitvec_hi; + logic [3:0][DEPTH-1:0] ld_byte_hitvec_lo, ld_byte_hitvec_hi; + logic [3:0][DEPTH-1:0] ld_byte_hitvecfn_lo, ld_byte_hitvecfn_hi; + + logic ld_addr_ibuf_hit_lo, ld_addr_ibuf_hit_hi; + logic [3:0] ld_byte_ibuf_hit_lo, ld_byte_ibuf_hit_hi; + + logic [3:0] ldst_byteen_r; + logic [3:0] ldst_byteen_hi_r, ldst_byteen_lo_r; + logic [31:0] store_data_hi_r, store_data_lo_r; + logic is_aligned_r; // Aligned load/store + logic ldst_samedw_r; + + logic lsu_nonblock_load_valid_r; + logic [31:0] lsu_nonblock_load_data_hi, lsu_nonblock_load_data_lo, lsu_nonblock_data_unalgn; + logic [1:0] lsu_nonblock_addr_offset; + logic [1:0] lsu_nonblock_sz; + logic lsu_nonblock_unsign, lsu_nonblock_dual; + logic lsu_nonblock_load_data_ready; + + logic [DEPTH-1:0] CmdPtr0Dec, CmdPtr1Dec; + logic [DEPTH-1:0] RspPtrDec; + logic [DEPTH_LOG2-1:0] CmdPtr0, CmdPtr1; + logic [DEPTH_LOG2-1:0] RspPtr; + logic [DEPTH_LOG2-1:0] WrPtr0_m, WrPtr0_r; + logic [DEPTH_LOG2-1:0] WrPtr1_m, WrPtr1_r; + logic found_wrptr0, found_wrptr1, found_cmdptr0, found_cmdptr1; + logic [3:0] buf_numvld_any, buf_numvld_wrcmd_any, buf_numvld_cmd_any, buf_numvld_pend_any; + logic any_done_wait_state; + logic bus_sideeffect_pend; + logic [7:0] bus_pend_trxn, bus_pend_trxnQ, bus_pend_trxn_ns; + logic lsu_bus_cntr_overflow; + logic bus_coalescing_disable; + logic mdbhd_en; + + logic bus_addr_match_pending; + logic bus_cmd_sent, bus_cmd_ready; + logic bus_wcmd_sent, bus_wdata_sent; + logic bus_rsp_read, bus_rsp_write; + logic [pt.LSU_BUS_TAG-1:0] bus_rsp_read_tag, bus_rsp_write_tag; + logic bus_rsp_read_error, bus_rsp_write_error; + logic [63:0] bus_rsp_rdata; + + // Bus buffer signals + state_t [DEPTH-1:0] buf_state; + logic [DEPTH-1:0][1:0] buf_sz; + logic [DEPTH-1:0][31:0] buf_addr; + logic [DEPTH-1:0][3:0] buf_byteen; + logic [DEPTH-1:0] buf_sideeffect; + logic [DEPTH-1:0] buf_write; + logic [DEPTH-1:0] buf_unsign; + logic [DEPTH-1:0] buf_dual; + logic [DEPTH-1:0] buf_samedw; + logic [DEPTH-1:0] buf_nomerge; + logic [DEPTH-1:0] buf_dualhi; + logic [DEPTH-1:0][DEPTH_LOG2-1:0] buf_dualtag; + logic [DEPTH-1:0] buf_ldfwd; + logic [DEPTH-1:0][DEPTH_LOG2-1:0] buf_ldfwdtag; + //logic [DEPTH-1:0] buf_nb; + logic [DEPTH-1:0] buf_error; + logic [DEPTH-1:0][31:0] buf_data; + logic [DEPTH-1:0][DEPTH-1:0] buf_age, buf_age_younger; + logic [DEPTH-1:0][DEPTH-1:0] buf_rspage, buf_rsp_pickage; + + state_t [DEPTH-1:0] buf_nxtstate; + logic [DEPTH-1:0] buf_rst; + logic [DEPTH-1:0] buf_state_en; + logic [DEPTH-1:0] buf_cmd_state_bus_en; + logic [DEPTH-1:0] buf_resp_state_bus_en; + logic [DEPTH-1:0] buf_state_bus_en; + logic [DEPTH-1:0] buf_dual_in; + logic [DEPTH-1:0] buf_samedw_in; + logic [DEPTH-1:0] buf_nomerge_in; + //logic [DEPTH-1:0] buf_nb_in; + logic [DEPTH-1:0] buf_sideeffect_in; + logic [DEPTH-1:0] buf_unsign_in; + logic [DEPTH-1:0][1:0] buf_sz_in; + logic [DEPTH-1:0] buf_write_in; + logic [DEPTH-1:0] buf_wr_en; + logic [DEPTH-1:0] buf_dualhi_in; + logic [DEPTH-1:0][DEPTH_LOG2-1:0] buf_dualtag_in; + logic [DEPTH-1:0] buf_ldfwd_en; + logic [DEPTH-1:0] buf_ldfwd_in; + logic [DEPTH-1:0][DEPTH_LOG2-1:0] buf_ldfwdtag_in; + logic [DEPTH-1:0][3:0] buf_byteen_in; + logic [DEPTH-1:0][31:0] buf_addr_in; + logic [DEPTH-1:0][31:0] buf_data_in; + logic [DEPTH-1:0] buf_error_en; + logic [DEPTH-1:0] buf_data_en; + logic [DEPTH-1:0][DEPTH-1:0] buf_age_in; + logic [DEPTH-1:0][DEPTH-1:0] buf_ageQ; + logic [DEPTH-1:0][DEPTH-1:0] buf_rspage_set; + logic [DEPTH-1:0][DEPTH-1:0] buf_rspage_in; + logic [DEPTH-1:0][DEPTH-1:0] buf_rspageQ; + + // Input buffer signals + logic ibuf_valid; + logic ibuf_dual; + logic ibuf_samedw; + logic ibuf_nomerge; + logic [DEPTH_LOG2-1:0] ibuf_tag; + logic [DEPTH_LOG2-1:0] ibuf_dualtag; + //logic ibuf_nb; + logic ibuf_sideeffect; + logic ibuf_unsign; + logic ibuf_write; + logic [1:0] ibuf_sz; + logic [3:0] ibuf_byteen; + logic [31:0] ibuf_addr; + logic [31:0] ibuf_data; + logic [TIMER_LOG2-1:0] ibuf_timer; + + logic ibuf_byp; + logic ibuf_wr_en; + logic ibuf_rst; + logic ibuf_force_drain; + logic ibuf_drain_vld; + logic [DEPTH-1:0] ibuf_drainvec_vld; + logic [DEPTH_LOG2-1:0] ibuf_tag_in; + logic [DEPTH_LOG2-1:0] ibuf_dualtag_in; + logic [1:0] ibuf_sz_in; + logic [31:0] ibuf_addr_in; + logic [3:0] ibuf_byteen_in; + logic [31:0] ibuf_data_in; + logic [TIMER_LOG2-1:0] ibuf_timer_in; + logic [3:0] ibuf_byteen_out; + logic [31:0] ibuf_data_out; + logic ibuf_merge_en, ibuf_merge_in; + + // Output buffer signals + logic obuf_valid; + logic obuf_write; + logic obuf_nosend; + logic obuf_rdrsp_pend; + logic obuf_sideeffect; + logic [31:0] obuf_addr; + logic [63:0] obuf_data; + logic [1:0] obuf_sz; + logic [7:0] obuf_byteen; + logic obuf_merge; + logic obuf_cmd_done, obuf_data_done; + logic [pt.LSU_BUS_TAG-1:0] obuf_tag0; + logic [pt.LSU_BUS_TAG-1:0] obuf_tag1; + logic [pt.LSU_BUS_TAG-1:0] obuf_rdrsp_tag; + + logic ibuf_buf_byp; + logic obuf_force_wr_en; + logic obuf_wr_wait; + logic obuf_wr_en, obuf_wr_enQ; + logic obuf_rst; + logic obuf_write_in; + logic obuf_nosend_in; + logic obuf_rdrsp_pend_in; + logic obuf_sideeffect_in; + logic obuf_aligned_in; + logic [31:0] obuf_addr_in; + logic [63:0] obuf_data_in; + logic [1:0] obuf_sz_in; + logic [7:0] obuf_byteen_in; + logic obuf_merge_in; + logic obuf_cmd_done_in, obuf_data_done_in; + logic [pt.LSU_BUS_TAG-1:0] obuf_tag0_in; + logic [pt.LSU_BUS_TAG-1:0] obuf_tag1_in; + logic [pt.LSU_BUS_TAG-1:0] obuf_rdrsp_tag_in; + + logic obuf_merge_en; + logic [TIMER_LOG2-1:0] obuf_wr_timer, obuf_wr_timer_in; + logic [7:0] obuf_byteen0_in, obuf_byteen1_in; + logic [63:0] obuf_data0_in, obuf_data1_in; + + logic lsu_axi_awvalid_q, lsu_axi_awready_q; + logic lsu_axi_wvalid_q, lsu_axi_wready_q; + logic lsu_axi_arvalid_q, lsu_axi_arready_q; + logic lsu_axi_bvalid_q, lsu_axi_bready_q; + logic lsu_axi_rvalid_q, lsu_axi_rready_q; + logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid_q, lsu_axi_rid_q; + logic [1:0] lsu_axi_bresp_q, lsu_axi_rresp_q; + logic [DEPTH_LOG2-1:0] lsu_imprecise_error_store_tag; + logic [63:0] lsu_axi_rdata_q; + + //------------------------------------------------------------------------------ + // Load forwarding logic start + //------------------------------------------------------------------------------ + + // Function to do 8 to 3 bit encoding + function automatic logic [2:0] f_Enc8to3; + input logic [7:0] Dec_value; + + logic [2:0] Enc_value; + Enc_value[0] = Dec_value[1] | Dec_value[3] | Dec_value[5] | Dec_value[7]; + Enc_value[1] = Dec_value[2] | Dec_value[3] | Dec_value[6] | Dec_value[7]; + Enc_value[2] = Dec_value[4] | Dec_value[5] | Dec_value[6] | Dec_value[7]; + + return Enc_value[2:0]; + endfunction // f_Enc8to3 + + // Buffer hit logic for bus load forwarding + assign ldst_byteen_hi_m[3:0] = ldst_byteen_ext_m[7:4]; + assign ldst_byteen_lo_m[3:0] = ldst_byteen_ext_m[3:0]; + for (genvar i=0; i<32'(DEPTH); i++) begin + assign ld_addr_hitvec_lo[i] = (lsu_addr_m[31:2] == buf_addr[i][31:2]) & buf_write[i] & (buf_state[i] != IDLE) & lsu_busreq_m; + assign ld_addr_hitvec_hi[i] = (end_addr_m[31:2] == buf_addr[i][31:2]) & buf_write[i] & (buf_state[i] != IDLE) & lsu_busreq_m; + end + + for (genvar j=0; j<4; j++) begin + assign ld_byte_hit_buf_lo[j] = |(ld_byte_hitvecfn_lo[j]) | ld_byte_ibuf_hit_lo[j]; + assign ld_byte_hit_buf_hi[j] = |(ld_byte_hitvecfn_hi[j]) | ld_byte_ibuf_hit_hi[j]; + for (genvar i=0; i<32'(DEPTH); i++) begin + assign ld_byte_hitvec_lo[j][i] = ld_addr_hitvec_lo[i] & buf_byteen[i][j] & ldst_byteen_lo_m[j]; + assign ld_byte_hitvec_hi[j][i] = ld_addr_hitvec_hi[i] & buf_byteen[i][j] & ldst_byteen_hi_m[j]; + + assign ld_byte_hitvecfn_lo[j][i] = ld_byte_hitvec_lo[j][i] & ~(|(ld_byte_hitvec_lo[j] & buf_age_younger[i])) & ~ld_byte_ibuf_hit_lo[j]; // Kill the byte enable if younger entry exists or byte exists in ibuf + assign ld_byte_hitvecfn_hi[j][i] = ld_byte_hitvec_hi[j][i] & ~(|(ld_byte_hitvec_hi[j] & buf_age_younger[i])) & ~ld_byte_ibuf_hit_hi[j]; // Kill the byte enable if younger entry exists or byte exists in ibuf + end + end + + // Hit in the ibuf + assign ld_addr_ibuf_hit_lo = (lsu_addr_m[31:2] == ibuf_addr[31:2]) & ibuf_write & ibuf_valid & lsu_busreq_m; + assign ld_addr_ibuf_hit_hi = (end_addr_m[31:2] == ibuf_addr[31:2]) & ibuf_write & ibuf_valid & lsu_busreq_m; + + for (genvar i=0; i<4; i++) begin + assign ld_byte_ibuf_hit_lo[i] = ld_addr_ibuf_hit_lo & ibuf_byteen[i] & ldst_byteen_lo_m[i]; + assign ld_byte_ibuf_hit_hi[i] = ld_addr_ibuf_hit_hi & ibuf_byteen[i] & ldst_byteen_hi_m[i]; + end + + always_comb begin + ld_fwddata_buf_lo[31:0] = {{8{ld_byte_ibuf_hit_lo[3]}},{8{ld_byte_ibuf_hit_lo[2]}},{8{ld_byte_ibuf_hit_lo[1]}},{8{ld_byte_ibuf_hit_lo[0]}}} & ibuf_data[31:0]; + ld_fwddata_buf_hi[31:0] = {{8{ld_byte_ibuf_hit_hi[3]}},{8{ld_byte_ibuf_hit_hi[2]}},{8{ld_byte_ibuf_hit_hi[1]}},{8{ld_byte_ibuf_hit_hi[0]}}} & ibuf_data[31:0]; + for (int i=0; i<32'(DEPTH); i++) begin + ld_fwddata_buf_lo[7:0] |= {8{ld_byte_hitvecfn_lo[0][i]}} & buf_data[i][7:0]; + ld_fwddata_buf_lo[15:8] |= {8{ld_byte_hitvecfn_lo[1][i]}} & buf_data[i][15:8]; + ld_fwddata_buf_lo[23:16] |= {8{ld_byte_hitvecfn_lo[2][i]}} & buf_data[i][23:16]; + ld_fwddata_buf_lo[31:24] |= {8{ld_byte_hitvecfn_lo[3][i]}} & buf_data[i][31:24]; + + ld_fwddata_buf_hi[7:0] |= {8{ld_byte_hitvecfn_hi[0][i]}} & buf_data[i][7:0]; + ld_fwddata_buf_hi[15:8] |= {8{ld_byte_hitvecfn_hi[1][i]}} & buf_data[i][15:8]; + ld_fwddata_buf_hi[23:16] |= {8{ld_byte_hitvecfn_hi[2][i]}} & buf_data[i][23:16]; + ld_fwddata_buf_hi[31:24] |= {8{ld_byte_hitvecfn_hi[3][i]}} & buf_data[i][31:24]; + end + end + + //------------------------------------------------------------------------------ + // Load forwarding logic end + //------------------------------------------------------------------------------ + + assign bus_coalescing_disable = dec_tlu_wb_coalescing_disable | pt.BUILD_AHB_LITE; + + // Get the hi/lo byte enable + assign ldst_byteen_r[3:0] = ({4{lsu_pkt_r.by}} & 4'b0001) | + ({4{lsu_pkt_r.half}} & 4'b0011) | + ({4{lsu_pkt_r.word}} & 4'b1111); + + assign {ldst_byteen_hi_r[3:0], ldst_byteen_lo_r[3:0]} = {4'b0,ldst_byteen_r[3:0]} << lsu_addr_r[1:0]; + assign {store_data_hi_r[31:0], store_data_lo_r[31:0]} = {32'b0,store_data_r[31:0]} << 8*lsu_addr_r[1:0]; + assign ldst_samedw_r = (lsu_addr_r[3] == end_addr_r[3]); + assign is_aligned_r = (lsu_pkt_r.word & (lsu_addr_r[1:0] == 2'b0)) | + (lsu_pkt_r.half & (lsu_addr_r[0] == 1'b0)) | + lsu_pkt_r.by; + + //------------------------------------------------------------------------------ + // Input buffer logic starts here + //------------------------------------------------------------------------------ + + assign ibuf_byp = lsu_busreq_r & (lsu_pkt_r.load | no_word_merge_r) & ~ibuf_valid; + assign ibuf_wr_en = lsu_busreq_r & lsu_commit_r & ~ibuf_byp; + assign ibuf_rst = (ibuf_drain_vld & ~ibuf_wr_en) | dec_tlu_force_halt; + assign ibuf_force_drain = lsu_busreq_m & ~lsu_busreq_r & ibuf_valid & (lsu_pkt_m.load | (ibuf_addr[31:2] != lsu_addr_m[31:2])); // Move the ibuf to buf if there is a non-colaescable ld/st in m but nothing in r + assign ibuf_drain_vld = ibuf_valid & (((ibuf_wr_en | (ibuf_timer == TIMER_LOG2'(TIMER_MAX))) & ~(ibuf_merge_en & ibuf_merge_in)) | ibuf_byp | ibuf_force_drain | ibuf_sideeffect | ~ibuf_write | bus_coalescing_disable); + assign ibuf_tag_in[DEPTH_LOG2-1:0] = (ibuf_merge_en & ibuf_merge_in) ? ibuf_tag[DEPTH_LOG2-1:0] : (ldst_dual_r ? WrPtr1_r : WrPtr0_r); + assign ibuf_dualtag_in[DEPTH_LOG2-1:0] = WrPtr0_r; + assign ibuf_sz_in[1:0] = {lsu_pkt_r.word, lsu_pkt_r.half}; + assign ibuf_addr_in[31:0] = ldst_dual_r ? end_addr_r[31:0] : lsu_addr_r[31:0]; + assign ibuf_byteen_in[3:0] = (ibuf_merge_en & ibuf_merge_in) ? (ibuf_byteen[3:0] | ldst_byteen_lo_r[3:0]) : (ldst_dual_r ? ldst_byteen_hi_r[3:0] : ldst_byteen_lo_r[3:0]); + for (genvar i=0; i<4; i++) begin + assign ibuf_data_in[(8*i)+7:(8*i)] = (ibuf_merge_en & ibuf_merge_in) ? (ldst_byteen_lo_r[i] ? store_data_lo_r[(8*i)+7:(8*i)] : ibuf_data[(8*i)+7:(8*i)]) : + (ldst_dual_r ? store_data_hi_r[(8*i)+7:(8*i)] : store_data_lo_r[(8*i)+7:(8*i)]); + end + assign ibuf_timer_in = ibuf_wr_en ? '0 : (ibuf_timer < TIMER_LOG2'(TIMER_MAX)) ? (ibuf_timer + 1'b1) : ibuf_timer; + + + assign ibuf_merge_en = lsu_busreq_r & lsu_commit_r & lsu_pkt_r.store & ibuf_valid & ibuf_write & (lsu_addr_r[31:2] == ibuf_addr[31:2]) & ~is_sideeffects_r & ~bus_coalescing_disable; + assign ibuf_merge_in = ~ldst_dual_r; // If it's a unaligned store, merge needs to happen on the way out of ibuf + + // ibuf signals going to bus buffer after merging + for (genvar i=0; i<4; i++) begin + assign ibuf_byteen_out[i] = (ibuf_merge_en & ~ibuf_merge_in) ? (ibuf_byteen[i] | ldst_byteen_lo_r[i]) : ibuf_byteen[i]; + assign ibuf_data_out[(8*i)+7:(8*i)] = (ibuf_merge_en & ~ibuf_merge_in) ? (ldst_byteen_lo_r[i] ? store_data_lo_r[(8*i)+7:(8*i)] : ibuf_data[(8*i)+7:(8*i)]) : + ibuf_data[(8*i)+7:(8*i)]; + end + + rvdffsc #(.WIDTH(1)) ibuf_valid_ff (.din(1'b1), .dout(ibuf_valid), .en(ibuf_wr_en), .clear(ibuf_rst), .clk(lsu_free_c2_clk), .*); + rvdffs #(.WIDTH(DEPTH_LOG2)) ibuf_tagff (.din(ibuf_tag_in), .dout(ibuf_tag), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*); + rvdffs #(.WIDTH(DEPTH_LOG2)) ibuf_dualtagff (.din(ibuf_dualtag_in), .dout(ibuf_dualtag), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*); + rvdffs #(.WIDTH(1)) ibuf_dualff (.din(ldst_dual_r), .dout(ibuf_dual), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*); + rvdffs #(.WIDTH(1)) ibuf_samedwff (.din(ldst_samedw_r), .dout(ibuf_samedw), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*); + rvdffs #(.WIDTH(1)) ibuf_nomergeff (.din(no_dword_merge_r), .dout(ibuf_nomerge), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*); + //rvdffs #(.WIDTH(1)) ibuf_nbff (.din(lsu_nonblock_load_valid_r), .dout(ibuf_nb), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*); + rvdffs #(.WIDTH(1)) ibuf_sideeffectff (.din(is_sideeffects_r), .dout(ibuf_sideeffect), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*); + rvdffs #(.WIDTH(1)) ibuf_unsignff (.din(lsu_pkt_r.unsign), .dout(ibuf_unsign), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*); + rvdffs #(.WIDTH(1)) ibuf_writeff (.din(lsu_pkt_r.store), .dout(ibuf_write), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*); + rvdffs #(.WIDTH(2)) ibuf_szff (.din(ibuf_sz_in[1:0]), .dout(ibuf_sz), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*); + rvdffe #(.WIDTH(32)) ibuf_addrff (.din(ibuf_addr_in[31:0]), .dout(ibuf_addr), .en(ibuf_wr_en), .*); + rvdffs #(.WIDTH(4)) ibuf_byteenff (.din(ibuf_byteen_in[3:0]), .dout(ibuf_byteen), .en(ibuf_wr_en), .clk(lsu_bus_ibuf_c1_clk), .*); + rvdffe #(.WIDTH(32)) ibuf_dataff (.din(ibuf_data_in[31:0]), .dout(ibuf_data), .en(ibuf_wr_en), .*); + rvdff #(.WIDTH(TIMER_LOG2)) ibuf_timerff (.din(ibuf_timer_in), .dout(ibuf_timer), .clk(lsu_free_c2_clk), .*); + + + //------------------------------------------------------------------------------ + // Input buffer logic ends here + //------------------------------------------------------------------------------ + + + //------------------------------------------------------------------------------ + // Output buffer logic starts here + //------------------------------------------------------------------------------ + + assign obuf_wr_wait = (buf_numvld_wrcmd_any[3:0] == 4'b1) & (buf_numvld_cmd_any[3:0] == 4'b1) & (obuf_wr_timer != TIMER_LOG2'(TIMER_MAX)) & + ~bus_coalescing_disable & ~buf_nomerge[CmdPtr0] & ~buf_sideeffect[CmdPtr0] & ~obuf_force_wr_en; + assign obuf_wr_timer_in = obuf_wr_en ? 3'b0: (((buf_numvld_cmd_any > 4'b0) & (obuf_wr_timer < TIMER_LOG2'(TIMER_MAX))) ? (obuf_wr_timer + 1'b1) : obuf_wr_timer); + assign obuf_force_wr_en = lsu_busreq_m & ~lsu_busreq_r & ~ibuf_valid & (buf_numvld_cmd_any[3:0] == 4'b1) & (lsu_addr_m[31:2] != buf_addr[CmdPtr0][31:2]); // Entry in m can't merge with entry going to obuf and there is no entry in between + assign ibuf_buf_byp = ibuf_byp & (buf_numvld_pend_any[3:0] == 4'b0) & (~lsu_pkt_r.store | no_dword_merge_r); + + assign obuf_wr_en = ((ibuf_buf_byp & lsu_commit_r & ~(is_sideeffects_r & bus_sideeffect_pend)) | + ((buf_state[CmdPtr0] == CMD) & found_cmdptr0 & ~buf_cmd_state_bus_en[CmdPtr0] & ~(buf_sideeffect[CmdPtr0] & bus_sideeffect_pend) & + (~(buf_dual[CmdPtr0] & buf_samedw[CmdPtr0] & ~buf_write[CmdPtr0]) | found_cmdptr1 | buf_nomerge[CmdPtr0] | obuf_force_wr_en))) & + (bus_cmd_ready | ~obuf_valid | obuf_nosend) & ~obuf_wr_wait & ~lsu_bus_cntr_overflow & ~bus_addr_match_pending & lsu_bus_clk_en; + + assign obuf_rst = ((bus_cmd_sent | (obuf_valid & obuf_nosend)) & ~obuf_wr_en & lsu_bus_clk_en) | dec_tlu_force_halt; + + assign obuf_write_in = ibuf_buf_byp ? lsu_pkt_r.store : buf_write[CmdPtr0]; + assign obuf_sideeffect_in = ibuf_buf_byp ? is_sideeffects_r : buf_sideeffect[CmdPtr0]; + assign obuf_addr_in[31:0] = ibuf_buf_byp ? lsu_addr_r[31:0] : buf_addr[CmdPtr0]; + assign obuf_sz_in[1:0] = ibuf_buf_byp ? {lsu_pkt_r.word, lsu_pkt_r.half} : buf_sz[CmdPtr0]; + assign obuf_merge_in = obuf_merge_en; + assign obuf_tag0_in[pt.LSU_BUS_TAG-1:0] = ibuf_buf_byp ? (pt.LSU_BUS_TAG)'(WrPtr0_r) : (pt.LSU_BUS_TAG)'(CmdPtr0); + assign obuf_tag1_in[pt.LSU_BUS_TAG-1:0] = ibuf_buf_byp ? (pt.LSU_BUS_TAG)'(WrPtr1_r) : (pt.LSU_BUS_TAG)'(CmdPtr1); + + assign obuf_cmd_done_in = ~(obuf_wr_en | obuf_rst) & (obuf_cmd_done | bus_wcmd_sent); + assign obuf_data_done_in = ~(obuf_wr_en | obuf_rst) & (obuf_data_done | bus_wdata_sent); + + assign obuf_aligned_in = ibuf_buf_byp ? is_aligned_r : ((obuf_sz_in[1:0] == 2'b0) | + (obuf_sz_in[0] & ~obuf_addr_in[0]) | + (obuf_sz_in[1] & ~(|obuf_addr_in[1:0]))); + + assign obuf_rdrsp_pend_in = (~(obuf_wr_en & ~obuf_nosend_in) & obuf_rdrsp_pend & ~(bus_rsp_read & (bus_rsp_read_tag == obuf_rdrsp_tag))) | + ((bus_cmd_sent & ~obuf_write) & ~dec_tlu_force_halt) ; + assign obuf_rdrsp_tag_in[pt.LSU_BUS_TAG-1:0] = (bus_cmd_sent & ~obuf_write) ? obuf_tag0[pt.LSU_BUS_TAG-1:0] : obuf_rdrsp_tag[pt.LSU_BUS_TAG-1:0]; + // No ld to ld fwd for aligned & atomic64 + assign obuf_nosend_in = (obuf_addr_in[31:3] == obuf_addr[31:3]) & obuf_aligned_in & ~obuf_sideeffect & ~obuf_write & ~obuf_write_in & ~dec_tlu_external_ldfwd_disable & + ((obuf_valid & ~obuf_nosend) | (obuf_rdrsp_pend & ~(bus_rsp_read & (bus_rsp_read_tag == obuf_rdrsp_tag)))); + + assign obuf_byteen0_in[7:0] = ibuf_buf_byp ? (lsu_addr_r[2] ? {ldst_byteen_lo_r[3:0],4'b0} : {4'b0,ldst_byteen_lo_r[3:0]}) : + (buf_addr[CmdPtr0][2] ? {buf_byteen[CmdPtr0],4'b0} : {4'b0,buf_byteen[CmdPtr0]}); + assign obuf_byteen1_in[7:0] = ibuf_buf_byp ? (end_addr_r[2] ? {ldst_byteen_hi_r[3:0],4'b0} : {4'b0,ldst_byteen_hi_r[3:0]}) : + (buf_addr[CmdPtr1][2] ? {buf_byteen[CmdPtr1],4'b0} : {4'b0,buf_byteen[CmdPtr1]}); + assign obuf_data0_in[63:0] = ibuf_buf_byp ? (lsu_addr_r[2] ? {store_data_lo_r[31:0],32'b0} : {32'b0,store_data_lo_r[31:0]}) : + (buf_addr[CmdPtr0][2] ? {buf_data[CmdPtr0],32'b0} : {32'b0,buf_data[CmdPtr0]}); + assign obuf_data1_in[63:0] = ibuf_buf_byp ? (lsu_addr_r[2] ? {store_data_hi_r[31:0],32'b0} :{32'b0,store_data_hi_r[31:0]}) : + (buf_addr[CmdPtr1][2] ? {buf_data[CmdPtr1],32'b0} : {32'b0,buf_data[CmdPtr1]}); + + for (genvar i=0 ;i<8; i++) begin + assign obuf_byteen_in[i] = obuf_byteen0_in[i] | (obuf_merge_en & obuf_byteen1_in[i]); + assign obuf_data_in[(8*i)+7:(8*i)] = (obuf_merge_en & obuf_byteen1_in[i]) ? obuf_data1_in[(8*i)+7:(8*i)] : obuf_data0_in[(8*i)+7:(8*i)]; + end + + // No store obuf merging for AXI since all stores are sent non-posted. Can't track the second id right now + assign obuf_merge_en = ((CmdPtr0 != CmdPtr1) & found_cmdptr0 & found_cmdptr1 & (buf_state[CmdPtr0] == CMD) & (buf_state[CmdPtr1] == CMD) & + ~buf_cmd_state_bus_en[CmdPtr0] & ~buf_sideeffect[CmdPtr0] & + ((buf_write[CmdPtr0] & buf_write[CmdPtr1] & (buf_addr[CmdPtr0][31:3] == buf_addr[CmdPtr1][31:3]) & ~bus_coalescing_disable & ~pt.BUILD_AXI_NATIVE) | + (~buf_write[CmdPtr0] & buf_dual[CmdPtr0] & ~buf_dualhi[CmdPtr0] & buf_samedw[CmdPtr0]))) | // CmdPtr0/CmdPtr1 are for same load which is within a DW + (ibuf_buf_byp & ldst_samedw_r & ldst_dual_r); + + + rvdff #(.WIDTH(1)) obuf_wren_ff (.din(obuf_wr_en), .dout(obuf_wr_enQ), .clk(lsu_busm_clk), .*); + rvdffsc #(.WIDTH(1)) obuf_valid_ff (.din(1'b1), .dout(obuf_valid), .en(obuf_wr_en), .clear(obuf_rst), .clk(lsu_free_c2_clk), .*); + rvdffs #(.WIDTH(1)) obuf_nosend_ff (.din(obuf_nosend_in), .dout(obuf_nosend), .en(obuf_wr_en), .clk(lsu_free_c2_clk), .*); + rvdff #(.WIDTH(1)) obuf_cmd_done_ff (.din(obuf_cmd_done_in), .dout(obuf_cmd_done), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(1)) obuf_data_done_ff (.din(obuf_data_done_in), .dout(obuf_data_done), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(1)) obuf_rdrsp_pend_ff(.din(obuf_rdrsp_pend_in), .dout(obuf_rdrsp_pend), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(pt.LSU_BUS_TAG)) obuf_rdrsp_tagff (.din(obuf_rdrsp_tag_in), .dout(obuf_rdrsp_tag), .clk(lsu_busm_clk), .*); + rvdffs #(.WIDTH(pt.LSU_BUS_TAG)) obuf_tag0ff (.din(obuf_tag0_in), .dout(obuf_tag0), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); + rvdffs #(.WIDTH(pt.LSU_BUS_TAG)) obuf_tag1ff (.din(obuf_tag1_in), .dout(obuf_tag1), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); + rvdffs #(.WIDTH(1)) obuf_mergeff (.din(obuf_merge_in), .dout(obuf_merge), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); + rvdffs #(.WIDTH(1)) obuf_writeff (.din(obuf_write_in), .dout(obuf_write), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); + rvdffs #(.WIDTH(1)) obuf_sideeffectff (.din(obuf_sideeffect_in), .dout(obuf_sideeffect), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); + rvdffs #(.WIDTH(2)) obuf_szff (.din(obuf_sz_in[1:0]), .dout(obuf_sz), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); + rvdffe #(.WIDTH(32)) obuf_addrff (.din(obuf_addr_in[31:0]), .dout(obuf_addr), .en(obuf_wr_en), .*); + rvdffs #(.WIDTH(8)) obuf_byteenff (.din(obuf_byteen_in[7:0]), .dout(obuf_byteen), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); + rvdffe #(.WIDTH(64)) obuf_dataff (.din(obuf_data_in[63:0]), .dout(obuf_data), .en(obuf_wr_en), .*); + rvdff #(.WIDTH(TIMER_LOG2)) obuf_timerff (.din(obuf_wr_timer_in), .dout(obuf_wr_timer), .clk(lsu_busm_clk), .*); + + + //------------------------------------------------------------------------------ + // Output buffer logic ends here + //------------------------------------------------------------------------------ + + // Find the entry to allocate and entry to send + always_comb begin + WrPtr0_m[DEPTH_LOG2-1:0] = '0; + WrPtr1_m[DEPTH_LOG2-1:0] = '0; + found_wrptr0 = '0; + found_wrptr1 = '0; + + // Find first write pointer + for (int i=0; i<32'(DEPTH); i++) begin + if (~found_wrptr0) begin + WrPtr0_m[DEPTH_LOG2-1:0] = DEPTH_LOG2'(i); + found_wrptr0 = (buf_state[i] == IDLE) & ~((ibuf_valid & (32'(ibuf_tag) == i)) | + (lsu_busreq_r & ((32'(WrPtr0_r) == i) | (ldst_dual_r & (32'(WrPtr1_r) == i))))); + end + end + + // Find second write pointer + for (int i=0; i<32'(DEPTH); i++) begin + if (~found_wrptr1) begin + WrPtr1_m[DEPTH_LOG2-1:0] = DEPTH_LOG2'(i); + found_wrptr1 = (buf_state[i] == IDLE) & ~((ibuf_valid & (32'(ibuf_tag) == i)) | + (lsu_busreq_m & (32'(WrPtr0_m) == i)) | + (lsu_busreq_r & ((32'(WrPtr0_r) == i) | (ldst_dual_r & (32'(WrPtr1_r) == i))))); + end + end + end + + // Get the command ptr + for (genvar i=0; i<32'(DEPTH); i++) begin + // These should be one-hot + assign CmdPtr0Dec[i] = ~(|buf_age[i]) & (buf_state[i] == CMD) & ~buf_cmd_state_bus_en[i]; + assign CmdPtr1Dec[i] = ~(|(buf_age[i] & ~CmdPtr0Dec)) & ~CmdPtr0Dec[i] & (buf_state[i] == CMD) & ~buf_cmd_state_bus_en[i]; + assign RspPtrDec[i] = ~(|buf_rsp_pickage[i]) & (buf_state[i] == DONE_WAIT); + end + + assign found_cmdptr0 = |CmdPtr0Dec; + assign found_cmdptr1 = |CmdPtr1Dec; + + assign CmdPtr0 = f_Enc8to3(8'(CmdPtr0Dec[DEPTH-1:0])); + assign CmdPtr1 = f_Enc8to3(8'(CmdPtr1Dec[DEPTH-1:0])); + assign RspPtr = f_Enc8to3(8'(RspPtrDec[DEPTH-1:0])); + + // Age vector + for (genvar i=0; i<32'(DEPTH); i++) begin: GenAgeVec + for (genvar j=0; j<32'(DEPTH); j++) begin + assign buf_age_in[i][j] = (((buf_state[i] == IDLE) & buf_state_en[i]) & + (((buf_state[j] == WAIT) | ((buf_state[j] == CMD) & ~buf_cmd_state_bus_en[j])) | // Set age bit for older entries + (ibuf_drain_vld & lsu_busreq_r & (ibuf_byp | ldst_dual_r) & (i == WrPtr0_r) & (j == ibuf_tag)) | // Set case for dual lo + (ibuf_byp & lsu_busreq_r & ldst_dual_r & (i == WrPtr1_r) & (j == WrPtr0_r)))) | // ibuf bypass case + buf_age[i][j]; + + + assign buf_age[i][j] = buf_ageQ[i][j] & ~((buf_state[j] == CMD) & buf_cmd_state_bus_en[j]); // Reset case + + assign buf_age_younger[i][j] = (i == j) ? 1'b0: (~buf_age[i][j] & (buf_state[j] != IDLE)); // Younger entries + end + end + + // Age vector for responses + for (genvar i=0; i= (DEPTH-1)) : (buf_numvld_any[3:0] == 4'(DEPTH)); + assign lsu_bus_buffer_empty_any = ~(|buf_state[DEPTH-1:0]) & ~ibuf_valid & ~obuf_valid; + + + // Non blocking ports + assign lsu_nonblock_load_valid_m = lsu_busreq_m & lsu_pkt_m.valid & lsu_pkt_m.load & ~flush_m_up & ~ld_full_hit_m; + assign lsu_nonblock_load_tag_m[DEPTH_LOG2-1:0] = WrPtr0_m[DEPTH_LOG2-1:0]; + assign lsu_nonblock_load_inv_r = lsu_nonblock_load_valid_r & ~lsu_commit_r; + assign lsu_nonblock_load_inv_tag_r[DEPTH_LOG2-1:0] = WrPtr0_r[DEPTH_LOG2-1:0]; // r tag needs to be accurate even if there is no invalidate + + always_comb begin + lsu_nonblock_load_data_ready = '0; + lsu_nonblock_load_data_error = '0; + lsu_nonblock_load_data_tag[DEPTH_LOG2-1:0] = '0; + lsu_nonblock_load_data_lo[31:0] = '0; + lsu_nonblock_load_data_hi[31:0] = '0; + for (int i=0; i<32'(DEPTH); i++) begin + // Use buf_rst[i] instead of buf_state_en[i] for timing + lsu_nonblock_load_data_ready |= (buf_state[i] == DONE) & ~(pt.BUILD_AXI_NATIVE & buf_write[i]); + lsu_nonblock_load_data_error |= (buf_state[i] == DONE) & buf_error[i] & ~buf_write[i]; + lsu_nonblock_load_data_tag[DEPTH_LOG2-1:0] |= DEPTH_LOG2'(i) & {DEPTH_LOG2{((buf_state[i] == DONE) & ~buf_write[i] & (~buf_dual[i] | ~buf_dualhi[i]))}}; + lsu_nonblock_load_data_lo[31:0] |= buf_data[i][31:0] & {32{((buf_state[i] == DONE) & ~buf_write[i] & (~buf_dual[i] | ~buf_dualhi[i]))}}; + lsu_nonblock_load_data_hi[31:0] |= buf_data[i][31:0] & {32{((buf_state[i] == DONE) & ~buf_write[i] & (buf_dual[i] & buf_dualhi[i]))}}; + end + end + + assign lsu_nonblock_addr_offset[1:0] = buf_addr[lsu_nonblock_load_data_tag][1:0]; + assign lsu_nonblock_sz[1:0] = buf_sz[lsu_nonblock_load_data_tag][1:0]; + assign lsu_nonblock_unsign = buf_unsign[lsu_nonblock_load_data_tag]; + assign lsu_nonblock_dual = buf_dual[lsu_nonblock_load_data_tag]; + assign lsu_nonblock_data_unalgn[31:0] = 32'({lsu_nonblock_load_data_hi[31:0], lsu_nonblock_load_data_lo[31:0]} >> 8*lsu_nonblock_addr_offset[1:0]); + + assign lsu_nonblock_load_data_valid = lsu_nonblock_load_data_ready & ~lsu_nonblock_load_data_error; + assign lsu_nonblock_load_data[31:0] = ({32{ lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b00)}} & {24'b0,lsu_nonblock_data_unalgn[7:0]}) | + ({32{ lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b01)}} & {16'b0,lsu_nonblock_data_unalgn[15:0]}) | + ({32{~lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b00)}} & {{24{lsu_nonblock_data_unalgn[7]}}, lsu_nonblock_data_unalgn[7:0]}) | + ({32{~lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b01)}} & {{16{lsu_nonblock_data_unalgn[15]}},lsu_nonblock_data_unalgn[15:0]}) | + ({32{(lsu_nonblock_sz[1:0] == 2'b10)}} & lsu_nonblock_data_unalgn[31:0]); + + // Determine if there is a pending return to sideeffect load/store + always_comb begin + bus_sideeffect_pend = obuf_valid & obuf_sideeffect & dec_tlu_sideeffect_posted_disable; + for (int i=0; i<32'(DEPTH); i++) begin + bus_sideeffect_pend |= ((buf_state[i] == RESP) & buf_sideeffect[i] & dec_tlu_sideeffect_posted_disable); + end + end + + // We have no ordering rules for AXI. Need to check outstanding trxns to same address for AXI + always_comb begin + bus_addr_match_pending = '0; + for (int i=0; i<32'(DEPTH); i++) begin + bus_addr_match_pending |= (pt.BUILD_AXI_NATIVE & obuf_valid & (obuf_addr[31:3] == buf_addr[i][31:3]) & (buf_state[i] == RESP) & ~((obuf_tag0 == (pt.LSU_BUS_TAG)'(i)) | (obuf_merge & (obuf_tag1 == (pt.LSU_BUS_TAG)'(i))))); + end + end + + // Generic bus signals + assign bus_cmd_ready = obuf_write ? ((obuf_cmd_done | obuf_data_done) ? (obuf_cmd_done ? lsu_axi_wready : lsu_axi_awready) : (lsu_axi_awready & lsu_axi_wready)) : lsu_axi_arready; + assign bus_wcmd_sent = lsu_axi_awvalid & lsu_axi_awready; + assign bus_wdata_sent = lsu_axi_wvalid & lsu_axi_wready; + assign bus_cmd_sent = ((obuf_cmd_done | bus_wcmd_sent) & (obuf_data_done | bus_wdata_sent)) | (lsu_axi_arvalid & lsu_axi_arready); + + assign bus_rsp_read = lsu_axi_rvalid & lsu_axi_rready; + assign bus_rsp_write = lsu_axi_bvalid & lsu_axi_bready; + assign bus_rsp_read_tag[pt.LSU_BUS_TAG-1:0] = lsu_axi_rid[pt.LSU_BUS_TAG-1:0]; + assign bus_rsp_write_tag[pt.LSU_BUS_TAG-1:0] = lsu_axi_bid[pt.LSU_BUS_TAG-1:0]; + assign bus_rsp_write_error = bus_rsp_write & (lsu_axi_bresp[1:0] != 2'b0); + assign bus_rsp_read_error = bus_rsp_read & (lsu_axi_rresp[1:0] != 2'b0); + assign bus_rsp_rdata[63:0] = lsu_axi_rdata[63:0]; + + // AXI command signals + assign lsu_axi_awvalid = obuf_valid & obuf_write & ~obuf_cmd_done & ~bus_addr_match_pending; + assign lsu_axi_awid[pt.LSU_BUS_TAG-1:0] = (pt.LSU_BUS_TAG)'(obuf_tag0); + assign lsu_axi_awaddr[31:0] = obuf_sideeffect ? obuf_addr[31:0] : {obuf_addr[31:3],3'b0}; + assign lsu_axi_awsize[2:0] = obuf_sideeffect ? {1'b0, obuf_sz[1:0]} : 3'b011; + assign lsu_axi_awprot[2:0] = '0; + assign lsu_axi_awcache[3:0] = obuf_sideeffect ? 4'b0 : 4'b1111; + assign lsu_axi_awregion[3:0] = obuf_addr[31:28]; + assign lsu_axi_awlen[7:0] = '0; + assign lsu_axi_awburst[1:0] = 2'b01; + assign lsu_axi_awqos[3:0] = '0; + assign lsu_axi_awlock = '0; + + assign lsu_axi_wvalid = obuf_valid & obuf_write & ~obuf_data_done & ~bus_addr_match_pending; + assign lsu_axi_wstrb[7:0] = obuf_byteen[7:0] & {8{obuf_write}}; + assign lsu_axi_wdata[63:0] = obuf_data[63:0]; + assign lsu_axi_wlast = '1; + + assign lsu_axi_arvalid = obuf_valid & ~obuf_write & ~obuf_nosend & ~bus_addr_match_pending; + assign lsu_axi_arid[pt.LSU_BUS_TAG-1:0] = (pt.LSU_BUS_TAG)'(obuf_tag0); + assign lsu_axi_araddr[31:0] = obuf_sideeffect ? obuf_addr[31:0] : {obuf_addr[31:3],3'b0}; + assign lsu_axi_arsize[2:0] = obuf_sideeffect ? {1'b0, obuf_sz[1:0]} : 3'b011; + assign lsu_axi_arprot[2:0] = '0; + assign lsu_axi_arcache[3:0] = obuf_sideeffect ? 4'b0 : 4'b1111; + assign lsu_axi_arregion[3:0] = obuf_addr[31:28]; + assign lsu_axi_arlen[7:0] = '0; + assign lsu_axi_arburst[1:0] = 2'b01; + assign lsu_axi_arqos[3:0] = '0; + assign lsu_axi_arlock = '0; + + assign lsu_axi_bready = 1; + assign lsu_axi_rready = 1; + + always_comb begin + lsu_imprecise_error_store_any = '0; + lsu_imprecise_error_store_tag = '0; + for (int i=0; i<32'(DEPTH); i++) begin + lsu_imprecise_error_store_any |= lsu_bus_clk_en_q & (buf_state[i] == DONE) & buf_error[i] & buf_write[i]; + lsu_imprecise_error_store_tag |= DEPTH_LOG2'(i) & {DEPTH_LOG2{((buf_state[i] == DONE) & buf_error[i] & buf_write[i])}}; + end + end + assign lsu_imprecise_error_load_any = lsu_nonblock_load_data_error & ~lsu_imprecise_error_store_any; // This is to make sure we send only one imprecise error for load/store + assign lsu_imprecise_error_addr_any[31:0] = lsu_imprecise_error_store_any ? buf_addr[lsu_imprecise_error_store_tag] : buf_addr[lsu_nonblock_load_data_tag]; + + // Count the number of pending trxns for fence (doesn't apply to AXI) + assign bus_pend_trxnQ[7:0] = 8'b0; + assign bus_pend_trxn[7:0] = 8'b0; + assign bus_pend_trxn_ns[7:0] = 8'b0; + assign lsu_bus_cntr_overflow = 1'b0; + assign lsu_bus_idle_any = 1'b1; + + // PMU signals + assign lsu_pmu_bus_trxn = (lsu_axi_awvalid & lsu_axi_awready) | (lsu_axi_wvalid & lsu_axi_wready) | (lsu_axi_arvalid & lsu_axi_arready); + assign lsu_pmu_bus_misaligned = lsu_busreq_r & ldst_dual_r & lsu_commit_r; + assign lsu_pmu_bus_error = lsu_imprecise_error_load_any | lsu_imprecise_error_store_any; + assign lsu_pmu_bus_busy = (lsu_axi_awvalid & ~lsu_axi_awready) | (lsu_axi_wvalid & ~lsu_axi_wready) | (lsu_axi_arvalid & ~lsu_axi_arready); + + rvdff #(.WIDTH(1)) lsu_axi_awvalid_ff (.din(lsu_axi_awvalid), .dout(lsu_axi_awvalid_q), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(1)) lsu_axi_awready_ff (.din(lsu_axi_awready), .dout(lsu_axi_awready_q), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(1)) lsu_axi_wvalid_ff (.din(lsu_axi_wvalid), .dout(lsu_axi_wvalid_q), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(1)) lsu_axi_wready_ff (.din(lsu_axi_wready), .dout(lsu_axi_wready_q), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(1)) lsu_axi_arvalid_ff (.din(lsu_axi_arvalid), .dout(lsu_axi_arvalid_q), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(1)) lsu_axi_arready_ff (.din(lsu_axi_arready), .dout(lsu_axi_arready_q), .clk(lsu_busm_clk), .*); + + rvdff #(.WIDTH(1)) lsu_axi_bvalid_ff (.din(lsu_axi_bvalid), .dout(lsu_axi_bvalid_q), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(1)) lsu_axi_bready_ff (.din(lsu_axi_bready), .dout(lsu_axi_bready_q), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(2)) lsu_axi_bresp_ff (.din(lsu_axi_bresp[1:0]), .dout(lsu_axi_bresp_q[1:0]), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(pt.LSU_BUS_TAG)) lsu_axi_bid_ff (.din(lsu_axi_bid[pt.LSU_BUS_TAG-1:0]),.dout(lsu_axi_bid_q[pt.LSU_BUS_TAG-1:0]),.clk(lsu_busm_clk), .*); + rvdffe #(.WIDTH(64)) lsu_axi_rdata_ff (.din(lsu_axi_rdata[63:0]), .dout(lsu_axi_rdata_q[63:0]), .en(lsu_axi_rvalid & lsu_bus_clk_en), .*); + + rvdff #(.WIDTH(1)) lsu_axi_rvalid_ff (.din(lsu_axi_rvalid), .dout(lsu_axi_rvalid_q), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(1)) lsu_axi_rready_ff (.din(lsu_axi_rready), .dout(lsu_axi_rready_q), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(2)) lsu_axi_rresp_ff (.din(lsu_axi_rresp[1:0]), .dout(lsu_axi_rresp_q[1:0]), .clk(lsu_busm_clk), .*); + rvdff #(.WIDTH(pt.LSU_BUS_TAG)) lsu_axi_rid_ff (.din(lsu_axi_rid[pt.LSU_BUS_TAG-1:0]),.dout(lsu_axi_rid_q[pt.LSU_BUS_TAG-1:0]),.clk(lsu_busm_clk), .*); + + rvdff #(.WIDTH(DEPTH_LOG2)) lsu_WrPtr0_rff (.din(WrPtr0_m), .dout(WrPtr0_r), .clk(lsu_c2_r_clk), .*); + rvdff #(.WIDTH(DEPTH_LOG2)) lsu_WrPtr1_rff (.din(WrPtr1_m), .dout(WrPtr1_r), .clk(lsu_c2_r_clk), .*); + + rvdff #(.WIDTH(1)) lsu_busreq_rff (.din(lsu_busreq_m & ~flush_r & ~ld_full_hit_m), .dout(lsu_busreq_r), .clk(lsu_c2_r_clk), .*); + rvdff #(.WIDTH(1)) lsu_nonblock_load_valid_rff (.din(lsu_nonblock_load_valid_m), .dout(lsu_nonblock_load_valid_r), .clk(lsu_c2_r_clk), .*); + +`ifdef ASSERT_ON + + for (genvar i=0; i<4; i++) begin: GenByte + assert_ld_byte_hitvecfn_lo_onehot: assert #0 ($onehot0(ld_byte_hitvecfn_lo[i][DEPTH-1:0])); + assert_ld_byte_hitvecfn_hi_onehot: assert #0 ($onehot0(ld_byte_hitvecfn_hi[i][DEPTH-1:0])); + end + + assert_CmdPtr0Dec_onehot: assert #0 ($onehot0(CmdPtr0Dec[DEPTH-1:0])); + assert_CmdPtr1Dec_onehot: assert #0 ($onehot0(CmdPtr1Dec[DEPTH-1:0])); + +`endif + +endmodule // el2_lsu_bus_buffer diff --git a/design/lsu/el2_lsu_bus_intf.sv b/design/lsu/el2_lsu_bus_intf.sv new file mode 100644 index 0000000..d056dde --- /dev/null +++ b/design/lsu/el2_lsu_bus_intf.sv @@ -0,0 +1,369 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: lsu interface with interface queue +// Comments: +// +//******************************************************************************** +module el2_lsu_bus_intf +import el2_pkg::*; +#( +`include "el2_param.vh" + )( + input logic clk, + input logic rst_l, + input logic scan_mode, + input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals + input logic dec_tlu_wb_coalescing_disable, // disable write buffer coalescing + input logic dec_tlu_sideeffect_posted_disable, // disable the posted sideeffect load store to the bus + + // various clocks needed for the bus reads and writes + input logic lsu_c1_m_clk, + input logic lsu_c1_r_clk, + input logic lsu_c2_r_clk, + input logic lsu_bus_ibuf_c1_clk, + input logic lsu_bus_obuf_c1_clk, + input logic lsu_bus_buf_c1_clk, + input logic lsu_free_c2_clk, + input logic free_clk, + input logic lsu_busm_clk, + + input logic dec_lsu_valid_raw_d, // Raw valid for address computation + input logic lsu_busreq_m, // bus request is in m + + input el2_lsu_pkt_t lsu_pkt_m, // lsu packet flowing down the pipe + input el2_lsu_pkt_t lsu_pkt_r, // lsu packet flowing down the pipe + + input logic [31:0] lsu_addr_d, // lsu address flowing down the pipe + input logic [31:0] lsu_addr_m, // lsu address flowing down the pipe + input logic [31:0] lsu_addr_r, // lsu address flowing down the pipe + + input logic [31:0] end_addr_d, // lsu address flowing down the pipe + input logic [31:0] end_addr_m, // lsu address flowing down the pipe + input logic [31:0] end_addr_r, // lsu address flowing down the pipe + + input logic [31:0] store_data_r, // store data flowing down the pipe + input logic dec_tlu_force_halt, + + input logic lsu_commit_r, // lsu instruction in r commits + input logic is_sideeffects_m, // lsu attribute is side_effects + input logic flush_m_up, // flush + input logic flush_r, // flush + + output logic lsu_busreq_r, // bus request is in r + output logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry + output logic lsu_bus_buffer_full_any, // write buffer is full + output logic lsu_bus_buffer_empty_any, // write buffer is empty + output logic lsu_bus_idle_any, // NO pending responses from the bus + output logic [31:0] bus_read_data_m, // the bus return data + + + output logic lsu_imprecise_error_load_any, // imprecise load bus error + output logic lsu_imprecise_error_store_any, // imprecise store bus error + output logic [31:0] lsu_imprecise_error_addr_any, // address of the imprecise error + + // Non-blocking loads + output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load + output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated + output logic lsu_nonblock_load_data_valid,// the non block is valid - sending information back to the cam + output logic lsu_nonblock_load_data_error,// non block load has an error + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error + output logic [31:0] lsu_nonblock_load_data, // Data of the non block load + + // PMU events + output logic lsu_pmu_bus_trxn, + output logic lsu_pmu_bus_misaligned, + output logic lsu_pmu_bus_error, + output logic lsu_pmu_bus_busy, + + // AXI Write Channels + output logic lsu_axi_awvalid, + input logic lsu_axi_awready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid, + output logic [31:0] lsu_axi_awaddr, + output logic [3:0] lsu_axi_awregion, + output logic [7:0] lsu_axi_awlen, + output logic [2:0] lsu_axi_awsize, + output logic [1:0] lsu_axi_awburst, + output logic lsu_axi_awlock, + output logic [3:0] lsu_axi_awcache, + output logic [2:0] lsu_axi_awprot, + output logic [3:0] lsu_axi_awqos, + + output logic lsu_axi_wvalid, + input logic lsu_axi_wready, + output logic [63:0] lsu_axi_wdata, + output logic [7:0] lsu_axi_wstrb, + output logic lsu_axi_wlast, + + input logic lsu_axi_bvalid, + output logic lsu_axi_bready, + input logic [1:0] lsu_axi_bresp, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid, + + // AXI Read Channels + output logic lsu_axi_arvalid, + input logic lsu_axi_arready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid, + output logic [31:0] lsu_axi_araddr, + output logic [3:0] lsu_axi_arregion, + output logic [7:0] lsu_axi_arlen, + output logic [2:0] lsu_axi_arsize, + output logic [1:0] lsu_axi_arburst, + output logic lsu_axi_arlock, + output logic [3:0] lsu_axi_arcache, + output logic [2:0] lsu_axi_arprot, + output logic [3:0] lsu_axi_arqos, + + input logic lsu_axi_rvalid, + output logic lsu_axi_rready, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid, + input logic [63:0] lsu_axi_rdata, + input logic [1:0] lsu_axi_rresp, + input logic lsu_axi_rlast, + + input logic lsu_bus_clk_en + +); + + + + logic lsu_bus_clk_en_q; + logic ldst_dual_d, ldst_dual_m, ldst_dual_r; + + logic [3:0] ldst_byteen_m, ldst_byteen_r; + logic [7:0] ldst_byteen_ext_m, ldst_byteen_ext_r; + logic [3:0] ldst_byteen_hi_m, ldst_byteen_hi_r; + logic [3:0] ldst_byteen_lo_m, ldst_byteen_lo_r; + logic is_sideeffects_r; + + logic [63:0] store_data_ext_r; + logic [31:0] store_data_hi_r; + logic [31:0] store_data_lo_r; + + logic addr_match_dw_lo_r_m; + logic addr_match_word_lo_r_m; + logic no_word_merge_r, no_dword_merge_r; + + logic ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi; + logic [3:0] ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi; + + logic [3:0] ld_byte_hit_lo, ld_byte_rhit_lo; + logic [3:0] ld_byte_hit_hi, ld_byte_rhit_hi; + + logic [31:0] ld_fwddata_rpipe_lo; + logic [31:0] ld_fwddata_rpipe_hi; + + logic [3:0] ld_byte_hit_buf_lo, ld_byte_hit_buf_hi; + logic [31:0] ld_fwddata_buf_lo, ld_fwddata_buf_hi; + + logic [63:0] ld_fwddata_lo, ld_fwddata_hi; + logic [63:0] ld_fwddata_m; + + logic ld_full_hit_hi_m, ld_full_hit_lo_m; + logic ld_full_hit_m; + + assign ldst_byteen_m[3:0] = ({4{lsu_pkt_m.by}} & 4'b0001) | + ({4{lsu_pkt_m.half}} & 4'b0011) | + ({4{lsu_pkt_m.word}} & 4'b1111); + assign ldst_dual_d = (lsu_addr_d[2] != end_addr_d[2]); + + // Read/Write Buffer + el2_lsu_bus_buffer #(.pt(pt)) bus_buffer ( + .* + ); + + // Logic to determine if dc5 store can be coalesced or not with younger stores. Bypass ibuf if cannot colaesced + assign addr_match_dw_lo_r_m = (lsu_addr_r[31:3] == lsu_addr_m[31:3]); + assign addr_match_word_lo_r_m = addr_match_dw_lo_r_m & ~(lsu_addr_r[2]^lsu_addr_m[2]); + + assign no_word_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_word_lo_r_m); + assign no_dword_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_dw_lo_r_m); + + // Create Hi/Lo signals + assign ldst_byteen_ext_m[7:0] = {4'b0,ldst_byteen_m[3:0]} << lsu_addr_m[1:0]; + assign ldst_byteen_ext_r[7:0] = {4'b0,ldst_byteen_r[3:0]} << lsu_addr_r[1:0]; + + assign store_data_ext_r[63:0] = {32'b0,store_data_r[31:0]} << {lsu_addr_r[1:0],3'b0}; + + assign ldst_byteen_hi_m[3:0] = ldst_byteen_ext_m[7:4]; + assign ldst_byteen_lo_m[3:0] = ldst_byteen_ext_m[3:0]; + assign ldst_byteen_hi_r[3:0] = ldst_byteen_ext_r[7:4]; + assign ldst_byteen_lo_r[3:0] = ldst_byteen_ext_r[3:0]; + + assign store_data_hi_r[31:0] = store_data_ext_r[63:32]; + assign store_data_lo_r[31:0] = store_data_ext_r[31:0]; + + assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m; + assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m; + assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m; + assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m; + + for (genvar i=0; i<4; i++) begin: GenBusBufFwd + assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i] & ldst_byteen_lo_m[i]; + assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i] & ldst_byteen_hi_m[i]; + assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i] & ldst_byteen_lo_m[i]; + assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i] & ldst_byteen_hi_m[i]; + + assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i] | + ld_byte_hit_buf_lo[i]; + + assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i] | + ld_byte_hit_buf_hi[i]; + + assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i]; + assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i]; + + assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) | + ({8{ld_byte_rhit_hi_lo[i]}} & store_data_hi_r[(8*i)+7:(8*i)]); + + assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) | + ({8{ld_byte_rhit_hi_hi[i]}} & store_data_hi_r[(8*i)+7:(8*i)]); + + // Final muxing between m/r + assign ld_fwddata_lo[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i] ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : ld_fwddata_buf_lo[(8*i)+7:(8*i)]; + + assign ld_fwddata_hi[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i] ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : ld_fwddata_buf_hi[(8*i)+7:(8*i)]; + + end + + always_comb begin + ld_full_hit_lo_m = 1'b1; + ld_full_hit_hi_m = 1'b1; + for (int i=0; i<4; i++) begin + ld_full_hit_lo_m &= (ld_byte_hit_lo[i] | ~ldst_byteen_lo_m[i]); + ld_full_hit_hi_m &= (ld_byte_hit_hi[i] | ~ldst_byteen_hi_m[i]); + end + end + + // This will be high if all the bytes of load hit the stores in pipe/write buffer (m/r/wrbuf) + assign ld_full_hit_m = ld_full_hit_lo_m & ld_full_hit_hi_m & lsu_busreq_m & lsu_pkt_m.load & ~is_sideeffects_m; + + assign ld_fwddata_m[63:0] = {ld_fwddata_hi[31:0], ld_fwddata_lo[31:0]} >> (8*lsu_addr_m[1:0]); + assign bus_read_data_m[31:0] = ld_fwddata_m[31:0]; + + // Fifo flops + + rvdff #(.WIDTH(1)) clken_ff (.din(lsu_bus_clk_en), .dout(lsu_bus_clk_en_q), .clk(free_clk), .*); + + rvdff #(.WIDTH(1)) ldst_dual_mff (.din(ldst_dual_d), .dout(ldst_dual_m), .clk(lsu_c1_m_clk), .*); + rvdff #(.WIDTH(1)) ldst_dual_rff (.din(ldst_dual_m), .dout(ldst_dual_r), .clk(lsu_c1_r_clk), .*); + rvdff #(.WIDTH(1)) is_sideeffects_rff (.din(is_sideeffects_m), .dout(is_sideeffects_r), .clk(lsu_c1_r_clk), .*); + + rvdff #(4) lsu_byten_rff (.*, .din(ldst_byteen_m[3:0]), .dout(ldst_byteen_r[3:0]), .clk(lsu_c1_r_clk)); + +`ifdef ASSERT_ON + + // Assertion to check AXI write address is aligned to size + property lsu_axi_awaddr_aligned; + @(posedge lsu_busm_clk) disable iff(~rst_l) lsu_axi_awvalid |-> ((lsu_axi_awsize[2:0] == 3'h0) | + ((lsu_axi_awsize[2:0] == 3'h1) & (lsu_axi_awaddr[0] == 1'b0)) | + ((lsu_axi_awsize[2:0] == 3'h2) & (lsu_axi_awaddr[1:0] == 2'b0)) | + ((lsu_axi_awsize[2:0] == 3'h3) & (lsu_axi_awaddr[2:0] == 3'b0))); + endproperty + assert_lsu_axi_awaddr_aligned: assert property (lsu_axi_awaddr_aligned) else + $display("Assertion lsu_axi_awaddr_aligned failed: lsu_axi_awvalid=1'b%b, lsu_axi_awsize=3'h%h, lsu_axi_awaddr=32'h%h",lsu_axi_awvalid, lsu_axi_awsize[2:0], lsu_axi_awaddr[31:0]); + // Assertion to check awvalid stays stable during entire bus clock + + // Assertion to check AXI read address is aligned to size + property lsu_axi_araddr_aligned; + @(posedge lsu_busm_clk) disable iff(~rst_l) lsu_axi_arvalid |-> ((lsu_axi_arsize[2:0] == 3'h0) | + ((lsu_axi_arsize[2:0] == 3'h1) & (lsu_axi_araddr[0] == 1'b0)) | + ((lsu_axi_arsize[2:0] == 3'h2) & (lsu_axi_araddr[1:0] == 2'b0)) | + ((lsu_axi_arsize[2:0] == 3'h3) & (lsu_axi_araddr[2:0] == 3'b0))); + endproperty + assert_lsu_axi_araddr_aligned: assert property (lsu_axi_araddr_aligned) else + $display("Assertion lsu_axi_araddr_aligned failed: lsu_axi_awvalid=1'b%b, lsu_axi_awsize=3'h%h, lsu_axi_araddr=32'h%h",lsu_axi_awvalid, lsu_axi_awsize[2:0], lsu_axi_araddr[31:0]); + + // Assertion to check awvalid stays stable during entire bus clock + property lsu_axi_awvalid_stable; + @(posedge clk) disable iff(~rst_l) (lsu_axi_awvalid != $past(lsu_axi_awvalid)) |-> $past(lsu_bus_clk_en); + endproperty + assert_lsu_axi_awvalid_stable: assert property (lsu_axi_awvalid_stable) else + $display("LSU AXI awvalid changed in middle of bus clock"); + + // Assertion to check awid stays stable during entire bus clock + property lsu_axi_awid_stable; + @(posedge clk) disable iff(~rst_l) (lsu_axi_awvalid & (lsu_axi_awid[pt.LSU_BUS_TAG-1:0] != $past(lsu_axi_awid[pt.LSU_BUS_TAG-1:0]))) |-> $past(lsu_bus_clk_en); + endproperty + assert_lsu_axi_awid_stable: assert property (lsu_axi_awid_stable) else + $display("LSU AXI awid changed in middle of bus clock"); + + // Assertion to check awaddr stays stable during entire bus clock + property lsu_axi_awaddr_stable; + @(posedge clk) disable iff(~rst_l) (lsu_axi_awvalid & (lsu_axi_awaddr[31:0] != $past(lsu_axi_awaddr[31:0]))) |-> $past(lsu_bus_clk_en); + endproperty + assert_lsu_axi_awaddr_stable: assert property (lsu_axi_awaddr_stable) else + $display("LSU AXI awaddr changed in middle of bus clock"); + + // Assertion to check awsize stays stable during entire bus clock + property lsu_axi_awsize_stable; + @(posedge clk) disable iff(~rst_l) (lsu_axi_awvalid & (lsu_axi_awsize[2:0] != $past(lsu_axi_awsize[2:0]))) |-> $past(lsu_bus_clk_en); + endproperty + assert_lsu_axi_awsize_stable: assert property (lsu_axi_awsize_stable) else + $display("LSU AXI awsize changed in middle of bus clock"); + + // Assertion to check wstrb stays stable during entire bus clock + property lsu_axi_wstrb_stable; + @(posedge clk) disable iff(~rst_l) (lsu_axi_wvalid & (lsu_axi_wstrb[7:0] != $past(lsu_axi_wstrb[7:0]))) |-> $past(lsu_bus_clk_en); + endproperty + assert_lsu_axi_wstrb_stable: assert property (lsu_axi_wstrb_stable) else + $display("LSU AXI wstrb changed in middle of bus clock"); + + // Assertion to check wdata stays stable during entire bus clock + property lsu_axi_wdata_stable; + @(posedge clk) disable iff(~rst_l) (lsu_axi_wvalid & (lsu_axi_wdata[63:0] != $past(lsu_axi_wdata[63:0]))) |-> $past(lsu_bus_clk_en); + endproperty + assert_lsu_axi_wdata_stable: assert property (lsu_axi_wdata_stable) else + $display("LSU AXI wdata changed in middle of bus clock"); + + // Assertion to check awvalid stays stable during entire bus clock + property lsu_axi_arvalid_stable; + @(posedge clk) disable iff(~rst_l) (lsu_axi_arvalid != $past(lsu_axi_arvalid)) |-> $past(lsu_bus_clk_en); + endproperty + assert_lsu_axi_arvalid_stable: assert property (lsu_axi_arvalid_stable) else + $display("LSU AXI awvalid changed in middle of bus clock"); + + // Assertion to check awid stays stable during entire bus clock + property lsu_axi_arid_stable; + @(posedge clk) disable iff(~rst_l) (lsu_axi_arvalid & (lsu_axi_arid[pt.LSU_BUS_TAG-1:0] != $past(lsu_axi_arid[pt.LSU_BUS_TAG-1:0]))) |-> $past(lsu_bus_clk_en); + endproperty + assert_lsu_axi_arid_stable: assert property (lsu_axi_arid_stable) else + $display("LSU AXI awid changed in middle of bus clock"); + + // Assertion to check awaddr stays stable during entire bus clock + property lsu_axi_araddr_stable; + @(posedge clk) disable iff(~rst_l) (lsu_axi_arvalid & (lsu_axi_araddr[31:0] != $past(lsu_axi_araddr[31:0]))) |-> $past(lsu_bus_clk_en); + endproperty + assert_lsu_axi_araddr_stable: assert property (lsu_axi_araddr_stable) else + $display("LSU AXI awaddr changed in middle of bus clock"); + + // Assertion to check awsize stays stable during entire bus clock + property lsu_axi_arsize_stable; + @(posedge clk) disable iff(~rst_l) (lsu_axi_awvalid & (lsu_axi_arsize[2:0] != $past(lsu_axi_arsize[2:0]))) |-> $past(lsu_bus_clk_en); + endproperty + assert_lsu_axi_arsize_stable: assert property (lsu_axi_arsize_stable) else + $display("LSU AXI awsize changed in middle of bus clock"); + +`endif + +endmodule // el2_lsu_bus_intf diff --git a/design/lsu/el2_lsu_clkdomain.sv b/design/lsu/el2_lsu_clkdomain.sv new file mode 100644 index 0000000..bc27f5c --- /dev/null +++ b/design/lsu/el2_lsu_clkdomain.sv @@ -0,0 +1,137 @@ +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: Clock Generation Block +// Comments: All the clocks are generate here +// +// //******************************************************************************** + + +module el2_lsu_clkdomain +import el2_pkg::*; +#( +`include "el2_param.vh" +)( + input logic clk, // clock + input logic free_clk, // clock + input logic rst_l, // reset + + // Inputs + input logic clk_override, // chciken bit to turn off clock gating + input logic addr_in_dccm_m, // address in dccm + input logic dma_dccm_req, // dma is active + input logic ldst_stbuf_reqvld_r, // allocating in to the store queue + + input logic stbuf_reqvld_any, // stbuf is draining + input logic stbuf_reqvld_flushed_any, // instruction going to stbuf is flushed + input logic lsu_busreq_r, // busreq in r + input logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry + input logic lsu_bus_buffer_empty_any, // external bus buffer is empty + input logic lsu_stbuf_empty_any, // stbuf is empty + + input logic lsu_bus_clk_en, // bus clock enable + + input el2_lsu_pkt_t lsu_p, // lsu packet in decode + input el2_lsu_pkt_t lsu_pkt_d, // lsu packet in d + input el2_lsu_pkt_t lsu_pkt_m, // lsu packet in m + input el2_lsu_pkt_t lsu_pkt_r, // lsu packet in r + + // Outputs + output logic lsu_c1_m_clk, // m pipe single pulse clock + output logic lsu_c1_r_clk, // r pipe single pulse clock + + output logic lsu_c2_m_clk, // m pipe double pulse clock + output logic lsu_c2_r_clk, // r pipe double pulse clock + + output logic lsu_store_c1_m_clk, // store in m + output logic lsu_store_c1_r_clk, // store in r + + output logic lsu_stbuf_c1_clk, + output logic lsu_bus_obuf_c1_clk, // ibuf clock + output logic lsu_bus_ibuf_c1_clk, // ibuf clock + output logic lsu_bus_buf_c1_clk, // ibuf clock + output logic lsu_busm_clk, // bus clock + + output logic lsu_free_c2_clk, + + input logic scan_mode +); + + logic lsu_c1_d_clken, lsu_c1_m_clken, lsu_c1_r_clken; + logic lsu_c2_m_clken, lsu_c2_r_clken; + logic lsu_c1_d_clken_q, lsu_c1_m_clken_q, lsu_c1_r_clken_q; + logic lsu_store_c1_m_clken, lsu_store_c1_r_clken; + + + logic lsu_stbuf_c1_clken; + logic lsu_bus_ibuf_c1_clken, lsu_bus_obuf_c1_clken, lsu_bus_buf_c1_clken; + + logic lsu_free_c1_clken, lsu_free_c1_clken_q, lsu_free_c2_clken; + + //------------------------------------------------------------------------------------------- + // Clock Enable logic + //------------------------------------------------------------------------------------------- + + assign lsu_c1_d_clken = lsu_p.valid | dma_dccm_req | clk_override; + assign lsu_c1_m_clken = lsu_pkt_d.valid | lsu_c1_d_clken_q | clk_override; + assign lsu_c1_r_clken = lsu_pkt_m.valid | lsu_c1_m_clken_q | clk_override; + + assign lsu_c2_m_clken = lsu_c1_m_clken | lsu_c1_m_clken_q | clk_override; + assign lsu_c2_r_clken = lsu_c1_r_clken | lsu_c1_r_clken_q | clk_override; + + assign lsu_store_c1_m_clken = ((lsu_c1_m_clken & lsu_pkt_d.store) | clk_override) ; + assign lsu_store_c1_r_clken = ((lsu_c1_r_clken & lsu_pkt_m.store) | clk_override) ; + + assign lsu_stbuf_c1_clken = ldst_stbuf_reqvld_r | stbuf_reqvld_any | stbuf_reqvld_flushed_any | clk_override; + assign lsu_bus_ibuf_c1_clken = lsu_busreq_r | clk_override; + assign lsu_bus_obuf_c1_clken = (lsu_bus_buffer_pend_any | lsu_busreq_r | clk_override) & lsu_bus_clk_en; + assign lsu_bus_buf_c1_clken = ~lsu_bus_buffer_empty_any | lsu_busreq_r | clk_override; + + assign lsu_free_c1_clken = (lsu_p.valid | lsu_pkt_d.valid | lsu_pkt_m.valid | lsu_pkt_r.valid) | + ~lsu_bus_buffer_empty_any | ~lsu_stbuf_empty_any | clk_override; + assign lsu_free_c2_clken = lsu_free_c1_clken | lsu_free_c1_clken_q | clk_override; + + // Flops + rvdff #(1) lsu_free_c1_clkenff (.din(lsu_free_c1_clken), .dout(lsu_free_c1_clken_q), .clk(free_clk), .*); + + rvdff #(1) lsu_c1_d_clkenff (.din(lsu_c1_d_clken), .dout(lsu_c1_d_clken_q), .clk(lsu_free_c2_clk), .*); + rvdff #(1) lsu_c1_m_clkenff (.din(lsu_c1_m_clken), .dout(lsu_c1_m_clken_q), .clk(lsu_free_c2_clk), .*); + rvdff #(1) lsu_c1_r_clkenff (.din(lsu_c1_r_clken), .dout(lsu_c1_r_clken_q), .clk(lsu_free_c2_clk), .*); + + // Clock Headers + rvoclkhdr lsu_c1m_cgc ( .en(lsu_c1_m_clken), .l1clk(lsu_c1_m_clk), .* ); + rvoclkhdr lsu_c1r_cgc ( .en(lsu_c1_r_clken), .l1clk(lsu_c1_r_clk), .* ); + + rvoclkhdr lsu_c2m_cgc ( .en(lsu_c2_m_clken), .l1clk(lsu_c2_m_clk), .* ); + rvoclkhdr lsu_c2r_cgc ( .en(lsu_c2_r_clken), .l1clk(lsu_c2_r_clk), .* ); + + rvoclkhdr lsu_store_c1m_cgc (.en(lsu_store_c1_m_clken), .l1clk(lsu_store_c1_m_clk), .*); + rvoclkhdr lsu_store_c1r_cgc (.en(lsu_store_c1_r_clken), .l1clk(lsu_store_c1_r_clk), .*); + + rvoclkhdr lsu_stbuf_c1_cgc ( .en(lsu_stbuf_c1_clken), .l1clk(lsu_stbuf_c1_clk), .* ); + rvoclkhdr lsu_bus_ibuf_c1_cgc ( .en(lsu_bus_ibuf_c1_clken), .l1clk(lsu_bus_ibuf_c1_clk), .* ); + rvclkhdr lsu_bus_obuf_c1_cgc ( .en(lsu_bus_obuf_c1_clken), .l1clk(lsu_bus_obuf_c1_clk), .* ); + rvoclkhdr lsu_bus_buf_c1_cgc ( .en(lsu_bus_buf_c1_clken), .l1clk(lsu_bus_buf_c1_clk), .* ); + + rvclkhdr lsu_busm_cgc (.en(lsu_bus_clk_en), .l1clk(lsu_busm_clk), .*); + + rvoclkhdr lsu_free_cgc (.en(lsu_free_c2_clken), .l1clk(lsu_free_c2_clk), .*); + +endmodule + diff --git a/design/lsu/el2_lsu_dccm_ctl.sv b/design/lsu/el2_lsu_dccm_ctl.sv new file mode 100644 index 0000000..0857631 --- /dev/null +++ b/design/lsu/el2_lsu_dccm_ctl.sv @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: DCCM for LSU pipe +// Comments: Single ported memory +// +// +// DC1 -> DC2 -> DC3 -> DC4 (Commit) +// +// //******************************************************************************** + +module el2_lsu_dccm_ctl +import el2_pkg::*; +#( +`include "el2_param.vh" + ) + ( + input logic lsu_c2_m_clk, // clocks + input logic lsu_c2_r_clk, // clocks + input logic lsu_c1_r_clk, + input logic lsu_store_c1_r_clk, + input logic lsu_free_c2_clk, + input logic clk, + + input logic rst_l, + + input el2_lsu_pkt_t lsu_pkt_r, // lsu packets + input el2_lsu_pkt_t lsu_pkt_m, // lsu packets + input el2_lsu_pkt_t lsu_pkt_d, + input logic addr_in_dccm_d, // address maps to dccm + input logic addr_in_pic_d, // address maps to pic + input logic addr_in_pic_m, // address maps to pic + input logic addr_in_dccm_m, addr_in_dccm_r, + input logic addr_in_pic_r, + input logic lsu_raw_fwd_lo_r, lsu_raw_fwd_hi_r, + input logic lsu_commit_r, + + input logic [31:0] lsu_addr_d, // starting byte address for loads + input logic [pt.DCCM_BITS-1:0] lsu_addr_m, // starting byte address for loads + input logic [31:0] lsu_addr_r, // starting byte address for loads + + input logic [pt.DCCM_BITS-1:0] end_addr_d, + input logic [pt.DCCM_BITS-1:0] end_addr_m, + input logic [pt.DCCM_BITS-1:0] end_addr_r, + + + input logic stbuf_reqvld_any, // write enable + input logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any, // stbuf address (aligned) + + input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, // the read out from stbuf + input logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any, // the encoded data with ECC bits + input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m, // stbuf fowarding to load + input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m, // stbuf fowarding to load + input logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m, // stbuf fowarding to load + input logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m, // stbuf fowarding to load + + output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_r, // data from the dccm + output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_r, // data from the dccm + output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_r, // data from the dccm + ecc + output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_r, + output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_r, // right justified, ie load byte will have data at 7:0 + output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_corr_r, // right justified & ECC corrected, ie load byte will have data at 7:0 + + input logic lsu_double_ecc_error_r, // lsu has a DED + input logic single_ecc_error_hi_r, // sec detected on hi dccm bank + input logic single_ecc_error_lo_r, // sec detected on lower dccm bank + input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r, // corrected dccm data + input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r, // corrected dccm data + input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r_ff, // corrected dccm data + input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, // corrected dccm data + input logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, // the encoded data with ECC bits + input logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_lo_r_ff, // the encoded data with ECC bits + + output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_m, // data from the dccm + output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_m, // data from the dccm + output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_m, // data from the dccm + ecc + output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_m, + output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_m, // right justified, ie load byte will have data at 7:0 + + input logic lsu_double_ecc_error_m, // lsu has a DED + input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_m, // corrected dccm data + input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_m, // corrected dccm data + + input logic [31:0] store_data_m, + input logic dma_dccm_wen, + input logic [2:0] dma_mem_tag_m, + input logic [31:0] dma_dccm_wdata_lo, + input logic [31:0] dma_dccm_wdata_hi, + input logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_hi, // ECC bits for the DMA wdata + input logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, // ECC bits for the DMA wdata + + output logic [pt.DCCM_DATA_WIDTH-1:0] store_data_hi_r, + output logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r, + output logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_hi_r, // data from the dccm + output logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_lo_r, // data from the dccm + output logic [31:0] store_data_r, // raw store data to be sent to bus + output logic ld_single_ecc_error_r, + output logic ld_single_ecc_error_r_ff, + + output logic [31:0] picm_mask_data_m, // pic data to stbuf + output logic lsu_stbuf_commit_any, // stbuf wins the dccm port or is to pic + output logic lsu_dccm_rden_m, // dccm read + output logic lsu_dccm_rden_r, // dccm read + + output logic dccm_dma_rvalid, // dccm serviving the dma load + output logic dccm_dma_ecc_error, // DMA load had ecc error + output logic [2:0] dccm_dma_rtag, // DMA return tag + output logic [63:0] dccm_dma_rdata, // dccm data to dma request + + // DCCM ports + output logic dccm_wren, // dccm interface -- write + output logic dccm_rden, // dccm interface -- write + output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // dccm interface -- wr addr for lo bank + output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // dccm interface -- wr addr for hi bank + output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // dccm interface -- read address for lo bank + output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // dccm interface -- read address for hi bank + output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // dccm write data for lo bank + output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // dccm write data for hi bank + + input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // dccm read data back from the dccm + input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // dccm read data back from the dccm + + // PIC ports + output logic picm_wren, // write to pic + output logic picm_rden, // read to pick + output logic picm_mken, // write to pic need a mask + output logic [31:0] picm_rdaddr, // address for pic read access + output logic [31:0] picm_wraddr, // address for pic write access + output logic [31:0] picm_wr_data, // write data + input logic [31:0] picm_rd_data, // read data + + input logic scan_mode // scan mode +); + + + localparam DCCM_WIDTH_BITS = $clog2(pt.DCCM_BYTE_WIDTH); + + logic lsu_dccm_rden_d, lsu_dccm_wren_d; + logic ld_single_ecc_error_lo_r, ld_single_ecc_error_hi_r; + logic ld_single_ecc_error_lo_r_ns, ld_single_ecc_error_hi_r_ns; + logic ld_single_ecc_error_lo_r_ff, ld_single_ecc_error_hi_r_ff; + logic lsu_double_ecc_error_r_ff; + logic [pt.DCCM_BITS-1:0] ld_sec_addr_lo_r_ff, ld_sec_addr_hi_r_ff; + logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r_in, store_data_hi_r_in ; + logic [63:0] picm_rd_data_m; + + logic dccm_wr_bypass_d_m_hi, dccm_wr_bypass_d_r_hi; + logic dccm_wr_bypass_d_m_lo, dccm_wr_bypass_d_r_lo; + logic kill_ecc_corr_lo_r, kill_ecc_corr_hi_r; + + // byte_en flowing down + logic [3:0] store_byteen_m ,store_byteen_r; + logic [7:0] store_byteen_ext_m, store_byteen_ext_r; + + if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1 + logic [63:0] lsu_rdata_r, lsu_rdata_corr_r; + logic [63:0] dccm_rdata_r, dccm_rdata_corr_r; + logic [63:0] stbuf_fwddata_r; + logic [7:0] stbuf_fwdbyteen_r; + logic [31:0] stbuf_fwddata_lo_r, stbuf_fwddata_hi_r; + logic [3:0] stbuf_fwdbyteen_lo_r, stbuf_fwdbyteen_hi_r; + logic [31:0] lsu_rdata_lo_r, lsu_rdata_hi_r; + logic [63:0] picm_rd_data_r; + logic [63:32] lsu_ld_data_r_nc, lsu_ld_data_corr_r_nc; + logic [2:0] dma_mem_tag_r; + + assign dccm_dma_rvalid = lsu_pkt_r.valid & lsu_pkt_r.load & lsu_pkt_r.dma; + assign dccm_dma_ecc_error = lsu_double_ecc_error_r; + assign dccm_dma_rtag[2:0] = dma_mem_tag_r[2:0]; + assign dccm_dma_rdata[63:0] = lsu_rdata_corr_r; + assign {lsu_ld_data_r_nc[63:32], lsu_ld_data_r[31:0]} = lsu_rdata_r[63:0] >> 8*lsu_addr_r[1:0]; + assign {lsu_ld_data_corr_r_nc[63:32], lsu_ld_data_corr_r[31:0]} = lsu_rdata_corr_r[63:0] >> 8*lsu_addr_r[1:0]; + + assign picm_rd_data_r[63:32] = picm_rd_data_r[31:0]; + assign dccm_rdata_r[63:0] = {dccm_rdata_hi_r[31:0],dccm_rdata_lo_r[31:0]}; + assign dccm_rdata_corr_r[63:0] = {sec_data_hi_r[31:0],sec_data_lo_r[31:0]}; + assign stbuf_fwddata_r[63:0] = {stbuf_fwddata_hi_r[31:0], stbuf_fwddata_lo_r[31:0]}; + assign stbuf_fwdbyteen_r[7:0] = {stbuf_fwdbyteen_hi_r[3:0], stbuf_fwdbyteen_lo_r[3:0]}; + + for (genvar i=0; i<8; i++) begin: GenDMAData + assign lsu_rdata_corr_r[(8*i)+7:8*i] = stbuf_fwdbyteen_r[i] ? stbuf_fwddata_r[(8*i)+7:8*i] : + (addr_in_pic_r ? picm_rd_data_r[(8*i)+7:8*i] : dccm_rdata_corr_r[(8*i)+7:8*i]); + + assign lsu_rdata_r[(8*i)+7:8*i] = stbuf_fwdbyteen_r[i] ? stbuf_fwddata_r[(8*i)+7:8*i] : + (addr_in_pic_r ? picm_rd_data_r[(8*i)+7:8*i] : dccm_rdata_r[(8*i)+7:8*i]); + end + rvdffe #(pt.DCCM_DATA_WIDTH) dccm_rdata_hi_r_ff (.*, .din(dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(dccm_rdata_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .en(lsu_dccm_rden_m)); + rvdffe #(pt.DCCM_DATA_WIDTH) dccm_rdata_lo_r_ff (.*, .din(dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(dccm_rdata_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .en(lsu_dccm_rden_m)); + rvdffe #(2*pt.DCCM_ECC_WIDTH) dccm_data_ecc_r_ff (.*, .din({dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0], dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0]}), + .dout({dccm_data_ecc_hi_r[pt.DCCM_ECC_WIDTH-1:0], dccm_data_ecc_lo_r[pt.DCCM_ECC_WIDTH-1:0]}), .en(lsu_dccm_rden_m)); + rvdff #(8) stbuf_fwdbyteen_ff (.*, .din({stbuf_fwdbyteen_hi_m[3:0], stbuf_fwdbyteen_lo_m[3:0]}), .dout({stbuf_fwdbyteen_hi_r[3:0], stbuf_fwdbyteen_lo_r[3:0]}), .clk(lsu_c2_r_clk)); + rvdff #(64) stbuf_fwddata_ff (.*, .din({stbuf_fwddata_hi_m[31:0], stbuf_fwddata_lo_m[31:0]}), .dout({stbuf_fwddata_hi_r[31:0], stbuf_fwddata_lo_r[31:0]}), .clk(lsu_c2_r_clk)); + rvdff #(32) picm_rddata_rff (.*, .din(picm_rd_data_m[31:0]), .dout(picm_rd_data_r[31:0]), .clk(lsu_c2_r_clk)); + rvdff #(3) dma_mem_tag_rff (.*, .din(dma_mem_tag_m[2:0]), .dout(dma_mem_tag_r[2:0]), .clk(lsu_c1_r_clk)); + + end else begin: L2U_Plus1_0 + + logic [63:0] lsu_rdata_m, lsu_rdata_corr_m; + logic [63:0] dccm_rdata_m, dccm_rdata_corr_m; + logic [63:0] stbuf_fwddata_m; + logic [7:0] stbuf_fwdbyteen_m; + logic [63:32] lsu_ld_data_m_nc, lsu_ld_data_corr_m_nc; + logic [31:0] lsu_ld_data_corr_m; + + assign dccm_dma_rvalid = lsu_pkt_m.valid & lsu_pkt_m.load & lsu_pkt_m.dma; + assign dccm_dma_ecc_error = lsu_double_ecc_error_m; + assign dccm_dma_rtag[2:0] = dma_mem_tag_m[2:0]; + assign dccm_dma_rdata[63:0] = lsu_rdata_corr_m; + assign {lsu_ld_data_m_nc[63:32], lsu_ld_data_m[31:0]} = lsu_rdata_m[63:0] >> 8*lsu_addr_m[1:0]; + assign {lsu_ld_data_corr_m_nc[63:32], lsu_ld_data_corr_m[31:0]} = lsu_rdata_corr_m[63:0] >> 8*lsu_addr_m[1:0]; + + assign dccm_rdata_m[63:0] = {dccm_rdata_hi_m[31:0],dccm_rdata_lo_m[31:0]}; + assign dccm_rdata_corr_m[63:0] = {sec_data_hi_m[31:0],sec_data_lo_m[31:0]}; + assign stbuf_fwddata_m[63:0] = {stbuf_fwddata_hi_m[31:0], stbuf_fwddata_lo_m[31:0]}; + assign stbuf_fwdbyteen_m[7:0] = {stbuf_fwdbyteen_hi_m[3:0], stbuf_fwdbyteen_lo_m[3:0]}; + + for (genvar i=0; i<8; i++) begin: GenLoop + assign lsu_rdata_corr_m[(8*i)+7:8*i] = stbuf_fwdbyteen_m[i] ? stbuf_fwddata_m[(8*i)+7:8*i] : + (addr_in_pic_m ? picm_rd_data_m[(8*i)+7:8*i] : dccm_rdata_corr_m[(8*i)+7:8*i]); + + assign lsu_rdata_m[(8*i)+7:8*i] = stbuf_fwdbyteen_m[i] ? stbuf_fwddata_m[(8*i)+7:8*i] : + (addr_in_pic_m ? picm_rd_data_m[(8*i)+7:8*i] : dccm_rdata_m[(8*i)+7:8*i]); + end + + rvdff #(32) lsu_ld_data_corr_rff(.*, .din(lsu_ld_data_corr_m[31:0]), .dout(lsu_ld_data_corr_r[31:0]), .clk(lsu_c2_r_clk)); + end + + assign kill_ecc_corr_lo_r = (((lsu_addr_d[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_d[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_d.valid & lsu_pkt_d.store & lsu_pkt_d.dma & addr_in_dccm_d) | + (((lsu_addr_m[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_m[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_m.valid & lsu_pkt_m.store & lsu_pkt_m.dma & addr_in_dccm_m); + + assign kill_ecc_corr_hi_r = (((lsu_addr_d[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_d[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_d.valid & lsu_pkt_d.store & lsu_pkt_d.dma & addr_in_dccm_d) | + (((lsu_addr_m[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_m[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_m.valid & lsu_pkt_m.store & lsu_pkt_m.dma & addr_in_dccm_m); + + assign ld_single_ecc_error_lo_r = lsu_pkt_r.load & single_ecc_error_lo_r & ~lsu_raw_fwd_lo_r; + assign ld_single_ecc_error_hi_r = lsu_pkt_r.load & single_ecc_error_hi_r & ~lsu_raw_fwd_hi_r; + assign ld_single_ecc_error_r = (ld_single_ecc_error_lo_r | ld_single_ecc_error_hi_r) & ~lsu_double_ecc_error_r; + + assign ld_single_ecc_error_lo_r_ns = ld_single_ecc_error_lo_r & (lsu_commit_r | lsu_pkt_r.dma) & ~kill_ecc_corr_lo_r; + assign ld_single_ecc_error_hi_r_ns = ld_single_ecc_error_hi_r & (lsu_commit_r | lsu_pkt_r.dma) & ~kill_ecc_corr_hi_r; + assign ld_single_ecc_error_r_ff = (ld_single_ecc_error_lo_r_ff | ld_single_ecc_error_hi_r_ff) & ~lsu_double_ecc_error_r_ff; + + assign lsu_stbuf_commit_any = stbuf_reqvld_any & + (~(lsu_dccm_rden_d | lsu_dccm_wren_d | ld_single_ecc_error_r_ff) | + (lsu_dccm_rden_d & ~((stbuf_addr_any[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] == lsu_addr_d[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]) | + (stbuf_addr_any[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] == end_addr_d[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS])))); + + // No need to read for aligned word/dword stores since ECC will come by new data completely + assign lsu_dccm_rden_d = lsu_pkt_d.valid & (lsu_pkt_d.load | (lsu_pkt_d.store & (~(lsu_pkt_d.word | lsu_pkt_d.dword) | (lsu_addr_d[1:0] != 2'b0)))) & addr_in_dccm_d; + + // DMA will read/write in decode stage + assign lsu_dccm_wren_d = dma_dccm_wen; + + // DCCM inputs + assign dccm_wren = lsu_dccm_wren_d | lsu_stbuf_commit_any | ld_single_ecc_error_r_ff; + assign dccm_rden = lsu_dccm_rden_d & addr_in_dccm_d; + assign dccm_wr_addr_lo[pt.DCCM_BITS-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_lo_r_ff ? ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0] : ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0]) : + lsu_dccm_wren_d ? lsu_addr_d[pt.DCCM_BITS-1:0] : stbuf_addr_any[pt.DCCM_BITS-1:0]; + assign dccm_wr_addr_hi[pt.DCCM_BITS-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_hi_r_ff ? ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0] : ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0]) : + lsu_dccm_wren_d ? end_addr_d[pt.DCCM_BITS-1:0] : stbuf_addr_any[pt.DCCM_BITS-1:0]; + assign dccm_rd_addr_lo[pt.DCCM_BITS-1:0] = lsu_addr_d[pt.DCCM_BITS-1:0]; + assign dccm_rd_addr_hi[pt.DCCM_BITS-1:0] = end_addr_d[pt.DCCM_BITS-1:0]; + assign dccm_wr_data_lo[pt.DCCM_FDATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_lo_r_ff ? {sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]} : + {sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]}) : + (dma_dccm_wen ? {dma_dccm_wdata_ecc_lo[pt.DCCM_ECC_WIDTH-1:0],dma_dccm_wdata_lo[pt.DCCM_DATA_WIDTH-1:0]} : + {stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0],stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]}); + assign dccm_wr_data_hi[pt.DCCM_FDATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_hi_r_ff ? {sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]} : + {sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]}) : + (dma_dccm_wen ? {dma_dccm_wdata_ecc_hi[pt.DCCM_ECC_WIDTH-1:0],dma_dccm_wdata_hi[pt.DCCM_DATA_WIDTH-1:0]} : + {stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0],stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]}); + + // DCCM outputs + assign store_byteen_m[3:0] = {4{lsu_pkt_m.store}} & + (({4{lsu_pkt_m.by}} & 4'b0001) | + ({4{lsu_pkt_m.half}} & 4'b0011) | + ({4{lsu_pkt_m.word}} & 4'b1111)); + + assign store_byteen_r[3:0] = {4{lsu_pkt_r.store}} & + (({4{lsu_pkt_r.by}} & 4'b0001) | + ({4{lsu_pkt_r.half}} & 4'b0011) | + ({4{lsu_pkt_r.word}} & 4'b1111)); + + assign store_byteen_ext_m[7:0] = {4'b0,store_byteen_m[3:0]} << lsu_addr_m[1:0]; // The packet in m + assign store_byteen_ext_r[7:0] = {4'b0,store_byteen_r[3:0]} << lsu_addr_r[1:0]; + + + + assign dccm_wr_bypass_d_m_lo = (stbuf_addr_any[pt.DCCM_BITS-1:2] == lsu_addr_m[pt.DCCM_BITS-1:2]) & addr_in_dccm_m; + assign dccm_wr_bypass_d_m_hi = (stbuf_addr_any[pt.DCCM_BITS-1:2] == end_addr_m[pt.DCCM_BITS-1:2]) & addr_in_dccm_m; + + assign dccm_wr_bypass_d_r_lo = (stbuf_addr_any[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) & addr_in_dccm_r; + assign dccm_wr_bypass_d_r_hi = (stbuf_addr_any[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) & addr_in_dccm_r; + + + if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U1_Plus1_1 + logic dccm_wren_Q; + logic [31:0] dccm_wr_data_Q; + logic dccm_wr_bypass_d_m_lo_Q, dccm_wr_bypass_d_m_hi_Q; + logic [31:0] store_data_pre_hi_r, store_data_pre_lo_r; + + assign {store_data_pre_hi_r[31:0], store_data_pre_lo_r[31:0]} = {32'b0,store_data_r[31:0]} << 8*lsu_addr_r[1:0]; + + for (genvar i=0; i<4; i++) begin + assign store_data_lo_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i] ? store_data_pre_lo_r[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_lo_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_lo_r[(8*i)+7:(8*i)]); + assign store_data_hi_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i+4] ? store_data_pre_hi_r[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_hi_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_hi_r[(8*i)+7:(8*i)]); + + assign store_datafn_lo_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i] ? store_data_pre_lo_r[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_r_lo) ? stbuf_data_any[(8*i)+7:(8*i)] : + ((dccm_wren_Q & dccm_wr_bypass_d_m_lo_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_lo_r[(8*i)+7:(8*i)])); + assign store_datafn_hi_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i+4] ? store_data_pre_hi_r[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_r_hi) ? stbuf_data_any[(8*i)+7:(8*i)] : + ((dccm_wren_Q & dccm_wr_bypass_d_m_hi_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_hi_r[(8*i)+7:(8*i)])); + end + + rvdff #(1) dccm_wren_ff (.*, .din(lsu_stbuf_commit_any), .dout(dccm_wren_Q), .clk(lsu_free_c2_clk)); // ECC load errors writing to dccm shouldn't fwd to stores in pipe + rvdffe #(32) dccm_wrdata_ff (.*, .din(stbuf_data_any[31:0]), .dout(dccm_wr_data_Q[31:0]), .en(lsu_stbuf_commit_any), .clk(clk)); + rvdff #(1) dccm_wrbyp_dm_loff (.*, .din(dccm_wr_bypass_d_m_lo), .dout(dccm_wr_bypass_d_m_lo_Q), .clk(lsu_free_c2_clk)); + rvdff #(1) dccm_wrbyp_dm_hiff (.*, .din(dccm_wr_bypass_d_m_hi), .dout(dccm_wr_bypass_d_m_hi_Q), .clk(lsu_free_c2_clk)); + rvdff #(32) store_data_rff (.*, .din(store_data_m[31:0]), .dout(store_data_r[31:0]), .clk(lsu_store_c1_r_clk)); + + end else begin: L2U1_Plus1_0 + + logic [31:0] store_data_hi_m, store_data_lo_m; + logic [63:0] store_data_mask; + assign {store_data_hi_m[31:0] , store_data_lo_m[31:0]} = {32'b0,store_data_m[31:0]} << 8*lsu_addr_m[1:0]; + + for (genvar i=0; i<4; i++) begin + assign store_data_hi_r_in[(8*i)+7:(8*i)] = store_byteen_ext_m[i+4] ? store_data_hi_m[(8*i)+7:(8*i)] : + ((lsu_stbuf_commit_any & dccm_wr_bypass_d_m_hi) ? stbuf_data_any[(8*i)+7:(8*i)] : sec_data_hi_m[(8*i)+7:(8*i)]); + assign store_data_lo_r_in[(8*i)+7:(8*i)] = store_byteen_ext_m[i] ? store_data_lo_m[(8*i)+7:(8*i)] : + ((lsu_stbuf_commit_any & dccm_wr_bypass_d_m_lo) ? stbuf_data_any[(8*i)+7:(8*i)] : sec_data_lo_m[(8*i)+7:(8*i)]); + + assign store_datafn_lo_r[(8*i)+7:(8*i)] = (lsu_stbuf_commit_any & dccm_wr_bypass_d_r_lo & ~store_byteen_ext_r[i]) ? stbuf_data_any[(8*i)+7:(8*i)] : store_data_lo_r[(8*i)+7:(8*i)]; + assign store_datafn_hi_r[(8*i)+7:(8*i)] = (lsu_stbuf_commit_any & dccm_wr_bypass_d_r_hi & ~store_byteen_ext_r[i+4]) ? stbuf_data_any[(8*i)+7:(8*i)] : store_data_hi_r[(8*i)+7:(8*i)]; + end // for (genvar i=0; i> 8*lsu_addr_r[1:0]) & store_data_mask[31:0]; + + rvdff #(pt.DCCM_DATA_WIDTH) store_data_hi_rff (.*, .din(store_data_hi_r_in[pt.DCCM_DATA_WIDTH-1:0]), .dout(store_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .clk(lsu_store_c1_r_clk)); + rvdff #(pt.DCCM_DATA_WIDTH) store_data_lo_rff (.*, .din(store_data_lo_r_in[pt.DCCM_DATA_WIDTH-1:0]), .dout(store_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .clk(lsu_store_c1_r_clk)); + + end + + assign dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0] = dccm_rd_data_lo[pt.DCCM_DATA_WIDTH-1:0]; // for ld choose dccm_out + assign dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0] = dccm_rd_data_hi[pt.DCCM_DATA_WIDTH-1:0]; // for ld this is used for ecc + + assign dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_lo[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH]; + assign dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_hi[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH]; + + // PIC signals. PIC ignores the lower 2 bits of address since PIC memory registers are 32-bits + assign picm_wren = lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_pic_r & lsu_commit_r; + assign picm_rden = lsu_pkt_d.valid & lsu_pkt_d.load & addr_in_pic_d; + assign picm_mken = lsu_pkt_d.valid & lsu_pkt_d.store & addr_in_pic_d; // Get the mask for stores + assign picm_rdaddr[31:0] = pt.PIC_BASE_ADDR | {17'b0,lsu_addr_d[14:0]}; + + assign picm_wraddr[31:0] = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},lsu_addr_r[pt.PIC_BITS-1:0]}; + + assign picm_wr_data[31:0] = store_datafn_lo_r[31:0]; + + assign picm_mask_data_m[31:0] = picm_rd_data_m[31:0]; + assign picm_rd_data_m[63:0] = {picm_rd_data[31:0],picm_rd_data[31:0]}; + + if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable + rvdff #(1) dccm_rden_mff (.*, .din(lsu_dccm_rden_d), .dout(lsu_dccm_rden_m), .clk(lsu_c2_m_clk)); + rvdff #(1) dccm_rden_rff (.*, .din(lsu_dccm_rden_m), .dout(lsu_dccm_rden_r), .clk(lsu_c2_r_clk)); + end else begin: Gen_dccm_disable + assign lsu_dccm_rden_m = '0; + assign lsu_dccm_rden_r = '0; + end + + // ECC correction flops since dccm write happens next cycle + // We are writing to dccm in r+1 for ecc correction since fast_int needs to be blocked in decode - 1. We can probably write in r for plus0 configuration since we know ecc error in M. + // In that case these (_ff) flops are needed only in plus1 configuration + rvdff #(1) ld_double_ecc_error_rff (.*, .din(lsu_double_ecc_error_r), .dout(lsu_double_ecc_error_r_ff), .clk(lsu_free_c2_clk)); + rvdff #(1) ld_single_ecc_error_hi_rff (.*, .din(ld_single_ecc_error_hi_r_ns), .dout(ld_single_ecc_error_hi_r_ff), .clk(lsu_free_c2_clk)); + rvdff #(1) ld_single_ecc_error_lo_rff (.*, .din(ld_single_ecc_error_lo_r_ns), .dout(ld_single_ecc_error_lo_r_ff), .clk(lsu_free_c2_clk)); + rvdffe #(pt.DCCM_BITS) ld_sec_addr_hi_rff (.*, .din(end_addr_r[pt.DCCM_BITS-1:0]), .dout(ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0]), .en(ld_single_ecc_error_r), .clk(clk)); + rvdffe #(pt.DCCM_BITS) ld_sec_addr_lo_rff (.*, .din(lsu_addr_r[pt.DCCM_BITS-1:0]), .dout(ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0]), .en(ld_single_ecc_error_r), .clk(clk)); + +`ifdef LSU_ASSERT_ON + assert_ecc_kill_lo: assert #0 (~(ld_single_ecc_error_lo_r & kill_ecc_corr_lo_r)); + assert_ecc_kill_hi: assert #0 (~(ld_single_ecc_error_hi_r & kill_ecc_corr_hi_r)); + + // Load single ECC error correction implies commit/dma + property ld_single_ecc_error_commit; + @(posedge clk) disable iff(~rst_l) (ld_single_ecc_error_r_ff & dccm_wren) |-> ($past(lsu_commit_r | lsu_pkt_r.dma)); + endproperty + assert_ld_single_ecc_error_commit: assert property (ld_single_ecc_error_commit) else + $display("No commit or DMA but ECC correction happened"); + + +`endif + +endmodule diff --git a/design/lsu/el2_lsu_dccm_mem.sv b/design/lsu/el2_lsu_dccm_mem.sv new file mode 100644 index 0000000..3f8a963 --- /dev/null +++ b/design/lsu/el2_lsu_dccm_mem.sv @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: DCCM for LSU pipe +// Comments: Single ported memory +// +// +// DC1 -> DC2 -> DC3 -> DC4 (Commit) +// +// //******************************************************************************** + + +module el2_lsu_dccm_mem +import el2_pkg::*; +#( +`include "el2_param.vh" + )( + input logic clk, // clock + input logic rst_l, + input logic clk_override, // clock override + + input logic dccm_wren, // write enable + input logic dccm_rden, // read enable + input logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // write address + input logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // write address + input logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // read address + input logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // read address for the upper bank in case of a misaligned access + input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // write data + input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // write data + + output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // read data from the lo bank + output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // read data from the hi bank + + input logic scan_mode +); + + + localparam DCCM_WIDTH_BITS = $clog2(pt.DCCM_BYTE_WIDTH); + localparam DCCM_INDEX_BITS = (pt.DCCM_BITS - pt.DCCM_BANK_BITS - pt.DCCM_WIDTH_BITS); + localparam DCCM_INDEX_DEPTH = ((pt.DCCM_SIZE)*1024)/((pt.DCCM_BYTE_WIDTH)*(pt.DCCM_NUM_BANKS)); // Depth of memory bank + + logic [pt.DCCM_NUM_BANKS-1:0] wren_bank; + logic [pt.DCCM_NUM_BANKS-1:0] rden_bank; + logic [pt.DCCM_NUM_BANKS-1:0] [pt.DCCM_BITS-1:(pt.DCCM_BANK_BITS+2)] addr_bank; + logic [pt.DCCM_BITS-1:(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)] rd_addr_even, rd_addr_odd; + logic rd_unaligned, wr_unaligned; + logic [pt.DCCM_NUM_BANKS-1:0] [pt.DCCM_FDATA_WIDTH-1:0] dccm_bank_dout; + logic [pt.DCCM_FDATA_WIDTH-1:0] wrdata; + + logic [pt.DCCM_NUM_BANKS-1:0][pt.DCCM_FDATA_WIDTH-1:0] wr_data_bank; + + logic [(DCCM_WIDTH_BITS+pt.DCCM_BANK_BITS-1):DCCM_WIDTH_BITS] dccm_rd_addr_lo_q; + logic [(DCCM_WIDTH_BITS+pt.DCCM_BANK_BITS-1):DCCM_WIDTH_BITS] dccm_rd_addr_hi_q; + + logic [pt.DCCM_NUM_BANKS-1:0] dccm_clken; + + assign rd_unaligned = (dccm_rd_addr_lo[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] != dccm_rd_addr_hi[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]); + assign wr_unaligned = (dccm_wr_addr_lo[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] != dccm_wr_addr_hi[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]); + + // Align the read data + assign dccm_rd_data_lo[pt.DCCM_FDATA_WIDTH-1:0] = dccm_bank_dout[dccm_rd_addr_lo_q[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]][pt.DCCM_FDATA_WIDTH-1:0]; + assign dccm_rd_data_hi[pt.DCCM_FDATA_WIDTH-1:0] = dccm_bank_dout[dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]][pt.DCCM_FDATA_WIDTH-1:0]; + + // Generate even/odd address + + // 8 Banks, 16KB each (2048 x 72) + for (genvar i=0; i<32'(pt.DCCM_NUM_BANKS); i++) begin: mem_bank + assign wren_bank[i] = dccm_wren & ((dccm_wr_addr_hi[2+:pt.DCCM_BANK_BITS] == i) | (dccm_wr_addr_lo[2+:pt.DCCM_BANK_BITS] == i)); + assign rden_bank[i] = dccm_rden & ((dccm_rd_addr_hi[2+:pt.DCCM_BANK_BITS] == i) | (dccm_rd_addr_lo[2+:pt.DCCM_BANK_BITS] == i)); + assign addr_bank[i][(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] = wren_bank[i] ? (((dccm_wr_addr_hi[2+:pt.DCCM_BANK_BITS] == i) & wr_unaligned) ? + dccm_wr_addr_hi[(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] : + dccm_wr_addr_lo[(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS]) : + (((dccm_rd_addr_hi[2+:pt.DCCM_BANK_BITS] == i) & rd_unaligned) ? + dccm_rd_addr_hi[(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] : + dccm_rd_addr_lo[(pt.DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS]); + + assign wr_data_bank[i] = ((dccm_wr_addr_hi[2+:pt.DCCM_BANK_BITS] == i) & wr_unaligned) ? dccm_wr_data_hi[pt.DCCM_FDATA_WIDTH-1:0] : dccm_wr_data_lo[pt.DCCM_FDATA_WIDTH-1:0]; + + // clock gating section + assign dccm_clken[i] = (wren_bank[i] | rden_bank[i] | clk_override) ; + // end clock gating section + +`ifdef VERILATOR + el2_ram #(DCCM_INDEX_DEPTH,39) ram ( + // Primary ports + .ME(dccm_clken[i]), + .CLK(clk), + .WE(wren_bank[i]), + .ADR(addr_bank[i]), + .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]), + .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]), + .* + ); + +`else + if (DCCM_INDEX_DEPTH == 32768) begin : dccm + ram_32768x39 dccm_bank ( + // Primary ports + .ME(dccm_clken[i]), + .CLK(clk), + .WE(wren_bank[i]), + .ADR(addr_bank[i]), + .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]), + .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]), + .* + ); + end + else if (DCCM_INDEX_DEPTH == 16384) begin : dccm + ram_16384x39 dccm_bank ( + // Primary ports + .ME(dccm_clken[i]), + .CLK(clk), + .WE(wren_bank[i]), + .ADR(addr_bank[i]), + .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]), + .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]), + .* + ); + end + else if (DCCM_INDEX_DEPTH == 8192) begin : dccm + ram_8192x39 dccm_bank ( + // Primary ports + .ME(dccm_clken[i]), + .CLK(clk), + .WE(wren_bank[i]), + .ADR(addr_bank[i]), + .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]), + .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]), + .* + ); + end + else if (DCCM_INDEX_DEPTH == 4096) begin : dccm + ram_4096x39 dccm_bank ( + // Primary ports + .ME(dccm_clken[i]), + .CLK(clk), + .WE(wren_bank[i]), + .ADR(addr_bank[i]), + .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]), + .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]), + .* + ); + end + else if (DCCM_INDEX_DEPTH == 3072) begin : dccm + ram_3072x39 dccm_bank ( + // Primary ports + .ME(dccm_clken[i]), + .CLK(clk), + .WE(wren_bank[i]), + .ADR(addr_bank[i]), + .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]), + .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]), + .* + ); + end + else if (DCCM_INDEX_DEPTH == 2048) begin : dccm + ram_2048x39 dccm_bank ( + // Primary ports + .ME(dccm_clken[i]), + .CLK(clk), + .WE(wren_bank[i]), + .ADR(addr_bank[i]), + .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]), + .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]), + .* + ); + end + else if (DCCM_INDEX_DEPTH == 1024) begin : dccm + ram_1024x39 dccm_bank ( + // Primary ports + .ME(dccm_clken[i]), + .CLK(clk), + .WE(wren_bank[i]), + .ADR(addr_bank[i]), + .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]), + .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]), + .* + ); + end + else if (DCCM_INDEX_DEPTH == 512) begin : dccm + ram_512x39 dccm_bank ( + // Primary ports + .ME(dccm_clken[i]), + .CLK(clk), + .WE(wren_bank[i]), + .ADR(addr_bank[i]), + .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]), + .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]), + .* + ); + end + else if (DCCM_INDEX_DEPTH == 256) begin : dccm + ram_256x39 dccm_bank ( + // Primary ports + .ME(dccm_clken[i]), + .CLK(clk), + .WE(wren_bank[i]), + .ADR(addr_bank[i]), + .D(wr_data_bank[i][pt.DCCM_FDATA_WIDTH-1:0]), + .Q(dccm_bank_dout[i][pt.DCCM_FDATA_WIDTH-1:0]), + .* + ); + end +`endif // VERILATOR + end : mem_bank + + // Flops + rvdffs #(pt.DCCM_BANK_BITS) rd_addr_lo_ff (.*, .din(dccm_rd_addr_lo[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]), .dout(dccm_rd_addr_lo_q[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]), .en(1'b1)); + rvdffs #(pt.DCCM_BANK_BITS) rd_addr_hi_ff (.*, .din(dccm_rd_addr_hi[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]), .dout(dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]), .en(1'b1)); + +`undef EL2_LOCAL_DCCM_RAM_TEST_PORTS + +endmodule // el2_lsu_dccm_mem + + diff --git a/design/lsu/el2_lsu_ecc.sv b/design/lsu/el2_lsu_ecc.sv new file mode 100644 index 0000000..e00d0aa --- /dev/null +++ b/design/lsu/el2_lsu_ecc.sv @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: Top level file for load store unit +// Comments: +// +// +// DC1 -> DC2 -> DC3 -> DC4 (Commit) +// +//******************************************************************************** +module el2_lsu_ecc +import el2_pkg::*; +#( +`include "el2_param.vh" + ) +( + input logic clk, + input logic lsu_c2_r_clk, // clocks + input logic rst_l, + input logic scan_mode, // scan + + input el2_lsu_pkt_t lsu_pkt_m, // packet in m + input el2_lsu_pkt_t lsu_pkt_r, // packet in r + input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, + + input logic dec_tlu_core_ecc_disable, // disables the ecc computation and error flagging + + input logic lsu_dccm_rden_r, // dccm rden + input logic addr_in_dccm_r, // address in dccm + input logic [pt.DCCM_BITS-1:0] lsu_addr_r, // start address + input logic [pt.DCCM_BITS-1:0] end_addr_r, // end address + input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_r, // data from the dccm + input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_r, // data from the dccm + input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_r, // data from the dccm + ecc + input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_r, + output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r, + output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r, + output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r_ff, + output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, + + input logic ld_single_ecc_error_r, // ld has a single ecc error + input logic ld_single_ecc_error_r_ff, // ld has a single ecc error + input logic lsu_dccm_rden_m, // dccm rden + input logic addr_in_dccm_m, // address in dccm + input logic [pt.DCCM_BITS-1:0] lsu_addr_m, // start address + input logic [pt.DCCM_BITS-1:0] end_addr_m, // end address + input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_m, // raw data from mem + input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_m, // raw data from mem + input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_m, // ecc read out from mem + input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_m, // ecc read out from mem + output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_m, + output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_m, + + input logic dma_dccm_wen, + input logic [31:0] dma_dccm_wdata_lo, + input logic [31:0] dma_dccm_wdata_hi, + output logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_hi, // ECC bits for the DMA wdata + output logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, // ECC bits for the DMA wdata + + output logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any, + output logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, + output logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_lo_r_ff, + + output logic single_ecc_error_hi_r, // sec detected + output logic single_ecc_error_lo_r, // sec detected on lower dccm bank + output logic lsu_single_ecc_error_r, // or of the 2 + output logic lsu_double_ecc_error_r, // double error detected + + output logic lsu_single_ecc_error_m, // or of the 2 + output logic lsu_double_ecc_error_m // double error detected + + ); + + logic is_ldst_r; + logic is_ldst_hi_any, is_ldst_lo_any; + logic [pt.DCCM_DATA_WIDTH-1:0] dccm_wdata_hi_any, dccm_wdata_lo_any; + logic [pt.DCCM_ECC_WIDTH-1:0] dccm_wdata_ecc_hi_any, dccm_wdata_ecc_lo_any; + logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_any, dccm_rdata_lo_any; + logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_any, dccm_data_ecc_lo_any; + logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_any, sec_data_lo_any; + logic single_ecc_error_hi_any, single_ecc_error_lo_any; + logic double_ecc_error_hi_any, double_ecc_error_lo_any; + + logic double_ecc_error_hi_m, double_ecc_error_lo_m; + logic double_ecc_error_hi_r, double_ecc_error_lo_r; + + logic [6:0] ecc_out_hi_nc, ecc_out_lo_nc; + + + if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1 + logic ldst_dual_m, ldst_dual_r; + logic is_ldst_m; + logic is_ldst_hi_m, is_ldst_lo_m; + logic is_ldst_hi_r, is_ldst_lo_r; + + assign ldst_dual_r = (lsu_addr_r[2] != end_addr_r[2]); + assign is_ldst_r = lsu_pkt_r.valid & (lsu_pkt_r.load | lsu_pkt_r.store) & addr_in_dccm_r & lsu_dccm_rden_r; + assign is_ldst_lo_r = is_ldst_r & ~dec_tlu_core_ecc_disable; + assign is_ldst_hi_r = is_ldst_r & (ldst_dual_r | lsu_pkt_r.dma) & ~dec_tlu_core_ecc_disable; // Always check the ECC Hi/Lo for DMA since we don't align for DMA + + assign is_ldst_hi_any = is_ldst_hi_r; + assign dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_hi_r[pt.DCCM_DATA_WIDTH-1:0]; + assign dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_hi_r[pt.DCCM_ECC_WIDTH-1:0]; + assign is_ldst_lo_any = is_ldst_lo_r; + assign dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_lo_r[pt.DCCM_DATA_WIDTH-1:0]; + assign dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_lo_r[pt.DCCM_ECC_WIDTH-1:0]; + + assign sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0] = sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0]; + assign single_ecc_error_hi_r = single_ecc_error_hi_any; + assign double_ecc_error_hi_r = double_ecc_error_hi_any; + assign sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0] = sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0]; + assign single_ecc_error_lo_r = single_ecc_error_lo_any; + assign double_ecc_error_lo_r = double_ecc_error_lo_any; + + assign lsu_single_ecc_error_r = single_ecc_error_hi_r | single_ecc_error_lo_r; + assign lsu_double_ecc_error_r = double_ecc_error_hi_r | double_ecc_error_lo_r; + + end else begin: L2U_Plus1_0 + + logic ldst_dual_m; + logic is_ldst_m; + logic is_ldst_hi_m, is_ldst_lo_m; + + assign ldst_dual_m = (lsu_addr_m[2] != end_addr_m[2]); + assign is_ldst_m = lsu_pkt_m.valid & (lsu_pkt_m.load | lsu_pkt_m.store) & addr_in_dccm_m & lsu_dccm_rden_m; + assign is_ldst_lo_m = is_ldst_m & ~dec_tlu_core_ecc_disable; + assign is_ldst_hi_m = is_ldst_m & (ldst_dual_m | lsu_pkt_m.dma) & ~dec_tlu_core_ecc_disable; // Always check the ECC Hi/Lo for DMA since we don't align for DMA + + assign is_ldst_hi_any = is_ldst_hi_m; + assign dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0]; + assign dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0]; + assign is_ldst_lo_any = is_ldst_lo_m; + assign dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0]; + assign dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0]; + + assign sec_data_hi_m[pt.DCCM_DATA_WIDTH-1:0] = sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0]; + assign double_ecc_error_hi_m = double_ecc_error_hi_any; + assign sec_data_lo_m[pt.DCCM_DATA_WIDTH-1:0] = sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0]; + assign double_ecc_error_lo_m = double_ecc_error_lo_any; + + assign lsu_single_ecc_error_m = single_ecc_error_hi_any | single_ecc_error_lo_any; + assign lsu_double_ecc_error_m = double_ecc_error_hi_m | double_ecc_error_lo_m; + + // Flops + rvdff #(1) lsu_single_ecc_err_r (.din(lsu_single_ecc_error_m), .dout(lsu_single_ecc_error_r), .clk(lsu_c2_r_clk), .*); + rvdff #(1) lsu_double_ecc_err_r (.din(lsu_double_ecc_error_m), .dout(lsu_double_ecc_error_r), .clk(lsu_c2_r_clk), .*); + rvdff #(.WIDTH(1)) ldst_sec_lo_rff (.din(single_ecc_error_lo_any), .dout(single_ecc_error_lo_r), .clk(lsu_c2_r_clk), .*); + rvdff #(.WIDTH(1)) ldst_sec_hi_rff (.din(single_ecc_error_hi_any), .dout(single_ecc_error_hi_r), .clk(lsu_c2_r_clk), .*); + rvdff #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_hi_rff (.din(sec_data_hi_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .clk(lsu_c2_r_clk), .*); + rvdff #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_lo_rff (.din(sec_data_lo_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .clk(lsu_c2_r_clk), .*); + + end + + // Logic for ECC generation during write + assign dccm_wdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0] : (dma_dccm_wen ? dma_dccm_wdata_lo[pt.DCCM_DATA_WIDTH-1:0] : stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]); + assign dccm_wdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0] : (dma_dccm_wen ? dma_dccm_wdata_hi[pt.DCCM_DATA_WIDTH-1:0] : stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]); + + assign sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]; + assign sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]; + assign stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]; + assign dma_dccm_wdata_ecc_hi[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]; + assign dma_dccm_wdata_ecc_lo[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]; + + // Instantiate ECC blocks + if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable + + //Detect/Repair for Hi + rvecc_decode lsu_ecc_decode_hi ( + // Inputs + .en(is_ldst_hi_any), + .sed_ded (1'b0), // 1 : means only detection + .din(dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0]), + .ecc_in(dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]), + // Outputs + .dout(sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0]), + .ecc_out (ecc_out_hi_nc[6:0]), + .single_ecc_error(single_ecc_error_hi_any), + .double_ecc_error(double_ecc_error_hi_any), + .* + ); + + //Detect/Repair for Lo + rvecc_decode lsu_ecc_decode_lo ( + // Inputs + .en(is_ldst_lo_any), + .sed_ded (1'b0), // 1 : means only detection + .din(dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] ), + .ecc_in(dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]), + // Outputs + .dout(sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0]), + .ecc_out (ecc_out_lo_nc[6:0]), + .single_ecc_error(single_ecc_error_lo_any), + .double_ecc_error(double_ecc_error_lo_any), + .* + ); + + rvecc_encode lsu_ecc_encode_hi ( + //Inputs + .din(dccm_wdata_hi_any[pt.DCCM_DATA_WIDTH-1:0]), + //Outputs + .ecc_out(dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]), + .* + ); + rvecc_encode lsu_ecc_encode_lo ( + //Inputs + .din(dccm_wdata_lo_any[pt.DCCM_DATA_WIDTH-1:0]), + //Outputs + .ecc_out(dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]), + .* + ); + end else begin: Gen_dccm_disable // block: Gen_dccm_enable + assign sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0] = '0; + assign sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0] = '0; + assign single_ecc_error_hi_any = '0; + assign double_ecc_error_hi_any = '0; + assign single_ecc_error_lo_any = '0; + assign double_ecc_error_lo_any = '0; + + assign stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0] = '0; + end + + rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_hi_rplus1ff (.din(sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]), .en(ld_single_ecc_error_r), .clk(clk), .*); + rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_lo_rplus1ff (.din(sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]), .en(ld_single_ecc_error_r), .clk(clk), .*); + +endmodule // el2_lsu_ecc diff --git a/design/lsu/el2_lsu_lsc_ctl.sv b/design/lsu/el2_lsu_lsc_ctl.sv new file mode 100644 index 0000000..ed748b1 --- /dev/null +++ b/design/lsu/el2_lsu_lsc_ctl.sv @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: LSU control +// Comments: +// +// +// DC1 -> DC2 -> DC3 -> DC4 (Commit) +// +//******************************************************************************** +module el2_lsu_lsc_ctl +import el2_pkg::*; +#( +`include "el2_param.vh" + )( + input logic rst_l, + + // clocks per pipe + input logic lsu_c1_m_clk, + input logic lsu_c1_r_clk, + input logic lsu_c2_m_clk, + input logic lsu_c2_r_clk, + input logic lsu_store_c1_m_clk, + + input logic [31:0] lsu_ld_data_r, + input logic [31:0] lsu_ld_data_corr_r, // ECC corrected data + input logic lsu_single_ecc_error_r, + input logic lsu_double_ecc_error_r, + + input logic [31:0] lsu_ld_data_m, + input logic lsu_single_ecc_error_m, + input logic lsu_double_ecc_error_m, + + input logic flush_m_up, + input logic flush_r, + + input logic [31:0] exu_lsu_rs1_d, // address + input logic [31:0] exu_lsu_rs2_d, // store data + + input el2_lsu_pkt_t lsu_p, // lsu control packet + input logic dec_lsu_valid_raw_d, // Raw valid for address computation + input logic [11:0] dec_lsu_offset_d, + + input logic [31:0] picm_mask_data_m, + input logic [31:0] bus_read_data_m, + output logic [31:0] lsu_result_m, + output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF + // lsu address down the pipe + output logic [31:0] lsu_addr_d, + output logic [31:0] lsu_addr_m, + output logic [31:0] lsu_addr_r, + // lsu address down the pipe - needed to check unaligned + output logic [31:0] end_addr_d, + output logic [31:0] end_addr_m, + output logic [31:0] end_addr_r, + // store data down the pipe + output logic [31:0] store_data_m, + + input logic [31:0] dec_tlu_mrac_ff, + output logic lsu_exc_m, + output logic is_sideeffects_m, + output logic lsu_commit_r, + output logic lsu_single_ecc_error_incr, + output el2_lsu_error_pkt_t lsu_error_pkt_r, + + output logic [31:1] lsu_fir_addr, // fast interrupt address + output logic [1:0] lsu_fir_error, // Error during fast interrupt lookup + + // address in dccm/pic/external per pipe stage + output logic addr_in_dccm_d, + output logic addr_in_dccm_m, + output logic addr_in_dccm_r, + + output logic addr_in_pic_d, + output logic addr_in_pic_m, + output logic addr_in_pic_r, + + output logic addr_external_m, + + // DMA slave + input logic dma_dccm_req, + input logic [31:0] dma_mem_addr, + input logic [2:0] dma_mem_sz, + input logic dma_mem_write, + input logic [63:0] dma_mem_wdata, + + // Store buffer related signals + output el2_lsu_pkt_t lsu_pkt_d, + output el2_lsu_pkt_t lsu_pkt_m, + output el2_lsu_pkt_t lsu_pkt_r, + + input logic scan_mode + + ); + + logic [31:0] full_addr_d; + logic [31:0] full_end_addr_d; + logic [31:0] lsu_rs1_d; + logic [11:0] lsu_offset_d; + logic [31:0] rs1_d; + logic [11:0] offset_d; + logic [12:0] end_addr_offset_d; + logic [2:0] addr_offset_d; + + logic [63:0] dma_mem_wdata_shifted; + logic addr_external_d; + logic addr_external_r; + logic access_fault_d, misaligned_fault_d; + logic access_fault_m, misaligned_fault_m; + + logic fir_dccm_access_error_d, fir_nondccm_access_error_d; + logic fir_dccm_access_error_m, fir_nondccm_access_error_m; + + logic [2:0] exc_mscause_d, exc_mscause_m; + logic [31:0] rs1_d_raw; + logic [31:0] store_data_d, store_data_pre_m, store_data_m_in; + logic [31:0] bus_read_data_r; + + el2_lsu_pkt_t dma_pkt_d; + el2_lsu_pkt_t lsu_pkt_m_in, lsu_pkt_r_in; + el2_lsu_error_pkt_t lsu_error_pkt_m; + + + // Premux the rs1/offset for dma + assign lsu_rs1_d[31:0] = dec_lsu_valid_raw_d ? exu_lsu_rs1_d[31:0] : dma_mem_addr[31:0]; + assign lsu_offset_d[11:0] = dec_lsu_offset_d[11:0] & {12{dec_lsu_valid_raw_d}}; + assign rs1_d_raw[31:0] = lsu_rs1_d[31:0]; + assign offset_d[11:0] = lsu_offset_d[11:0]; + + assign rs1_d[31:0] = (lsu_pkt_d.load_ldst_bypass_d) ? lsu_result_m[31:0] : rs1_d_raw[31:0]; + + // generate the ls address + // need to refine this is memory is only 128KB + rvlsadder lsadder (.rs1(rs1_d[31:0]), + .offset(offset_d[11:0]), + .dout(full_addr_d[31:0]) + ); + + // Module to generate the memory map of the address + el2_lsu_addrcheck addrcheck ( + .start_addr_d(full_addr_d[31:0]), + .end_addr_d(full_end_addr_d[31:0]), + .rs1_region_d(rs1_d[31:28]), + .* + ); + + // Calculate start/end address for load/store + assign addr_offset_d[2:0] = ({3{lsu_pkt_d.half}} & 3'b01) | ({3{lsu_pkt_d.word}} & 3'b11) | ({3{lsu_pkt_d.dword}} & 3'b111); + assign end_addr_offset_d[12:0] = {offset_d[11],offset_d[11:0]} + {9'b0,addr_offset_d[2:0]}; + assign full_end_addr_d[31:0] = rs1_d[31:0] + {{19{end_addr_offset_d[12]}},end_addr_offset_d[12:0]}; + assign end_addr_d[31:0] = full_end_addr_d[31:0]; + assign lsu_exc_m = access_fault_m | misaligned_fault_m; + + // Goes to TLU to increment the ECC error counter + assign lsu_single_ecc_error_incr = (lsu_single_ecc_error_r & ~lsu_double_ecc_error_r) & (lsu_commit_r | lsu_pkt_r.dma) & lsu_pkt_r.valid; + + if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1 + logic access_fault_r, misaligned_fault_r; + logic [2:0] exc_mscause_r; + logic fir_dccm_access_error_r, fir_nondccm_access_error_r; + + // Generate exception packet + assign lsu_error_pkt_r.exc_valid = (access_fault_r | misaligned_fault_r | lsu_double_ecc_error_r) & lsu_pkt_r.valid & ~lsu_pkt_r.dma & ~lsu_pkt_r.fast_int; + assign lsu_error_pkt_r.single_ecc_error = lsu_single_ecc_error_r & ~lsu_error_pkt_r.exc_valid & ~lsu_pkt_r.dma; + assign lsu_error_pkt_r.inst_type = lsu_pkt_r.store; + assign lsu_error_pkt_r.exc_type = ~misaligned_fault_r; + assign lsu_error_pkt_r.mscause[2:0] = (lsu_double_ecc_error_r & ~misaligned_fault_r & ~access_fault_r) ? 3'h1 : exc_mscause_r[2:0]; + assign lsu_error_pkt_r.addr[31:0] = lsu_addr_r[31:0]; + + assign lsu_fir_error[1:0] = fir_nondccm_access_error_r ? 2'b11 : (fir_dccm_access_error_r ? 2'b10 : ((lsu_pkt_r.fast_int & lsu_double_ecc_error_r) ? 2'b01 : 2'b00)); + + rvdff #(1) access_fault_rff (.din(access_fault_m), .dout(access_fault_r), .clk(lsu_c1_r_clk), .*); + rvdff #(1) misaligned_fault_rff (.din(misaligned_fault_m), .dout(misaligned_fault_r), .clk(lsu_c1_r_clk), .*); + rvdff #(3) exc_mscause_rff (.din(exc_mscause_m[2:0]), .dout(exc_mscause_r[2:0]), .clk(lsu_c1_r_clk), .*); + rvdff #(1) fir_dccm_access_error_mff (.din(fir_dccm_access_error_m), .dout(fir_dccm_access_error_r), .clk(lsu_c1_r_clk), .*); + rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_m), .dout(fir_nondccm_access_error_r), .clk(lsu_c1_r_clk), .*); + + end else begin: L2U_Plus1_0 + logic [1:0] lsu_fir_error_m; + + // Generate exception packet + assign lsu_error_pkt_m.exc_valid = (access_fault_m | misaligned_fault_m | lsu_double_ecc_error_m) & lsu_pkt_m.valid & ~lsu_pkt_m.dma & ~lsu_pkt_m.fast_int & ~flush_m_up; + assign lsu_error_pkt_m.single_ecc_error = lsu_single_ecc_error_m & ~lsu_error_pkt_m.exc_valid & ~lsu_pkt_m.dma; + assign lsu_error_pkt_m.inst_type = lsu_pkt_m.store; + assign lsu_error_pkt_m.exc_type = ~misaligned_fault_m; + assign lsu_error_pkt_m.mscause[2:0] = (lsu_double_ecc_error_m & ~misaligned_fault_m & ~access_fault_m) ? 3'h1 : exc_mscause_m[2:0]; + assign lsu_error_pkt_m.addr[31:0] = lsu_addr_m[31:0]; + + assign lsu_fir_error_m[1:0] = fir_nondccm_access_error_m ? 2'b11 : (fir_dccm_access_error_m ? 2'b10 : ((lsu_pkt_m.fast_int & lsu_double_ecc_error_m) ? 2'b01 : 2'b00)); + + rvdff #($bits(el2_lsu_error_pkt_t)) lsu_error_pkt_rff(.*, .din(lsu_error_pkt_m), .dout(lsu_error_pkt_r), .clk(lsu_c2_r_clk)); + rvdff #(2) lsu_fir_error_rff(.*, .din(lsu_fir_error_m[1:0]), .dout(lsu_fir_error[1:0]), .clk(lsu_c2_r_clk)); + end + + //Create DMA packet + always_comb begin + dma_pkt_d = '0; + dma_pkt_d.valid = dma_dccm_req; + dma_pkt_d.dma = 1'b1; + dma_pkt_d.store = dma_mem_write; + dma_pkt_d.load = ~dma_mem_write; + dma_pkt_d.by = (dma_mem_sz[2:0] == 3'b0); + dma_pkt_d.half = (dma_mem_sz[2:0] == 3'b1); + dma_pkt_d.word = (dma_mem_sz[2:0] == 3'b10); + dma_pkt_d.dword = (dma_mem_sz[2:0] == 3'b11); + end + + always_comb begin + lsu_pkt_d = dec_lsu_valid_raw_d ? lsu_p : dma_pkt_d; + lsu_pkt_m_in = lsu_pkt_d; + lsu_pkt_r_in = lsu_pkt_m; + + lsu_pkt_d.valid = (lsu_p.valid & ~(flush_m_up & ~lsu_p.fast_int)) | dma_dccm_req; + lsu_pkt_m_in.valid = lsu_pkt_d.valid & ~(flush_m_up & ~lsu_pkt_d.dma); + lsu_pkt_r_in.valid = lsu_pkt_m.valid & ~(flush_m_up & ~lsu_pkt_m.dma) ; + end + + // C2 clock for valid and C1 for other bits of packet + rvdff #(1) lsu_pkt_vldmff (.*, .din(lsu_pkt_m_in.valid), .dout(lsu_pkt_m.valid), .clk(lsu_c2_m_clk)); + rvdff #(1) lsu_pkt_vldrff (.*, .din(lsu_pkt_r_in.valid), .dout(lsu_pkt_r.valid), .clk(lsu_c2_r_clk)); + + rvdff #($bits(el2_lsu_pkt_t)-1) lsu_pkt_mff (.*, .din(lsu_pkt_m_in[$bits(el2_lsu_pkt_t)-1:1]), .dout(lsu_pkt_m[$bits(el2_lsu_pkt_t)-1:1]), .clk(lsu_c1_m_clk)); + rvdff #($bits(el2_lsu_pkt_t)-1) lsu_pkt_rff (.*, .din(lsu_pkt_r_in[$bits(el2_lsu_pkt_t)-1:1]), .dout(lsu_pkt_r[$bits(el2_lsu_pkt_t)-1:1]), .clk(lsu_c1_r_clk)); + + + + if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U1_Plus1_1 + logic [31:0] lsu_ld_datafn_r, lsu_ld_datafn_corr_r; + + assign lsu_ld_datafn_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_r[31:0]; + assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0]; + + // this is really R stage but don't want to make all the changes to support M,R buses + assign lsu_result_m[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_r[7:0]}) | + ({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_r[15:0]}) | + ({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_r[7]}}, lsu_ld_datafn_r[7:0]}) | + ({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_r[15]}},lsu_ld_datafn_r[15:0]}) | + ({32{lsu_pkt_r.word}} & lsu_ld_datafn_r[31:0]); + + // this signal is used for gpr update + assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) | + ({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) | + ({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) | + ({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) | + ({32{lsu_pkt_r.word}} & lsu_ld_datafn_corr_r[31:0]); + + end else begin: L2U1_Plus1_0 // block: L2U1_Plus1_1 + logic [31:0] lsu_ld_datafn_m, lsu_ld_datafn_corr_r; + + assign lsu_ld_datafn_m[31:0] = addr_external_m ? bus_read_data_m[31:0] : lsu_ld_data_m[31:0]; + assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0]; + + // this result must look at prior stores and merge them in + assign lsu_result_m[31:0] = ({32{ lsu_pkt_m.unsign & lsu_pkt_m.by }} & {24'b0,lsu_ld_datafn_m[7:0]}) | + ({32{ lsu_pkt_m.unsign & lsu_pkt_m.half}} & {16'b0,lsu_ld_datafn_m[15:0]}) | + ({32{~lsu_pkt_m.unsign & lsu_pkt_m.by }} & {{24{ lsu_ld_datafn_m[7]}}, lsu_ld_datafn_m[7:0]}) | + ({32{~lsu_pkt_m.unsign & lsu_pkt_m.half}} & {{16{ lsu_ld_datafn_m[15]}},lsu_ld_datafn_m[15:0]}) | + ({32{lsu_pkt_m.word}} & lsu_ld_datafn_m[31:0]); + + // this signal is used for gpr update + assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) | + ({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) | + ({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) | + ({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) | + ({32{lsu_pkt_r.word}} & lsu_ld_datafn_corr_r[31:0]); + end + + // Fast interrupt address + assign lsu_fir_addr[31:1] = lsu_ld_data_corr_r[31:1]; + + // absence load/store all 0's + assign lsu_addr_d[31:0] = full_addr_d[31:0]; + + // Interrupt as a flush source allows the WB to occur + assign lsu_commit_r = lsu_pkt_r.valid & (lsu_pkt_r.store | lsu_pkt_r.load) & ~flush_r & ~lsu_pkt_r.dma; + + assign dma_mem_wdata_shifted[63:0] = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores + assign store_data_d[31:0] = dma_dccm_req ? dma_mem_wdata_shifted[31:0] : exu_lsu_rs2_d[31:0]; // Write to PIC still happens in r stage + + assign store_data_m_in[31:0] = (lsu_pkt_d.store_data_bypass_d) ? lsu_result_m[31:0] : store_data_d[31:0]; + + assign store_data_m[31:0] = (picm_mask_data_m[31:0] | {32{~addr_in_pic_m}}) & ((lsu_pkt_m.store_data_bypass_m) ? lsu_result_m[31:0] : store_data_pre_m[31:0]); + + + rvdff #(32) sdmff (.*, .din(store_data_m_in[31:0]), .dout(store_data_pre_m[31:0]), .clk(lsu_store_c1_m_clk)); + + rvdff #(32) samff (.*, .din(lsu_addr_d[31:0]), .dout(lsu_addr_m[31:0]), .clk(lsu_c1_m_clk)); + rvdff #(32) sarff (.*, .din(lsu_addr_m[31:0]), .dout(lsu_addr_r[31:0]), .clk(lsu_c1_r_clk)); + + rvdff #(32) end_addr_mff (.*, .din(end_addr_d[31:0]), .dout(end_addr_m[31:0]), .clk(lsu_c1_m_clk)); + rvdff #(32) end_addr_rff (.*, .din(end_addr_m[31:0]), .dout(end_addr_r[31:0]), .clk(lsu_c1_r_clk)); + + rvdff #(1) addr_in_dccm_mff(.din(addr_in_dccm_d), .dout(addr_in_dccm_m), .clk(lsu_c1_m_clk), .*); + rvdff #(1) addr_in_dccm_rff(.din(addr_in_dccm_m), .dout(addr_in_dccm_r), .clk(lsu_c1_r_clk), .*); + + rvdff #(1) addr_in_pic_mff(.din(addr_in_pic_d), .dout(addr_in_pic_m), .clk(lsu_c1_m_clk), .*); + rvdff #(1) addr_in_pic_rff(.din(addr_in_pic_m), .dout(addr_in_pic_r), .clk(lsu_c1_r_clk), .*); + + rvdff #(1) addr_external_mff(.din(addr_external_d), .dout(addr_external_m), .clk(lsu_c1_m_clk), .*); + rvdff #(1) addr_external_rff(.din(addr_external_m), .dout(addr_external_r), .clk(lsu_c1_r_clk), .*); + + rvdff #(1) access_fault_mff (.din(access_fault_d), .dout(access_fault_m), .clk(lsu_c1_m_clk), .*); + rvdff #(1) misaligned_fault_mff (.din(misaligned_fault_d), .dout(misaligned_fault_m), .clk(lsu_c1_m_clk), .*); + rvdff #(3) exc_mscause_mff (.din(exc_mscause_d[2:0]), .dout(exc_mscause_m[2:0]), .clk(lsu_c1_m_clk), .*); + + rvdff #(1) fir_dccm_access_error_mff (.din(fir_dccm_access_error_d), .dout(fir_dccm_access_error_m), .clk(lsu_c1_m_clk), .*); + rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_d), .dout(fir_nondccm_access_error_m), .clk(lsu_c1_m_clk), .*); + + rvdff #(32) bus_read_data_r_ff (.*, .din(bus_read_data_m[31:0]), .dout(bus_read_data_r[31:0]), .clk(lsu_c1_r_clk)); + +endmodule diff --git a/design/lsu/el2_lsu_stbuf.sv b/design/lsu/el2_lsu_stbuf.sv new file mode 100644 index 0000000..79f339c --- /dev/null +++ b/design/lsu/el2_lsu_stbuf.sv @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: Store Buffer +// Comments: Dual writes and single drain +// +// +// DC1 -> DC2 -> DC3 -> DC4 (Commit) +// +// //******************************************************************************** + + +module el2_lsu_stbuf +import el2_pkg::*; +#( +`include "el2_param.vh" + ) +( + input logic clk, // core clock + input logic rst_l, // reset + + input logic lsu_c1_m_clk, // clock + input logic lsu_c1_r_clk, // lsu pipe clock + input logic lsu_stbuf_c1_clk, // stbuf clock + input logic lsu_free_c2_clk, // free clk + + // Store Buffer input + input logic store_stbuf_reqvld_r, // core instruction goes to stbuf + input logic lsu_commit_r, // lsu commits + input logic dec_lsu_valid_raw_d, // Speculative decode valid + input logic [pt.DCCM_DATA_WIDTH-1:0] store_data_hi_r, // merged data from the dccm for stores. This is used for fwding + input logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r, // merged data from the dccm for stores. This is used for fwding + input logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_hi_r, // merged data from the dccm for stores + input logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_lo_r, // merged data from the dccm for stores + + // Store Buffer output + output logic stbuf_reqvld_any, // stbuf is draining + output logic stbuf_reqvld_flushed_any, // Top entry is flushed + output logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any, // address + output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, // stbuf data + + input logic lsu_stbuf_commit_any, // pop the stbuf as it commite + output logic lsu_stbuf_full_any, // stbuf is full + output logic lsu_stbuf_empty_any, // stbuf is empty + output logic ldst_stbuf_reqvld_r, // needed for clocking + + input logic [pt.LSU_SB_BITS-1:0] lsu_addr_d, // lsu address + input logic [31:0] lsu_addr_m, + input logic [31:0] lsu_addr_r, + + input logic [pt.LSU_SB_BITS-1:0] end_addr_d, // lsu end addrress - needed to check unaligned + input logic [31:0] end_addr_m, + input logic [31:0] end_addr_r, + + input logic addr_in_dccm_m, // address is in dccm + input logic addr_in_dccm_r, // address is in dccm + + // Forwarding signals + input logic lsu_cmpen_m, // needed for forwarding stbuf - load + input el2_lsu_pkt_t lsu_pkt_m, + input el2_lsu_pkt_t lsu_pkt_r, + + output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m, // stbuf data + output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m, + output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m, + output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m, + + input logic scan_mode + +); + + + localparam DEPTH = pt.LSU_STBUF_DEPTH; + localparam DATA_WIDTH = pt.DCCM_DATA_WIDTH; + localparam BYTE_WIDTH = pt.DCCM_BYTE_WIDTH; + localparam DEPTH_LOG2 = $clog2(DEPTH); + + // These are the fields in the store queue + logic [DEPTH-1:0] stbuf_vld; + logic [DEPTH-1:0] stbuf_dma_kill; + logic [DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addr; + logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_byteen; + logic [DEPTH-1:0][DATA_WIDTH-1:0] stbuf_data; + + logic [DEPTH-1:0] sel_lo; + logic [DEPTH-1:0] stbuf_wr_en; + logic [DEPTH-1:0] stbuf_dma_kill_en; + logic [DEPTH-1:0] stbuf_reset; + logic [DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addrin; + logic [DEPTH-1:0][DATA_WIDTH-1:0] stbuf_datain; + logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_byteenin; + + logic [7:0] store_byteen_ext_r; + logic [BYTE_WIDTH-1:0] store_byteen_hi_r; + logic [BYTE_WIDTH-1:0] store_byteen_lo_r; + + logic WrPtrEn, RdPtrEn; + logic [DEPTH_LOG2-1:0] WrPtr, RdPtr; + logic [DEPTH_LOG2-1:0] NxtWrPtr, NxtRdPtr; + logic [DEPTH_LOG2-1:0] WrPtrPlus1, WrPtrPlus2, RdPtrPlus1; + logic ldst_dual_d, ldst_dual_m, ldst_dual_r; + + logic dual_stbuf_write_r; + + logic isdccmst_m, isdccmst_r; + logic [3:0] stbuf_numvld_any, stbuf_specvld_any; + logic [1:0] stbuf_specvld_m, stbuf_specvld_r;//, stbuf_eccvld_m, stbuf_eccvld_r; + + logic cmpen_hi_m, cmpen_lo_m; + logic [pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] cmpaddr_hi_m, cmpaddr_lo_m; + + // variables to detect matching from the store queue + logic [DEPTH-1:0] stbuf_match_hi, stbuf_match_lo; + logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_fwdbyteenvec_hi, stbuf_fwdbyteenvec_lo; + logic [DATA_WIDTH-1:0] stbuf_fwddata_hi_pre_m, stbuf_fwddata_lo_pre_m; + logic [BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_pre_m, stbuf_fwdbyteen_lo_pre_m; + + // logic to detect matching from the pipe - needed for store - load forwarding + logic [BYTE_WIDTH-1:0] ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi; + logic ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi; + + logic [BYTE_WIDTH-1:0] ld_byte_hit_lo, ld_byte_rhit_lo; + logic [BYTE_WIDTH-1:0] ld_byte_hit_hi, ld_byte_rhit_hi; + + logic [BYTE_WIDTH-1:0] ldst_byteen_hi_r; + logic [BYTE_WIDTH-1:0] ldst_byteen_lo_r; + // byte_en flowing down + logic [7:0] ldst_byteen_r; + logic [7:0] ldst_byteen_ext_r; + // fwd data through the pipe + logic [31:0] ld_fwddata_rpipe_lo; + logic [31:0] ld_fwddata_rpipe_hi; + + // coalescing signals + logic [DEPTH-1:0] store_matchvec_lo_r, store_matchvec_hi_r; + logic store_coalesce_lo_r, store_coalesce_hi_r; + + //---------------------------------------- + // Logic starts here + //---------------------------------------- + // Create high/low byte enables + assign store_byteen_ext_r[7:0] = ldst_byteen_r[7:0] << lsu_addr_r[1:0]; + assign store_byteen_hi_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[7:4] & {4{lsu_pkt_r.store}}; + assign store_byteen_lo_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[3:0] & {4{lsu_pkt_r.store}}; + + assign RdPtrPlus1[DEPTH_LOG2-1:0] = RdPtr[DEPTH_LOG2-1:0] + 1'b1; + assign WrPtrPlus1[DEPTH_LOG2-1:0] = WrPtr[DEPTH_LOG2-1:0] + 1'b1; + assign WrPtrPlus2[DEPTH_LOG2-1:0] = WrPtr[DEPTH_LOG2-1:0] + 2'b10; + + // ecc error on both hi/lo + assign ldst_dual_d = (lsu_addr_d[2] != end_addr_d[2]); + assign dual_stbuf_write_r = ldst_dual_r & store_stbuf_reqvld_r; + assign ldst_stbuf_reqvld_r = (lsu_commit_r & store_stbuf_reqvld_r); //| + + // Store Buffer coalescing + for (genvar i=0; i<32'(DEPTH); i++) begin: FindMatchEntry + assign store_matchvec_lo_r[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == lsu_addr_r[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & ~stbuf_reset[i]; + assign store_matchvec_hi_r[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == end_addr_r[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & dual_stbuf_write_r & ~stbuf_reset[i]; + end: FindMatchEntry + + assign store_coalesce_lo_r = |store_matchvec_lo_r[DEPTH-1:0]; + assign store_coalesce_hi_r = |store_matchvec_hi_r[DEPTH-1:0]; + + + // Allocate new in this entry if : + // 1. wrptr, single allocate, lo did not coalesce + // 2. wrptr, double allocate, lo ^ hi coalesced + // 3. wrptr + 1, double alloacte, niether lo or hi coalesced + // Also update if there is a hi or a lo coalesce to this entry + // Store Buffer instantiation + for (genvar i=0; i<32'(DEPTH); i++) begin: GenStBuf + assign stbuf_wr_en[i] = ldst_stbuf_reqvld_r & ( + ( (i == WrPtr[DEPTH_LOG2-1:0]) & ~store_coalesce_lo_r) | // Allocate : new Lo + ( (i == WrPtr[DEPTH_LOG2-1:0]) & dual_stbuf_write_r & ~store_coalesce_hi_r) | // Allocate : only 1 new Write Either + ( (i == WrPtrPlus1[DEPTH_LOG2-1:0]) & dual_stbuf_write_r & ~(store_coalesce_lo_r | store_coalesce_hi_r)) | // Allocate2 : 2 new so Write Hi + store_matchvec_lo_r[i] | store_matchvec_hi_r[i]); // Coalesced Write Lo or Hi + assign stbuf_reset[i] = (lsu_stbuf_commit_any | stbuf_reqvld_flushed_any) & (i == RdPtr[DEPTH_LOG2-1:0]); + + // Mux select for start/end address + assign sel_lo[i] = ((~ldst_dual_r | store_stbuf_reqvld_r) & (i == WrPtr[DEPTH_LOG2-1:0]) & ~store_coalesce_lo_r) | // lo allocated new entry + store_matchvec_lo_r[i]; // lo coalesced in to this entry + assign stbuf_addrin[i][pt.LSU_SB_BITS-1:0] = sel_lo[i] ? lsu_addr_r[pt.LSU_SB_BITS-1:0] : end_addr_r[pt.LSU_SB_BITS-1:0]; + assign stbuf_byteenin[i][BYTE_WIDTH-1:0] = sel_lo[i] ? (stbuf_byteen[i][BYTE_WIDTH-1:0] | store_byteen_lo_r[BYTE_WIDTH-1:0]) : (stbuf_byteen[i][BYTE_WIDTH-1:0] | store_byteen_hi_r[BYTE_WIDTH-1:0]); + assign stbuf_datain[i][7:0] = sel_lo[i] ? ((~stbuf_byteen[i][0] | store_byteen_lo_r[0]) ? store_datafn_lo_r[7:0] : stbuf_data[i][7:0]) : + ((~stbuf_byteen[i][0] | store_byteen_hi_r[0]) ? store_datafn_hi_r[7:0] : stbuf_data[i][7:0]); + assign stbuf_datain[i][15:8] = sel_lo[i] ? ((~stbuf_byteen[i][1] | store_byteen_lo_r[1]) ? store_datafn_lo_r[15:8] : stbuf_data[i][15:8]) : + ((~stbuf_byteen[i][1] | store_byteen_hi_r[1]) ? store_datafn_hi_r[15:8] : stbuf_data[i][15:8]); + assign stbuf_datain[i][23:16] = sel_lo[i] ? ((~stbuf_byteen[i][2] | store_byteen_lo_r[2]) ? store_datafn_lo_r[23:16] : stbuf_data[i][23:16]) : + ((~stbuf_byteen[i][2] | store_byteen_hi_r[2]) ? store_datafn_hi_r[23:16] : stbuf_data[i][23:16]); + assign stbuf_datain[i][31:24] = sel_lo[i] ? ((~stbuf_byteen[i][3] | store_byteen_lo_r[3]) ? store_datafn_lo_r[31:24] : stbuf_data[i][31:24]) : + ((~stbuf_byteen[i][3] | store_byteen_hi_r[3]) ? store_datafn_hi_r[31:24] : stbuf_data[i][31:24]); + + rvdffsc #(.WIDTH(1)) stbuf_vldff (.din(1'b1), .dout(stbuf_vld[i]), .en(stbuf_wr_en[i]), .clear(stbuf_reset[i]), .clk(lsu_free_c2_clk), .*); + rvdffsc #(.WIDTH(1)) stbuf_killff (.din(1'b1), .dout(stbuf_dma_kill[i]), .en(stbuf_dma_kill_en[i]), .clear(stbuf_reset[i]), .clk(lsu_free_c2_clk), .*); + rvdffe #(.WIDTH(pt.LSU_SB_BITS)) stbuf_addrff (.din(stbuf_addrin[i][pt.LSU_SB_BITS-1:0]), .dout(stbuf_addr[i][pt.LSU_SB_BITS-1:0]), .en(stbuf_wr_en[i]), .*); + rvdffsc #(.WIDTH(BYTE_WIDTH)) stbuf_byteenff (.din(stbuf_byteenin[i][BYTE_WIDTH-1:0]), .dout(stbuf_byteen[i][BYTE_WIDTH-1:0]), .en(stbuf_wr_en[i]), .clear(stbuf_reset[i]), .clk(lsu_stbuf_c1_clk), .*); + rvdffe #(.WIDTH(DATA_WIDTH)) stbuf_dataff (.din(stbuf_datain[i][DATA_WIDTH-1:0]), .dout(stbuf_data[i][DATA_WIDTH-1:0]), .en(stbuf_wr_en[i]), .*); + end + + rvdff #(.WIDTH(1)) ldst_dual_mff (.din(ldst_dual_d), .dout(ldst_dual_m), .clk(lsu_c1_m_clk), .*); + rvdff #(.WIDTH(1)) ldst_dual_rff (.din(ldst_dual_m), .dout(ldst_dual_r), .clk(lsu_c1_r_clk), .*); + + // Store Buffer drain logic + assign stbuf_reqvld_flushed_any = stbuf_vld[RdPtr] & stbuf_dma_kill[RdPtr]; + assign stbuf_reqvld_any = stbuf_vld[RdPtr] & ~stbuf_dma_kill[RdPtr] & ~(|stbuf_dma_kill_en[DEPTH-1:0]); // Don't drain if some kill bit is being set this cycle + assign stbuf_addr_any[pt.LSU_SB_BITS-1:0] = stbuf_addr[RdPtr][pt.LSU_SB_BITS-1:0]; + assign stbuf_data_any[DATA_WIDTH-1:0] = stbuf_data[RdPtr][DATA_WIDTH-1:0]; + + // Update the RdPtr/WrPtr logic + // Need to revert the WrPtr for flush cases. Also revert the pipe WrPtrs + assign WrPtrEn = (ldst_stbuf_reqvld_r & ~dual_stbuf_write_r & ~(store_coalesce_hi_r | store_coalesce_lo_r)) | // writing 1 and did not coalesce + (ldst_stbuf_reqvld_r & dual_stbuf_write_r & ~(store_coalesce_hi_r & store_coalesce_lo_r)); // writing 2 and atleast 1 did not coalesce + assign NxtWrPtr[DEPTH_LOG2-1:0] = (ldst_stbuf_reqvld_r & dual_stbuf_write_r & ~(store_coalesce_hi_r | store_coalesce_lo_r)) ? WrPtrPlus2[DEPTH_LOG2-1:0] : WrPtrPlus1[DEPTH_LOG2-1:0]; + assign RdPtrEn = lsu_stbuf_commit_any | stbuf_reqvld_flushed_any; + assign NxtRdPtr[DEPTH_LOG2-1:0] = RdPtrPlus1[DEPTH_LOG2-1:0]; + + always_comb begin + stbuf_numvld_any[3:0] = '0; + for (int i=0; i<32'(DEPTH); i++) begin + stbuf_numvld_any[3:0] += {3'b0, stbuf_vld[i]}; + end + end + + // These go to store buffer to detect full + assign isdccmst_m = lsu_pkt_m.valid & lsu_pkt_m.store & addr_in_dccm_m & ~lsu_pkt_m.dma; + assign isdccmst_r = lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r & ~lsu_pkt_r.dma; + + assign stbuf_specvld_m[1:0] = {1'b0,isdccmst_m} << (isdccmst_m & ldst_dual_m); + assign stbuf_specvld_r[1:0] = {1'b0,isdccmst_r} << (isdccmst_r & ldst_dual_r); + assign stbuf_specvld_any[3:0] = stbuf_numvld_any[3:0] + {2'b0, stbuf_specvld_m[1:0]} + {2'b0, stbuf_specvld_r[1:0]}; + + assign lsu_stbuf_full_any = (~ldst_dual_d & dec_lsu_valid_raw_d) ? (stbuf_specvld_any[3:0] >= DEPTH) : (stbuf_specvld_any[3:0] >= (DEPTH-1)); + assign lsu_stbuf_empty_any = (stbuf_numvld_any[3:0] == 4'b0); + + // Load forwarding logic from the store queue + assign cmpen_hi_m = lsu_cmpen_m & ldst_dual_m; + assign cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = end_addr_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]; + + assign cmpen_lo_m = lsu_cmpen_m; + assign cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = lsu_addr_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]; + + always_comb begin: GenLdFwd + stbuf_fwdbyteen_hi_pre_m[BYTE_WIDTH-1:0] = '0; + stbuf_fwdbyteen_lo_pre_m[BYTE_WIDTH-1:0] = '0; + + for (int i=0; i<32'(DEPTH); i++) begin + stbuf_match_hi[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m; + stbuf_match_lo[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m; + + // Kill the store buffer entry if there is a dma store since it already updated the dccm + stbuf_dma_kill_en[i] = (stbuf_match_hi[i] | stbuf_match_lo[i]) & lsu_pkt_m.valid & lsu_pkt_m.dma & lsu_pkt_m.store; + + for (int j=0; j<32'(BYTE_WIDTH); j++) begin + stbuf_fwdbyteenvec_hi[i][j] = stbuf_match_hi[i] & stbuf_byteen[i][j] & stbuf_vld[i]; + stbuf_fwdbyteen_hi_pre_m[j] |= stbuf_fwdbyteenvec_hi[i][j]; + + stbuf_fwdbyteenvec_lo[i][j] = stbuf_match_lo[i] & stbuf_byteen[i][j] & stbuf_vld[i]; + stbuf_fwdbyteen_lo_pre_m[j] |= stbuf_fwdbyteenvec_lo[i][j]; + end + end + end // block: GenLdFwd + + always_comb begin: GenLdData + stbuf_fwddata_hi_pre_m[31:0] = '0; + stbuf_fwddata_lo_pre_m[31:0] = '0; + + for (int i=0; i<32'(DEPTH); i++) begin + stbuf_fwddata_hi_pre_m[31:0] |= {32{stbuf_match_hi[i]}} & stbuf_data[i][31:0]; + stbuf_fwddata_lo_pre_m[31:0] |= {32{stbuf_match_lo[i]}} & stbuf_data[i][31:0]; + + end + + end // block: GenLdData + + // Create Hi/Lo signals - needed for the pipe forwarding + assign ldst_byteen_r[7:0] = ({8{lsu_pkt_r.by}} & 8'b0000_0001) | + ({8{lsu_pkt_r.half}} & 8'b0000_0011) | + ({8{lsu_pkt_r.word}} & 8'b0000_1111) | + ({8{lsu_pkt_r.dword}} & 8'b1111_1111); + + assign ldst_byteen_ext_r[7:0] = ldst_byteen_r[7:0] << lsu_addr_r[1:0]; + + assign ldst_byteen_hi_r[3:0] = ldst_byteen_ext_r[7:4]; + assign ldst_byteen_lo_r[3:0] = ldst_byteen_ext_r[3:0]; + + assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma; + assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma; + assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma & dual_stbuf_write_r; + assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma & dual_stbuf_write_r; + + for (genvar i=0; i<32'(BYTE_WIDTH); i++) begin + assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i]; + assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i]; + assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i]; + assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i]; + + assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i]; + assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i]; + + assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) | + ({8{ld_byte_rhit_hi_lo[i]}} & store_data_hi_r[(8*i)+7:(8*i)]); + + assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) | + ({8{ld_byte_rhit_hi_hi[i]}} & store_data_hi_r[(8*i)+7:(8*i)]); + + assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i]; + assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i]; + + assign stbuf_fwdbyteen_hi_m[i] = ld_byte_hit_hi[i] | stbuf_fwdbyteen_hi_pre_m[i]; + assign stbuf_fwdbyteen_lo_m[i] = ld_byte_hit_lo[i] | stbuf_fwdbyteen_lo_pre_m[i]; + // // Pipe vs Store Queue priority + assign stbuf_fwddata_lo_m[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i] ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : stbuf_fwddata_lo_pre_m[(8*i)+7:(8*i)]; + // // Pipe vs Store Queue priority + assign stbuf_fwddata_hi_m[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i] ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : stbuf_fwddata_hi_pre_m[(8*i)+7:(8*i)]; + end + + // Flops + rvdffs #(.WIDTH(DEPTH_LOG2)) WrPtrff (.din(NxtWrPtr[DEPTH_LOG2-1:0]), .dout(WrPtr[DEPTH_LOG2-1:0]), .en(WrPtrEn), .clk(lsu_stbuf_c1_clk), .*); + rvdffs #(.WIDTH(DEPTH_LOG2)) RdPtrff (.din(NxtRdPtr[DEPTH_LOG2-1:0]), .dout(RdPtr[DEPTH_LOG2-1:0]), .en(RdPtrEn), .clk(lsu_stbuf_c1_clk), .*); + +`ifdef ASSERT_ON + + assert_stbuf_overflow: assert #0 (stbuf_specvld_any[2:0] <= DEPTH); + property stbuf_wren_store_dccm; + @(posedge clk) disable iff(~rst_l) (|stbuf_wr_en[DEPTH-1:0]) |-> (lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r & ~lsu_pkt_r.dma); + endproperty + assert_stbuf_wren_store_dccm: assert property (stbuf_wren_store_dccm) else + $display("Illegal store buffer write"); + +`endif + +endmodule + diff --git a/design/lsu/el2_lsu_trigger.sv b/design/lsu/el2_lsu_trigger.sv new file mode 100644 index 0000000..ab506f3 --- /dev/null +++ b/design/lsu/el2_lsu_trigger.sv @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or it's affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//******************************************************************************** +// $Id$ +// +// +// Owner: +// Function: LSU Trigger logic +// Comments: +// +//******************************************************************************** +module el2_lsu_trigger +import el2_pkg::*; +#( +`include "el2_param.vh" + )( + input el2_trigger_pkt_t [3:0] trigger_pkt_any, // trigger packet from dec + input el2_lsu_pkt_t lsu_pkt_m, // lsu packet + input logic [31:0] lsu_addr_m, // address + input logic [31:0] store_data_m, // store data + + output logic [3:0] lsu_trigger_match_m // match result +); + + logic [3:0][31:0] lsu_match_data; + logic [3:0] lsu_trigger_data_match; + logic [31:0] store_data_trigger_m; + + assign store_data_trigger_m[31:0] = {({16{lsu_pkt_m.word}} & store_data_m[31:16]),({8{(lsu_pkt_m.half | lsu_pkt_m.word)}} & store_data_m[15:8]), store_data_m[7:0]}; + + + for (genvar i=0; i<4; i++) begin + assign lsu_match_data[i][31:0] = ({32{~trigger_pkt_any[i].select}} & lsu_addr_m[31:0]) | + ({32{trigger_pkt_any[i].select & trigger_pkt_any[i].store}} & store_data_trigger_m[31:0]); + + + rvmaskandmatch trigger_match (.mask(trigger_pkt_any[i].tdata2[31:0]), .data(lsu_match_data[i][31:0]), .masken(trigger_pkt_any[i].match), .match(lsu_trigger_data_match[i])); + + assign lsu_trigger_match_m[i] = lsu_pkt_m.valid & ~lsu_pkt_m.dma & + ((trigger_pkt_any[i].store & lsu_pkt_m.store) | (trigger_pkt_any[i].load & lsu_pkt_m.load & ~trigger_pkt_any[i].select)) & + lsu_trigger_data_match[i]; + end + + +endmodule // el2_lsu_trigger diff --git a/docs/RISC-V SweRV EL2 PRM.pdf b/docs/RISC-V SweRV EL2 PRM.pdf new file mode 100755 index 0000000..95730d9 Binary files /dev/null and b/docs/RISC-V SweRV EL2 PRM.pdf differ diff --git a/testbench/ahb_sif.sv b/testbench/ahb_sif.sv new file mode 100644 index 0000000..871e2a0 --- /dev/null +++ b/testbench/ahb_sif.sv @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +`ifdef RV_BUILD_AHB_LITE + +module ahb_sif ( +input logic [63:0] HWDATA, +input logic HCLK, +input logic HSEL, +input logic [3:0] HPROT, +input logic HWRITE, +input logic [1:0] HTRANS, +input logic [2:0] HSIZE, +input logic HREADY, +input logic HRESETn, +input logic [31:0] HADDR, +input logic [2:0] HBURST, + +output logic HREADYOUT, +output logic HRESP, +output logic [63:0] HRDATA +); + +parameter MEM_SIZE_DW = 8192; +parameter MAILBOX_ADDR = 32'hD0580000; +localparam MEM_SIZE = MEM_SIZE_DW*8; + +logic Write; +logic [31:0] Last_HADDR; +logic [7:0] strb_lat; + +bit [7:0] mem [0:MEM_SIZE-1]; +//bit [7:0] mem [int]; +//int kuku[int]; + +// Wires +wire [63:0] WriteData = HWDATA; +wire [7:0] strb = HSIZE == 3'b000 ? 8'h1 << HADDR[2:0] : + HSIZE == 3'b001 ? 8'h3 << {HADDR[2:1],1'b0} : + HSIZE == 3'b010 ? 8'hf << {HADDR[2],2'b0} : 8'hff; + +wire[31:0] addr = HADDR & (MEM_SIZE-1); +wire[31:0] laddr = Last_HADDR & (MEM_SIZE-1); + +wire mailbox_write = Write && Last_HADDR==MAILBOX_ADDR; + +wire [63:0] mem_dout = {mem[{addr[31:3],3'd7}], + mem[{addr[31:3],3'd6}], + mem[{addr[31:3],3'd5}], + mem[{addr[31:3],3'd4}], + mem[{addr[31:3],3'd3}], + mem[{addr[31:3],3'd2}], + mem[{addr[31:3],3'd1}], + mem[{addr[31:3],3'd0}]}; + + +always @ (negedge HCLK ) begin + if (Write) begin + if(strb_lat[7]) mem[{laddr[31:3],3'd7}] = HWDATA[63:56]; + if(strb_lat[6]) mem[{laddr[31:3],3'd6}] = HWDATA[55:48]; + if(strb_lat[5]) mem[{laddr[31:3],3'd5}] = HWDATA[47:40]; + if(strb_lat[4]) mem[{laddr[31:3],3'd4}] = HWDATA[39:32]; + if(strb_lat[3]) mem[{laddr[31:3],3'd3}] = HWDATA[31:24]; + if(strb_lat[2]) mem[{laddr[31:3],3'd2}] = HWDATA[23:16]; + if(strb_lat[1]) mem[{laddr[31:3],3'd1}] = HWDATA[15:08]; + if(strb_lat[0]) mem[{laddr[31:3],3'd0}] = HWDATA[07:00]; + end +end + + +assign HREADYOUT = 1; +assign HRESP = 0; + +always @(posedge HCLK or negedge HRESETn) begin + if(~HRESETn) begin + Last_HADDR <= 32'b0; + Write <= 1'b0; + HRDATA <= '0; + end else begin + Last_HADDR <= HADDR; + Write <= HWRITE & |HTRANS; + if(|HTRANS & ~HWRITE) + HRDATA <= mem_dout; + strb_lat <= strb; + end +end + + +endmodule +`endif + +`ifdef RV_BUILD_AXI4 +module axi_slv #(TAGW=1) ( +input aclk, +input rst_l, +input arvalid, +output reg arready, +input [31:0] araddr, +input [TAGW-1:0] arid, +input [7:0] arlen, +input [1:0] arburst, +input [2:0] arsize, + +output reg rvalid, +input rready, +output reg [63:0] rdata, +output reg [1:0] rresp, +output reg [TAGW-1:0] rid, +output rlast, + +input awvalid, +output awready, +input [31:0] awaddr, +input [TAGW-1:0] awid, +input [7:0] awlen, +input [1:0] awburst, +input [2:0] awsize, + +input [63:0] wdata, +input [7:0] wstrb, +input wvalid, +output wready, + +output reg bvalid, +input bready, +output reg [1:0] bresp, +output reg [TAGW-1:0] bid +); + +parameter MAILBOX_ADDR = 32'hD0580000; +parameter MEM_SIZE_DW = 8192; + +bit [7:0] mem [0:MEM_SIZE_DW*8-1]; +bit [63:0] memdata; +wire [31:0] waddr, raddr; +wire [63:0] WriteData; +wire mailbox_write; + +assign raddr = araddr & (MEM_SIZE_DW*8-1); +assign waddr = awaddr & (MEM_SIZE_DW*8-1); + +assign mailbox_write = awvalid && awaddr==MAILBOX_ADDR && rst_l; +assign WriteData = wdata; + +always @ ( posedge aclk or negedge rst_l) begin + if(!rst_l) begin + rvalid <= 0; + bvalid <= 0; + end + else begin + bid <= awid; + rid <= arid; + rvalid <= arvalid; + bvalid <= awvalid; + rdata <= memdata; + end +end + +always @ ( negedge aclk) begin + if(arvalid) memdata <= {mem[raddr+7], mem[raddr+6], mem[raddr+5], mem[raddr+4], + mem[raddr+3], mem[raddr+2], mem[raddr+1], mem[raddr]}; + if(awvalid) begin + if(wstrb[7]) mem[waddr+7] = wdata[63:56]; + if(wstrb[6]) mem[waddr+6] = wdata[55:48]; + if(wstrb[5]) mem[waddr+5] = wdata[47:40]; + if(wstrb[4]) mem[waddr+4] = wdata[39:32]; + if(wstrb[3]) mem[waddr+3] = wdata[31:24]; + if(wstrb[2]) mem[waddr+2] = wdata[23:16]; + if(wstrb[1]) mem[waddr+1] = wdata[15:08]; + if(wstrb[0]) mem[waddr+0] = wdata[07:00]; + end +end + + +assign arready = 1'b1; +assign awready = 1'b1; +assign wready = 1'b1; +assign rresp = 2'b0; +assign bresp = 2'b0; +assign rlast = 1'b1; + +endmodule +`endif diff --git a/testbench/asm/cmark.c b/testbench/asm/cmark.c new file mode 100644 index 0000000..dab4304 --- /dev/null +++ b/testbench/asm/cmark.c @@ -0,0 +1,2399 @@ +#include "defines.h" + +extern int STACK; +void main(); + + +#define STDOUT 0xd0580000 + +__asm (".section .text"); +__asm (".global _start"); +__asm ("_start:"); + +// Enable Caches in MRAC +__asm ("li t0, 0x5f555555"); +__asm ("csrw 0x7c0, t0"); + +// Set stack pointer. +__asm ("la sp, STACK"); + +__asm ("jal main"); + +// Write 0xff to STDOUT for TB to termiate test. +__asm (".global _finish"); +__asm ("_finish:"); +__asm ("li t0, 0xd0580000"); +__asm ("addi t1, zero, 0xff"); +__asm ("sb t1, 0(t0)"); +__asm ("beq x0, x0, _finish"); +__asm (".rept 10"); +__asm ("nop"); +__asm (".endr"); + + +/* +Author : Shay Gal-On, EEMBC + +This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009 +All rights reserved. + +EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the +CoreMark License that is distributed with the official EEMBC COREMARK Software release. +If you received this EEMBC CoreMark Software without the accompanying CoreMark License, +you must discontinue use and download the official release from www.coremark.org. + +Also, if you are publicly displaying scores generated from the EEMBC CoreMark software, +make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file. + +EEMBC +4354 Town Center Blvd. Suite 114-200 +El Dorado Hills, CA, 95762 +*/ + +//#include "/wd/users/jrahmeh/coremark_v1.0/riscv/coremark.h" + +/* +Author : Shay Gal-On, EEMBC + +This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009 +All rights reserved. + +EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the +CoreMark License that is distributed with the official EEMBC COREMARK Software release. +If you received this EEMBC CoreMark Software without the accompanying CoreMark License, +you must discontinue use and download the official release from www.coremark.org. + +Also, if you are publicly displaying scores generated from the EEMBC CoreMark software, +make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file. + +EEMBC +4354 Town Center Blvd. Suite 114-200 +El Dorado Hills, CA, 95762 +*/ +/* Topic: Description + This file contains declarations of the various benchmark functions. +*/ + +/* Configuration: TOTAL_DATA_SIZE + Define total size for data algorithms will operate on +*/ +#ifndef TOTAL_DATA_SIZE +#define TOTAL_DATA_SIZE 2*1000 +#endif + +#define SEED_ARG 0 +#define SEED_FUNC 1 +#define SEED_VOLATILE 2 + +#define MEM_STATIC 0 +#define MEM_MALLOC 1 +#define MEM_STACK 2 + +/* File : core_portme.h */ + +/* + Author : Shay Gal-On, EEMBC + Legal : TODO! +*/ +/* Topic : Description + This file contains configuration constants required to execute on different platforms +*/ +#ifndef CORE_PORTME_H +#define CORE_PORTME_H +/************************/ +/* Data types and settings */ +/************************/ +/* Configuration : HAS_FLOAT + Define to 1 if the platform supports floating point. +*/ +#ifndef HAS_FLOAT +#define HAS_FLOAT 0 +#endif +/* Configuration : HAS_TIME_H + Define to 1 if platform has the time.h header file, + and implementation of functions thereof. +*/ +#ifndef HAS_TIME_H +#define HAS_TIME_H 0 +#endif +/* Configuration : USE_CLOCK + Define to 1 if platform has the time.h header file, + and implementation of functions thereof. +*/ +#ifndef USE_CLOCK +#define USE_CLOCK 0 +#endif +/* Configuration : HAS_STDIO + Define to 1 if the platform has stdio.h. +*/ +#ifndef HAS_STDIO +#define HAS_STDIO 0 +#endif +/* Configuration : HAS_PRINTF + Define to 1 if the platform has stdio.h and implements the printf function. +*/ +#ifndef HAS_PRINTF +#define HAS_PRINTF 1 +int whisperPrintf(const char* format, ...); +#define ee_printf whisperPrintf +#endif + +/* Configuration : CORE_TICKS + Define type of return from the timing functions. + */ +#include +typedef clock_t CORE_TICKS; + +/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION + Initialize these strings per platform +*/ +#ifndef COMPILER_VERSION + #ifdef __GNUC__ + #define COMPILER_VERSION "GCC"__VERSION__ + #else + #define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" + #endif +#endif +#ifndef COMPILER_FLAGS + #define COMPILER_FLAGS "-O2" +#endif + +#ifndef MEM_LOCATION +// #define MEM_LOCATION "STACK" + #define MEM_LOCATION "STATIC" +#endif + +/* Data Types : + To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in . + + *Imprtant* : + ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!! +*/ +typedef signed short ee_s16; +typedef unsigned short ee_u16; +typedef signed int ee_s32; +typedef double ee_f32; +typedef unsigned char ee_u8; +typedef unsigned int ee_u32; +typedef ee_u32 ee_ptr_int; +typedef size_t ee_size_t; +/* align_mem : + This macro is used to align an offset to point to a 32b value. It is used in the Matrix algorithm to initialize the input memory blocks. +*/ +#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3)) + +/* Configuration : SEED_METHOD + Defines method to get seed values that cannot be computed at compile time. + + Valid values : + SEED_ARG - from command line. + SEED_FUNC - from a system function. + SEED_VOLATILE - from volatile variables. +*/ +#ifndef SEED_METHOD +#define SEED_METHOD SEED_VOLATILE +#endif + +/* Configuration : MEM_METHOD + Defines method to get a block of memry. + + Valid values : + MEM_MALLOC - for platforms that implement malloc and have malloc.h. + MEM_STATIC - to use a static memory array. + MEM_STACK - to allocate the data block on the stack (NYI). +*/ +#ifndef MEM_METHOD +//#define MEM_METHOD MEM_STACK +#define MEM_METHOD MEM_STATIC +#endif + +/* Configuration : MULTITHREAD + Define for parallel execution + + Valid values : + 1 - only one context (default). + N>1 - will execute N copies in parallel. + + Note : + If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined. + + Two sample implementations are provided. Use or to enable them. + + It is valid to have a different implementation of and in , + to fit a particular architecture. +*/ +#ifndef MULTITHREAD +#define MULTITHREAD 1 +#define USE_PTHREAD 0 +#define USE_FORK 0 +#define USE_SOCKET 0 +#endif + +/* Configuration : MAIN_HAS_NOARGC + Needed if platform does not support getting arguments to main. + + Valid values : + 0 - argc/argv to main is supported + 1 - argc/argv to main is not supported + + Note : + This flag only matters if MULTITHREAD has been defined to a value greater then 1. +*/ +#ifndef MAIN_HAS_NOARGC +#define MAIN_HAS_NOARGC 1 +#endif + +/* Configuration : MAIN_HAS_NORETURN + Needed if platform does not support returning a value from main. + + Valid values : + 0 - main returns an int, and return value will be 0. + 1 - platform does not support returning a value from main +*/ +#ifndef MAIN_HAS_NORETURN +#define MAIN_HAS_NORETURN 1 +#endif + +/* Variable : default_num_contexts + Not used for this simple port, must cintain the value 1. +*/ +extern ee_u32 default_num_contexts; + +typedef struct CORE_PORTABLE_S { + ee_u8 portable_id; +} core_portable; + +/* target specific init/fini */ +void portable_init(core_portable *p, int *argc, char *argv[]); +void portable_fini(core_portable *p); + +#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) && !defined(VALIDATION_RUN) +#if (TOTAL_DATA_SIZE==1200) +#define PROFILE_RUN 1 +#elif (TOTAL_DATA_SIZE==2000) +#define PERFORMANCE_RUN 1 +#else +#define VALIDATION_RUN 1 +#endif +#endif + +#endif /* CORE_PORTME_H */ + + +#if HAS_STDIO +#include +#endif +#if HAS_PRINTF +#ifndef ee_printf +#define ee_printf printf +#endif +#endif + +/* Actual benchmark execution in iterate */ +void *iterate(void *pres); + +/* Typedef: secs_ret + For machines that have floating point support, get number of seconds as a double. + Otherwise an unsigned int. +*/ +#if HAS_FLOAT +typedef double secs_ret; +#else +typedef ee_u32 secs_ret; +#endif + +#if MAIN_HAS_NORETURN +#define MAIN_RETURN_VAL +#define MAIN_RETURN_TYPE void +#else +#define MAIN_RETURN_VAL 0 +#define MAIN_RETURN_TYPE int +#endif + +void start_time(void); +void stop_time(void); +CORE_TICKS get_time(void); +secs_ret time_in_secs(CORE_TICKS ticks); + +/* Misc useful functions */ +ee_u16 crcu8(ee_u8 data, ee_u16 crc); +ee_u16 crc16(ee_s16 newval, ee_u16 crc); +ee_u16 crcu16(ee_u16 newval, ee_u16 crc); +ee_u16 crcu32(ee_u32 newval, ee_u16 crc); +ee_u8 check_data_types(); +void *portable_malloc(ee_size_t size); +void portable_free(void *p); +ee_s32 parseval(char *valstring); + +/* Algorithm IDS */ +#define ID_LIST (1<<0) +#define ID_MATRIX (1<<1) +#define ID_STATE (1<<2) +#define ALL_ALGORITHMS_MASK (ID_LIST|ID_MATRIX|ID_STATE) +#define NUM_ALGORITHMS 3 + +/* list data structures */ +typedef struct list_data_s { + ee_s16 data16; + ee_s16 idx; +} list_data; + +typedef struct list_head_s { + struct list_head_s *next; + struct list_data_s *info; +} list_head; + + +/*matrix benchmark related stuff */ +#define MATDAT_INT 1 +#if MATDAT_INT +typedef ee_s16 MATDAT; +typedef ee_s32 MATRES; +#else +typedef ee_f16 MATDAT; +typedef ee_f32 MATRES; +#endif + +typedef struct MAT_PARAMS_S { + int N; + MATDAT *A; + MATDAT *B; + MATRES *C; +} mat_params; + +/* state machine related stuff */ +/* List of all the possible states for the FSM */ +typedef enum CORE_STATE { + CORE_START=0, + CORE_INVALID, + CORE_S1, + CORE_S2, + CORE_INT, + CORE_FLOAT, + CORE_EXPONENT, + CORE_SCIENTIFIC, + NUM_CORE_STATES +} core_state_e ; + + +/* Helper structure to hold results */ +typedef struct RESULTS_S { + /* inputs */ + ee_s16 seed1; /* Initializing seed */ + ee_s16 seed2; /* Initializing seed */ + ee_s16 seed3; /* Initializing seed */ + void *memblock[4]; /* Pointer to safe memory location */ + ee_u32 size; /* Size of the data */ + ee_u32 iterations; /* Number of iterations to execute */ + ee_u32 execs; /* Bitmask of operations to execute */ + struct list_head_s *list; + mat_params mat; + /* outputs */ + ee_u16 crc; + ee_u16 crclist; + ee_u16 crcmatrix; + ee_u16 crcstate; + ee_s16 err; + /* ultithread specific */ + core_portable port; +} core_results; + +/* Multicore execution handling */ +#if (MULTITHREAD>1) +ee_u8 core_start_parallel(core_results *res); +ee_u8 core_stop_parallel(core_results *res); +#endif + +/* list benchmark functions */ +list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed); +ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx); + +/* state benchmark functions */ +void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p); +ee_u16 core_bench_state(ee_u32 blksize, ee_u8 *memblock, + ee_s16 seed1, ee_s16 seed2, ee_s16 step, ee_u16 crc); + +/* matrix benchmark functions */ +ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p); +ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc); + + + + + +/* +Topic: Description + Benchmark using a linked list. + + Linked list is a common data structure used in many applications. + + For our purposes, this will excercise the memory units of the processor. + In particular, usage of the list pointers to find and alter data. + + We are not using Malloc since some platforms do not support this library. + + Instead, the memory block being passed in is used to create a list, + and the benchmark takes care not to add more items then can be + accomodated by the memory block. The porting layer will make sure + that we have a valid memory block. + + All operations are done in place, without using any extra memory. + + The list itself contains list pointers and pointers to data items. + Data items contain the following: + + idx - An index that captures the initial order of the list. + data - Variable data initialized based on the input parameters. The 16b are divided as follows: + o Upper 8b are backup of original data. + o Bit 7 indicates if the lower 7 bits are to be used as is or calculated. + o Bits 0-2 indicate type of operation to perform to get a 7b value. + o Bits 3-6 provide input for the operation. + +*/ + +/* local functions */ + +list_head *core_list_find(list_head *list,list_data *info); +list_head *core_list_reverse(list_head *list); +list_head *core_list_remove(list_head *item); +list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modified); +list_head *core_list_insert_new(list_head *insert_point + , list_data *info, list_head **memblock, list_data **datablock + , list_head *memblock_end, list_data *datablock_end); +typedef ee_s32(*list_cmp)(list_data *a, list_data *b, core_results *res); +list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res); + +ee_s16 calc_func(ee_s16 *pdata, core_results *res) { + ee_s16 data=*pdata; + ee_s16 retval; + ee_u8 optype=(data>>7) & 1; /* bit 7 indicates if the function result has been cached */ + if (optype) /* if cached, use cache */ + return (data & 0x007f); + else { /* otherwise calculate and cache the result */ + ee_s16 flag=data & 0x7; /* bits 0-2 is type of function to perform */ + ee_s16 dtype=((data>>3) & 0xf); /* bits 3-6 is specific data for the operation */ + dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */ + switch (flag) { + case 0: + if (dtype<0x22) /* set min period for bit corruption */ + dtype=0x22; + retval=core_bench_state(res->size,res->memblock[3],res->seed1,res->seed2,dtype,res->crc); + if (res->crcstate==0) + res->crcstate=retval; + break; + case 1: + retval=core_bench_matrix(&(res->mat),dtype,res->crc); + if (res->crcmatrix==0) + res->crcmatrix=retval; + break; + default: + retval=data; + break; + } + res->crc=crcu16(retval,res->crc); + retval &= 0x007f; + *pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */ + return retval; + } +} +/* Function: cmp_complex + Compare the data item in a list cell. + + Can be used by mergesort. +*/ +ee_s32 cmp_complex(list_data *a, list_data *b, core_results *res) { + ee_s16 val1=calc_func(&(a->data16),res); + ee_s16 val2=calc_func(&(b->data16),res); + return val1 - val2; +} + +/* Function: cmp_idx + Compare the idx item in a list cell, and regen the data. + + Can be used by mergesort. +*/ +ee_s32 cmp_idx(list_data *a, list_data *b, core_results *res) { + if (res==NULL) { + a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16>>8)); + b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16>>8)); + } + return a->idx - b->idx; +} + +void copy_info(list_data *to,list_data *from) { + to->data16=from->data16; + to->idx=from->idx; +} + +/* Benchmark for linked list: + - Try to find multiple data items. + - List sort + - Operate on data from list (crc) + - Single remove/reinsert + * At the end of this function, the list is back to original state +*/ +ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) { + ee_u16 retval=0; + ee_u16 found=0,missed=0; + list_head *list=res->list; + ee_s16 find_num=res->seed3; + list_head *this_find; + list_head *finder, *remover; + list_data info; + ee_s16 i; + + info.idx=finder_idx; + /* find values in the list, and change the list each time (reverse and cache if value found) */ + for (i=0; inext->info->data16 >> 8) & 1; + } + else { + found++; + if (this_find->info->data16 & 0x1) /* use found value */ + retval+=(this_find->info->data16 >> 9) & 1; + /* and cache next item at the head of the list (if any) */ + if (this_find->next != NULL) { + finder = this_find->next; + this_find->next = finder->next; + finder->next=list->next; + list->next=finder; + } + } + if (info.idx>=0) + info.idx++; +#if CORE_DEBUG + ee_printf("List find %d: [%d,%d,%d]\n",i,retval,missed,found); +#endif + } + retval+=found*4-missed; + /* sort the list by data content and remove one item*/ + if (finder_idx>0) + list=core_list_mergesort(list,cmp_complex,res); + remover=core_list_remove(list->next); + /* CRC data content of list from location of index N forward, and then undo remove */ + finder=core_list_find(list,&info); + if (!finder) + finder=list->next; + while (finder) { + retval=crc16(list->info->data16,retval); + finder=finder->next; + } +#if CORE_DEBUG + ee_printf("List sort 1: %04x\n",retval); +#endif + remover=core_list_undo_remove(remover,list->next); + /* sort the list by index, in effect returning the list to original state */ + list=core_list_mergesort(list,cmp_idx,NULL); + /* CRC data content of list */ + finder=list->next; + while (finder) { + retval=crc16(list->info->data16,retval); + finder=finder->next; + } +#if CORE_DEBUG + ee_printf("List sort 2: %04x\n",retval); +#endif + return retval; +} +/* Function: core_list_init + Initialize list with data. + + Parameters: + blksize - Size of memory to be initialized. + memblock - Pointer to memory block. + seed - Actual values chosen depend on the seed parameter. + The seed parameter MUST be supplied from a source that cannot be determined at compile time + + Returns: + Pointer to the head of the list. + +*/ +list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed) { + /* calculated pointers for the list */ + ee_u32 per_item=16+sizeof(struct list_data_s); + ee_u32 size=(blksize/per_item)-2; /* to accomodate systems with 64b pointers, and make sure same code is executed, set max list elements */ + list_head *memblock_end=memblock+size; + list_data *datablock=(list_data *)(memblock_end); + list_data *datablock_end=datablock+size; + /* some useful variables */ + ee_u32 i; + list_head *finder,*list=memblock; + list_data info; + + /* create a fake items for the list head and tail */ + list->next=NULL; + list->info=datablock; + list->info->idx=0x0000; + list->info->data16=(ee_s16)0x8080; + memblock++; + datablock++; + info.idx=0x7fff; + info.data16=(ee_s16)0xffff; + core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end); + + /* then insert size items */ + for (i=0; inext; + i=1; + while (finder->next!=NULL) { + if (iinfo->idx=i++; + else { + ee_u16 pat=(ee_u16)(i++ ^ seed); /* get a pseudo random number */ + finder->info->idx=0x3fff & (((i & 0x07) << 8) | pat); /* make sure the mixed items end up after the ones in sequence */ + } + finder=finder->next; + } + list = core_list_mergesort(list,cmp_idx,NULL); +#if CORE_DEBUG + ee_printf("Initialized list:\n"); + finder=list; + while (finder) { + ee_printf("[%04x,%04x]",finder->info->idx,(ee_u16)finder->info->data16); + finder=finder->next; + } + ee_printf("\n"); +#endif + return list; +} + +/* Function: core_list_insert + Insert an item to the list + + Parameters: + insert_point - where to insert the item. + info - data for the cell. + memblock - pointer for the list header + datablock - pointer for the list data + memblock_end - end of region for list headers + datablock_end - end of region for list data + + Returns: + Pointer to new item. +*/ +list_head *core_list_insert_new(list_head *insert_point, list_data *info, list_head **memblock, list_data **datablock + , list_head *memblock_end, list_data *datablock_end) { + list_head *newitem; + + if ((*memblock+1) >= memblock_end) + return NULL; + if ((*datablock+1) >= datablock_end) + return NULL; + + newitem=*memblock; + (*memblock)++; + newitem->next=insert_point->next; + insert_point->next=newitem; + + newitem->info=*datablock; + (*datablock)++; + copy_info(newitem->info,info); + + return newitem; +} + +/* Function: core_list_remove + Remove an item from the list. + + Operation: + For a singly linked list, remove by copying the data from the next item + over to the current cell, and unlinking the next item. + + Note: + since there is always a fake item at the end of the list, no need to check for NULL. + + Returns: + Removed item. +*/ +list_head *core_list_remove(list_head *item) { + list_data *tmp; + list_head *ret=item->next; + /* swap data pointers */ + tmp=item->info; + item->info=ret->info; + ret->info=tmp; + /* and eliminate item */ + item->next=item->next->next; + ret->next=NULL; + return ret; +} + +/* Function: core_list_undo_remove + Undo a remove operation. + + Operation: + Since we want each iteration of the benchmark to be exactly the same, + we need to be able to undo a remove. + Link the removed item back into the list, and switch the info items. + + Parameters: + item_removed - Return value from the + item_modified - List item that was modified during + + Returns: + The item that was linked back to the list. + +*/ +list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modified) { + list_data *tmp; + /* swap data pointers */ + tmp=item_removed->info; + item_removed->info=item_modified->info; + item_modified->info=tmp; + /* and insert item */ + item_removed->next=item_modified->next; + item_modified->next=item_removed; + return item_removed; +} + +/* Function: core_list_find + Find an item in the list + + Operation: + Find an item by idx (if not 0) or specific data value + + Parameters: + list - list head + info - idx or data to find + + Returns: + Found item, or NULL if not found. +*/ +list_head *core_list_find(list_head *list,list_data *info) { + if (info->idx>=0) { + while (list && (list->info->idx != info->idx)) + list=list->next; + return list; + } else { + while (list && ((list->info->data16 & 0xff) != info->data16)) + list=list->next; + return list; + } +} +/* Function: core_list_reverse + Reverse a list + + Operation: + Rearrange the pointers so the list is reversed. + + Parameters: + list - list head + info - idx or data to find + + Returns: + Found item, or NULL if not found. +*/ + +list_head *core_list_reverse(list_head *list) { + list_head *next=NULL, *tmp; + while (list) { + tmp=list->next; + list->next=next; + next=list; + list=tmp; + } + return next; +} +/* Function: core_list_mergesort + Sort the list in place without recursion. + + Description: + Use mergesort, as for linked list this is a realistic solution. + Also, since this is aimed at embedded, care was taken to use iterative rather then recursive algorithm. + The sort can either return the list to original order (by idx) , + or use the data item to invoke other other algorithms and change the order of the list. + + Parameters: + list - list to be sorted. + cmp - cmp function to use + + Returns: + New head of the list. + + Note: + We have a special header for the list that will always be first, + but the algorithm could theoretically modify where the list starts. + + */ +list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res) { + list_head *p, *q, *e, *tail; + ee_s32 insize, nmerges, psize, qsize, i; + + insize = 1; + + while (1) { + p = list; + list = NULL; + tail = NULL; + + nmerges = 0; /* count number of merges we do in this pass */ + + while (p) { + nmerges++; /* there exists a merge to be done */ + /* step `insize' places along from p */ + q = p; + psize = 0; + for (i = 0; i < insize; i++) { + psize++; + q = q->next; + if (!q) break; + } + + /* if q hasn't fallen off end, we have two lists to merge */ + qsize = insize; + + /* now we have two lists; merge them */ + while (psize > 0 || (qsize > 0 && q)) { + + /* decide whether next element of merge comes from p or q */ + if (psize == 0) { + /* p is empty; e must come from q. */ + e = q; q = q->next; qsize--; + } else if (qsize == 0 || !q) { + /* q is empty; e must come from p. */ + e = p; p = p->next; psize--; + } else if (cmp(p->info,q->info,res) <= 0) { + /* First element of p is lower (or same); e must come from p. */ + e = p; p = p->next; psize--; + } else { + /* First element of q is lower; e must come from q. */ + e = q; q = q->next; qsize--; + } + + /* add the next element to the merged list */ + if (tail) { + tail->next = e; + } else { + list = e; + } + tail = e; + } + + /* now p has stepped `insize' places along, and q has too */ + p = q; + } + + tail->next = NULL; + + /* If we have done only one merge, we're finished. */ + if (nmerges <= 1) /* allow for nmerges==0, the empty list case */ + return list; + + /* Otherwise repeat, merging lists twice the size */ + insize *= 2; + } +#if COMPILER_REQUIRES_SORT_RETURN + return list; +#endif +} +/* +Author : Shay Gal-On, EEMBC + +This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009 +All rights reserved. + +EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the +CoreMark License that is distributed with the official EEMBC COREMARK Software release. +If you received this EEMBC CoreMark Software without the accompanying CoreMark License, +you must discontinue use and download the official release from www.coremark.org. + +Also, if you are publicly displaying scores generated from the EEMBC CoreMark software, +make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file. + +EEMBC +4354 Town Center Blvd. Suite 114-200 +El Dorado Hills, CA, 95762 +*/ +/* File: core_main.c + This file contains the framework to acquire a block of memory, seed initial parameters, tun t he benchmark and report the results. +*/ +//#include "coremark.h" + +/* Function: iterate + Run the benchmark for a specified number of iterations. + + Operation: + For each type of benchmarked algorithm: + a - Initialize the data block for the algorithm. + b - Execute the algorithm N times. + + Returns: + NULL. +*/ +static ee_u16 list_known_crc[] = {(ee_u16)0xd4b0,(ee_u16)0x3340,(ee_u16)0x6a79,(ee_u16)0xe714,(ee_u16)0xe3c1}; +static ee_u16 matrix_known_crc[] = {(ee_u16)0xbe52,(ee_u16)0x1199,(ee_u16)0x5608,(ee_u16)0x1fd7,(ee_u16)0x0747}; +static ee_u16 state_known_crc[] = {(ee_u16)0x5e47,(ee_u16)0x39bf,(ee_u16)0xe5a4,(ee_u16)0x8e3a,(ee_u16)0x8d84}; +void *iterate(void *pres) { + ee_u32 i; + ee_u16 crc; + core_results *res=(core_results *)pres; + ee_u32 iterations=res->iterations; + res->crc=0; + res->crclist=0; + res->crcmatrix=0; + res->crcstate=0; + + for (i=0; icrc=crcu16(crc,res->crc); + crc=core_bench_list(res,-1); + res->crc=crcu16(crc,res->crc); + if (i==0) res->crclist=res->crc; + } + return NULL; +} + +#if (SEED_METHOD==SEED_ARG) +ee_s32 get_seed_args(int i, int argc, char *argv[]); +#define get_seed(x) (ee_s16)get_seed_args(x,argc,argv) +#define get_seed_32(x) get_seed_args(x,argc,argv) +#else /* via function or volatile */ +ee_s32 get_seed_32(int i); +#define get_seed(x) (ee_s16)get_seed_32(x) +#endif + +#if (MEM_METHOD==MEM_STATIC) +ee_u8 static_memblk[TOTAL_DATA_SIZE]; +#endif +char *mem_name[3] = {"Static","Heap","Stack"}; +/* Function: main + Main entry routine for the benchmark. + This function is responsible for the following steps: + + 1 - Initialize input seeds from a source that cannot be determined at compile time. + 2 - Initialize memory block for use. + 3 - Run and time the benchmark. + 4 - Report results, testing the validity of the output if the seeds are known. + + Arguments: + 1 - first seed : Any value + 2 - second seed : Must be identical to first for iterations to be identical + 3 - third seed : Any value, should be at least an order of magnitude less then the input size, but bigger then 32. + 4 - Iterations : Special, if set to 0, iterations will be automatically determined such that the benchmark will run between 10 to 100 secs + +*/ + +#if MAIN_HAS_NOARGC +MAIN_RETURN_TYPE main(void) { + int argc=0; + char *argv[1]; +#else +MAIN_RETURN_TYPE main(int argc, char *argv[]) { +#endif + ee_u16 i,j=0,num_algorithms=0; + ee_s16 known_id=-1,total_errors=0; + ee_u16 seedcrc=0; + CORE_TICKS total_time; + core_results results[MULTITHREAD]; +#if (MEM_METHOD==MEM_STACK) + ee_u8 stack_memblock[TOTAL_DATA_SIZE*MULTITHREAD]; +#endif + /* first call any initializations needed */ + portable_init(&(results[0].port), &argc, argv); + /* First some checks to make sure benchmark will run ok */ + if (sizeof(struct list_head_s)>128) { + ee_printf("list_head structure too big for comparable data!\n"); + return MAIN_RETURN_VAL; + } + results[0].seed1=get_seed(1); + results[0].seed2=get_seed(2); + results[0].seed3=get_seed(3); + results[0].iterations=get_seed_32(4); +#if CORE_DEBUG + results[0].iterations=1; +#endif + results[0].execs=get_seed_32(5); + if (results[0].execs==0) { /* if not supplied, execute all algorithms */ + results[0].execs=ALL_ALGORITHMS_MASK; + } + /* put in some default values based on one seed only for easy testing */ + if ((results[0].seed1==0) && (results[0].seed2==0) && (results[0].seed3==0)) { /* validation run */ + results[0].seed1=0; + results[0].seed2=0; + results[0].seed3=0x66; + } + if ((results[0].seed1==1) && (results[0].seed2==0) && (results[0].seed3==0)) { /* perfromance run */ + results[0].seed1=0x3415; + results[0].seed2=0x3415; + results[0].seed3=0x66; + } +#if (MEM_METHOD==MEM_STATIC) + results[0].memblock[0]=(void *)static_memblk; + results[0].size=TOTAL_DATA_SIZE; + results[0].err=0; + #if (MULTITHREAD>1) + #error "Cannot use a static data area with multiple contexts!" + #endif +#elif (MEM_METHOD==MEM_MALLOC) + for (i=0 ; i1) + if (default_num_contexts>MULTITHREAD) { + default_num_contexts=MULTITHREAD; + } + for (i=0 ; i=0) { + for (i=0 ; i 0) + ee_printf("Iterations/Sec : %f\n",default_num_contexts*results[0].iterations/time_in_secs(total_time)); +#else + ee_printf("Total time (secs): %d\n",time_in_secs(total_time)); + if (time_in_secs(total_time) > 0) + ee_printf("Iterations/Sec : %d\n",default_num_contexts*results[0].iterations/time_in_secs(total_time)); +#endif + if (time_in_secs(total_time) < 10) { + ee_printf("ERROR! Must execute for at least 10 secs for a valid result!\n"); + total_errors++; + } + + ee_printf("Iterations : %u\n",(ee_u32)default_num_contexts*results[0].iterations); + ee_printf("Compiler version : %s\n",COMPILER_VERSION); + ee_printf("Compiler flags : %s\n",COMPILER_FLAGS); +#if (MULTITHREAD>1) + ee_printf("Parallel %s : %d\n",PARALLEL_METHOD,default_num_contexts); +#endif + ee_printf("Memory location : %s\n",MEM_LOCATION); + /* output for verification */ + ee_printf("seedcrc : 0x%04x\n",seedcrc); + if (results[0].execs & ID_LIST) + for (i=0 ; i1) + ee_printf(" / %d:%s",default_num_contexts,PARALLEL_METHOD); +#endif + ee_printf("\n"); + } +#endif + } + if (total_errors>0) + ee_printf("Errors detected\n"); + if (total_errors<0) + ee_printf("Cannot validate operation for these seed values, please compare with results on a known platform.\n"); + +#if (MEM_METHOD==MEM_MALLOC) + for (i=0 ; i>(from)) & (~(0xffffffff << (to)))) + +#if CORE_DEBUG +void printmat(MATDAT *A, ee_u32 N, char *name) { + ee_u32 i,j; + ee_printf("Matrix %s [%dx%d]:\n",name,N,N); + for (i=0; i N times, + changing the matrix values slightly by a constant amount each time. +*/ +ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc) { + ee_u32 N=p->N; + MATRES *C=p->C; + MATDAT *A=p->A; + MATDAT *B=p->B; + MATDAT val=(MATDAT)seed; + + crc=crc16(matrix_test(N,C,A,B,val),crc); + + return crc; +} + +/* Function: matrix_test + Perform matrix manipulation. + + Parameters: + N - Dimensions of the matrix. + C - memory for result matrix. + A - input matrix + B - operator matrix (not changed during operations) + + Returns: + A CRC value that captures all results calculated in the function. + In particular, crc of the value calculated on the result matrix + after each step by . + + Operation: + + 1 - Add a constant value to all elements of a matrix. + 2 - Multiply a matrix by a constant. + 3 - Multiply a matrix by a vector. + 4 - Multiply a matrix by a matrix. + 5 - Add a constant value to all elements of a matrix. + + After the last step, matrix A is back to original contents. +*/ +ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val) { + ee_u16 crc=0; + MATDAT clipval=matrix_big(val); + + matrix_add_const(N,A,val); /* make sure data changes */ +#if CORE_DEBUG + printmat(A,N,"matrix_add_const"); +#endif + matrix_mul_const(N,C,A,val); + crc=crc16(matrix_sum(N,C,clipval),crc); +#if CORE_DEBUG + printmatC(C,N,"matrix_mul_const"); +#endif + matrix_mul_vect(N,C,A,B); + crc=crc16(matrix_sum(N,C,clipval),crc); +#if CORE_DEBUG + printmatC(C,N,"matrix_mul_vect"); +#endif + matrix_mul_matrix(N,C,A,B); + crc=crc16(matrix_sum(N,C,clipval),crc); +#if CORE_DEBUG + printmatC(C,N,"matrix_mul_matrix"); +#endif + matrix_mul_matrix_bitextract(N,C,A,B); + crc=crc16(matrix_sum(N,C,clipval),crc); +#if CORE_DEBUG + printmatC(C,N,"matrix_mul_matrix_bitextract"); +#endif + + matrix_add_const(N,A,-val); /* return matrix to initial value */ + return crc; +} + +/* Function : matrix_init + Initialize the memory block for matrix benchmarking. + + Parameters: + blksize - Size of memory to be initialized. + memblk - Pointer to memory block. + seed - Actual values chosen depend on the seed parameter. + p - pointers to containing initialized matrixes. + + Returns: + Matrix dimensions. + + Note: + The seed parameter MUST be supplied from a source that cannot be determined at compile time +*/ +ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p) { + ee_u32 N=0; + MATDAT *A; + MATDAT *B; + ee_s32 order=1; + MATDAT val; + ee_u32 i=0,j=0; + if (seed==0) + seed=1; + while (jA=A; + p->B=B; + p->C=(MATRES *)align_mem(B+N*N); + p->N=N; +#if CORE_DEBUG + printmat(A,N,"A"); + printmat(B,N,"B"); +#endif + return N; +} + +/* Function: matrix_sum + Calculate a function that depends on the values of elements in the matrix. + + For each element, accumulate into a temporary variable. + + As long as this value is under the parameter clipval, + add 1 to the result if the element is bigger then the previous. + + Otherwise, reset the accumulator and add 10 to the result. +*/ +ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval) { + MATRES tmp=0,prev=0,cur=0; + ee_s16 ret=0; + ee_u32 i,j; + for (i=0; iclipval) { + ret+=10; + tmp=0; + } else { + ret += (cur>prev) ? 1 : 0; + } + prev=cur; + } + } + return ret; +} + +/* Function: matrix_mul_const + Multiply a matrix by a constant. + This could be used as a scaler for instance. +*/ +void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val) { + ee_u32 i,j; + for (i=0; i0) { + for(i=0;i>3) & 0x3]; + next=4; + break; + case 3: /* float */ + case 4: /* float */ + buf=floatpat[(seed>>3) & 0x3]; + next=8; + break; + case 5: /* scientific */ + case 6: /* scientific */ + buf=scipat[(seed>>3) & 0x3]; + next=8; + break; + case 7: /* invalid */ + buf=errpat[(seed>>3) & 0x3]; + next=8; + break; + default: /* Never happen, just to make some compilers happy */ + break; + } + } + size++; + while (total='0') & (c<='9')) ? 1 : 0; + return retval; +} + +/* Function: core_state_transition + Actual state machine. + + The state machine will continue scanning until either: + 1 - an invalid input is detcted. + 2 - a valid number has been detected. + + The input pointer is updated to point to the end of the token, and the end state is returned (either specific format determined or invalid). +*/ + +enum CORE_STATE core_state_transition( ee_u8 **instr , ee_u32 *transition_count) { + ee_u8 *str=*instr; + ee_u8 NEXT_SYMBOL; + enum CORE_STATE state=CORE_START; + for( ; *str && state != CORE_INVALID; str++ ) { + NEXT_SYMBOL = *str; + if (NEXT_SYMBOL==',') /* end of this input */ { + str++; + break; + } + switch(state) { + case CORE_START: + if(ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INT; + } + else if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) { + state = CORE_S1; + } + else if( NEXT_SYMBOL == '.' ) { + state = CORE_FLOAT; + } + else { + state = CORE_INVALID; + transition_count[CORE_INVALID]++; + } + transition_count[CORE_START]++; + break; + case CORE_S1: + if(ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INT; + transition_count[CORE_S1]++; + } + else if( NEXT_SYMBOL == '.' ) { + state = CORE_FLOAT; + transition_count[CORE_S1]++; + } + else { + state = CORE_INVALID; + transition_count[CORE_S1]++; + } + break; + case CORE_INT: + if( NEXT_SYMBOL == '.' ) { + state = CORE_FLOAT; + transition_count[CORE_INT]++; + } + else if(!ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INVALID; + transition_count[CORE_INT]++; + } + break; + case CORE_FLOAT: + if( NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e' ) { + state = CORE_S2; + transition_count[CORE_FLOAT]++; + } + else if(!ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INVALID; + transition_count[CORE_FLOAT]++; + } + break; + case CORE_S2: + if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) { + state = CORE_EXPONENT; + transition_count[CORE_S2]++; + } + else { + state = CORE_INVALID; + transition_count[CORE_S2]++; + } + break; + case CORE_EXPONENT: + if(ee_isdigit(NEXT_SYMBOL)) { + state = CORE_SCIENTIFIC; + transition_count[CORE_EXPONENT]++; + } + else { + state = CORE_INVALID; + transition_count[CORE_EXPONENT]++; + } + break; + case CORE_SCIENTIFIC: + if(!ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INVALID; + transition_count[CORE_INVALID]++; + } + break; + default: + break; + } + } + *instr=str; + return state; +} +/* +Author : Shay Gal-On, EEMBC + +This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009 +All rights reserved. + +EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the +CoreMark License that is distributed with the official EEMBC COREMARK Software release. +If you received this EEMBC CoreMark Software without the accompanying CoreMark License, +you must discontinue use and download the official release from www.coremark.org. + +Also, if you are publicly displaying scores generated from the EEMBC CoreMark software, +make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file. + +EEMBC +4354 Town Center Blvd. Suite 114-200 +El Dorado Hills, CA, 95762 +*/ +//#include "coremark.h" +/* Function: get_seed + Get a values that cannot be determined at compile time. + + Since different embedded systems and compilers are used, 3 different methods are provided: + 1 - Using a volatile variable. This method is only valid if the compiler is forced to generate code that + reads the value of a volatile variable from memory at run time. + Please note, if using this method, you would need to modify core_portme.c to generate training profile. + 2 - Command line arguments. This is the preferred method if command line arguments are supported. + 3 - System function. If none of the first 2 methods is available on the platform, + a system function which is not a stub can be used. + + e.g. read the value on GPIO pins connected to switches, or invoke special simulator functions. +*/ +#if (SEED_METHOD==SEED_VOLATILE) + extern volatile ee_s32 seed1_volatile; + extern volatile ee_s32 seed2_volatile; + extern volatile ee_s32 seed3_volatile; + extern volatile ee_s32 seed4_volatile; + extern volatile ee_s32 seed5_volatile; + ee_s32 get_seed_32(int i) { + ee_s32 retval; + switch (i) { + case 1: + retval=seed1_volatile; + break; + case 2: + retval=seed2_volatile; + break; + case 3: + retval=seed3_volatile; + break; + case 4: + retval=seed4_volatile; + break; + case 5: + retval=seed5_volatile; + break; + default: + retval=0; + break; + } + return retval; + } +#elif (SEED_METHOD==SEED_ARG) +ee_s32 parseval(char *valstring) { + ee_s32 retval=0; + ee_s32 neg=1; + int hexmode=0; + if (*valstring == '-') { + neg=-1; + valstring++; + } + if ((valstring[0] == '0') && (valstring[1] == 'x')) { + hexmode=1; + valstring+=2; + } + /* first look for digits */ + if (hexmode) { + while (((*valstring >= '0') && (*valstring <= '9')) || ((*valstring >= 'a') && (*valstring <= 'f'))) { + ee_s32 digit=*valstring-'0'; + if (digit>9) + digit=10+*valstring-'a'; + retval*=16; + retval+=digit; + valstring++; + } + } else { + while ((*valstring >= '0') && (*valstring <= '9')) { + ee_s32 digit=*valstring-'0'; + retval*=10; + retval+=digit; + valstring++; + } + } + /* now add qualifiers */ + if (*valstring=='K') + retval*=1024; + if (*valstring=='M') + retval*=1024*1024; + + retval*=neg; + return retval; +} + +ee_s32 get_seed_args(int i, int argc, char *argv[]) { + if (argc>i) + return parseval(argv[i]); + return 0; +} + +#elif (SEED_METHOD==SEED_FUNC) +/* If using OS based function, you must define and implement the functions below in core_portme.h and core_portme.c ! */ +ee_s32 get_seed_32(int i) { + ee_s32 retval; + switch (i) { + case 1: + retval=portme_sys1(); + break; + case 2: + retval=portme_sys2(); + break; + case 3: + retval=portme_sys3(); + break; + case 4: + retval=portme_sys4(); + break; + case 5: + retval=portme_sys5(); + break; + default: + retval=0; + break; + } + return retval; +} +#endif + +/* Function: crc* + Service functions to calculate 16b CRC code. + +*/ +ee_u16 crcu8(ee_u8 data, ee_u16 crc ) +{ + ee_u8 i=0,x16=0,carry=0; + + for (i = 0; i < 8; i++) + { + x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1)); + data >>= 1; + + if (x16 == 1) + { + crc ^= 0x4002; + carry = 1; + } + else + carry = 0; + crc >>= 1; + if (carry) + crc |= 0x8000; + else + crc &= 0x7fff; + } + return crc; +} +ee_u16 crcu16(ee_u16 newval, ee_u16 crc) { + crc=crcu8( (ee_u8) (newval) ,crc); + crc=crcu8( (ee_u8) ((newval)>>8) ,crc); + return crc; +} +ee_u16 crcu32(ee_u32 newval, ee_u16 crc) { + crc=crc16((ee_s16) newval ,crc); + crc=crc16((ee_s16) (newval>>16) ,crc); + return crc; +} +ee_u16 crc16(ee_s16 newval, ee_u16 crc) { + return crcu16((ee_u16)newval, crc); +} + +ee_u8 check_data_types() { + ee_u8 retval=0; + if (sizeof(ee_u8) != 1) { + ee_printf("ERROR: ee_u8 is not an 8b datatype!\n"); + retval++; + } + if (sizeof(ee_u16) != 2) { + ee_printf("ERROR: ee_u16 is not a 16b datatype!\n"); + retval++; + } + if (sizeof(ee_s16) != 2) { + ee_printf("ERROR: ee_s16 is not a 16b datatype!\n"); + retval++; + } + if (sizeof(ee_s32) != 4) { + ee_printf("ERROR: ee_s32 is not a 32b datatype!\n"); + retval++; + } + if (sizeof(ee_u32) != 4) { + ee_printf("ERROR: ee_u32 is not a 32b datatype!\n"); + retval++; + } + if (sizeof(ee_ptr_int) != sizeof(int *)) { + ee_printf("ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n"); + retval++; + } + if (retval>0) { + ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n"); + } + return retval; +} +/* + File : core_portme.c +*/ +/* + Author : Shay Gal-On, EEMBC + Legal : TODO! +*/ +#include +#include +//#include "coremark.h" + +#if VALIDATION_RUN + volatile ee_s32 seed1_volatile=0x3415; + volatile ee_s32 seed2_volatile=0x3415; + volatile ee_s32 seed3_volatile=0x66; +#endif +#if PERFORMANCE_RUN + volatile ee_s32 seed1_volatile=0x0; + volatile ee_s32 seed2_volatile=0x0; + volatile ee_s32 seed3_volatile=0x66; +#endif +#if PROFILE_RUN + volatile ee_s32 seed1_volatile=0x8; + volatile ee_s32 seed2_volatile=0x8; + volatile ee_s32 seed3_volatile=0x8; +#endif +// volatile ee_s32 seed4_volatile=ITERATIONS; + volatile ee_s32 seed4_volatile=1; + volatile ee_s32 seed5_volatile=0; +/* Porting : Timing functions + How to capture time and convert to seconds must be ported to whatever is supported by the platform. + e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc. + Sample implementation for standard time.h and windows.h definitions included. +*/ +/* Define : TIMER_RES_DIVIDER + Divider to trade off timer resolution and total time that can be measured. + + Use lower values to increase resolution, but make sure that overflow does not occur. + If there are issues with the return value overflowing, increase this value. + */ +//#define NSECS_PER_SEC CLOCKS_PER_SEC +#define NSECS_PER_SEC 1000000000 +#define CORETIMETYPE clock_t +//#define GETMYTIME(_t) (*_t=clock()) +#define GETMYTIME(_t) (*_t=0) +#define MYTIMEDIFF(fin,ini) ((fin)-(ini)) +#define TIMER_RES_DIVIDER 1 +#define SAMPLE_TIME_IMPLEMENTATION 1 +#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) + +/** Define Host specific (POSIX), or target specific global time variables. */ +static CORETIMETYPE start_time_val, stop_time_val; + +/* Function : start_time + This function will be called right before starting the timed portion of the benchmark. + + Implementation may be capturing a system timer (as implemented in the example code) + or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0. +*/ +void start_time(void) { +uint32_t mcyclel; + asm volatile ("csrr %0,mcycle" : "=r" (mcyclel) ); + start_time_val = mcyclel; +} +/* Function : stop_time + This function will be called right after ending the timed portion of the benchmark. + + Implementation may be capturing a system timer (as implemented in the example code) + or other system parameters - e.g. reading the current value of cpu cycles counter. +*/ +void stop_time(void) { +uint32_t mcyclel; + asm volatile ("csrr %0,mcycle" : "=r" (mcyclel) ); + stop_time_val = mcyclel; +} +/* Function : get_time + Return an abstract "ticks" number that signifies time on the system. + + Actual value returned may be cpu cycles, milliseconds or any other value, + as long as it can be converted to seconds by . + This methodology is taken to accomodate any hardware or simulated platform. + The sample implementation returns millisecs by default, + and the resolution is controlled by +*/ +CORE_TICKS get_time(void) { + CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); + return elapsed; +} +/* Function : time_in_secs + Convert the value returned by get_time to seconds. + + The type is used to accomodate systems with no support for floating point. + Default implementation implemented by the EE_TICKS_PER_SEC macro above. +*/ +secs_ret time_in_secs(CORE_TICKS ticks) { + secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; + return retval; +} + +ee_u32 default_num_contexts=1; + +/* Function : portable_init + Target specific initialization code + Test for some common mistakes. +*/ +void portable_init(core_portable *p, int *argc, char *argv[]) +{ + if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) { + ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n"); + } + if (sizeof(ee_u32) != 4) { + ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); + } + p->portable_id=1; +} +/* Function : portable_fini + Target specific final code +*/ +void portable_fini(core_portable *p) +{ + p->portable_id=0; +} + + +#include + +// Special address. Writing (store byte instruction) to this address +// causes the simulator to write to the console. +volatile char __whisper_console_io = 0; + + +static int +whisperPutc(char c) +{ +// __whisper_console_io = c; +// __whisper_console_io = c; + *(volatile char*)(STDOUT) = c; + return c; +} + + +static int +whisperPuts(const char* s) +{ + while (*s) + whisperPutc(*s++); + return 1; +} + + +static int +whisperPrintDecimal(int value) +{ + char buffer[20]; + int charCount = 0; + + unsigned neg = value < 0; + if (neg) + { + value = -value; + whisperPutc('-'); + } + + do + { + char c = '0' + (value % 10); + value = value / 10; + buffer[charCount++] = c; + } + while (value); + + char* p = buffer + charCount - 1; + for (unsigned i = 0; i < charCount; ++i) + whisperPutc(*p--); + + if (neg) + charCount++; + + return charCount; +} + + +static int +whisperPrintInt(int value, int base) +{ + if (base == 10) + return whisperPrintDecimal(value); + + char buffer[20]; + int charCount = 0; + + unsigned uu = value; + + if (base == 8) + { + do + { + char c = '0' + (uu & 7); + buffer[charCount++] = c; + uu >>= 3; + } + while (uu); + } + else if (base == 16) + { + do + { + int digit = uu & 0xf; + char c = digit < 10 ? '0' + digit : 'a' + digit; + buffer[charCount++] = c; + uu >>= 4; + } + while (uu); + } + else + return -1; + + char* p = buffer + charCount - 1; + for (unsigned i = 0; i < charCount; ++i) + whisperPutc(*p--); + + return charCount; +} + + +int +whisperPrintfImpl(const char* format, va_list ap) +{ + int count = 0; // Printed character count + + for (const char* fp = format; *fp; fp++) + { + if (*fp != '%') + { + whisperPutc(*fp); + ++count; + continue; + } + + ++fp; // Skip % + + if (*fp == 0) + break; + + if (*fp == '%') + { + whisperPutc('%'); + continue; + } + + if (*fp == '-') + { + fp++; // Pad right not yet implemented. + } + + while (*fp == '0') + { + fp++; // Pad zero not yet implented. + } + + if (*fp == '*') + { + int width = va_arg(ap, int); + fp++; // Width not yet implemented. + } + else + { + while (*fp >= '0' && *fp <= '9') + ++fp; // Width not yet implemented. + } + + switch (*fp) + { + case 'd': + count += whisperPrintDecimal(va_arg(ap, int)); + break; + + case 'u': + count += whisperPrintDecimal((unsigned) va_arg(ap, unsigned)); + break; + + case 'x': + case 'X': + count += whisperPrintInt(va_arg(ap, int), 16); + break; + + case 'o': + count += whisperPrintInt(va_arg(ap, int), 8); + break; + + case 'c': + whisperPutc(va_arg(ap, int)); + ++count; + break; + + case 's': + count += whisperPuts(va_arg(ap, char*)); + break; + } + } + + return count; +} + + +int +whisperPrintf(const char* format, ...) +{ + va_list ap; + + va_start(ap, format); + int code = whisperPrintfImpl(format, ap); + va_end(ap); + + return code; +} + + +int +printf(const char* format, ...) +{ + va_list ap; + + va_start(ap, format); + int code = whisperPrintfImpl(format, ap); + va_end(ap); + + return code; +} + + +void* memset(void* s, int c, size_t n) +{ + asm("mv t0, a0"); + asm("add a2, a2, a0"); // end = s + n + asm(".memset_loop: bge a0, a2, .memset_end"); + asm("sb a1, 0(a0)"); + asm("addi a0, a0, 1"); + asm("j .memset_loop"); + asm(".memset_end:"); + asm("mv a0, t0"); + asm("jr ra"); +} diff --git a/testbench/asm/cmark_dccm.c b/testbench/asm/cmark_dccm.c new file mode 120000 index 0000000..712dfb9 --- /dev/null +++ b/testbench/asm/cmark_dccm.c @@ -0,0 +1 @@ +cmark.c \ No newline at end of file diff --git a/testbench/asm/cmark_dccm.ld b/testbench/asm/cmark_dccm.ld new file mode 120000 index 0000000..ae51d23 --- /dev/null +++ b/testbench/asm/cmark_dccm.ld @@ -0,0 +1 @@ +hello_world_dccm.ld \ No newline at end of file diff --git a/testbench/asm/hello_world.s b/testbench/asm/hello_world.s new file mode 100644 index 0000000..5f2c90a --- /dev/null +++ b/testbench/asm/hello_world.s @@ -0,0 +1,67 @@ + + +.global _start +_start: + csrrw x2, 0xb02, x3 + + + lui x5, 974848 + ori x5, x5, 0 + csrrw x2, 0x305, x5 + + + lui x6, 382293 + ori x6, x6, 1365 + csrrw x1, 0x7c0, x6 + + + + + lui x5, 0 + ori x5, x5, 0 + csrrw x2, 0x7f8, x5 + + + + + lui x5, 0 + ori x5, x5, 0 + csrrw x2, 0x7f9, x5 + + + addi x0, x0, 0 + lui x11, 853376 + ori x9, x0, 'H' + sw x9, 0 (x11) + ori x9, x0, 'E' + sw x9, 0 (x11) + ori x9, x0, 'L' + sw x9, 0 (x11) + sw x9, 0 (x11) + ori x9, x0, 'O' + sw x9, 0 (x11) + ori x9, x0, ' ' + sw x9, 0 (x11) + addi x9, x0, 'W' + sw x9, 0 (x11) + ori x9, x0, 'O' + sw x9, 0 (x11) + ori x9, x0, 'R' + sw x9, 0 (x11) + ori x9, x0, 'L' + sw x9, 0 (x11) + ori x9, x0, 'D' + sw x9, 0 (x11) + ori x9, x0, '!' + sw x9, 0 (x11) + ori x9, x0, 255 + sw x9, 0 (x11) + addi x1,x0,0 + +finish: + addi x1,x1,1 + jal x0, finish; + addi x0,x0,0 + addi x0,x0,0 + addi x0,x0,0 + addi x0,x0,0 diff --git a/testbench/asm/hello_world2.s b/testbench/asm/hello_world2.s new file mode 100644 index 0000000..0097c63 --- /dev/null +++ b/testbench/asm/hello_world2.s @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Assembly code for Hello World +// Not using only ALU ops for creating the string + + +#include "defines.h" + +#define STDOUT 0xd0580000 + + +// Code to execute +.section .text +.global _start +_start: + + // Clear minstret + csrw minstret, zero + csrw minstreth, zero + + // Set up MTVEC - not expecting to use it though + li x1, RV_ICCM_SADR + csrw mtvec, x1 + + + // Enable Caches in MRAC + li x1, 0x5f555555 + csrw 0x7c0, x1 + + // Load string from hw_data + // and write to stdout address + + li x3, STDOUT + la x4, hw_data + +loop: + lb x5, 0(x4) + sb x5, 0(x3) + addi x4, x4, 1 + bnez x5, loop + +// Write 0xff to STDOUT for TB to termiate test. +_finish: + li x3, STDOUT + addi x5, x0, 0xff + sb x5, 0(x3) + beq x0, x0, _finish +.rept 100 + nop +.endr + +.data +hw_data: +.ascii "----------------------------------\n" +.ascii "Hello World from SweRV EL2 @WDC !!\n" +.ascii "----------------------------------\n" +.byte 0 diff --git a/testbench/asm/hello_world_dccm.ld b/testbench/asm/hello_world_dccm.ld new file mode 100644 index 0000000..eea3cbe --- /dev/null +++ b/testbench/asm/hello_world_dccm.ld @@ -0,0 +1,12 @@ + +OUTPUT_ARCH( "riscv" ) +ENTRY(_start) + +SECTIONS { + .text : { *(.text*) } + _end = .; + . = 0xfff8; + .data.ctl : { LONG(0xf0040000); LONG(STACK) } + . = 0xf0040000; + .data : AT(0x10000) { *(.*data) *(.rodata*) STACK = ALIGN(16) + 0x8000;} +} diff --git a/testbench/asm/hello_world_dccm.s b/testbench/asm/hello_world_dccm.s new file mode 120000 index 0000000..2c91c93 --- /dev/null +++ b/testbench/asm/hello_world_dccm.s @@ -0,0 +1 @@ +hello_world2.s \ No newline at end of file diff --git a/testbench/flist b/testbench/flist new file mode 100644 index 0000000..fb1cd8f --- /dev/null +++ b/testbench/flist @@ -0,0 +1,52 @@ ++libext+.v+.sv +//-incdir $RV_ROOT/design/lib +//-incdir $RV_ROOT/design/include +//-incdir $RV_ROOT/design/dmi +//-incdir $SYNOPSYS_SYN_ROOT/dw/sim_ver +//-y $SYNOPSYS_SYN_ROOT/dw/sim_ver +$RV_ROOT/design/el2_swerv_wrapper.sv +$RV_ROOT/design/el2_mem.sv +$RV_ROOT/design/el2_pic_ctrl.sv +$RV_ROOT/design/el2_swerv.sv +$RV_ROOT/design/el2_dma_ctrl.sv +$RV_ROOT/design/ifu/el2_ifu_aln_ctl.sv +$RV_ROOT/design/ifu/el2_ifu_compress_ctl.sv +$RV_ROOT/design/ifu/el2_ifu_ifc_ctl.sv +$RV_ROOT/design/ifu/el2_ifu_bp_ctl.sv +$RV_ROOT/design/ifu/el2_ifu_ic_mem.sv +$RV_ROOT/design/ifu/el2_ifu_mem_ctl.sv +$RV_ROOT/design/ifu/el2_ifu_iccm_mem.sv +$RV_ROOT/design/ifu/el2_ifu.sv +$RV_ROOT/design/dec/el2_dec_decode_ctl.sv +$RV_ROOT/design/dec/el2_dec_gpr_ctl.sv +$RV_ROOT/design/dec/el2_dec_ib_ctl.sv +$RV_ROOT/design/dec/el2_dec_tlu_ctl.sv +$RV_ROOT/design/dec/el2_dec_trigger.sv +$RV_ROOT/design/dec/el2_dec.sv +$RV_ROOT/design/exu/el2_exu_alu_ctl.sv +//$RV_ROOT/design/exu/el2_exu_br_ctl.sv +$RV_ROOT/design/exu/el2_exu_mul_ctl.sv +$RV_ROOT/design/exu/el2_exu_div_ctl.sv +$RV_ROOT/design/exu/el2_exu.sv +$RV_ROOT/design/lsu/el2_lsu.sv +$RV_ROOT/design/lsu/el2_lsu_clkdomain.sv +$RV_ROOT/design/lsu/el2_lsu_addrcheck.sv +$RV_ROOT/design/lsu/el2_lsu_lsc_ctl.sv +$RV_ROOT/design/lsu/el2_lsu_stbuf.sv +$RV_ROOT/design/lsu/el2_lsu_bus_buffer.sv +$RV_ROOT/design/lsu/el2_lsu_bus_intf.sv +$RV_ROOT/design/lsu/el2_lsu_ecc.sv +$RV_ROOT/design/lsu/el2_lsu_dccm_mem.sv +$RV_ROOT/design/lsu/el2_lsu_dccm_ctl.sv +$RV_ROOT/design/lsu/el2_lsu_trigger.sv +$RV_ROOT/design/dbg/el2_dbg.sv +$RV_ROOT/design/dmi/dmi_wrapper.v +$RV_ROOT/design/dmi/dmi_jtag_to_core_sync.v +$RV_ROOT/design/dmi/rvjtag_tap.v +$RV_ROOT/design/lib/el2_lib.sv +-v $RV_ROOT/design/lib/beh_lib.sv +-v $RV_ROOT/design/lib/mem_lib.sv +-y $RV_ROOT/design/lib +//$RV_ROOT/design/lib/ahb_to_svci.sv +//$RV_ROOT/design/lib/ahb_to_axi4.sv +//$RV_ROOT/design/lib/axi4_to_ahb.sv diff --git a/testbench/hex/data.hex b/testbench/hex/data.hex new file mode 100755 index 0000000..b71d0ef --- /dev/null +++ b/testbench/hex/data.hex @@ -0,0 +1,8 @@ +@00001000 +2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D +2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D +2D 2D 0A 48 65 6C 6C 6F 20 57 6F 72 6C 64 20 66 +72 6F 6D 20 53 77 65 52 56 20 45 4C 32 20 40 57 +44 43 20 21 21 0A 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D +2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D 2D +2D 2D 2D 2D 2D 2D 2D 2D 0A 00 diff --git a/testbench/hex/program.hex b/testbench/hex/program.hex new file mode 100644 index 0000000..0e2bf73 --- /dev/null +++ b/testbench/hex/program.hex @@ -0,0 +1,18 @@ +@00000000 +73 10 20 B0 73 10 20 B8 B7 00 00 EE 73 90 50 30 +B7 50 55 5F 93 80 50 55 73 90 00 7C B7 01 58 D0 +17 12 00 00 13 02 02 FE 83 02 02 00 23 80 51 00 +05 02 E3 9B 02 FE B7 01 58 D0 93 02 F0 0F 23 80 +51 00 E3 0A 00 FE 01 00 01 00 01 00 01 00 01 00 +01 00 01 00 01 00 01 00 01 00 01 00 01 00 01 00 +01 00 01 00 01 00 01 00 01 00 01 00 01 00 01 00 +01 00 01 00 01 00 01 00 01 00 01 00 01 00 01 00 +01 00 01 00 01 00 01 00 01 00 01 00 01 00 01 00 +01 00 01 00 01 00 01 00 01 00 01 00 01 00 01 00 +01 00 01 00 01 00 01 00 01 00 01 00 01 00 01 00 +01 00 01 00 01 00 01 00 01 00 01 00 01 00 01 00 +01 00 01 00 01 00 01 00 01 00 01 00 01 00 01 00 +01 00 01 00 01 00 01 00 01 00 01 00 01 00 01 00 +01 00 01 00 01 00 01 00 01 00 01 00 01 00 01 00 +01 00 01 00 01 00 01 00 01 00 01 00 01 00 01 00 +01 00 01 00 01 00 01 00 01 00 01 00 01 00 diff --git a/testbench/input.tcl b/testbench/input.tcl new file mode 100644 index 0000000..3de45c5 --- /dev/null +++ b/testbench/input.tcl @@ -0,0 +1,4 @@ +database -open waves -into waves.shm -default +probe -create tb_top -depth all -database waves +run +exit diff --git a/testbench/link.ld b/testbench/link.ld new file mode 100644 index 0000000..de779f8 --- /dev/null +++ b/testbench/link.ld @@ -0,0 +1,12 @@ + +OUTPUT_ARCH( "riscv" ) +ENTRY(_start) + +SECTIONS +{ + . = 0; + .text : { *(.text*) } + _end = .; + . = 0x10000; + .data : ALIGN(0x800) { *(.*data) *(.rodata*) STACK = ALIGN(16) + 0x8000; } +} diff --git a/testbench/tb_top.sv b/testbench/tb_top.sv new file mode 100644 index 0000000..d62c800 --- /dev/null +++ b/testbench/tb_top.sv @@ -0,0 +1,882 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +`ifndef VERILATOR +module tb_top; +`else +module tb_top ( input bit core_clk ); +`endif + +`ifndef VERILATOR + bit core_clk; +`endif + logic rst_l; + logic nmi_int; + + logic [31:0] reset_vector; + logic [31:0] nmi_vector; + logic [31:1] jtag_id; + + logic [31:0] ic_haddr ; + logic [2:0] ic_hburst ; + logic ic_hmastlock ; + logic [3:0] ic_hprot ; + logic [2:0] ic_hsize ; + logic [1:0] ic_htrans ; + logic ic_hwrite ; + logic [63:0] ic_hrdata ; + logic ic_hready ; + logic ic_hresp ; + + logic [31:0] lsu_haddr ; + logic [2:0] lsu_hburst ; + logic lsu_hmastlock ; + logic [3:0] lsu_hprot ; + logic [2:0] lsu_hsize ; + logic [1:0] lsu_htrans ; + logic lsu_hwrite ; + logic [63:0] lsu_hrdata ; + logic [63:0] lsu_hwdata ; + logic lsu_hready ; + logic lsu_hresp ; + + logic [31:0] sb_haddr ; + logic [2:0] sb_hburst ; + logic sb_hmastlock ; + logic [3:0] sb_hprot ; + logic [2:0] sb_hsize ; + logic [1:0] sb_htrans ; + logic sb_hwrite ; + + logic [63:0] sb_hrdata ; + logic [63:0] sb_hwdata ; + logic sb_hready ; + logic sb_hresp ; + + logic [31:0] trace_rv_i_insn_ip; + logic [31:0] trace_rv_i_address_ip; + logic [1:0] trace_rv_i_valid_ip; + logic [1:0] trace_rv_i_exception_ip; + logic [4:0] trace_rv_i_ecause_ip; + logic [2:0] trace_rv_i_interrupt_ip; + logic [31:0] trace_rv_i_tval_ip; + + logic o_debug_mode_status; + + + logic jtag_tdo; + logic o_cpu_halt_ack; + logic o_cpu_halt_status; + logic o_cpu_run_ack; + + logic mailbox_write; + logic [63:0] dma_hrdata ; + logic [63:0] dma_hwdata ; + logic dma_hready ; + logic dma_hresp ; + + logic mpc_debug_halt_req; + logic mpc_debug_run_req; + logic mpc_reset_run_req; + logic mpc_debug_halt_ack; + logic mpc_debug_run_ack; + logic debug_brkpt_status; + + bit [31:0] cycleCnt ; + logic mailbox_data_val; + + wire dma_hready_out; + int commit_count; + + logic wb_valid[1:0]; + logic [4:0] wb_dest[1:0]; + logic [31:0] wb_data[1:0]; + +`ifdef RV_BUILD_AXI4 + //-------------------------- LSU AXI signals-------------------------- + // AXI Write Channels + wire lsu_axi_awvalid; + wire lsu_axi_awready; + wire [`RV_LSU_BUS_TAG-1:0] lsu_axi_awid; + wire [31:0] lsu_axi_awaddr; + wire [3:0] lsu_axi_awregion; + wire [7:0] lsu_axi_awlen; + wire [2:0] lsu_axi_awsize; + wire [1:0] lsu_axi_awburst; + wire lsu_axi_awlock; + wire [3:0] lsu_axi_awcache; + wire [2:0] lsu_axi_awprot; + wire [3:0] lsu_axi_awqos; + + wire lsu_axi_wvalid; + wire lsu_axi_wready; + wire [63:0] lsu_axi_wdata; + wire [7:0] lsu_axi_wstrb; + wire lsu_axi_wlast; + + wire lsu_axi_bvalid; + wire lsu_axi_bready; + wire [1:0] lsu_axi_bresp; + wire [`RV_LSU_BUS_TAG-1:0] lsu_axi_bid; + + // AXI Read Channels + wire lsu_axi_arvalid; + wire lsu_axi_arready; + wire [`RV_LSU_BUS_TAG-1:0] lsu_axi_arid; + wire [31:0] lsu_axi_araddr; + wire [3:0] lsu_axi_arregion; + wire [7:0] lsu_axi_arlen; + wire [2:0] lsu_axi_arsize; + wire [1:0] lsu_axi_arburst; + wire lsu_axi_arlock; + wire [3:0] lsu_axi_arcache; + wire [2:0] lsu_axi_arprot; + wire [3:0] lsu_axi_arqos; + + wire lsu_axi_rvalid; + wire lsu_axi_rready; + wire [`RV_LSU_BUS_TAG-1:0] lsu_axi_rid; + wire [63:0] lsu_axi_rdata; + wire [1:0] lsu_axi_rresp; + wire lsu_axi_rlast; + + //-------------------------- IFU AXI signals-------------------------- + // AXI Write Channels + wire ifu_axi_awvalid; + wire ifu_axi_awready; + wire [`RV_IFU_BUS_TAG-1:0] ifu_axi_awid; + wire [31:0] ifu_axi_awaddr; + wire [3:0] ifu_axi_awregion; + wire [7:0] ifu_axi_awlen; + wire [2:0] ifu_axi_awsize; + wire [1:0] ifu_axi_awburst; + wire ifu_axi_awlock; + wire [3:0] ifu_axi_awcache; + wire [2:0] ifu_axi_awprot; + wire [3:0] ifu_axi_awqos; + + wire ifu_axi_wvalid; + wire ifu_axi_wready; + wire [63:0] ifu_axi_wdata; + wire [7:0] ifu_axi_wstrb; + wire ifu_axi_wlast; + + wire ifu_axi_bvalid; + wire ifu_axi_bready; + wire [1:0] ifu_axi_bresp; + wire [`RV_IFU_BUS_TAG-1:0] ifu_axi_bid; + + // AXI Read Channels + wire ifu_axi_arvalid; + wire ifu_axi_arready; + wire [`RV_IFU_BUS_TAG-1:0] ifu_axi_arid; + wire [31:0] ifu_axi_araddr; + wire [3:0] ifu_axi_arregion; + wire [7:0] ifu_axi_arlen; + wire [2:0] ifu_axi_arsize; + wire [1:0] ifu_axi_arburst; + wire ifu_axi_arlock; + wire [3:0] ifu_axi_arcache; + wire [2:0] ifu_axi_arprot; + wire [3:0] ifu_axi_arqos; + + wire ifu_axi_rvalid; + wire ifu_axi_rready; + wire [`RV_IFU_BUS_TAG-1:0] ifu_axi_rid; + wire [63:0] ifu_axi_rdata; + wire [1:0] ifu_axi_rresp; + wire ifu_axi_rlast; + + //-------------------------- SB AXI signals-------------------------- + // AXI Write Channels + wire sb_axi_awvalid; + wire sb_axi_awready; + wire [`RV_SB_BUS_TAG-1:0] sb_axi_awid; + wire [31:0] sb_axi_awaddr; + wire [3:0] sb_axi_awregion; + wire [7:0] sb_axi_awlen; + wire [2:0] sb_axi_awsize; + wire [1:0] sb_axi_awburst; + wire sb_axi_awlock; + wire [3:0] sb_axi_awcache; + wire [2:0] sb_axi_awprot; + wire [3:0] sb_axi_awqos; + + wire sb_axi_wvalid; + wire sb_axi_wready; + wire [63:0] sb_axi_wdata; + wire [7:0] sb_axi_wstrb; + wire sb_axi_wlast; + + wire sb_axi_bvalid; + wire sb_axi_bready; + wire [1:0] sb_axi_bresp; + wire [`RV_SB_BUS_TAG-1:0] sb_axi_bid; + + // AXI Read Channels + wire sb_axi_arvalid; + wire sb_axi_arready; + wire [`RV_SB_BUS_TAG-1:0] sb_axi_arid; + wire [31:0] sb_axi_araddr; + wire [3:0] sb_axi_arregion; + wire [7:0] sb_axi_arlen; + wire [2:0] sb_axi_arsize; + wire [1:0] sb_axi_arburst; + wire sb_axi_arlock; + wire [3:0] sb_axi_arcache; + wire [2:0] sb_axi_arprot; + wire [3:0] sb_axi_arqos; + + wire sb_axi_rvalid; + wire sb_axi_rready; + wire [`RV_SB_BUS_TAG-1:0] sb_axi_rid; + wire [63:0] sb_axi_rdata; + wire [1:0] sb_axi_rresp; + wire sb_axi_rlast; + + //-------------------------- DMA AXI signals-------------------------- + // AXI Write Channels + wire dma_axi_awvalid; + wire dma_axi_awready; + wire [`RV_DMA_BUS_TAG-1:0] dma_axi_awid; + wire [31:0] dma_axi_awaddr; + wire [2:0] dma_axi_awsize; + wire [2:0] dma_axi_awprot; + wire [7:0] dma_axi_awlen; + wire [1:0] dma_axi_awburst; + + + wire dma_axi_wvalid; + wire dma_axi_wready; + wire [63:0] dma_axi_wdata; + wire [7:0] dma_axi_wstrb; + wire dma_axi_wlast; + + wire dma_axi_bvalid; + wire dma_axi_bready; + wire [1:0] dma_axi_bresp; + wire [`RV_DMA_BUS_TAG-1:0] dma_axi_bid; + + // AXI Read Channels + wire dma_axi_arvalid; + wire dma_axi_arready; + wire [`RV_DMA_BUS_TAG-1:0] dma_axi_arid; + wire [31:0] dma_axi_araddr; + wire [2:0] dma_axi_arsize; + wire [2:0] dma_axi_arprot; + wire [7:0] dma_axi_arlen; + wire [1:0] dma_axi_arburst; + + wire dma_axi_rvalid; + wire dma_axi_rready; + wire [`RV_DMA_BUS_TAG-1:0] dma_axi_rid; + wire [63:0] dma_axi_rdata; + wire [1:0] dma_axi_rresp; + wire dma_axi_rlast; + +`endif + wire[63:0] WriteData; + + + assign mailbox_write = lmem.mailbox_write; + assign WriteData = lmem.WriteData; + assign mailbox_data_val = WriteData[7:0] > 8'h5 && WriteData[7:0] < 8'h7f; + + parameter MAX_CYCLES = 2_000_000; + + integer fd, tp, el; + + always @(negedge core_clk) begin + cycleCnt <= cycleCnt+1; + // Test timeout monitor + if(cycleCnt == MAX_CYCLES) begin + $display ("Hit max cycle count (%0d) .. stopping",cycleCnt); + $finish; + end + // console Monitor + if( mailbox_data_val & mailbox_write) begin + $fwrite(fd,"%c", WriteData[7:0]); + $write("%c", WriteData[7:0]); + end + // End Of test monitor + if(mailbox_write && WriteData[7:0] == 8'hff) begin + $display("TEST_PASSED"); + $display("\nFinished : minstret = %0d, mcycle = %0d", rvtop.swerv.dec.tlu.minstretl[31:0],rvtop.swerv.dec.tlu.mcyclel[31:0]); + $display("See \"exec.log\" for execution trace with register updates..\n"); + $finish; + end + else if(mailbox_write && WriteData[7:0] == 8'h1) begin + $display("TEST_FAILED"); + $finish; + end + end + + + // trace monitor + always @(posedge core_clk) begin + wb_valid[0] <= rvtop.swerv.dec.dec_i0_wen_r; + wb_dest[0] <= rvtop.swerv.dec.dec_i0_waddr_r; + wb_data[0] <= rvtop.swerv.dec.dec_i0_wdata_r; + if (rvtop.trace_rv_i_valid_ip !== 0) begin + $fwrite(tp,"%b,%h,%h,%0h,%0h,3,%b,%h,%h,%b\n", rvtop.trace_rv_i_valid_ip, 0, trace_rv_i_address_ip, + 0, trace_rv_i_insn_ip,trace_rv_i_exception_ip,trace_rv_i_ecause_ip, + trace_rv_i_tval_ip,trace_rv_i_interrupt_ip); + // Basic trace - no exception register updates + // #1 0 ee000000 b0201073 c 0b02 00000000 + for (int i=0; i<1; i++) + if (rvtop.trace_rv_i_valid_ip[i]==1) begin + commit_count++; + $fwrite (el, "%10d : %6s 0 %h %h %s\n", cycleCnt, $sformatf("#%0d",commit_count), + trace_rv_i_address_ip[31+i*32 -:32], trace_rv_i_insn_ip[31+i*32-:32], + wb_dest[i] !=0 ? $sformatf("r%0d=%h", wb_dest[i], wb_data[i]) : ""); + end + end + end + + + initial begin + // tie offs + jtag_id[31:28] = 4'b1; + jtag_id[27:12] = '0; + jtag_id[11:1] = 11'h45; + reset_vector = 32'h0; + nmi_vector = 32'hee000000; + nmi_int = 0; + + $readmemh("data.hex", lmem.mem); + $readmemh("program.hex", imem.mem); + tp = $fopen("trace_port.csv","w"); + el = $fopen("exec.log","w"); + $fwrite (el, "//Cycle : #inst 0 pc opcode reg regnum value\n"); + fd = $fopen("console.log","w"); + commit_count = 0; + preload_dccm(); + +`ifndef VERILATOR + if($test$plusargs("dumpon")) $dumpvars; + forever core_clk = #5 ~core_clk; +`endif + end + + + assign rst_l = cycleCnt > 5; + + //=========================================================================- + // RTL instance + //=========================================================================- +el2_swerv_wrapper rvtop ( + .rst_l ( rst_l ), + .clk ( core_clk ), + .rst_vec ( reset_vector[31:1]), + .nmi_int ( nmi_int ), + .nmi_vec ( nmi_vector[31:1]), + .jtag_id ( jtag_id[31:1]), + +`ifdef RV_BUILD_AHB_LITE + .haddr ( ic_haddr ), + .hburst ( ic_hburst ), + .hmastlock ( ic_hmastlock ), + .hprot ( ic_hprot ), + .hsize ( ic_hsize ), + .htrans ( ic_htrans ), + .hwrite ( ic_hwrite ), + + .hrdata ( ic_hrdata[63:0]), + .hready ( ic_hready ), + .hresp ( ic_hresp ), + + //--------------------------------------------------------------- + // Debug AHB Master + //--------------------------------------------------------------- + .sb_haddr ( sb_haddr ), + .sb_hburst ( sb_hburst ), + .sb_hmastlock ( sb_hmastlock ), + .sb_hprot ( sb_hprot ), + .sb_hsize ( sb_hsize ), + .sb_htrans ( sb_htrans ), + .sb_hwrite ( sb_hwrite ), + .sb_hwdata ( sb_hwdata ), + + .sb_hrdata ( sb_hrdata ), + .sb_hready ( sb_hready ), + .sb_hresp ( sb_hresp ), + + //--------------------------------------------------------------- + // LSU AHB Master + //--------------------------------------------------------------- + .lsu_haddr ( lsu_haddr ), + .lsu_hburst ( lsu_hburst ), + .lsu_hmastlock ( lsu_hmastlock ), + .lsu_hprot ( lsu_hprot ), + .lsu_hsize ( lsu_hsize ), + .lsu_htrans ( lsu_htrans ), + .lsu_hwrite ( lsu_hwrite ), + .lsu_hwdata ( lsu_hwdata ), + + .lsu_hrdata ( lsu_hrdata[63:0]), + .lsu_hready ( lsu_hready ), + .lsu_hresp ( lsu_hresp ), + + //--------------------------------------------------------------- + // DMA Slave + //--------------------------------------------------------------- + .dma_haddr ( '0 ), + .dma_hburst ( '0 ), + .dma_hmastlock ( '0 ), + .dma_hprot ( '0 ), + .dma_hsize ( '0 ), + .dma_htrans ( '0 ), + .dma_hwrite ( '0 ), + .dma_hwdata ( '0 ), + + .dma_hrdata ( dma_hrdata ), + .dma_hresp ( dma_hresp ), + .dma_hsel ( 1'b1 ), + .dma_hreadyin ( dma_hready_out ), + .dma_hreadyout ( dma_hready_out ), +`endif +`ifdef RV_BUILD_AXI4 + //-------------------------- LSU AXI signals-------------------------- + // AXI Write Channels + .lsu_axi_awvalid (lsu_axi_awvalid), + .lsu_axi_awready (lsu_axi_awready), + .lsu_axi_awid (lsu_axi_awid), + .lsu_axi_awaddr (lsu_axi_awaddr), + .lsu_axi_awregion (lsu_axi_awregion), + .lsu_axi_awlen (lsu_axi_awlen), + .lsu_axi_awsize (lsu_axi_awsize), + .lsu_axi_awburst (lsu_axi_awburst), + .lsu_axi_awlock (lsu_axi_awlock), + .lsu_axi_awcache (lsu_axi_awcache), + .lsu_axi_awprot (lsu_axi_awprot), + .lsu_axi_awqos (lsu_axi_awqos), + + .lsu_axi_wvalid (lsu_axi_wvalid), + .lsu_axi_wready (lsu_axi_wready), + .lsu_axi_wdata (lsu_axi_wdata), + .lsu_axi_wstrb (lsu_axi_wstrb), + .lsu_axi_wlast (lsu_axi_wlast), + + .lsu_axi_bvalid (lsu_axi_bvalid), + .lsu_axi_bready (lsu_axi_bready), + .lsu_axi_bresp (lsu_axi_bresp), + .lsu_axi_bid (lsu_axi_bid), + + + .lsu_axi_arvalid (lsu_axi_arvalid), + .lsu_axi_arready (lsu_axi_arready), + .lsu_axi_arid (lsu_axi_arid), + .lsu_axi_araddr (lsu_axi_araddr), + .lsu_axi_arregion (lsu_axi_arregion), + .lsu_axi_arlen (lsu_axi_arlen), + .lsu_axi_arsize (lsu_axi_arsize), + .lsu_axi_arburst (lsu_axi_arburst), + .lsu_axi_arlock (lsu_axi_arlock), + .lsu_axi_arcache (lsu_axi_arcache), + .lsu_axi_arprot (lsu_axi_arprot), + .lsu_axi_arqos (lsu_axi_arqos), + + .lsu_axi_rvalid (lsu_axi_rvalid), + .lsu_axi_rready (lsu_axi_rready), + .lsu_axi_rid (lsu_axi_rid), + .lsu_axi_rdata (lsu_axi_rdata), + .lsu_axi_rresp (lsu_axi_rresp), + .lsu_axi_rlast (lsu_axi_rlast), + + //-------------------------- IFU AXI signals-------------------------- + // AXI Write Channels + .ifu_axi_awvalid (ifu_axi_awvalid), + .ifu_axi_awready (ifu_axi_awready), + .ifu_axi_awid (ifu_axi_awid), + .ifu_axi_awaddr (ifu_axi_awaddr), + .ifu_axi_awregion (ifu_axi_awregion), + .ifu_axi_awlen (ifu_axi_awlen), + .ifu_axi_awsize (ifu_axi_awsize), + .ifu_axi_awburst (ifu_axi_awburst), + .ifu_axi_awlock (ifu_axi_awlock), + .ifu_axi_awcache (ifu_axi_awcache), + .ifu_axi_awprot (ifu_axi_awprot), + .ifu_axi_awqos (ifu_axi_awqos), + + .ifu_axi_wvalid (ifu_axi_wvalid), + .ifu_axi_wready (ifu_axi_wready), + .ifu_axi_wdata (ifu_axi_wdata), + .ifu_axi_wstrb (ifu_axi_wstrb), + .ifu_axi_wlast (ifu_axi_wlast), + + .ifu_axi_bvalid (ifu_axi_bvalid), + .ifu_axi_bready (ifu_axi_bready), + .ifu_axi_bresp (ifu_axi_bresp), + .ifu_axi_bid (ifu_axi_bid), + + .ifu_axi_arvalid (ifu_axi_arvalid), + .ifu_axi_arready (ifu_axi_arready), + .ifu_axi_arid (ifu_axi_arid), + .ifu_axi_araddr (ifu_axi_araddr), + .ifu_axi_arregion (ifu_axi_arregion), + .ifu_axi_arlen (ifu_axi_arlen), + .ifu_axi_arsize (ifu_axi_arsize), + .ifu_axi_arburst (ifu_axi_arburst), + .ifu_axi_arlock (ifu_axi_arlock), + .ifu_axi_arcache (ifu_axi_arcache), + .ifu_axi_arprot (ifu_axi_arprot), + .ifu_axi_arqos (ifu_axi_arqos), + + .ifu_axi_rvalid (ifu_axi_rvalid), + .ifu_axi_rready (ifu_axi_rready), + .ifu_axi_rid (ifu_axi_rid), + .ifu_axi_rdata (ifu_axi_rdata), + .ifu_axi_rresp (ifu_axi_rresp), + .ifu_axi_rlast (ifu_axi_rlast), + + //-------------------------- SB AXI signals-------------------------- + // AXI Write Channels + .sb_axi_awvalid (sb_axi_awvalid), + .sb_axi_awready (sb_axi_awready), + .sb_axi_awid (sb_axi_awid), + .sb_axi_awaddr (sb_axi_awaddr), + .sb_axi_awregion (sb_axi_awregion), + .sb_axi_awlen (sb_axi_awlen), + .sb_axi_awsize (sb_axi_awsize), + .sb_axi_awburst (sb_axi_awburst), + .sb_axi_awlock (sb_axi_awlock), + .sb_axi_awcache (sb_axi_awcache), + .sb_axi_awprot (sb_axi_awprot), + .sb_axi_awqos (sb_axi_awqos), + + .sb_axi_wvalid (sb_axi_wvalid), + .sb_axi_wready (sb_axi_wready), + .sb_axi_wdata (sb_axi_wdata), + .sb_axi_wstrb (sb_axi_wstrb), + .sb_axi_wlast (sb_axi_wlast), + + .sb_axi_bvalid (sb_axi_bvalid), + .sb_axi_bready (sb_axi_bready), + .sb_axi_bresp (sb_axi_bresp), + .sb_axi_bid (sb_axi_bid), + + + .sb_axi_arvalid (sb_axi_arvalid), + .sb_axi_arready (sb_axi_arready), + .sb_axi_arid (sb_axi_arid), + .sb_axi_araddr (sb_axi_araddr), + .sb_axi_arregion (sb_axi_arregion), + .sb_axi_arlen (sb_axi_arlen), + .sb_axi_arsize (sb_axi_arsize), + .sb_axi_arburst (sb_axi_arburst), + .sb_axi_arlock (sb_axi_arlock), + .sb_axi_arcache (sb_axi_arcache), + .sb_axi_arprot (sb_axi_arprot), + .sb_axi_arqos (sb_axi_arqos), + + .sb_axi_rvalid (sb_axi_rvalid), + .sb_axi_rready (sb_axi_rready), + .sb_axi_rid (sb_axi_rid), + .sb_axi_rdata (sb_axi_rdata), + .sb_axi_rresp (sb_axi_rresp), + .sb_axi_rlast (sb_axi_rlast), + + //-------------------------- DMA AXI signals-------------------------- + // AXI Write Channels + .dma_axi_awvalid (1'b0), + .dma_axi_awready (dma_axi_awready), + .dma_axi_awid (dma_axi_awid), + .dma_axi_awaddr (dma_axi_awaddr), + .dma_axi_awsize (dma_axi_awsize), + .dma_axi_awprot (dma_axi_awprot), + .dma_axi_awlen (dma_axi_awlen), + .dma_axi_awburst (dma_axi_awburst), + + + .dma_axi_wvalid (1'b0), + .dma_axi_wready (dma_axi_wready), + .dma_axi_wdata (dma_axi_wdata), + .dma_axi_wstrb (dma_axi_wstrb), + .dma_axi_wlast (dma_axi_wlast), + + .dma_axi_bvalid (dma_axi_bvalid), + .dma_axi_bready (1'b0), + .dma_axi_bresp (dma_axi_bresp), + .dma_axi_bid (dma_axi_bid), + + + .dma_axi_arvalid (1'b0), + .dma_axi_arready (dma_axi_arready), + .dma_axi_arid (dma_axi_arid), + .dma_axi_araddr (dma_axi_araddr), + .dma_axi_arsize (dma_axi_arsize), + .dma_axi_arprot (dma_axi_arprot), + .dma_axi_arlen (dma_axi_arlen), + .dma_axi_arburst (dma_axi_arburst), + + .dma_axi_rvalid (dma_axi_rvalid), + .dma_axi_rready (1'b0), + .dma_axi_rid (dma_axi_rid), + .dma_axi_rdata (dma_axi_rdata), + .dma_axi_rresp (dma_axi_rresp), + .dma_axi_rlast (dma_axi_rlast), +`endif + .timer_int ( 1'b0 ), + .extintsrc_req ( '0 ), + + .lsu_bus_clk_en ( 1'b1 ),// Clock ratio b/w cpu core clk & AHB master interface + .ifu_bus_clk_en ( 1'b1 ),// Clock ratio b/w cpu core clk & AHB master interface + .dbg_bus_clk_en ( 1'b1 ),// Clock ratio b/w cpu core clk & AHB Debug master interface + .dma_bus_clk_en ( 1'b1 ),// Clock ratio b/w cpu core clk & AHB slave interface + + .trace_rv_i_insn_ip (trace_rv_i_insn_ip), + .trace_rv_i_address_ip (trace_rv_i_address_ip), + .trace_rv_i_valid_ip (trace_rv_i_valid_ip), + .trace_rv_i_exception_ip(trace_rv_i_exception_ip), + .trace_rv_i_ecause_ip (trace_rv_i_ecause_ip), + .trace_rv_i_interrupt_ip(trace_rv_i_interrupt_ip), + .trace_rv_i_tval_ip (trace_rv_i_tval_ip), + + .jtag_tck ( 1'b0 ), + .jtag_tms ( 1'b0 ), + .jtag_tdi ( 1'b0 ), + .jtag_trst_n ( 1'b0 ), + .jtag_tdo ( jtag_tdo ), + + .mpc_debug_halt_ack ( mpc_debug_halt_ack), + .mpc_debug_halt_req ( 1'b0), + .mpc_debug_run_ack ( mpc_debug_run_ack), + .mpc_debug_run_req ( 1'b1), + .mpc_reset_run_req ( 1'b1), // Start running after reset + .debug_brkpt_status (debug_brkpt_status), + + .i_cpu_halt_req ( 1'b0 ), // Async halt req to CPU + .o_cpu_halt_ack ( o_cpu_halt_ack ), // core response to halt + .o_cpu_halt_status ( o_cpu_halt_status ), // 1'b1 indicates core is halted + .i_cpu_run_req ( 1'b0 ), // Async restart req to CPU + .o_debug_mode_status (o_debug_mode_status), + .o_cpu_run_ack ( o_cpu_run_ack ), // Core response to run req + + .dec_tlu_perfcnt0 (), + .dec_tlu_perfcnt1 (), + .dec_tlu_perfcnt2 (), + .dec_tlu_perfcnt3 (), + + .soft_int ('0), + .core_id ('0), + .scan_mode ( 1'b0 ), // To enable scan mode + .mbist_mode ( 1'b0 ) // to enable mbist + +); + + + //=========================================================================- + // AHB I$ instance + //=========================================================================- +`ifdef RV_BUILD_AHB_LITE + +ahb_sif imem ( + // Inputs + .HWDATA(64'h0), + .HCLK(core_clk), + .HSEL(1'b1), + .HPROT(ic_hprot), + .HWRITE(ic_hwrite), + .HTRANS(ic_htrans), + .HSIZE(ic_hsize), + .HREADY(ic_hready), + .HRESETn(rst_l), + .HADDR(ic_haddr), + .HBURST(ic_hburst), + + // Outputs + .HREADYOUT(ic_hready), + .HRESP(ic_hresp), + .HRDATA(ic_hrdata[63:0]) +); + + +ahb_sif lmem ( + // Inputs + .HWDATA(lsu_hwdata), + .HCLK(core_clk), + .HSEL(1'b1), + .HPROT(lsu_hprot), + .HWRITE(lsu_hwrite), + .HTRANS(lsu_htrans), + .HSIZE(lsu_hsize), + .HREADY(lsu_hready), + .HRESETn(rst_l), + .HADDR(lsu_haddr), + .HBURST(lsu_hburst), + + // Outputs + .HREADYOUT(lsu_hready), + .HRESP(lsu_hresp), + .HRDATA(lsu_hrdata[63:0]) +); + +`endif +`ifdef RV_BUILD_AXI4 +axi_slv #(.TAGW(`RV_IFU_BUS_TAG)) imem( + .aclk(core_clk), + .rst_l(rst_l), + .arvalid(ifu_axi_arvalid), + .arready(ifu_axi_arready), + .araddr(ifu_axi_araddr), + .arid(ifu_axi_arid), + .arlen(ifu_axi_arlen), + .arburst(ifu_axi_arburst), + .arsize(ifu_axi_arsize), + + .rvalid(ifu_axi_rvalid), + .rready(ifu_axi_rready), + .rdata(ifu_axi_rdata), + .rresp(ifu_axi_rresp), + .rid(ifu_axi_rid), + .rlast(ifu_axi_rlast), + + .awvalid(1'b0), + .awready(), + .awaddr('0), + .awid('0), + .awlen('0), + .awburst('0), + .awsize('0), + + .wdata('0), + .wstrb('0), + .wvalid(1'b0), + .wready(), + + .bvalid(), + .bready(1'b0), + .bresp(), + .bid() +); + +defparam lmem.TAGW =`RV_LSU_BUS_TAG; + +//axi_slv #(.TAGW(`RV_LSU_BUS_TAG)) lmem( +axi_slv lmem( + .aclk(core_clk), + .rst_l(rst_l), + .arvalid(lsu_axi_arvalid), + .arready(lsu_axi_arready), + .araddr(lsu_axi_araddr), + .arid(lsu_axi_arid), + .arlen(lsu_axi_arlen), + .arburst(lsu_axi_arburst), + .arsize(lsu_axi_arsize), + + .rvalid(lsu_axi_rvalid), + .rready(lsu_axi_rready), + .rdata(lsu_axi_rdata), + .rresp(lsu_axi_rresp), + .rid(lsu_axi_rid), + .rlast(lsu_axi_rlast), + + .awvalid(lsu_axi_awvalid), + .awready(lsu_axi_awready), + .awaddr(lsu_axi_awaddr), + .awid(lsu_axi_awid), + .awlen(lsu_axi_awlen), + .awburst(lsu_axi_awburst), + .awsize(lsu_axi_awsize), + + .wdata(lsu_axi_wdata), + .wstrb(lsu_axi_wstrb), + .wvalid(lsu_axi_wvalid), + .wready(lsu_axi_wready), + + .bvalid(lsu_axi_bvalid), + .bready(lsu_axi_bready), + .bresp(lsu_axi_bresp), + .bid(lsu_axi_bid) +); +`endif + + +task preload_dccm; +bit[31:0] data; +bit[31:0] addr, eaddr; +int adr; + +addr = 'hfff8; +eaddr = {lmem.mem[addr+3],lmem.mem[addr+2],lmem.mem[addr+1],lmem.mem[addr]}; +if (eaddr != `RV_DCCM_SADR) return; +addr = 'hfffc; +eaddr = {lmem.mem[addr+3],lmem.mem[addr+2],lmem.mem[addr+1],lmem.mem[addr]}; +$display("DCCM pre-load from %h to %h", `RV_DCCM_SADR, eaddr); + +for(addr=`RV_DCCM_SADR; addr <= eaddr; addr+=4) begin + adr = addr & 'hffff; + data = {lmem.mem[adr+3],lmem.mem[adr+2],lmem.mem[adr+1],lmem.mem[adr]}; + slam_dccm_ram(addr, data == 0 ? 0 : {riscv_ecc32(data),data}); +end + +endtask + +`ifdef VERILATOR +`define DRAM(bank) rvtop.mem.Gen_dccm_enable.dccm.mem_bank[bank].ram.ram_core +`else +`define DRAM(bank) rvtop.mem.Gen_dccm_enable.dccm.mem_bank[bank].dccm.dccm_bank.ram_core +`endif + +task slam_dccm_ram(input [31:0] addr, input[38:0] data); +int bank, indx; +bank = get_dccm_bank(addr, indx); +//`ifndef VERILATOR +case(bank) +0: `DRAM(0)[indx] = data; +1: `DRAM(1)[indx] = data; +`ifdef RV_DCCM_NUM_BANKS_4 +2: `DRAM(2)[indx] = data; +3: `DRAM(3)[indx] = data; +`endif +`ifdef RV_DCCM_NUM_BANKS_8 +2: `DRAM(2)[indx] = data; +3: `DRAM(3)[indx] = data; +4: `DRAM(4)[indx] = data; +5: `DRAM(5)[indx] = data; +6: `DRAM(6)[indx] = data; +7: `DRAM(7)[indx] = data; +`endif +endcase +//`endif +endtask + + +function[6:0] riscv_ecc32(input[31:0] data); +reg[6:0] synd; +synd[0] = ^(data & 32'h56aa_ad5b); +synd[1] = ^(data & 32'h9b33_366d); +synd[2] = ^(data & 32'he3c3_c78e); +synd[3] = ^(data & 32'h03fc_07f0); +synd[4] = ^(data & 32'h03ff_f800); +synd[5] = ^(data & 32'hfc00_0000); +synd[6] = ^{data, synd[5:0]}; +return synd; +endfunction + +function int get_dccm_bank(input int addr, output int bank_idx); +`ifdef RV_DCCM_NUM_BANKS_2 + bank_idx = int'(addr[`RV_DCCM_BITS-1:3]); + return int'( addr[2]); +`elsif RV_DCCM_NUM_BANKS_4 + bank_idx = int'(addr[`RV_DCCM_BITS-1:4]); + return int'(addr[3:2]); +`elsif RV_DCCM_NUM_BANKS_8 + bank_idx = int'(addr[`RV_DCCM_BITS-1:5]); + return int'( addr[4:2]); +`endif +endfunction + +endmodule diff --git a/testbench/test_tb_top.cpp b/testbench/test_tb_top.cpp new file mode 100644 index 0000000..c88bb2b --- /dev/null +++ b/testbench/test_tb_top.cpp @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include +#include +#include +#include +#include "Vtb_top.h" +#include "verilated.h" +#include "verilated_vcd_c.h" + + +vluint64_t main_time = 0; + +double sc_time_stamp () { + return main_time; +} + + +int main(int argc, char** argv) { + std::cout << "\nVerilatorTB: Start of sim\n" << std::endl; + + // Check for +dumpon and remove it from argv + bool dumpWaves = false; + int newArgc = 0; + for (int i = 0; i < argc; ++i) + if (strcmp(argv[i], "+dumpon") == 0) + dumpWaves = true; + else + argv[newArgc++] = argv[i]; + argc = newArgc; + + Verilated::commandArgs(argc, argv); + + Vtb_top* tb = new Vtb_top; + + // init trace dump + Verilated::traceEverOn(true); + VerilatedVcdC* tfp = new VerilatedVcdC; + tb->trace (tfp, 24); + if (dumpWaves) + tfp->open ("sim.vcd"); + + // Simulate + while(!Verilated::gotFinish()){ + if (dumpWaves) + tfp->dump (main_time); + main_time += 5; + tb->core_clk = !tb->core_clk; + tb->eval(); + } + + if (dumpWaves) + tfp->close(); + + std::cout << "\nVerilatorTB: End of sim" << std::endl; + exit(EXIT_SUCCESS); + +} diff --git a/tools/JSON.pm b/tools/JSON.pm new file mode 100644 index 0000000..6fb7a90 --- /dev/null +++ b/tools/JSON.pm @@ -0,0 +1,2267 @@ +package JSON; + + +use strict; +use Carp (); +use base qw(Exporter); +@JSON::EXPORT = qw(from_json to_json jsonToObj objToJson encode_json decode_json); + +BEGIN { + $JSON::VERSION = '2.53'; + $JSON::DEBUG = 0 unless (defined $JSON::DEBUG); + $JSON::DEBUG = $ENV{ PERL_JSON_DEBUG } if exists $ENV{ PERL_JSON_DEBUG }; +} + +my $Module_XS = 'JSON::XS'; +my $Module_PP = 'JSON::PP'; +my $Module_bp = 'JSON::backportPP'; # included in JSON distribution +my $PP_Version = '2.27200'; +my $XS_Version = '2.27'; + + +# XS and PP common methods + +my @PublicMethods = qw/ + ascii latin1 utf8 pretty indent space_before space_after relaxed canonical allow_nonref + allow_blessed convert_blessed filter_json_object filter_json_single_key_object + shrink max_depth max_size encode decode decode_prefix allow_unknown +/; + +my @Properties = qw/ + ascii latin1 utf8 indent space_before space_after relaxed canonical allow_nonref + allow_blessed convert_blessed shrink max_depth max_size allow_unknown +/; + +my @XSOnlyMethods = qw//; # Currently nothing + +my @PPOnlyMethods = qw/ + indent_length sort_by + allow_singlequote allow_bignum loose allow_barekey escape_slash as_nonblessed +/; # JSON::PP specific + + +# used in _load_xs and _load_pp ($INSTALL_ONLY is not used currently) +my $_INSTALL_DONT_DIE = 1; # When _load_xs fails to load XS, don't die. +my $_INSTALL_ONLY = 2; # Don't call _set_methods() +my $_ALLOW_UNSUPPORTED = 0; +my $_UNIV_CONV_BLESSED = 0; +my $_USSING_bpPP = 0; + + +# Check the environment variable to decide worker module. + +unless ($JSON::Backend) { + $JSON::DEBUG and Carp::carp("Check used worker module..."); + + my $backend = exists $ENV{PERL_JSON_BACKEND} ? $ENV{PERL_JSON_BACKEND} : 1; + + if ($backend eq '1' or $backend =~ /JSON::XS\s*,\s*JSON::PP/) { + _load_xs($_INSTALL_DONT_DIE) or _load_pp(); + } + elsif ($backend eq '0' or $backend eq 'JSON::PP') { + _load_pp(); + } + elsif ($backend eq '2' or $backend eq 'JSON::XS') { + _load_xs(); + } + elsif ($backend eq 'JSON::backportPP') { + $_USSING_bpPP = 1; + _load_pp(); + } + else { + Carp::croak "The value of environmental variable 'PERL_JSON_BACKEND' is invalid."; + } +} + + +sub import { + my $pkg = shift; + my @what_to_export; + my $no_export; + + for my $tag (@_) { + if ($tag eq '-support_by_pp') { + if (!$_ALLOW_UNSUPPORTED++) { + JSON::Backend::XS + ->support_by_pp(@PPOnlyMethods) if ($JSON::Backend eq $Module_XS); + } + next; + } + elsif ($tag eq '-no_export') { + $no_export++, next; + } + elsif ( $tag eq '-convert_blessed_universally' ) { + eval q| + require B; + *UNIVERSAL::TO_JSON = sub { + my $b_obj = B::svref_2object( $_[0] ); + return $b_obj->isa('B::HV') ? { %{ $_[0] } } + : $b_obj->isa('B::AV') ? [ @{ $_[0] } ] + : undef + ; + } + | if ( !$_UNIV_CONV_BLESSED++ ); + next; + } + push @what_to_export, $tag; + } + + return if ($no_export); + + __PACKAGE__->export_to_level(1, $pkg, @what_to_export); +} + + +# OBSOLETED + +sub jsonToObj { + my $alternative = 'from_json'; + if (defined $_[0] and UNIVERSAL::isa($_[0], 'JSON')) { + shift @_; $alternative = 'decode'; + } + Carp::carp "'jsonToObj' will be obsoleted. Please use '$alternative' instead."; + return JSON::from_json(@_); +}; + +sub objToJson { + my $alternative = 'to_json'; + if (defined $_[0] and UNIVERSAL::isa($_[0], 'JSON')) { + shift @_; $alternative = 'encode'; + } + Carp::carp "'objToJson' will be obsoleted. Please use '$alternative' instead."; + JSON::to_json(@_); +}; + + +# INTERFACES + +sub to_json ($@) { + if ( + ref($_[0]) eq 'JSON' + or (@_ > 2 and $_[0] eq 'JSON') + ) { + Carp::croak "to_json should not be called as a method."; + } + my $json = new JSON; + + if (@_ == 2 and ref $_[1] eq 'HASH') { + my $opt = $_[1]; + for my $method (keys %$opt) { + $json->$method( $opt->{$method} ); + } + } + + $json->encode($_[0]); +} + + +sub from_json ($@) { + if ( ref($_[0]) eq 'JSON' or $_[0] eq 'JSON' ) { + Carp::croak "from_json should not be called as a method."; + } + my $json = new JSON; + + if (@_ == 2 and ref $_[1] eq 'HASH') { + my $opt = $_[1]; + for my $method (keys %$opt) { + $json->$method( $opt->{$method} ); + } + } + + return $json->decode( $_[0] ); +} + + +sub true { $JSON::true } + +sub false { $JSON::false } + +sub null { undef; } + + +sub require_xs_version { $XS_Version; } + +sub backend { + my $proto = shift; + $JSON::Backend; +} + +#*module = *backend; + + +sub is_xs { + return $_[0]->module eq $Module_XS; +} + + +sub is_pp { + return not $_[0]->xs; +} + + +sub pureperl_only_methods { @PPOnlyMethods; } + + +sub property { + my ($self, $name, $value) = @_; + + if (@_ == 1) { + my %props; + for $name (@Properties) { + my $method = 'get_' . $name; + if ($name eq 'max_size') { + my $value = $self->$method(); + $props{$name} = $value == 1 ? 0 : $value; + next; + } + $props{$name} = $self->$method(); + } + return \%props; + } + elsif (@_ > 3) { + Carp::croak('property() can take only the option within 2 arguments.'); + } + elsif (@_ == 2) { + if ( my $method = $self->can('get_' . $name) ) { + if ($name eq 'max_size') { + my $value = $self->$method(); + return $value == 1 ? 0 : $value; + } + $self->$method(); + } + } + else { + $self->$name($value); + } + +} + + + +# INTERNAL + +sub _load_xs { + my $opt = shift; + + $JSON::DEBUG and Carp::carp "Load $Module_XS."; + + # if called after install module, overload is disable.... why? + JSON::Boolean::_overrride_overload($Module_XS); + JSON::Boolean::_overrride_overload($Module_PP); + + eval qq| + use $Module_XS $XS_Version (); + |; + + if ($@) { + if (defined $opt and $opt & $_INSTALL_DONT_DIE) { + $JSON::DEBUG and Carp::carp "Can't load $Module_XS...($@)"; + return 0; + } + Carp::croak $@; + } + + unless (defined $opt and $opt & $_INSTALL_ONLY) { + _set_module( $JSON::Backend = $Module_XS ); + my $data = join("", ); # this code is from Jcode 2.xx. + close(DATA); + eval $data; + JSON::Backend::XS->init; + } + + return 1; +}; + + +sub _load_pp { + my $opt = shift; + my $backend = $_USSING_bpPP ? $Module_bp : $Module_PP; + + $JSON::DEBUG and Carp::carp "Load $backend."; + + # if called after install module, overload is disable.... why? + JSON::Boolean::_overrride_overload($Module_XS); + JSON::Boolean::_overrride_overload($backend); + + if ( $_USSING_bpPP ) { + eval qq| require $backend |; + } + else { + eval qq| use $backend $PP_Version () |; + } + + if ($@) { + if ( $backend eq $Module_PP ) { + $JSON::DEBUG and Carp::carp "Can't load $Module_PP ($@), so try to load $Module_bp"; + $_USSING_bpPP++; + $backend = $Module_bp; + JSON::Boolean::_overrride_overload($backend); + local $^W; # if PP installed but invalid version, backportPP redifines methods. + eval qq| require $Module_bp |; + } + Carp::croak $@ if $@; + } + + unless (defined $opt and $opt & $_INSTALL_ONLY) { + _set_module( $JSON::Backend = $Module_PP ); # even if backportPP, set $Backend with 'JSON::PP' + JSON::Backend::PP->init; + } +}; + + +sub _set_module { + return if defined $JSON::true; + + my $module = shift; + + local $^W; + no strict qw(refs); + + $JSON::true = ${"$module\::true"}; + $JSON::false = ${"$module\::false"}; + + push @JSON::ISA, $module; + push @{"$module\::Boolean::ISA"}, qw(JSON::Boolean); + + *{"JSON::is_bool"} = \&{"$module\::is_bool"}; + + for my $method ($module eq $Module_XS ? @PPOnlyMethods : @XSOnlyMethods) { + *{"JSON::$method"} = sub { + Carp::carp("$method is not supported in $module."); + $_[0]; + }; + } + + return 1; +} + + + +# +# JSON Boolean +# + +package JSON::Boolean; + +my %Installed; + +sub _overrride_overload { + return if ($Installed{ $_[0] }++); + + my $boolean = $_[0] . '::Boolean'; + + eval sprintf(q| + package %s; + use overload ( + '""' => sub { ${$_[0]} == 1 ? 'true' : 'false' }, + 'eq' => sub { + my ($obj, $op) = ref ($_[0]) ? ($_[0], $_[1]) : ($_[1], $_[0]); + if ($op eq 'true' or $op eq 'false') { + return "$obj" eq 'true' ? 'true' eq $op : 'false' eq $op; + } + else { + return $obj ? 1 == $op : 0 == $op; + } + }, + ); + |, $boolean); + + if ($@) { Carp::croak $@; } + + return 1; +} + + +# +# Helper classes for Backend Module (PP) +# + +package JSON::Backend::PP; + +sub init { + local $^W; + no strict qw(refs); # this routine may be called after JSON::Backend::XS init was called. + *{"JSON::decode_json"} = \&{"JSON::PP::decode_json"}; + *{"JSON::encode_json"} = \&{"JSON::PP::encode_json"}; + *{"JSON::PP::is_xs"} = sub { 0 }; + *{"JSON::PP::is_pp"} = sub { 1 }; + return 1; +} + +# +# To save memory, the below lines are read only when XS backend is used. +# + +package JSON; + +1; +__DATA__ + + +# +# Helper classes for Backend Module (XS) +# + +package JSON::Backend::XS; + +use constant INDENT_LENGTH_FLAG => 15 << 12; + +use constant UNSUPPORTED_ENCODE_FLAG => { + ESCAPE_SLASH => 0x00000010, + ALLOW_BIGNUM => 0x00000020, + AS_NONBLESSED => 0x00000040, + EXPANDED => 0x10000000, # for developer's +}; + +use constant UNSUPPORTED_DECODE_FLAG => { + LOOSE => 0x00000001, + ALLOW_BIGNUM => 0x00000002, + ALLOW_BAREKEY => 0x00000004, + ALLOW_SINGLEQUOTE => 0x00000008, + EXPANDED => 0x20000000, # for developer's +}; + + +sub init { + local $^W; + no strict qw(refs); + *{"JSON::decode_json"} = \&{"JSON::XS::decode_json"}; + *{"JSON::encode_json"} = \&{"JSON::XS::encode_json"}; + *{"JSON::XS::is_xs"} = sub { 1 }; + *{"JSON::XS::is_pp"} = sub { 0 }; + return 1; +} + + +sub support_by_pp { + my ($class, @methods) = @_; + + local $^W; + no strict qw(refs); + + my $JSON_XS_encode_orignal = \&JSON::XS::encode; + my $JSON_XS_decode_orignal = \&JSON::XS::decode; + my $JSON_XS_incr_parse_orignal = \&JSON::XS::incr_parse; + + *JSON::XS::decode = \&JSON::Backend::XS::Supportable::_decode; + *JSON::XS::encode = \&JSON::Backend::XS::Supportable::_encode; + *JSON::XS::incr_parse = \&JSON::Backend::XS::Supportable::_incr_parse; + + *{JSON::XS::_original_decode} = $JSON_XS_decode_orignal; + *{JSON::XS::_original_encode} = $JSON_XS_encode_orignal; + *{JSON::XS::_original_incr_parse} = $JSON_XS_incr_parse_orignal; + + push @JSON::Backend::XS::Supportable::ISA, 'JSON'; + + my $pkg = 'JSON::Backend::XS::Supportable'; + + *{JSON::new} = sub { + my $proto = new JSON::XS; $$proto = 0; + bless $proto, $pkg; + }; + + + for my $method (@methods) { + my $flag = uc($method); + my $type |= (UNSUPPORTED_ENCODE_FLAG->{$flag} || 0); + $type |= (UNSUPPORTED_DECODE_FLAG->{$flag} || 0); + + next unless($type); + + $pkg->_make_unsupported_method($method => $type); + } + + push @{"JSON::XS::Boolean::ISA"}, qw(JSON::PP::Boolean); + push @{"JSON::PP::Boolean::ISA"}, qw(JSON::Boolean); + + $JSON::DEBUG and Carp::carp("set -support_by_pp mode."); + + return 1; +} + + + + +# +# Helper classes for XS +# + +package JSON::Backend::XS::Supportable; + +$Carp::Internal{'JSON::Backend::XS::Supportable'} = 1; + +sub _make_unsupported_method { + my ($pkg, $method, $type) = @_; + + local $^W; + no strict qw(refs); + + *{"$pkg\::$method"} = sub { + local $^W; + if (defined $_[1] ? $_[1] : 1) { + ${$_[0]} |= $type; + } + else { + ${$_[0]} &= ~$type; + } + $_[0]; + }; + + *{"$pkg\::get_$method"} = sub { + ${$_[0]} & $type ? 1 : ''; + }; + +} + + +sub _set_for_pp { + JSON::_load_pp( $_INSTALL_ONLY ); + + my $type = shift; + my $pp = new JSON::PP; + my $prop = $_[0]->property; + + for my $name (keys %$prop) { + $pp->$name( $prop->{$name} ? $prop->{$name} : 0 ); + } + + my $unsupported = $type eq 'encode' ? JSON::Backend::XS::UNSUPPORTED_ENCODE_FLAG + : JSON::Backend::XS::UNSUPPORTED_DECODE_FLAG; + my $flags = ${$_[0]} || 0; + + for my $name (keys %$unsupported) { + next if ($name eq 'EXPANDED'); # for developer's + my $enable = ($flags & $unsupported->{$name}) ? 1 : 0; + my $method = lc $name; + $pp->$method($enable); + } + + $pp->indent_length( $_[0]->get_indent_length ); + + return $pp; +} + +sub _encode { # using with PP encod + if (${$_[0]}) { + _set_for_pp('encode' => @_)->encode($_[1]); + } + else { + $_[0]->_original_encode( $_[1] ); + } +} + + +sub _decode { # if unsupported-flag is set, use PP + if (${$_[0]}) { + _set_for_pp('decode' => @_)->decode($_[1]); + } + else { + $_[0]->_original_decode( $_[1] ); + } +} + + +sub decode_prefix { # if unsupported-flag is set, use PP + _set_for_pp('decode' => @_)->decode_prefix($_[1]); +} + + +sub _incr_parse { + if (${$_[0]}) { + _set_for_pp('decode' => @_)->incr_parse($_[1]); + } + else { + $_[0]->_original_incr_parse( $_[1] ); + } +} + + +sub get_indent_length { + ${$_[0]} << 4 >> 16; +} + + +sub indent_length { + my $length = $_[1]; + + if (!defined $length or $length > 15 or $length < 0) { + Carp::carp "The acceptable range of indent_length() is 0 to 15."; + } + else { + local $^W; + $length <<= 12; + ${$_[0]} &= ~ JSON::Backend::XS::INDENT_LENGTH_FLAG; + ${$_[0]} |= $length; + *JSON::XS::encode = \&JSON::Backend::XS::Supportable::_encode; + } + + $_[0]; +} + + +1; +__END__ + +=head1 NAME + +JSON - JSON (JavaScript Object Notation) encoder/decoder + +=head1 SYNOPSIS + + use JSON; # imports encode_json, decode_json, to_json and from_json. + + # simple and fast interfaces (expect/generate UTF-8) + + $utf8_encoded_json_text = encode_json $perl_hash_or_arrayref; + $perl_hash_or_arrayref = decode_json $utf8_encoded_json_text; + + # OO-interface + + $json = JSON->new->allow_nonref; + + $json_text = $json->encode( $perl_scalar ); + $perl_scalar = $json->decode( $json_text ); + + $pretty_printed = $json->pretty->encode( $perl_scalar ); # pretty-printing + + # If you want to use PP only support features, call with '-support_by_pp' + # When XS unsupported feature is enable, using PP (de|en)code instead of XS ones. + + use JSON -support_by_pp; + + # option-acceptable interfaces (expect/generate UNICODE by default) + + $json_text = to_json( $perl_scalar, { ascii => 1, pretty => 1 } ); + $perl_scalar = from_json( $json_text, { utf8 => 1 } ); + + # Between (en|de)code_json and (to|from)_json, if you want to write + # a code which communicates to an outer world (encoded in UTF-8), + # recommend to use (en|de)code_json. + +=head1 VERSION + + 2.53 + +This version is compatible with JSON::XS B<2.27> and later. + + +=head1 NOTE + +JSON::PP was inculded in C distribution. +It comes to be a perl core module in Perl 5.14. +And L will be split away it. + +C distribution will inculde yet another JSON::PP modules. +They are JSNO::backportPP and so on. JSON.pm should work as it did at all. + +=head1 DESCRIPTION + + ************************** CAUTION ******************************** + * This is 'JSON module version 2' and there are many differences * + * to version 1.xx * + * Please check your applications useing old version. * + * See to 'INCOMPATIBLE CHANGES TO OLD VERSION' * + ******************************************************************* + +JSON (JavaScript Object Notation) is a simple data format. +See to L and C(L). + +This module converts Perl data structures to JSON and vice versa using either +L or L. + +JSON::XS is the fastest and most proper JSON module on CPAN which must be +compiled and installed in your environment. +JSON::PP is a pure-Perl module which is bundled in this distribution and +has a strong compatibility to JSON::XS. + +This module try to use JSON::XS by default and fail to it, use JSON::PP instead. +So its features completely depend on JSON::XS or JSON::PP. + +See to L. + +To distinguish the module name 'JSON' and the format type JSON, +the former is quoted by CEE (its results vary with your using media), +and the latter is left just as it is. + +Module name : C + +Format type : JSON + +=head2 FEATURES + +=over + +=item * correct unicode handling + +This module (i.e. backend modules) knows how to handle Unicode, documents +how and when it does so, and even documents what "correct" means. + +Even though there are limitations, this feature is available since Perl version 5.6. + +JSON::XS requires Perl 5.8.2 (but works correctly in 5.8.8 or later), so in older versions +C sholud call JSON::PP as the backend which can be used since Perl 5.005. + +With Perl 5.8.x JSON::PP works, but from 5.8.0 to 5.8.2, because of a Perl side problem, +JSON::PP works slower in the versions. And in 5.005, the Unicode handling is not available. +See to L for more information. + +See also to L +and L. + + +=item * round-trip integrity + +When you serialise a perl data structure using only data types supported +by JSON and Perl, the deserialised data structure is identical on the Perl +level. (e.g. the string "2.0" doesn't suddenly become "2" just because +it looks like a number). There I minor exceptions to this, read the +L section below to learn about those. + + +=item * strict checking of JSON correctness + +There is no guessing, no generating of illegal JSON texts by default, +and only JSON is accepted as input by default (the latter is a security +feature). + +See to L and L. + +=item * fast + +This module returns a JSON::XS object itself if available. +Compared to other JSON modules and other serialisers such as Storable, +JSON::XS usually compares favourably in terms of speed, too. + +If not available, C returns a JSON::PP object instead of JSON::XS and +it is very slow as pure-Perl. + +=item * simple to use + +This module has both a simple functional interface as well as an +object oriented interface interface. + +=item * reasonably versatile output formats + +You can choose between the most compact guaranteed-single-line format possible +(nice for simple line-based protocols), a pure-ASCII format (for when your transport +is not 8-bit clean, still supports the whole Unicode range), or a pretty-printed +format (for when you want to read that stuff). Or you can combine those features +in whatever way you like. + +=back + +=head1 FUNCTIONAL INTERFACE + +Some documents are copied and modified from L. +C and C are additional functions. + +=head2 encode_json + + $json_text = encode_json $perl_scalar + +Converts the given Perl data structure to a UTF-8 encoded, binary string. + +This function call is functionally identical to: + + $json_text = JSON->new->utf8->encode($perl_scalar) + +=head2 decode_json + + $perl_scalar = decode_json $json_text + +The opposite of C: expects an UTF-8 (binary) string and tries +to parse that as an UTF-8 encoded JSON text, returning the resulting +reference. + +This function call is functionally identical to: + + $perl_scalar = JSON->new->utf8->decode($json_text) + + +=head2 to_json + + $json_text = to_json($perl_scalar) + +Converts the given Perl data structure to a json string. + +This function call is functionally identical to: + + $json_text = JSON->new->encode($perl_scalar) + +Takes a hash reference as the second. + + $json_text = to_json($perl_scalar, $flag_hashref) + +So, + + $json_text = to_json($perl_scalar, {utf8 => 1, pretty => 1}) + +equivalent to: + + $json_text = JSON->new->utf8(1)->pretty(1)->encode($perl_scalar) + +If you want to write a modern perl code which communicates to outer world, +you should use C (supposed that JSON data are encoded in UTF-8). + +=head2 from_json + + $perl_scalar = from_json($json_text) + +The opposite of C: expects a json string and tries +to parse it, returning the resulting reference. + +This function call is functionally identical to: + + $perl_scalar = JSON->decode($json_text) + +Takes a hash reference as the second. + + $perl_scalar = from_json($json_text, $flag_hashref) + +So, + + $perl_scalar = from_json($json_text, {utf8 => 1}) + +equivalent to: + + $perl_scalar = JSON->new->utf8(1)->decode($json_text) + +If you want to write a modern perl code which communicates to outer world, +you should use C (supposed that JSON data are encoded in UTF-8). + +=head2 JSON::is_bool + + $is_boolean = JSON::is_bool($scalar) + +Returns true if the passed scalar represents either JSON::true or +JSON::false, two constants that act like C<1> and C<0> respectively +and are also used to represent JSON C and C in Perl strings. + +=head2 JSON::true + +Returns JSON true value which is blessed object. +It C JSON::Boolean object. + +=head2 JSON::false + +Returns JSON false value which is blessed object. +It C JSON::Boolean object. + +=head2 JSON::null + +Returns C. + +See L, below, for more information on how JSON values are mapped to +Perl. + +=head1 HOW DO I DECODE A DATA FROM OUTER AND ENCODE TO OUTER + +This section supposes that your perl vresion is 5.8 or later. + +If you know a JSON text from an outer world - a network, a file content, and so on, +is encoded in UTF-8, you should use C or C module object +with C enable. And the decoded result will contain UNICODE characters. + + # from network + my $json = JSON->new->utf8; + my $json_text = CGI->new->param( 'json_data' ); + my $perl_scalar = $json->decode( $json_text ); + + # from file content + local $/; + open( my $fh, '<', 'json.data' ); + $json_text = <$fh>; + $perl_scalar = decode_json( $json_text ); + +If an outer data is not encoded in UTF-8, firstly you should C it. + + use Encode; + local $/; + open( my $fh, '<', 'json.data' ); + my $encoding = 'cp932'; + my $unicode_json_text = decode( $encoding, <$fh> ); # UNICODE + + # or you can write the below code. + # + # open( my $fh, "<:encoding($encoding)", 'json.data' ); + # $unicode_json_text = <$fh>; + +In this case, C<$unicode_json_text> is of course UNICODE string. +So you B use C nor C module object with C enable. +Instead of them, you use C module object with C disable or C. + + $perl_scalar = $json->utf8(0)->decode( $unicode_json_text ); + # or + $perl_scalar = from_json( $unicode_json_text ); + +Or C and C: + + $perl_scalar = decode_json( encode( 'utf8', $unicode_json_text ) ); + # this way is not efficient. + +And now, you want to convert your C<$perl_scalar> into JSON data and +send it to an outer world - a network or a file content, and so on. + +Your data usually contains UNICODE strings and you want the converted data to be encoded +in UTF-8, you should use C or C module object with C enable. + + print encode_json( $perl_scalar ); # to a network? file? or display? + # or + print $json->utf8->encode( $perl_scalar ); + +If C<$perl_scalar> does not contain UNICODE but C<$encoding>-encoded strings +for some reason, then its characters are regarded as B for perl +(because it does not concern with your $encoding). +You B use C nor C module object with C enable. +Instead of them, you use C module object with C disable or C. +Note that the resulted text is a UNICODE string but no problem to print it. + + # $perl_scalar contains $encoding encoded string values + $unicode_json_text = $json->utf8(0)->encode( $perl_scalar ); + # or + $unicode_json_text = to_json( $perl_scalar ); + # $unicode_json_text consists of characters less than 0x100 + print $unicode_json_text; + +Or C all string values and C: + + $perl_scalar->{ foo } = decode( $encoding, $perl_scalar->{ foo } ); + # ... do it to each string values, then encode_json + $json_text = encode_json( $perl_scalar ); + +This method is a proper way but probably not efficient. + +See to L, L. + + +=head1 COMMON OBJECT-ORIENTED INTERFACE + +=head2 new + + $json = new JSON + +Returns a new C object inherited from either JSON::XS or JSON::PP +that can be used to de/encode JSON strings. + +All boolean flags described below are by default I. + +The mutators for flags all return the JSON object again and thus calls can +be chained: + + my $json = JSON->new->utf8->space_after->encode({a => [1,2]}) + => {"a": [1, 2]} + +=head2 ascii + + $json = $json->ascii([$enable]) + + $enabled = $json->get_ascii + +If $enable is true (or missing), then the encode method will not generate characters outside +the code range 0..127. Any Unicode characters outside that range will be escaped using either +a single \uXXXX or a double \uHHHH\uLLLLL escape sequence, as per RFC4627. + +If $enable is false, then the encode method will not escape Unicode characters unless +required by the JSON syntax or other flags. This results in a faster and more compact format. + +This feature depends on the used Perl version and environment. + +See to L if the backend is PP. + + JSON->new->ascii(1)->encode([chr 0x10401]) + => ["\ud801\udc01"] + +=head2 latin1 + + $json = $json->latin1([$enable]) + + $enabled = $json->get_latin1 + +If $enable is true (or missing), then the encode method will encode the resulting JSON +text as latin1 (or iso-8859-1), escaping any characters outside the code range 0..255. + +If $enable is false, then the encode method will not escape Unicode characters +unless required by the JSON syntax or other flags. + + JSON->new->latin1->encode (["\x{89}\x{abc}"] + => ["\x{89}\\u0abc"] # (perl syntax, U+abc escaped, U+89 not) + +=head2 utf8 + + $json = $json->utf8([$enable]) + + $enabled = $json->get_utf8 + +If $enable is true (or missing), then the encode method will encode the JSON result +into UTF-8, as required by many protocols, while the decode method expects to be handled +an UTF-8-encoded string. Please note that UTF-8-encoded strings do not contain any +characters outside the range 0..255, they are thus useful for bytewise/binary I/O. + +In future versions, enabling this option might enable autodetection of the UTF-16 and UTF-32 +encoding families, as described in RFC4627. + +If $enable is false, then the encode method will return the JSON string as a (non-encoded) +Unicode string, while decode expects thus a Unicode string. Any decoding or encoding +(e.g. to UTF-8 or UTF-16) needs to be done yourself, e.g. using the Encode module. + + +Example, output UTF-16BE-encoded JSON: + + use Encode; + $jsontext = encode "UTF-16BE", JSON::XS->new->encode ($object); + +Example, decode UTF-32LE-encoded JSON: + + use Encode; + $object = JSON::XS->new->decode (decode "UTF-32LE", $jsontext); + +See to L if the backend is PP. + + +=head2 pretty + + $json = $json->pretty([$enable]) + +This enables (or disables) all of the C, C and +C (and in the future possibly more) flags in one call to +generate the most readable (or most compact) form possible. + +Equivalent to: + + $json->indent->space_before->space_after + +The indent space length is three and JSON::XS cannot change the indent +space length. + +=head2 indent + + $json = $json->indent([$enable]) + + $enabled = $json->get_indent + +If C<$enable> is true (or missing), then the C method will use a multiline +format as output, putting every array member or object/hash key-value pair +into its own line, identing them properly. + +If C<$enable> is false, no newlines or indenting will be produced, and the +resulting JSON text is guarenteed not to contain any C. + +This setting has no effect when decoding JSON texts. + +The indent space length is three. +With JSON::PP, you can also access C to change indent space length. + + +=head2 space_before + + $json = $json->space_before([$enable]) + + $enabled = $json->get_space_before + +If C<$enable> is true (or missing), then the C method will add an extra +optional space before the C<:> separating keys from values in JSON objects. + +If C<$enable> is false, then the C method will not add any extra +space at those places. + +This setting has no effect when decoding JSON texts. + +Example, space_before enabled, space_after and indent disabled: + + {"key" :"value"} + + +=head2 space_after + + $json = $json->space_after([$enable]) + + $enabled = $json->get_space_after + +If C<$enable> is true (or missing), then the C method will add an extra +optional space after the C<:> separating keys from values in JSON objects +and extra whitespace after the C<,> separating key-value pairs and array +members. + +If C<$enable> is false, then the C method will not add any extra +space at those places. + +This setting has no effect when decoding JSON texts. + +Example, space_before and indent disabled, space_after enabled: + + {"key": "value"} + + +=head2 relaxed + + $json = $json->relaxed([$enable]) + + $enabled = $json->get_relaxed + +If C<$enable> is true (or missing), then C will accept some +extensions to normal JSON syntax (see below). C will not be +affected in anyway. I. I suggest only to use this option to +parse application-specific files written by humans (configuration files, +resource files etc.) + +If C<$enable> is false (the default), then C will only accept +valid JSON texts. + +Currently accepted extensions are: + +=over 4 + +=item * list items can have an end-comma + +JSON I array elements and key-value pairs with commas. This +can be annoying if you write JSON texts manually and want to be able to +quickly append elements, so this extension accepts comma at the end of +such items not just between them: + + [ + 1, + 2, <- this comma not normally allowed + ] + { + "k1": "v1", + "k2": "v2", <- this comma not normally allowed + } + +=item * shell-style '#'-comments + +Whenever JSON allows whitespace, shell-style comments are additionally +allowed. They are terminated by the first carriage-return or line-feed +character, after which more white-space and comments are allowed. + + [ + 1, # this comment not allowed in JSON + # neither this one... + ] + +=back + + +=head2 canonical + + $json = $json->canonical([$enable]) + + $enabled = $json->get_canonical + +If C<$enable> is true (or missing), then the C method will output JSON objects +by sorting their keys. This is adding a comparatively high overhead. + +If C<$enable> is false, then the C method will output key-value +pairs in the order Perl stores them (which will likely change between runs +of the same script). + +This option is useful if you want the same data structure to be encoded as +the same JSON text (given the same overall settings). If it is disabled, +the same hash might be encoded differently even if contains the same data, +as key-value pairs have no inherent ordering in Perl. + +This setting has no effect when decoding JSON texts. + +=head2 allow_nonref + + $json = $json->allow_nonref([$enable]) + + $enabled = $json->get_allow_nonref + +If C<$enable> is true (or missing), then the C method can convert a +non-reference into its corresponding string, number or null JSON value, +which is an extension to RFC4627. Likewise, C will accept those JSON +values instead of croaking. + +If C<$enable> is false, then the C method will croak if it isn't +passed an arrayref or hashref, as JSON texts must either be an object +or array. Likewise, C will croak if given something that is not a +JSON object or array. + + JSON->new->allow_nonref->encode ("Hello, World!") + => "Hello, World!" + +=head2 allow_unknown + + $json = $json->allow_unknown ([$enable]) + + $enabled = $json->get_allow_unknown + +If $enable is true (or missing), then "encode" will *not* throw an +exception when it encounters values it cannot represent in JSON (for +example, filehandles) but instead will encode a JSON "null" value. +Note that blessed objects are not included here and are handled +separately by c. + +If $enable is false (the default), then "encode" will throw an +exception when it encounters anything it cannot encode as JSON. + +This option does not affect "decode" in any way, and it is +recommended to leave it off unless you know your communications +partner. + +=head2 allow_blessed + + $json = $json->allow_blessed([$enable]) + + $enabled = $json->get_allow_blessed + +If C<$enable> is true (or missing), then the C method will not +barf when it encounters a blessed reference. Instead, the value of the +B option will decide whether C (C +disabled or no C method found) or a representation of the +object (C enabled and C method found) is being +encoded. Has no effect on C. + +If C<$enable> is false (the default), then C will throw an +exception when it encounters a blessed object. + + +=head2 convert_blessed + + $json = $json->convert_blessed([$enable]) + + $enabled = $json->get_convert_blessed + +If C<$enable> is true (or missing), then C, upon encountering a +blessed object, will check for the availability of the C method +on the object's class. If found, it will be called in scalar context +and the resulting scalar will be encoded instead of the object. If no +C method is found, the value of C will decide what +to do. + +The C method may safely call die if it wants. If C +returns other blessed objects, those will be handled in the same +way. C must take care of not causing an endless recursion cycle +(== crash) in this case. The name of C was chosen because other +methods called by the Perl core (== not by the user of the object) are +usually in upper case letters and to avoid collisions with the C +function or method. + +This setting does not yet influence C in any way. + +If C<$enable> is false, then the C setting will decide what +to do when a blessed object is found. + +=over + +=item convert_blessed_universally mode + +If use C with C<-convert_blessed_universally>, the C +subroutine is defined as the below code: + + *UNIVERSAL::TO_JSON = sub { + my $b_obj = B::svref_2object( $_[0] ); + return $b_obj->isa('B::HV') ? { %{ $_[0] } } + : $b_obj->isa('B::AV') ? [ @{ $_[0] } ] + : undef + ; + } + +This will cause that C method converts simple blessed objects into +JSON objects as non-blessed object. + + JSON -convert_blessed_universally; + $json->allow_blessed->convert_blessed->encode( $blessed_object ) + +This feature is experimental and may be removed in the future. + +=back + +=head2 filter_json_object + + $json = $json->filter_json_object([$coderef]) + +When C<$coderef> is specified, it will be called from C each +time it decodes a JSON object. The only argument passed to the coderef +is a reference to the newly-created hash. If the code references returns +a single scalar (which need not be a reference), this value +(i.e. a copy of that scalar to avoid aliasing) is inserted into the +deserialised data structure. If it returns an empty list +(NOTE: I C, which is a valid scalar), the original deserialised +hash will be inserted. This setting can slow down decoding considerably. + +When C<$coderef> is omitted or undefined, any existing callback will +be removed and C will not change the deserialised hash in any +way. + +Example, convert all JSON objects into the integer 5: + + my $js = JSON->new->filter_json_object (sub { 5 }); + # returns [5] + $js->decode ('[{}]'); # the given subroutine takes a hash reference. + # throw an exception because allow_nonref is not enabled + # so a lone 5 is not allowed. + $js->decode ('{"a":1, "b":2}'); + + +=head2 filter_json_single_key_object + + $json = $json->filter_json_single_key_object($key [=> $coderef]) + +Works remotely similar to C, but is only called for +JSON objects having a single key named C<$key>. + +This C<$coderef> is called before the one specified via +C, if any. It gets passed the single value in the JSON +object. If it returns a single value, it will be inserted into the data +structure. If it returns nothing (not even C but the empty list), +the callback from C will be called next, as if no +single-key callback were specified. + +If C<$coderef> is omitted or undefined, the corresponding callback will be +disabled. There can only ever be one callback for a given key. + +As this callback gets called less often then the C +one, decoding speed will not usually suffer as much. Therefore, single-key +objects make excellent targets to serialise Perl objects into, especially +as single-key JSON objects are as close to the type-tagged value concept +as JSON gets (it's basically an ID/VALUE tuple). Of course, JSON does not +support this in any way, so you need to make sure your data never looks +like a serialised Perl hash. + +Typical names for the single object key are C<__class_whatever__>, or +C<$__dollars_are_rarely_used__$> or C<}ugly_brace_placement>, or even +things like C<__class_md5sum(classname)__>, to reduce the risk of clashing +with real hashes. + +Example, decode JSON objects of the form C<< { "__widget__" => } >> +into the corresponding C<< $WIDGET{} >> object: + + # return whatever is in $WIDGET{5}: + JSON + ->new + ->filter_json_single_key_object (__widget__ => sub { + $WIDGET{ $_[0] } + }) + ->decode ('{"__widget__": 5') + + # this can be used with a TO_JSON method in some "widget" class + # for serialisation to json: + sub WidgetBase::TO_JSON { + my ($self) = @_; + + unless ($self->{id}) { + $self->{id} = ..get..some..id..; + $WIDGET{$self->{id}} = $self; + } + + { __widget__ => $self->{id} } + } + + +=head2 shrink + + $json = $json->shrink([$enable]) + + $enabled = $json->get_shrink + +With JSON::XS, this flag resizes strings generated by either +C or C to their minimum size possible. This can save +memory when your JSON texts are either very very long or you have many +short strings. It will also try to downgrade any strings to octet-form +if possible: perl stores strings internally either in an encoding called +UTF-X or in octet-form. The latter cannot store everything but uses less +space in general (and some buggy Perl or C code might even rely on that +internal representation being used). + +With JSON::PP, it is noop about resizing strings but tries +C to the returned string by C. See to L. + +See to L and L. + +=head2 max_depth + + $json = $json->max_depth([$maximum_nesting_depth]) + + $max_depth = $json->get_max_depth + +Sets the maximum nesting level (default C<512>) accepted while encoding +or decoding. If a higher nesting level is detected in JSON text or a Perl +data structure, then the encoder and decoder will stop and croak at that +point. + +Nesting level is defined by number of hash- or arrayrefs that the encoder +needs to traverse to reach a given point or the number of C<{> or C<[> +characters without their matching closing parenthesis crossed to reach a +given character in a string. + +If no argument is given, the highest possible setting will be used, which +is rarely useful. + +Note that nesting is implemented by recursion in C. The default value has +been chosen to be as large as typical operating systems allow without +crashing. (JSON::XS) + +With JSON::PP as the backend, when a large value (100 or more) was set and +it de/encodes a deep nested object/text, it may raise a warning +'Deep recursion on subroutin' at the perl runtime phase. + +See L for more info on why this is useful. + +=head2 max_size + + $json = $json->max_size([$maximum_string_size]) + + $max_size = $json->get_max_size + +Set the maximum length a JSON text may have (in bytes) where decoding is +being attempted. The default is C<0>, meaning no limit. When C +is called on a string that is longer then this many bytes, it will not +attempt to decode the string but throw an exception. This setting has no +effect on C (yet). + +If no argument is given, the limit check will be deactivated (same as when +C<0> is specified). + +See L, below, for more info on why this is useful. + +=head2 encode + + $json_text = $json->encode($perl_scalar) + +Converts the given Perl data structure (a simple scalar or a reference +to a hash or array) to its JSON representation. Simple scalars will be +converted into JSON string or number sequences, while references to arrays +become JSON arrays and references to hashes become JSON objects. Undefined +Perl values (e.g. C) become JSON C values. +References to the integers C<0> and C<1> are converted into C and C. + +=head2 decode + + $perl_scalar = $json->decode($json_text) + +The opposite of C: expects a JSON text and tries to parse it, +returning the resulting simple scalar or reference. Croaks on error. + +JSON numbers and strings become simple Perl scalars. JSON arrays become +Perl arrayrefs and JSON objects become Perl hashrefs. C becomes +C<1> (C), C becomes C<0> (C) and +C becomes C. + +=head2 decode_prefix + + ($perl_scalar, $characters) = $json->decode_prefix($json_text) + +This works like the C method, but instead of raising an exception +when there is trailing garbage after the first JSON object, it will +silently stop parsing there and return the number of characters consumed +so far. + + JSON->new->decode_prefix ("[1] the tail") + => ([], 3) + +See to L + +=head2 property + + $boolean = $json->property($property_name) + +Returns a boolean value about above some properties. + +The available properties are C, C, C, +C,C, C, C, C, +C, C, C, C, +C, C and C. + + $boolean = $json->property('utf8'); + => 0 + $json->utf8; + $boolean = $json->property('utf8'); + => 1 + +Sets the property with a given boolean value. + + $json = $json->property($property_name => $boolean); + +With no argumnt, it returns all the above properties as a hash reference. + + $flag_hashref = $json->property(); + +=head1 INCREMENTAL PARSING + +Most of this section are copied and modified from L. + +In some cases, there is the need for incremental parsing of JSON texts. +This module does allow you to parse a JSON stream incrementally. +It does so by accumulating text until it has a full JSON object, which +it then can decode. This process is similar to using C +to see if a full JSON object is available, but is much more efficient +(and can be implemented with a minimum of method calls). + +The backend module will only attempt to parse the JSON text once it is sure it +has enough text to get a decisive result, using a very simple but +truly incremental parser. This means that it sometimes won't stop as +early as the full parser, for example, it doesn't detect parenthese +mismatches. The only thing it guarantees is that it starts decoding as +soon as a syntactically valid JSON text has been seen. This means you need +to set resource limits (e.g. C) to ensure the parser will stop +parsing in the presence if syntax errors. + +The following methods implement this incremental parser. + +=head2 incr_parse + + $json->incr_parse( [$string] ) # void context + + $obj_or_undef = $json->incr_parse( [$string] ) # scalar context + + @obj_or_empty = $json->incr_parse( [$string] ) # list context + +This is the central parsing function. It can both append new text and +extract objects from the stream accumulated so far (both of these +functions are optional). + +If C<$string> is given, then this string is appended to the already +existing JSON fragment stored in the C<$json> object. + +After that, if the function is called in void context, it will simply +return without doing anything further. This can be used to add more text +in as many chunks as you want. + +If the method is called in scalar context, then it will try to extract +exactly I JSON object. If that is successful, it will return this +object, otherwise it will return C. If there is a parse error, +this method will croak just as C would do (one can then use +C to skip the errornous part). This is the most common way of +using the method. + +And finally, in list context, it will try to extract as many objects +from the stream as it can find and return them, or the empty list +otherwise. For this to work, there must be no separators between the JSON +objects or arrays, instead they must be concatenated back-to-back. If +an error occurs, an exception will be raised as in the scalar context +case. Note that in this case, any previously-parsed JSON texts will be +lost. + +Example: Parse some JSON arrays/objects in a given string and return them. + + my @objs = JSON->new->incr_parse ("[5][7][1,2]"); + +=head2 incr_text + + $lvalue_string = $json->incr_text + +This method returns the currently stored JSON fragment as an lvalue, that +is, you can manipulate it. This I works when a preceding call to +C in I successfully returned an object. Under +all other circumstances you must not call this function (I mean it. +although in simple tests it might actually work, it I fail under +real world conditions). As a special exception, you can also call this +method before having parsed anything. + +This function is useful in two cases: a) finding the trailing text after a +JSON object or b) parsing multiple JSON objects separated by non-JSON text +(such as commas). + + $json->incr_text =~ s/\s*,\s*//; + +In Perl 5.005, C attribute is not available. +You must write codes like the below: + + $string = $json->incr_text; + $string =~ s/\s*,\s*//; + $json->incr_text( $string ); + +=head2 incr_skip + + $json->incr_skip + +This will reset the state of the incremental parser and will remove the +parsed text from the input buffer. This is useful after C +died, in which case the input buffer and incremental parser state is left +unchanged, to skip the text parsed so far and to reset the parse state. + +=head2 incr_reset + + $json->incr_reset + +This completely resets the incremental parser, that is, after this call, +it will be as if the parser had never parsed anything. + +This is useful if you want ot repeatedly parse JSON objects and want to +ignore any trailing data, which means you have to reset the parser after +each successful decode. + +See to L for examples. + + +=head1 JSON::PP SUPPORT METHODS + +The below methods are JSON::PP own methods, so when C works +with JSON::PP (i.e. the created object is a JSON::PP object), available. +See to L in detail. + +If you use C with additonal C<-support_by_pp>, some methods +are available even with JSON::XS. See to L. + + BEING { $ENV{PERL_JSON_BACKEND} = 'JSON::XS' } + + use JSON -support_by_pp; + + my $json = new JSON; + $json->allow_nonref->escape_slash->encode("/"); + + # functional interfaces too. + print to_json(["/"], {escape_slash => 1}); + print from_json('["foo"]', {utf8 => 1}); + +If you do not want to all functions but C<-support_by_pp>, +use C<-no_export>. + + use JSON -support_by_pp, -no_export; + # functional interfaces are not exported. + +=head2 allow_singlequote + + $json = $json->allow_singlequote([$enable]) + +If C<$enable> is true (or missing), then C will accept +any JSON strings quoted by single quotations that are invalid JSON +format. + + $json->allow_singlequote->decode({"foo":'bar'}); + $json->allow_singlequote->decode({'foo':"bar"}); + $json->allow_singlequote->decode({'foo':'bar'}); + +As same as the C option, this option may be used to parse +application-specific files written by humans. + +=head2 allow_barekey + + $json = $json->allow_barekey([$enable]) + +If C<$enable> is true (or missing), then C will accept +bare keys of JSON object that are invalid JSON format. + +As same as the C option, this option may be used to parse +application-specific files written by humans. + + $json->allow_barekey->decode('{foo:"bar"}'); + +=head2 allow_bignum + + $json = $json->allow_bignum([$enable]) + +If C<$enable> is true (or missing), then C will convert +the big integer Perl cannot handle as integer into a L +object and convert a floating number (any) into a L. + +On the contary, C converts C objects and C +objects into JSON numbers with C enable. + + $json->allow_nonref->allow_blessed->allow_bignum; + $bigfloat = $json->decode('2.000000000000000000000000001'); + print $json->encode($bigfloat); + # => 2.000000000000000000000000001 + +See to L aboout the conversion of JSON number. + +=head2 loose + + $json = $json->loose([$enable]) + +The unescaped [\x00-\x1f\x22\x2f\x5c] strings are invalid in JSON strings +and the module doesn't allow to C to these (except for \x2f). +If C<$enable> is true (or missing), then C will accept these +unescaped strings. + + $json->loose->decode(qq|["abc + def"]|); + +See to L. + +=head2 escape_slash + + $json = $json->escape_slash([$enable]) + +According to JSON Grammar, I (U+002F) is escaped. But by default +JSON backend modules encode strings without escaping slash. + +If C<$enable> is true (or missing), then C will escape slashes. + +=head2 indent_length + + $json = $json->indent_length($length) + +With JSON::XS, The indent space length is 3 and cannot be changed. +With JSON::PP, it sets the indent space length with the given $length. +The default is 3. The acceptable range is 0 to 15. + +=head2 sort_by + + $json = $json->sort_by($function_name) + $json = $json->sort_by($subroutine_ref) + +If $function_name or $subroutine_ref are set, its sort routine are used. + + $js = $pc->sort_by(sub { $JSON::PP::a cmp $JSON::PP::b })->encode($obj); + # is($js, q|{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6,"g":7,"h":8,"i":9}|); + + $js = $pc->sort_by('own_sort')->encode($obj); + # is($js, q|{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6,"g":7,"h":8,"i":9}|); + + sub JSON::PP::own_sort { $JSON::PP::a cmp $JSON::PP::b } + +As the sorting routine runs in the JSON::PP scope, the given +subroutine name and the special variables C<$a>, C<$b> will begin +with 'JSON::PP::'. + +If $integer is set, then the effect is same as C on. + +See to L. + +=head1 MAPPING + +This section is copied from JSON::XS and modified to C. +JSON::XS and JSON::PP mapping mechanisms are almost equivalent. + +See to L. + +=head2 JSON -> PERL + +=over 4 + +=item object + +A JSON object becomes a reference to a hash in Perl. No ordering of object +keys is preserved (JSON does not preserver object key ordering itself). + +=item array + +A JSON array becomes a reference to an array in Perl. + +=item string + +A JSON string becomes a string scalar in Perl - Unicode codepoints in JSON +are represented by the same codepoints in the Perl string, so no manual +decoding is necessary. + +=item number + +A JSON number becomes either an integer, numeric (floating point) or +string scalar in perl, depending on its range and any fractional parts. On +the Perl level, there is no difference between those as Perl handles all +the conversion details, but an integer may take slightly less memory and +might represent more values exactly than floating point numbers. + +If the number consists of digits only, C will try to represent +it as an integer value. If that fails, it will try to represent it as +a numeric (floating point) value if that is possible without loss of +precision. Otherwise it will preserve the number as a string value (in +which case you lose roundtripping ability, as the JSON number will be +re-encoded toa JSON string). + +Numbers containing a fractional or exponential part will always be +represented as numeric (floating point) values, possibly at a loss of +precision (in which case you might lose perfect roundtripping ability, but +the JSON number will still be re-encoded as a JSON number). + +Note that precision is not accuracy - binary floating point values cannot +represent most decimal fractions exactly, and when converting from and to +floating point, C only guarantees precision up to but not including +the leats significant bit. + +If the backend is JSON::PP and C is enable, the big integers +and the numeric can be optionally converted into L and +L objects. + +=item true, false + +These JSON atoms become C and C, +respectively. They are overloaded to act almost exactly like the numbers +C<1> and C<0>. You can check wether a scalar is a JSON boolean by using +the C function. + +If C and C are used as strings or compared as strings, +they represent as C and C respectively. + + print JSON::true . "\n"; + => true + print JSON::true + 1; + => 1 + + ok(JSON::true eq 'true'); + ok(JSON::true eq '1'); + ok(JSON::true == 1); + +C will install these missing overloading features to the backend modules. + + +=item null + +A JSON null atom becomes C in Perl. + +C returns C. + +=back + + +=head2 PERL -> JSON + +The mapping from Perl to JSON is slightly more difficult, as Perl is a +truly typeless language, so we can only guess which JSON type is meant by +a Perl value. + +=over 4 + +=item hash references + +Perl hash references become JSON objects. As there is no inherent ordering +in hash keys (or JSON objects), they will usually be encoded in a +pseudo-random order that can change between runs of the same program but +stays generally the same within a single run of a program. C +optionally sort the hash keys (determined by the I flag), so +the same datastructure will serialise to the same JSON text (given same +settings and version of JSON::XS), but this incurs a runtime overhead +and is only rarely useful, e.g. when you want to compare some JSON text +against another for equality. + +In future, the ordered object feature will be added to JSON::PP using C mechanism. + + +=item array references + +Perl array references become JSON arrays. + +=item other references + +Other unblessed references are generally not allowed and will cause an +exception to be thrown, except for references to the integers C<0> and +C<1>, which get turned into C and C atoms in JSON. You can +also use C and C to improve readability. + + to_json [\0,JSON::true] # yields [false,true] + +=item JSON::true, JSON::false, JSON::null + +These special values become JSON true and JSON false values, +respectively. You can also use C<\1> and C<\0> directly if you want. + +JSON::null returns C. + +=item blessed objects + +Blessed objects are not directly representable in JSON. See the +C and C methods on various options on +how to deal with this: basically, you can choose between throwing an +exception, encoding the reference as if it weren't blessed, or provide +your own serialiser method. + +With C mode, C converts blessed +hash references or blessed array references (contains other blessed references) +into JSON members and arrays. + + use JSON -convert_blessed_universally; + JSON->new->allow_blessed->convert_blessed->encode( $blessed_object ); + +See to L. + +=item simple scalars + +Simple Perl scalars (any scalar that is not a reference) are the most +difficult objects to encode: JSON::XS and JSON::PP will encode undefined scalars as +JSON C values, scalars that have last been used in a string context +before encoding as JSON strings, and anything else as number value: + + # dump as number + encode_json [2] # yields [2] + encode_json [-3.0e17] # yields [-3e+17] + my $value = 5; encode_json [$value] # yields [5] + + # used as string, so dump as string + print $value; + encode_json [$value] # yields ["5"] + + # undef becomes null + encode_json [undef] # yields [null] + +You can force the type to be a string by stringifying it: + + my $x = 3.1; # some variable containing a number + "$x"; # stringified + $x .= ""; # another, more awkward way to stringify + print $x; # perl does it for you, too, quite often + +You can force the type to be a number by numifying it: + + my $x = "3"; # some variable containing a string + $x += 0; # numify it, ensuring it will be dumped as a number + $x *= 1; # same thing, the choise is yours. + +You can not currently force the type in other, less obscure, ways. + +Note that numerical precision has the same meaning as under Perl (so +binary to decimal conversion follows the same rules as in Perl, which +can differ to other languages). Also, your perl interpreter might expose +extensions to the floating point numbers of your platform, such as +infinities or NaN's - these cannot be represented in JSON, and it is an +error to pass those in. + +=item Big Number + +If the backend is JSON::PP and C is enable, +C converts C objects and C +objects into JSON numbers. + + +=back + +=head1 JSON and ECMAscript + +See to L. + +=head1 JSON and YAML + +JSON is not a subset of YAML. +See to L. + + +=head1 BACKEND MODULE DECISION + +When you use C, C tries to C JSON::XS. If this call failed, it will +C JSON::PP. The required JSON::XS version is I<2.2> or later. + +The C constructor method returns an object inherited from the backend module, +and JSON::XS object is a blessed scaler reference while JSON::PP is a blessed hash +reference. + +So, your program should not depend on the backend module, especially +returned objects should not be modified. + + my $json = JSON->new; # XS or PP? + $json->{stash} = 'this is xs object'; # this code may raise an error! + +To check the backend module, there are some methods - C, C and C. + + JSON->backend; # 'JSON::XS' or 'JSON::PP' + + JSON->backend->is_pp: # 0 or 1 + + JSON->backend->is_xs: # 1 or 0 + + $json->is_xs; # 1 or 0 + + $json->is_pp; # 0 or 1 + + +If you set an enviornment variable C, The calling action will be changed. + +=over + +=item PERL_JSON_BACKEND = 0 or PERL_JSON_BACKEND = 'JSON::PP' + +Always use JSON::PP + +=item PERL_JSON_BACKEND == 1 or PERL_JSON_BACKEND = 'JSON::XS,JSON::PP' + +(The default) Use compiled JSON::XS if it is properly compiled & installed, +otherwise use JSON::PP. + +=item PERL_JSON_BACKEND == 2 or PERL_JSON_BACKEND = 'JSON::XS' + +Always use compiled JSON::XS, die if it isn't properly compiled & installed. + +=item PERL_JSON_BACKEND = 'JSON::backportPP' + +Always use JSON::backportPP. +JSON::backportPP is JSON::PP back port module. +C includs JSON::backportPP instead of JSON::PP. + +=back + +These ideas come from L mechanism. + +example: + + BEGIN { $ENV{PERL_JSON_BACKEND} = 'JSON::PP' } + use JSON; # always uses JSON::PP + +In future, it may be able to specify another module. + +=head1 USE PP FEATURES EVEN THOUGH XS BACKEND + +Many methods are available with either JSON::XS or JSON::PP and +when the backend module is JSON::XS, if any JSON::PP specific (i.e. JSON::XS unspported) +method is called, it will C and be noop. + +But If you C C passing the optional string C<-support_by_pp>, +it makes a part of those unupported methods available. +This feature is achieved by using JSON::PP in C. + + BEGIN { $ENV{PERL_JSON_BACKEND} = 2 } # with JSON::XS + use JSON -support_by_pp; + my $json = new JSON; + $json->allow_nonref->escape_slash->encode("/"); + +At this time, the returned object is a C +object (re-blessed XS object), and by checking JSON::XS unsupported flags +in de/encoding, can support some unsupported methods - C, C, +C, C, C and C. + +When any unsupported methods are not enable, C will be +used as is. The switch is achieved by changing the symbolic tables. + +C<-support_by_pp> is effective only when the backend module is JSON::XS +and it makes the de/encoding speed down a bit. + +See to L. + +=head1 INCOMPATIBLE CHANGES TO OLD VERSION + +There are big incompatibility between new version (2.00) and old (1.xx). +If you use old C 1.xx in your code, please check it. + +See to L + +=over + +=item jsonToObj and objToJson are obsoleted. + +Non Perl-style name C and C are obsoleted +(but not yet deleted from the source). +If you use these functions in your code, please replace them +with C and C. + + +=item Global variables are no longer available. + +C class variables - C<$JSON::AUTOCONVERT>, C<$JSON::BareKey>, etc... +- are not available any longer. +Instead, various features can be used through object methods. + + +=item Package JSON::Converter and JSON::Parser are deleted. + +Now C bundles with JSON::PP which can handle JSON more properly than them. + +=item Package JSON::NotString is deleted. + +There was C class which represents JSON value C, C, C +and numbers. It was deleted and replaced by C. + +C represents C and C. + +C does not represent C. + +C returns C. + +C makes L and L is-a relation +to L. + +=item function JSON::Number is obsoleted. + +C is now needless because JSON::XS and JSON::PP have +round-trip integrity. + +=item JSONRPC modules are deleted. + +Perl implementation of JSON-RPC protocol - C, C +and C are deleted in this distribution. +Instead of them, there is L which supports JSON-RPC protocol version 1.1. + +=back + +=head2 Transition ways from 1.xx to 2.xx. + +You should set C mode firstly, because +it is always successful for the below codes even with JSON::XS. + + use JSON -support_by_pp; + +=over + +=item Exported jsonToObj (simple) + + from_json($json_text); + +=item Exported objToJson (simple) + + to_json($perl_scalar); + +=item Exported jsonToObj (advanced) + + $flags = {allow_barekey => 1, allow_singlequote => 1}; + from_json($json_text, $flags); + +equivalent to: + + $JSON::BareKey = 1; + $JSON::QuotApos = 1; + jsonToObj($json_text); + +=item Exported objToJson (advanced) + + $flags = {allow_blessed => 1, allow_barekey => 1}; + to_json($perl_scalar, $flags); + +equivalent to: + + $JSON::BareKey = 1; + objToJson($perl_scalar); + +=item jsonToObj as object method + + $json->decode($json_text); + +=item objToJson as object method + + $json->encode($perl_scalar); + +=item new method with parameters + +The C method in 2.x takes any parameters no longer. +You can set parameters instead; + + $json = JSON->new->pretty; + +=item $JSON::Pretty, $JSON::Indent, $JSON::Delimiter + +If C is enable, that means C<$JSON::Pretty> flag set. And +C<$JSON::Delimiter> was substituted by C and C. +In conclusion: + + $json->indent->space_before->space_after; + +Equivalent to: + + $json->pretty; + +To change indent length, use C. + +(Only with JSON::PP, if C<-support_by_pp> is not used.) + + $json->pretty->indent_length(2)->encode($perl_scalar); + +=item $JSON::BareKey + +(Only with JSON::PP, if C<-support_by_pp> is not used.) + + $json->allow_barekey->decode($json_text) + +=item $JSON::ConvBlessed + +use C<-convert_blessed_universally>. See to L. + +=item $JSON::QuotApos + +(Only with JSON::PP, if C<-support_by_pp> is not used.) + + $json->allow_singlequote->decode($json_text) + +=item $JSON::SingleQuote + +Disable. C does not make such a invalid JSON string any longer. + +=item $JSON::KeySort + + $json->canonical->encode($perl_scalar) + +This is the ascii sort. + +If you want to use with your own sort routine, check the C method. + +(Only with JSON::PP, even if C<-support_by_pp> is used currently.) + + $json->sort_by($sort_routine_ref)->encode($perl_scalar) + + $json->sort_by(sub { $JSON::PP::a <=> $JSON::PP::b })->encode($perl_scalar) + +Can't access C<$a> and C<$b> but C<$JSON::PP::a> and C<$JSON::PP::b>. + +=item $JSON::SkipInvalid + + $json->allow_unknown + +=item $JSON::AUTOCONVERT + +Needless. C backend modules have the round-trip integrity. + +=item $JSON::UTF8 + +Needless because C (JSON::XS/JSON::PP) sets +the UTF8 flag on properly. + + # With UTF8-flagged strings + + $json->allow_nonref; + $str = chr(1000); # UTF8-flagged + + $json_text = $json->utf8(0)->encode($str); + utf8::is_utf8($json_text); + # true + $json_text = $json->utf8(1)->encode($str); + utf8::is_utf8($json_text); + # false + + $str = '"' . chr(1000) . '"'; # UTF8-flagged + + $perl_scalar = $json->utf8(0)->decode($str); + utf8::is_utf8($perl_scalar); + # true + $perl_scalar = $json->utf8(1)->decode($str); + # died because of 'Wide character in subroutine' + +See to L. + +=item $JSON::UnMapping + +Disable. See to L. + +=item $JSON::SelfConvert + +This option was deleted. +Instead of it, if a givien blessed object has the C method, +C will be executed with C. + + $json->convert_blessed->encode($bleesed_hashref_or_arrayref) + # if need, call allow_blessed + +Note that it was C in old version, but now not C but C. + +=back + +=head1 TODO + +=over + +=item example programs + +=back + +=head1 THREADS + +No test with JSON::PP. If with JSON::XS, See to L. + + +=head1 BUGS + +Please report bugs relevant to C to Emakamaka[at]cpan.orgE. + + +=head1 SEE ALSO + +Most of the document is copied and modified from JSON::XS doc. + +L, L + +C(L) + +=head1 AUTHOR + +Makamaka Hannyaharamitu, Emakamaka[at]cpan.orgE + +JSON::XS was written by Marc Lehmann + +The relese of this new version owes to the courtesy of Marc Lehmann. + + +=head1 COPYRIGHT AND LICENSE + +Copyright 2005-2011 by Makamaka Hannyaharamitu + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut + diff --git a/tools/Makefile b/tools/Makefile new file mode 100755 index 0000000..c968134 --- /dev/null +++ b/tools/Makefile @@ -0,0 +1,152 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright 2019 Western Digital Corporation or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Check for RV_ROOT +ifeq (,$(wildcard ${RV_ROOT}/configs/swerv.config)) +$(error env var RV_ROOT does not point to a valid dir! Exiting!) +endif + +# Allow snapshot override +target = default +snapshot = $(target) + +# Allow tool override +SWERV_CONFIG = ${RV_ROOT}/configs/swerv.config +IRUN = xrun +VCS = vcs +VERILATOR = verilator +GCC_PREFIX = riscv64-unknown-elf +BUILD_DIR = snapshots/${snapshot} +TBDIR = ${RV_ROOT}/testbench + +# Define test name +TEST = hello_world2 + +# Define test name +TEST_DIR = ${RV_ROOT}/testbench/asm + +ifdef debug + DEBUG_PLUS = +dumpon + IRUN_DEBUG = -access +rc + IRUN_DEBUG_RUN = -input ${RV_ROOT}/testbench/input.tcl +endif + +# provide specific link file +ifeq (,$(wildcard $(TEST_DIR)/$(TEST).ld)) + LINK = $(TBDIR)/link.ld +else + LINK = $(TEST_DIR)/$(TEST).ld +endif + +VPATH = $(TEST_DIR) $(BUILD_DIR) $(TBDIR) +TBFILES = $(TBDIR)/tb_top.sv $(TBDIR)/ahb_sif.sv + +defines = $(BUILD_DIR)/common_defines.vh +defines += ${RV_ROOT}/design/include/el2_def.sv +defines += $(BUILD_DIR)/el2_pdef.vh +includes = -I${BUILD_DIR} + +# CFLAGS for verilator generated Makefiles. Without -std=c++11 it +# complains for `auto` variables +CFLAGS += "-std=c++11" + +# Optimization for better performance; alternative is nothing for +# slower runtime (faster compiles) -O2 for faster runtime (slower +# compiles), or -O for balance. +VERILATOR_MAKE_FLAGS = OPT_FAST="-O2" + +# Targets +all: clean verilator + +clean: + rm -rf *.log *.s *.hex *.dis *.tbl irun* vcs* simv* snapshots swerv* \ + verilator* *.exe obj* *.o ucli.key vc_hdrs.h csrc *.csv + +# If define files do not exist, then run swerv.config. +${BUILD_DIR}/defines.h : + BUILD_PATH=${BUILD_DIR} ${RV_ROOT}/configs/swerv.config -target=$(target) + +verilator-build: ${TBFILES} ${BUILD_DIR}/defines.h test_tb_top.cpp + echo '`undef ASSERT_ON' >> ${BUILD_DIR}/common_defines.vh + $(VERILATOR) '-UASSERT_ON' --cc -CFLAGS ${CFLAGS} $(defines) \ + $(includes) -I${RV_ROOT}/testbench -f ${RV_ROOT}/testbench/flist \ + -Wno-WIDTH -Wno-UNOPTFLAT ${TBFILES} --top-module tb_top \ + -exe test_tb_top.cpp --trace --autoflush + cp ${RV_ROOT}/testbench/test_tb_top.cpp obj_dir/ + $(MAKE) -C obj_dir/ -f Vtb_top.mk $(VERILATOR_MAKE_FLAGS) + touch verilator-build + +vcs-build: ${TBFILES} ${BUILD_DIR}/defines.h + $(VCS) -full64 -assert svaext -sverilog +define+RV_OPENSOURCE \ + +error+500 +incdir+${RV_ROOT}/design/lib \ + +incdir+${RV_ROOT}/design/include ${BUILD_DIR}/common_defines.vh \ + +incdir+$(BUILD_DIR) +libext+.v $(defines) \ + -f ${RV_ROOT}/testbench/flist ${TBFILES} -l vcs.log + touch vcs-build + +irun-build: ${TBFILES} ${BUILD_DIR}/defines.h + $(IRUN) -64bit -elaborate $(IRUN_DEBUG) -q -sv -sysv -nowarn CUVIHR \ + -xmlibdirpath . -xmlibdirname swerv.build \ + -incdir ${RV_ROOT}/design/lib -incdir ${RV_ROOT}/design/include \ + -vlog_ext +.vh+.h $(defines) -incdir $(BUILD_DIR) \ + -f ${RV_ROOT}/testbench/flist -top tb_top ${TBFILES} \ + -I${RV_ROOT}/testbench -elaborate -snapshot ${snapshot} + touch irun-build + +verilator: program.hex verilator-build + ./obj_dir/Vtb_top ${DEBUG_PLUS} + +irun: program.hex irun-build + $(IRUN) -64bit -abvglobalfailurelimit 1 +lic_queue -licqueue \ + -status -xmlibdirpath . -xmlibdirname swerv.build \ + -snapshot ${snapshot} -r $(snapshot) $(IRUN_DEBUG_RUN) + +vcs: program.hex vcs-build + ./simv $(DEBUG_PLUS) +vcs+lic+wait -l vcs.log + +program.hex: $(TEST).o $(LINK) + @echo Building $(TEST) +ifeq ($(shell which $(GCC_PREFIX)-as),) + @echo " !!! No $(GCC_PREFIX)-as in path, using canned hex files !!" + cp ${RV_ROOT}/testbench/hex/*.hex . +else +ifeq (,$(wildcard $(TEST_DIR)/$(TEST).makefile)) + $(GCC_PREFIX)-ld -m elf32lriscv --discard-none -T$(LINK) -o $(TEST).exe $(TEST).o + $(GCC_PREFIX)-objcopy -O verilog --only-section ".data*" --change-section-lma .data=0 $(TEST).exe data.hex + $(GCC_PREFIX)-objcopy -O verilog --only-section ".text" $(TEST).exe program.hex + $(GCC_PREFIX)-objdump -S $(TEST).exe > $(TEST).dis + $(GCC_PREFIX)-nm -f posix -C $(TEST).exe > $(TEST).tbl + @echo Completed building $(TEST) +else + $(MAKE) -f $(TEST_DIR)/$(TEST).makefile +endif +endif + +%.o : %.s ${BUILD_DIR}/defines.h + $(GCC_PREFIX)-cpp -I${BUILD_DIR} $< > $(TEST).cpp.s + $(GCC_PREFIX)-as -march=rv32gc $(TEST).cpp.s -o $(TEST).o + +TEST_CFLAGS = -g -O3 -funroll-all-loops +ABI = -mabi=ilp32 -march=rv32imc + +%.o : %.c ${BUILD_DIR}/defines.h + $(GCC_PREFIX)-gcc -I${BUILD_DIR} ${TEST_CFLAGS} ${ABI} -nostdlib -c $< -o $@ + +help: + @echo Make sure the environment variable RV_ROOT is set. + @echo Possible targets: verilator vcs irun help clean all verilator-build irun-build vcs-build program.hex + +.PHONY: help clean verilator vcs irun diff --git a/tools/addassign b/tools/addassign new file mode 100755 index 0000000..c1b9998 --- /dev/null +++ b/tools/addassign @@ -0,0 +1,46 @@ +#!/usr/bin/perl + +use Getopt::Long; + +$helpusage = "placeholder"; + +GetOptions ('in=s' => \$in, + 'prefix=s' => \$prefix) || die("$helpusage"); + + + +@in=`cat $in`; + + +foreach $line (@in) { + + if ($line=~/\#/) { next; } + + if ($line=~/([^=]+)=/) { + $sig=$1; + $sig=~s/\s+//g; + printf("logic $sig;\n"); + } +} + +foreach $line (@in) { + + if ($line=~/\#/) { next; } + + if ($line=~/([^=]+)=\s*;/) { + printf("assign ${prefix}$1 = 1'b0;\n"); + next; + } + + if ($line=~/([^=]+)=\s*\(\s*\);/) { + printf("assign ${prefix}$1 = 1'b0;\n"); + next; + } + + if ($line =~ /=/) { printf("assign ${prefix}$line"); } + else { printf("$line"); } +} + + +exit; + diff --git a/tools/coredecode b/tools/coredecode new file mode 100755 index 0000000..f2ce7ef --- /dev/null +++ b/tools/coredecode @@ -0,0 +1,198 @@ +#!/usr/bin/perl + +use Getopt::Long; + +$helpusage = "placeholder"; + +GetOptions ('legal' => \$legal, + 'in=s' => \$in, + 'out=s' => \$out, + 'view=s' => \$view ) || die("$helpusage"); + + +if (!defined($in)) { die("must define -in=input"); } +if (!defined($out)) { $out="${in}.out"; } + +if ($in eq "decode") { $view="rv32i"; } +elsif ($in eq "cdecode") { $view="rv32c"; } +elsif ($in eq "csrdecode") { $view="csr"; } + +if (defined($in)) { printf("in=$in\n"); } +if (defined($out)) { printf("out=$out\n"); } +if (defined($view)) { printf("view=$view\n"); } + +@in=`cat $in`; + +$gather=0; + +$TIMEOUT=50; + +foreach $line (@in) { + + #printf("$pstate: $line"); + + if ($line=~/^\s*\#/) { #printf("skip $line"); + next; } + + if ($gather==1) { + if ($line=~/(\S+)/) { + if ($line=~/}/) { $gather=0; $position=0; next; } + $label=$1; + $label=~s/,//g; + if ($pstate==2) { + if (defined($INPUT{$CVIEW}{$label})) { die("input $label already defined"); } + $INPUT{$CVIEW}{$label}=$position++; + $INPUTLEN{$CVIEW}++; + $INPUTSTR{$CVIEW}.=" $label"; + } + elsif ($pstate==3) { + if (defined($OUTPUT{$CVIEW}{$label})) { die("output $label already defined"); } + $OUTPUT{$CVIEW}{$label}=$position++; + $OUTPUTLEN{$CVIEW}++; + $OUTPUTSTR{$CVIEW}.=" $label"; + } + else { die("unknown pstate $pstate in gather"); } + } + } + + if ($line=~/^.definition/) { + $pstate=1; next; + } + if ($pstate==1) { # definition + if ($line!~/^.output/) { + if ($line=~/(\S+)\s*=\s*(\S+)/) { + $key=$1; $value=$2; + $value=~s/\./-/g; + $value=~s/\[//g; + $value=~s/\]//g; + $DEFINITION{$key}=$value; + } + } + else { $pstate=2; next; } + } + + if ($line=~/^.input/) { + $pstate=2; next; + } + + if ($pstate==2) { # input + if ($line=~/(\S+)\s*=\s*\{/) { + $CVIEW=$1; $gather=1; next; + } + } + + if ($line=~/^.output/) { + $pstate=3; next; + } + + if ($pstate==3) { # output + if ($line=~/(\S+)\s*=\s*\{/) { + $CVIEW=$1; $gather=1; next; + } + } + + if ($line=~/^.decode/) { + $pstate=4; next; + } + + if ($pstate==4) { # decode + if ($line=~/([^\[]+)\[([^\]]+)\]\s*=\s*\{([^\}]+)\}/) { + $dview=$1; $inst=$2; $body=$3; + $dview=~s/\s+//g; + $inst=~s/\s+//g; + #printf("$dview $inst $body\n"); + if ($inst=~/([^\{]+)\{([^-]+)-([^\}]+)\}/) { + $base=$1; $lo=$2; $hi=$3; + $hi++; + for ($i=0; $i<$TIMEOUT && $lo ne $hi; $i++) { + #printf("decode $dview $base$lo\n"); + + $expand=$base.$lo; + if (!defined($DEFINITION{$expand})) { die("could not find instruction definition for inst $expand"); } + + $DECODE{$dview}{$expand}=$body; + $lo++; + } + if ($i == $TIMEOUT) { die("timeout in decode expansion"); } + + } + else { + if (!defined($DEFINITION{$inst})) { die("could not find instruction definition for inst $inst"); } + $DECODE{$dview}{$inst}=$body; + } + } + } + +} + + +#printf("view $view len %d\n",$OUTPUTLEN{$view}); + +#printf("$OUTPUTSTR{$view}\n"); + + +# need to switch this somehow based on 16/32 +printf(".i %d\n",$INPUTLEN{$view}); + +if (defined($legal)) { + printf(".o 1\n"); +} +else { + printf(".o %d\n",$OUTPUTLEN{$view}); +} + +printf(".ilb %s\n",$INPUTSTR{$view}); + +if (defined($legal)) { + printf(".ob legal\n"); +} +else { + printf(".ob %s\n",$OUTPUTSTR{$view}); +} + +if (defined($legal)) { + printf(".type fd\n"); +} +else { + printf(".type fr\n"); +} + +$DEFAULT_TEMPLATE='0'x$OUTPUTLEN{$view}; + +foreach $inst (sort keys %{ $DECODE{$view} }) { + + $body=$DECODE{$view}{$inst}; + @sigs=split(' ',$body); + + $template=$DEFAULT_TEMPLATE; + foreach $sig (@sigs) { + if (!defined($OUTPUT{$view}{$sig})) { die("could not find output definition for sig $sig in view $view"); } + $position=$OUTPUT{$view}{$sig}; + substr($template,$position,1,1); + } + +# if (!defined($DEFINITION{$inst})) { die("could not find instruction defintion for inst $inst"); } + + printf("# $inst\n"); + if (defined($legal)) { + printf("$DEFINITION{$inst} 1\n"); + } + else { + printf("$DEFINITION{$inst} $template\n"); + } + +} + + +exit; + +foreach $inst (sort keys %DEFINITION) { + $value=$DEFINITION{$inst}; + printf("%-10s = $value\n",$inst); +} + + +foreach $sig (sort keys %{ $OUTPUT{$view} }) { + $position=$OUTPUT{$view}{$sig}; + printf("$sig $position\n"); +} diff --git a/tools/picmap b/tools/picmap new file mode 100755 index 0000000..06df0d5 --- /dev/null +++ b/tools/picmap @@ -0,0 +1,59 @@ +#!/usr/bin/perl + +use Getopt::Long; + +use integer; + +$helpusage = "placeholder"; + +GetOptions ('total_int=s' => \$total_int)|| die("$helpusage"); + +$LEN=15; + +#printf("logic [2:0] mask;\n"); + +printf("// mask[3:0] = { 4'b1000 - 30b mask,4'b0100 - 31b mask, 4'b0010 - 28b mask, 4'b0001 - 32b mask }\n"); +printf("always_comb begin\n"); +printf(" case \(address[14:0]\)\n"); +printf(" 15'b011000000000000 : mask[3:0] = 4'b0100;\n"); +for ($i=1; $i<=$total_int; $i++) { + $j=hex("4000"); + printf(" 15'b%s : mask[3:0] = 4'b1000;\n",d2b($j+$i*4)); +} +for ($i=1; $i<=$total_int; $i++) { + $j=hex("2000"); + printf(" 15'b%s : mask[3:0] = 4'b0100;\n",d2b($j+$i*4)); +} +for ($i=1; $i<=$total_int; $i++) { + $j=hex("0"); + printf(" 15'b%s : mask[3:0] = 4'b0010;\n",d2b($j+$i*4)); +} + printf(" %-17s : mask[3:0] = 4'b0001;\n","default"); +printf(" endcase\n"); +printf("end\n"); + + +sub b2d { + my ($v) = @_; + + $v = oct("0b" . $v); + + return($v); +} + +sub d2b { + my ($v) = @_; + + my $repeat; + + $v = sprintf "%b",$v; + if (length($v)<$LEN) { + $repeat=$LEN-length($v); + $v="0"x$repeat.$v; + } + elsif (length($v)>$LEN) { + $v=substr($v,length($v)-$LEN,$LEN); + } + + return($v); +} diff --git a/tools/smalldiv b/tools/smalldiv new file mode 100755 index 0000000..48495be --- /dev/null +++ b/tools/smalldiv @@ -0,0 +1,121 @@ +#!/usr/bin/perl + +use Getopt::Long; + +use integer; + +$helpusage = "placeholder"; + +GetOptions ('len=s' => \$len, + 'num=s' => \$num, + 'den=s' => \$den, + 'skip' => \$skip) || die("$helpusage"); + +if (!defined($len)) { $len=8; } +$LEN=$len; + +$n=d2b($num); # numerator - quotient +$m=d2b($den); # denominator - divisor + + +printf(".i 8\n"); +printf(".o 4\n"); +printf(".ilb q_ff[3] q_ff[2] q_ff[1] q_ff[0] m_ff[3] m_ff[2] m_ff[1] m_ff[0]\n"); +printf(".ob smallnum[3] smallnum[2] smallnum[1] smallnum[0]\n"); +printf(".type fr\n"); +for ($q=0; $q<16; $q++) { + for ($m=0; $m<16; $m++) { + if ($m==0) { next; } + $result=int($q/$m); + printf("%s %s %s\n",d2bl($q,4),d2bl($m,4),d2bl($result,4)); + } +} + +exit; + +#$LEN=length($n); + +$a="0"x$LEN; +$q=$n; + +#printf("n=%s, m=%s\n",$n,$m); +#printf("a=%s, q=%s\n",$a,$q); + +for ($i=1; $i<=$LEN; $i++) { + + #printf("iteration $n:\n"); + + printf("$i: a=%s q=%s\n",$a,$q); + + + $signa = substr($a,0,1); + + + $a = substr($a.$q,1,$LEN); # new a with q shifted in + + if ($signa==0) { $a=b2d($a)-b2d($m); } + else { $a=b2d($a)+b2d($m); } + + $a=d2b($a); + + + $signa = substr($a,0,1); + if ($signa==0) { $q=substr($q,1,$LEN-1)."1"; } + else { $q=substr($q,1,$LEN-1)."0"; } + +} + + +#printf("a=$a\n"); +$signa = substr($a,0,1); +if ($signa==1 && !defined($skip)) { + printf("correction:\n"); + $a=b2d($a)+b2d($m); + $a=d2b($a); +} +#printf("a=$a\n"); +printf("%d / %d = %d R %d ",b2d($n),b2d($m),b2d($q),b2d($a)); +if ($a eq $n) { printf("-> remainder equal numerator\n"); } +else { printf("\n"); } + +sub b2d { + my ($v) = @_; + + $v = oct("0b" . $v); + + return($v); +} + +sub d2b { + my ($v) = @_; + + my $repeat; + + $v = sprintf "%b",$v; + if (length($v)<$LEN) { + $repeat=$LEN-length($v); + $v="0"x$repeat.$v; + } + elsif (length($v)>$LEN) { + $v=substr($v,length($v)-$LEN,$LEN); + } + + return($v); +} + +sub d2bl { + my ($v,$LEN) = @_; + + my $repeat; + + $v = sprintf "%b",$v; + if (length($v)<$LEN) { + $repeat=$LEN-length($v); + $v="0"x$repeat.$v; + } + elsif (length($v)>$LEN) { + $v=substr($v,length($v)-$LEN,$LEN); + } + + return($v); +} diff --git a/tools/unrollforverilator b/tools/unrollforverilator new file mode 100755 index 0000000..1b686fc --- /dev/null +++ b/tools/unrollforverilator @@ -0,0 +1,169 @@ +#!/usr/bin/perl +#use strict; +#use warnings; + +my $RV_ROOT = $ENV{RV_ROOT}; + +my $TOTAL_INT=$ARGV[0]; +print "// argv=".$ARGV[0]."\n"; +my $NUM_LEVELS; +if($TOTAL_INT==2){$NUM_LEVELS=1;} +elsif ($TOTAL_INT==4){$NUM_LEVELS=2;} +elsif ($TOTAL_INT==8){$NUM_LEVELS=3;} +elsif ($TOTAL_INT==16){$NUM_LEVELS=4;} +elsif ($TOTAL_INT==32){$NUM_LEVELS=5;} +elsif ($TOTAL_INT==64){$NUM_LEVELS=6;} +elsif ($TOTAL_INT==128){$NUM_LEVELS=7;} +elsif ($TOTAL_INT==256){$NUM_LEVELS=8;} +elsif ($TOTAL_INT==512){$NUM_LEVELS=9;} +elsif ($TOTAL_INT==1024){$NUM_LEVELS=10;} +else {$NUM_LEVELS=int(log($TOTAL_INT)/log(2))+1;} +print ("// TOTAL_INT=".$TOTAL_INT." NUM_LEVELS=".$NUM_LEVELS."\n"); +$next_level = 1; +print ("`ifdef RV_PIC_2CYCLE\n"); +if($TOTAL_INT > 2){ +print ("// LEVEL0\n"); +print ("logic [TOTAL_INT+2:0] [INTPRIORITY_BITS-1:0] level_intpend_w_prior_en_".$next_level.";\n"); +print ("logic [TOTAL_INT+2:0] [ID_BITS-1:0] level_intpend_id_".$next_level.";\n"); +print (" for (m=0; m<=(TOTAL_INT)/(2**(".$next_level.")) ; m++) begin : COMPARE0\n"); +print (" if ( m == (TOTAL_INT)/(2**(".$next_level."))) begin \n"); +print (" assign level_intpend_w_prior_en_".$next_level."[m+1] = '0 ;\n"); +print (" assign level_intpend_id_".$next_level."[m+1] = '0 ;\n"); +print (" end\n"); +print (" el2_cmp_and_mux #(\n"); +print (" .ID_BITS(ID_BITS),\n"); +print (" .INTPRIORITY_BITS(INTPRIORITY_BITS)) cmp_l".$next_level." (\n"); +print (" .a_id(level_intpend_id[0][2*m]),\n"); +print (" .a_priority(level_intpend_w_prior_en[0][2*m]),\n"); +print (" .b_id(level_intpend_id[0][2*m+1]),\n"); +print (" .b_priority(level_intpend_w_prior_en[0][2*m+1]),\n"); +print (" .out_id(level_intpend_id_".$next_level."[m]),\n"); +print (" .out_priority(level_intpend_w_prior_en_".$next_level."[m])) ;\n"); +print (" \n"); +print (" end\n\n"); +for (my $l=1; $l