cores-swerv-el2/design/lsu/el2_lsu.sv

425 lines
22 KiB
Systemverilog

// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Function: Top level file for load store unit
// Comments:
//
//
// DC1 -> DC2 -> DC3 -> DC4 (Commit)
//
//********************************************************************************
module el2_lsu
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic clk_override, // Override non-functional clock gating
input logic dec_tlu_flush_lower_r, // I0/I1 writeback flush. This is used to flush the old packets only
input logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state
input logic dec_tlu_force_halt, // This will be high till TLU goes to debug halt
// chicken signals
input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals
input logic dec_tlu_wb_coalescing_disable, // disable the write buffer coalesce
input logic dec_tlu_sideeffect_posted_disable, // disable the posted sideeffect load store to the bus
input logic dec_tlu_core_ecc_disable, // disable the generation of the ecc
input logic [31:0] exu_lsu_rs1_d, // address rs operand
input logic [31:0] exu_lsu_rs2_d, // store data
input logic [11:0] dec_lsu_offset_d, // address offset operand
input el2_lsu_pkt_t lsu_p, // lsu control packet
input logic dec_lsu_valid_raw_d, // Raw valid for address computation
input logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control
output logic [31:0] lsu_result_m, // lsu load data
output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF
output logic lsu_load_stall_any, // This is for blocking loads in the decode
output logic lsu_store_stall_any, // This is for blocking stores in the decode
output logic lsu_fastint_stall_any, // Stall the fastint in decode-1 stage
output logic lsu_idle_any, // lsu buffers are empty and no instruction in the pipeline. Doesn't include DMA
output logic lsu_active, // Used to turn off top level clk
output logic [31:1] lsu_fir_addr, // fast interrupt address
output logic [1:0] lsu_fir_error, // Error during fast interrupt lookup
output logic lsu_single_ecc_error_incr, // Increment the ecc counter
output el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu exception packet
output logic lsu_imprecise_error_load_any, // bus load imprecise error
output logic lsu_imprecise_error_store_any, // bus store imprecise error
output logic [31:0] lsu_imprecise_error_addr_any, // bus store imprecise error address
// Non-blocking loads
output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load
output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated
output logic lsu_nonblock_load_data_valid, // the non block is valid - sending information back to the cam
output logic lsu_nonblock_load_data_error, // non block load has an error
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error
output logic [31:0] lsu_nonblock_load_data, // Data of the non block load
output logic lsu_pmu_load_external_m, // PMU : Bus loads
output logic lsu_pmu_store_external_m, // PMU : Bus loads
output logic lsu_pmu_misaligned_m, // PMU : misaligned
output logic lsu_pmu_bus_trxn, // PMU : bus transaction
output logic lsu_pmu_bus_misaligned, // PMU : misaligned access going to the bus
output logic lsu_pmu_bus_error, // PMU : bus sending error back
output logic lsu_pmu_bus_busy, // PMU : bus is not ready
// Trigger signals
input el2_trigger_pkt_t [3:0] trigger_pkt_any, // Trigger info from the decode
output logic [3:0] lsu_trigger_match_m, // lsu trigger hit (one bit per trigger)
// DCCM ports
output logic dccm_wren, // DCCM write enable
output logic dccm_rden, // DCCM read enable
output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // DCCM write address low bank
output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // DCCM write address hi bank
output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // DCCM read address low bank
output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // DCCM read address hi bank (hi and low same if aligned read)
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // DCCM write data for lo bank
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // DCCM write data for hi bank
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // DCCM read data low bank
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // DCCM read data hi bank
// PIC ports
output logic picm_wren, // PIC memory write enable
output logic picm_rden, // PIC memory read enable
output logic picm_mken, // Need to read the mask for stores to determine which bits to write/forward
output logic [31:0] picm_rdaddr, // address for pic read access
output logic [31:0] picm_wraddr, // address for pic write access
output logic [31:0] picm_wr_data, // PIC memory write data
input logic [31:0] picm_rd_data, // PIC memory read/mask data
// AXI Write Channels
output logic lsu_axi_awvalid,
input logic lsu_axi_awready,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid,
output logic [31:0] lsu_axi_awaddr,
output logic [3:0] lsu_axi_awregion,
output logic [7:0] lsu_axi_awlen,
output logic [2:0] lsu_axi_awsize,
output logic [1:0] lsu_axi_awburst,
output logic lsu_axi_awlock,
output logic [3:0] lsu_axi_awcache,
output logic [2:0] lsu_axi_awprot,
output logic [3:0] lsu_axi_awqos,
output logic lsu_axi_wvalid,
input logic lsu_axi_wready,
output logic [63:0] lsu_axi_wdata,
output logic [7:0] lsu_axi_wstrb,
output logic lsu_axi_wlast,
input logic lsu_axi_bvalid,
output logic lsu_axi_bready,
input logic [1:0] lsu_axi_bresp,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid,
// AXI Read Channels
output logic lsu_axi_arvalid,
input logic lsu_axi_arready,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid,
output logic [31:0] lsu_axi_araddr,
output logic [3:0] lsu_axi_arregion,
output logic [7:0] lsu_axi_arlen,
output logic [2:0] lsu_axi_arsize,
output logic [1:0] lsu_axi_arburst,
output logic lsu_axi_arlock,
output logic [3:0] lsu_axi_arcache,
output logic [2:0] lsu_axi_arprot,
output logic [3:0] lsu_axi_arqos,
input logic lsu_axi_rvalid,
output logic lsu_axi_rready,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid,
input logic [63:0] lsu_axi_rdata,
input logic [1:0] lsu_axi_rresp,
input logic lsu_axi_rlast,
input logic lsu_bus_clk_en, // external drives a clock_en to control bus ratio
// DMA slave
input logic dma_dccm_req, // DMA read/write to dccm
input logic [2:0] dma_mem_tag, // DMA request tag
input logic [31:0] dma_mem_addr, // DMA address
input logic [2:0] dma_mem_sz, // DMA access size
input logic dma_mem_write, // DMA access is a write
input logic [63:0] dma_mem_wdata, // DMA write data
output logic dccm_dma_rvalid, // lsu data valid for DMA dccm read
output logic dccm_dma_ecc_error, // DMA load had ecc error
output logic [2:0] dccm_dma_rtag, // DMA request tag
output logic [63:0] dccm_dma_rdata, // lsu data for DMA dccm read
output logic dccm_ready, // lsu ready for DMA access
input logic scan_mode, // scan mode
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic rst_l // reset, active low
);
logic lsu_dccm_rden_m;
logic lsu_dccm_rden_r;
logic [31:0] store_data_m;
logic [31:0] store_data_r;
logic [31:0] store_data_hi_r, store_data_lo_r;
logic [31:0] store_datafn_hi_r, store_datafn_lo_r;
logic [31:0] sec_data_lo_m, sec_data_hi_m;
logic [31:0] sec_data_lo_r, sec_data_hi_r;
logic [31:0] lsu_ld_data_m;
logic [31:0] dccm_rdata_hi_m, dccm_rdata_lo_m;
logic [6:0] dccm_data_ecc_hi_m, dccm_data_ecc_lo_m;
logic lsu_single_ecc_error_m;
logic lsu_double_ecc_error_m;
logic [31:0] lsu_ld_data_r;
logic [31:0] lsu_ld_data_corr_r;
logic [31:0] dccm_rdata_hi_r, dccm_rdata_lo_r;
logic [6:0] dccm_data_ecc_hi_r, dccm_data_ecc_lo_r;
logic single_ecc_error_hi_r, single_ecc_error_lo_r;
logic lsu_single_ecc_error_r;
logic lsu_double_ecc_error_r;
logic ld_single_ecc_error_r, ld_single_ecc_error_r_ff;
logic [31:0] picm_mask_data_m;
logic [31:0] lsu_addr_d, lsu_addr_m, lsu_addr_r;
logic [31:0] end_addr_d, end_addr_m, end_addr_r;
el2_lsu_pkt_t lsu_pkt_d, lsu_pkt_m, lsu_pkt_r;
logic lsu_i0_valid_d, lsu_i0_valid_m, lsu_i0_valid_r;
// Store Buffer signals
logic store_stbuf_reqvld_r;
logic ldst_stbuf_reqvld_r;
logic lsu_commit_r;
logic lsu_exc_m;
logic addr_in_dccm_d, addr_in_dccm_m, addr_in_dccm_r;
logic addr_in_pic_d, addr_in_pic_m, addr_in_pic_r;
logic ldst_dual_d, ldst_dual_m, ldst_dual_r;
logic addr_external_m;
logic stbuf_reqvld_any;
logic stbuf_reqvld_flushed_any;
logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any;
logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any;
logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any;
logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, sec_data_hi_r_ff;
logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, sec_data_ecc_lo_r_ff;
logic lsu_cmpen_m;
logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m;
logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m;
logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m;
logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m;
logic lsu_stbuf_commit_any;
logic lsu_stbuf_empty_any; // This is for blocking loads
logic lsu_stbuf_full_any;
// Bus signals
logic lsu_busreq_r;
logic lsu_bus_buffer_pend_any;
logic lsu_bus_buffer_empty_any;
logic lsu_bus_buffer_full_any;
logic lsu_busreq_m;
logic [31:0] bus_read_data_m;
logic flush_m_up, flush_r;
logic is_sideeffects_m;
logic [2:0] dma_mem_tag_d, dma_mem_tag_m;
logic ldst_nodma_mtor;
logic dma_dccm_wen, dma_pic_wen;
logic [31:0] dma_dccm_wdata_lo, dma_dccm_wdata_hi;
logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, dma_dccm_wdata_ecc_hi;
// Clocks
logic lsu_busm_clken;
logic lsu_bus_obuf_c1_clken;
logic lsu_c1_m_clk, lsu_c1_r_clk;
logic lsu_c2_m_clk, lsu_c2_r_clk;
logic lsu_store_c1_m_clk, lsu_store_c1_r_clk;
logic lsu_stbuf_c1_clk;
logic lsu_bus_ibuf_c1_clk, lsu_bus_obuf_c1_clk, lsu_bus_buf_c1_clk;
logic lsu_busm_clk;
logic lsu_free_c2_clk;
logic lsu_raw_fwd_lo_m, lsu_raw_fwd_hi_m;
logic lsu_raw_fwd_lo_r, lsu_raw_fwd_hi_r;
assign lsu_raw_fwd_lo_m = (|stbuf_fwdbyteen_lo_m[pt.DCCM_BYTE_WIDTH-1:0]);
assign lsu_raw_fwd_hi_m = (|stbuf_fwdbyteen_hi_m[pt.DCCM_BYTE_WIDTH-1:0]);
el2_lsu_lsc_ctl #(.pt(pt)) lsu_lsc_ctl (.*);
// block stores in decode - for either bus or stbuf reasons
assign lsu_store_stall_any = lsu_stbuf_full_any | lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff;
assign lsu_load_stall_any = lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff;
assign lsu_fastint_stall_any = ld_single_ecc_error_r; // Stall the fastint in decode-1 stage
// Ready to accept dma trxns
// There can't be any inpipe forwarding from non-dma packet to dma packet since they can be flushed so we can't have st in r when dma is in m
assign dma_mem_tag_d[2:0] = dma_mem_tag[2:0];
assign ldst_nodma_mtor = (lsu_pkt_m.valid & ~lsu_pkt_m.dma & (addr_in_dccm_m | addr_in_pic_m) & lsu_pkt_m.store);
assign dccm_ready = ~(dec_lsu_valid_raw_d | ldst_nodma_mtor | ld_single_ecc_error_r_ff);
assign dma_dccm_wen = dma_dccm_req & dma_mem_write & addr_in_dccm_d & dma_mem_sz[1]; // Perform DMA writes only for word/dword
assign dma_pic_wen = dma_dccm_req & dma_mem_write & addr_in_pic_d;
assign {dma_dccm_wdata_hi[31:0], dma_dccm_wdata_lo[31:0]} = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores
// Generate per cycle flush signals
assign flush_m_up = dec_tlu_flush_lower_r;
assign flush_r = dec_tlu_i0_kill_writeb_r;
// lsu idle
// lsu halt idle. This is used for entering the halt mode. Also, DMA accesses are allowed during fence.
// Indicates non-idle if there is a instruction valid in d-r or read/write buffers are non-empty since they can come with error
// Store buffer now have only non-dma dccm stores
// stbuf_empty not needed since it has only dccm stores
assign lsu_idle_any = ~((lsu_pkt_m.valid & ~lsu_pkt_m.dma) |
(lsu_pkt_r.valid & ~lsu_pkt_r.dma)) &
lsu_bus_buffer_empty_any;
assign lsu_active = (lsu_pkt_m.valid | lsu_pkt_r.valid | ld_single_ecc_error_r_ff) | ~lsu_bus_buffer_empty_any; // This includes DMA. Used for gating top clock
// Instantiate the store buffer
assign store_stbuf_reqvld_r = lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r & ~flush_r & (~lsu_pkt_r.dma | ((lsu_pkt_r.by | lsu_pkt_r.half) & ~lsu_double_ecc_error_r));
// Disable Forwarding for now
assign lsu_cmpen_m = lsu_pkt_m.valid & (lsu_pkt_m.load | lsu_pkt_m.store) & (addr_in_dccm_m | addr_in_pic_m);
// Bus signals
assign lsu_busreq_m = lsu_pkt_m.valid & ((lsu_pkt_m.load | lsu_pkt_m.store) & addr_external_m) & ~flush_m_up & ~lsu_exc_m & ~lsu_pkt_m.fast_int;
// Dual signals
assign ldst_dual_d = (lsu_addr_d[2] != end_addr_d[2]);
assign ldst_dual_m = (lsu_addr_m[2] != end_addr_m[2]);
assign ldst_dual_r = (lsu_addr_r[2] != end_addr_r[2]);
// PMU signals
assign lsu_pmu_misaligned_m = lsu_pkt_m.valid & ((lsu_pkt_m.half & lsu_addr_m[0]) | (lsu_pkt_m.word & (|lsu_addr_m[1:0])));
assign lsu_pmu_load_external_m = lsu_pkt_m.valid & lsu_pkt_m.load & addr_external_m;
assign lsu_pmu_store_external_m = lsu_pkt_m.valid & lsu_pkt_m.store & addr_external_m;
el2_lsu_dccm_ctl #(.pt(pt)) dccm_ctl (
.lsu_addr_d(lsu_addr_d[31:0]),
.end_addr_d(end_addr_d[pt.DCCM_BITS-1:0]),
.lsu_addr_m(lsu_addr_m[pt.DCCM_BITS-1:0]),
.lsu_addr_r(lsu_addr_r[31:0]),
.end_addr_m(end_addr_m[pt.DCCM_BITS-1:0]),
.end_addr_r(end_addr_r[pt.DCCM_BITS-1:0]),
.*
);
el2_lsu_stbuf #(.pt(pt)) stbuf (
.lsu_addr_d(lsu_addr_d[pt.LSU_SB_BITS-1:0]),
.end_addr_d(end_addr_d[pt.LSU_SB_BITS-1:0]),
.*
);
el2_lsu_ecc #(.pt(pt)) ecc (
.lsu_addr_r(lsu_addr_r[pt.DCCM_BITS-1:0]),
.end_addr_r(end_addr_r[pt.DCCM_BITS-1:0]),
.lsu_addr_m(lsu_addr_m[pt.DCCM_BITS-1:0]),
.end_addr_m(end_addr_m[pt.DCCM_BITS-1:0]),
.*
);
el2_lsu_trigger #(.pt(pt)) trigger (
.store_data_m(store_data_m[31:0]),
.*
);
// Clk domain
el2_lsu_clkdomain #(.pt(pt)) clkdomain (.*);
// Bus interface
el2_lsu_bus_intf #(.pt(pt)) bus_intf (
.lsu_addr_m(lsu_addr_m[31:0] & {32{addr_external_m & lsu_pkt_m.valid}}),
.lsu_addr_r(lsu_addr_r[31:0] & {32{lsu_busreq_r}}),
.end_addr_m(end_addr_m[31:0] & {32{addr_external_m & lsu_pkt_m.valid}}),
.end_addr_r(end_addr_r[31:0] & {32{lsu_busreq_r}}),
.store_data_r(store_data_r[31:0] & {32{lsu_busreq_r}}),
.*
);
//Flops
rvdff #(3) dma_mem_tag_mff (.*, .din(dma_mem_tag_d[2:0]), .dout(dma_mem_tag_m[2:0]), .clk(lsu_c1_m_clk));
rvdff #(2) lsu_raw_fwd_r_ff (.*, .din({lsu_raw_fwd_hi_m, lsu_raw_fwd_lo_m}), .dout({lsu_raw_fwd_hi_r, lsu_raw_fwd_lo_r}), .clk(lsu_c2_r_clk));
`ifdef RV_ASSERT_ON
logic [1:0] store_data_bypass_sel;
assign store_data_bypass_sel[1:0] = {lsu_p.store_data_bypass_d, lsu_p.store_data_bypass_m};
property exception_no_lsu_flush;
@(posedge clk) disable iff(~rst_l) lsu_lsc_ctl.lsu_error_pkt_m.exc_valid |-> ##[1:2] (flush_r );
endproperty
assert_exception_no_lsu_flush: assert property (exception_no_lsu_flush) else
$display("No flush within 2 cycles of exception");
// offset should be zero for fast interrupt
property offset_0_fastint;
@(posedge clk) disable iff(~rst_l) (lsu_p.valid & lsu_p.fast_int) |-> (dec_lsu_offset_d[11:0] == 12'b0);
endproperty
assert_offset_0_fastint: assert property (offset_0_fastint) else
$display("dec_tlu_offset_d not zero for fast interrupt redirect");
// DMA req should assert dccm rden/wren
property dmareq_dccm_wren_or_rden;
@(posedge clk) disable iff(~rst_l) dma_dccm_req |-> (dccm_rden | dccm_wren | addr_in_pic_d);
endproperty
assert_dmareq_dccm_wren_or_rden: assert property(dmareq_dccm_wren_or_rden) else
$display("dccm rden or wren not asserted during DMA request");
// fastint_stall should cause load/store stall next cycle
property fastint_stall_imply_loadstore_stall;
@(posedge clk) disable iff(~rst_l) (lsu_fastint_stall_any & (lsu_commit_r | lsu_pkt_r.dma)) |-> ##1 ((lsu_load_stall_any | lsu_store_stall_any) | ~ld_single_ecc_error_r_ff);
endproperty
assert_fastint_stall_imply_loadstore_stall: assert property (fastint_stall_imply_loadstore_stall) else
$display("fastint_stall should be followed by lsu_load/store_stall_any");
// Single ECC error implies rfnpc flush
property single_ecc_error_rfnpc_flush;
@(posedge clk) disable iff(~rst_l) (lsu_error_pkt_r.single_ecc_error & lsu_pkt_r.load) |=> ~lsu_commit_r;
endproperty
assert_single_ecc_error_rfnpc_flush: assert property (single_ecc_error_rfnpc_flush) else
$display("LSU commit next cycle after single ecc error");
`endif
endmodule // el2_lsu