cores-swerv-el2/design/lsu/el2_lsu_lsc_ctl.sv

342 lines
19 KiB
Systemverilog

// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Owner:
// Function: LSU control
// Comments:
//
//
// DC1 -> DC2 -> DC3 -> DC4 (Commit)
//
//********************************************************************************
module el2_lsu_lsc_ctl
import el2_pkg::*;
#(
`include "el2_param.vh"
)(
input logic rst_l, // reset, active low
input logic clk_override, // Override non-functional clock gating
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
// clocks per pipe
input logic lsu_c1_m_clk,
input logic lsu_c1_r_clk,
input logic lsu_c2_m_clk,
input logic lsu_c2_r_clk,
input logic lsu_store_c1_m_clk,
input logic [31:0] lsu_ld_data_r, // Load data R-stage
input logic [31:0] lsu_ld_data_corr_r, // ECC corrected data R-stage
input logic lsu_single_ecc_error_r, // ECC single bit error R-stage
input logic lsu_double_ecc_error_r, // ECC double bit error R-stage
input logic [31:0] lsu_ld_data_m, // Load data M-stage
input logic lsu_single_ecc_error_m, // ECC single bit error M-stage
input logic lsu_double_ecc_error_m, // ECC double bit error M-stage
input logic flush_m_up, // Flush M and D stage
input logic flush_r, // Flush R-stage
input logic ldst_dual_d, // load/store is unaligned at 32 bit boundary D-stage
input logic ldst_dual_m, // load/store is unaligned at 32 bit boundary M-stage
input logic ldst_dual_r, // load/store is unaligned at 32 bit boundary R-stage
input logic [31:0] exu_lsu_rs1_d, // address
input logic [31:0] exu_lsu_rs2_d, // store data
input el2_lsu_pkt_t lsu_p, // lsu control packet
input logic dec_lsu_valid_raw_d, // Raw valid for address computation
input logic [11:0] dec_lsu_offset_d, // 12b offset for load/store addresses
input logic [31:0] picm_mask_data_m, // PIC data M-stage
input logic [31:0] bus_read_data_m, // the bus return data
output logic [31:0] lsu_result_m, // lsu load data
output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF
// lsu address down the pipe
output logic [31:0] lsu_addr_d,
output logic [31:0] lsu_addr_m,
output logic [31:0] lsu_addr_r,
// lsu address down the pipe - needed to check unaligned
output logic [31:0] end_addr_d,
output logic [31:0] end_addr_m,
output logic [31:0] end_addr_r,
// store data down the pipe
output logic [31:0] store_data_m,
input logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control
output logic lsu_exc_m, // Access or misaligned fault
output logic is_sideeffects_m, // is sideffects space
output logic lsu_commit_r, // lsu instruction in r commits
output logic lsu_single_ecc_error_incr,// LSU inc SB error counter
output el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu exception packet
output logic [31:1] lsu_fir_addr, // fast interrupt address
output logic [1:0] lsu_fir_error, // Error during fast interrupt lookup
// address in dccm/pic/external per pipe stage
output logic addr_in_dccm_d,
output logic addr_in_dccm_m,
output logic addr_in_dccm_r,
output logic addr_in_pic_d,
output logic addr_in_pic_m,
output logic addr_in_pic_r,
output logic addr_external_m,
// DMA slave
input logic dma_dccm_req,
input logic [31:0] dma_mem_addr,
input logic [2:0] dma_mem_sz,
input logic dma_mem_write,
input logic [63:0] dma_mem_wdata,
// Store buffer related signals
output el2_lsu_pkt_t lsu_pkt_d,
output el2_lsu_pkt_t lsu_pkt_m,
output el2_lsu_pkt_t lsu_pkt_r,
input logic scan_mode // Scan mode
);
logic [31:3] end_addr_pre_m, end_addr_pre_r;
logic [31:0] full_addr_d;
logic [31:0] full_end_addr_d;
logic [31:0] lsu_rs1_d;
logic [11:0] lsu_offset_d;
logic [31:0] rs1_d;
logic [11:0] offset_d;
logic [12:0] end_addr_offset_d;
logic [2:0] addr_offset_d;
logic [63:0] dma_mem_wdata_shifted;
logic addr_external_d;
logic addr_external_r;
logic access_fault_d, misaligned_fault_d;
logic access_fault_m, misaligned_fault_m;
logic fir_dccm_access_error_d, fir_nondccm_access_error_d;
logic fir_dccm_access_error_m, fir_nondccm_access_error_m;
logic [3:0] exc_mscause_d, exc_mscause_m;
logic [31:0] rs1_d_raw;
logic [31:0] store_data_d, store_data_pre_m, store_data_m_in;
logic [31:0] bus_read_data_r;
el2_lsu_pkt_t dma_pkt_d;
el2_lsu_pkt_t lsu_pkt_m_in, lsu_pkt_r_in;
el2_lsu_error_pkt_t lsu_error_pkt_m;
// Premux the rs1/offset for dma
assign lsu_rs1_d[31:0] = dec_lsu_valid_raw_d ? exu_lsu_rs1_d[31:0] : dma_mem_addr[31:0];
assign lsu_offset_d[11:0] = dec_lsu_offset_d[11:0] & {12{dec_lsu_valid_raw_d}};
assign rs1_d_raw[31:0] = lsu_rs1_d[31:0];
assign offset_d[11:0] = lsu_offset_d[11:0];
assign rs1_d[31:0] = (lsu_pkt_d.load_ldst_bypass_d) ? lsu_result_m[31:0] : rs1_d_raw[31:0];
// generate the ls address
rvlsadder lsadder (.rs1(rs1_d[31:0]),
.offset(offset_d[11:0]),
.dout(full_addr_d[31:0])
);
// Module to generate the memory map of the address
el2_lsu_addrcheck addrcheck (
.start_addr_d(full_addr_d[31:0]),
.end_addr_d(full_end_addr_d[31:0]),
.rs1_region_d(rs1_d[31:28]),
.*
);
// Calculate start/end address for load/store
assign addr_offset_d[2:0] = ({3{lsu_pkt_d.half}} & 3'b01) | ({3{lsu_pkt_d.word}} & 3'b11) | ({3{lsu_pkt_d.dword}} & 3'b111);
assign end_addr_offset_d[12:0] = {offset_d[11],offset_d[11:0]} + {9'b0,addr_offset_d[2:0]};
assign full_end_addr_d[31:0] = rs1_d[31:0] + {{19{end_addr_offset_d[12]}},end_addr_offset_d[12:0]};
assign end_addr_d[31:0] = full_end_addr_d[31:0];
assign lsu_exc_m = access_fault_m | misaligned_fault_m;
// Goes to TLU to increment the ECC error counter
assign lsu_single_ecc_error_incr = (lsu_single_ecc_error_r & ~lsu_double_ecc_error_r) & (lsu_commit_r | lsu_pkt_r.dma) & lsu_pkt_r.valid;
if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1
logic access_fault_r, misaligned_fault_r;
logic [3:0] exc_mscause_r;
logic fir_dccm_access_error_r, fir_nondccm_access_error_r;
// Generate exception packet
assign lsu_error_pkt_r.exc_valid = (access_fault_r | misaligned_fault_r | lsu_double_ecc_error_r) & lsu_pkt_r.valid & ~lsu_pkt_r.dma & ~lsu_pkt_r.fast_int;
assign lsu_error_pkt_r.single_ecc_error = lsu_single_ecc_error_r & ~lsu_error_pkt_r.exc_valid & ~lsu_pkt_r.dma;
assign lsu_error_pkt_r.inst_type = lsu_pkt_r.store;
assign lsu_error_pkt_r.exc_type = ~misaligned_fault_r;
assign lsu_error_pkt_r.mscause[3:0] = (lsu_double_ecc_error_r & ~misaligned_fault_r & ~access_fault_r) ? 4'h1 : exc_mscause_r[3:0];
assign lsu_error_pkt_r.addr[31:0] = lsu_addr_r[31:0];
assign lsu_fir_error[1:0] = fir_nondccm_access_error_r ? 2'b11 : (fir_dccm_access_error_r ? 2'b10 : ((lsu_pkt_r.fast_int & lsu_double_ecc_error_r) ? 2'b01 : 2'b00));
rvdff #(1) access_fault_rff (.din(access_fault_m), .dout(access_fault_r), .clk(lsu_c1_r_clk), .*);
rvdff #(1) misaligned_fault_rff (.din(misaligned_fault_m), .dout(misaligned_fault_r), .clk(lsu_c1_r_clk), .*);
rvdff #(4) exc_mscause_rff (.din(exc_mscause_m[3:0]), .dout(exc_mscause_r[3:0]), .clk(lsu_c1_r_clk), .*);
rvdff #(1) fir_dccm_access_error_mff (.din(fir_dccm_access_error_m), .dout(fir_dccm_access_error_r), .clk(lsu_c1_r_clk), .*);
rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_m), .dout(fir_nondccm_access_error_r), .clk(lsu_c1_r_clk), .*);
end else begin: L2U_Plus1_0
logic [1:0] lsu_fir_error_m;
// Generate exception packet
assign lsu_error_pkt_m.exc_valid = (access_fault_m | misaligned_fault_m | lsu_double_ecc_error_m) & lsu_pkt_m.valid & ~lsu_pkt_m.dma & ~lsu_pkt_m.fast_int & ~flush_m_up;
assign lsu_error_pkt_m.single_ecc_error = lsu_single_ecc_error_m & ~lsu_error_pkt_m.exc_valid & ~lsu_pkt_m.dma;
assign lsu_error_pkt_m.inst_type = lsu_pkt_m.store;
assign lsu_error_pkt_m.exc_type = ~misaligned_fault_m;
assign lsu_error_pkt_m.mscause[3:0] = (lsu_double_ecc_error_m & ~misaligned_fault_m & ~access_fault_m) ? 4'h1 : exc_mscause_m[3:0];
assign lsu_error_pkt_m.addr[31:0] = lsu_addr_m[31:0];
assign lsu_fir_error_m[1:0] = fir_nondccm_access_error_m ? 2'b11 : (fir_dccm_access_error_m ? 2'b10 : ((lsu_pkt_m.fast_int & lsu_double_ecc_error_m) ? 2'b01 : 2'b00));
rvdff #(1) lsu_exc_valid_rff (.*, .din(lsu_error_pkt_m.exc_valid), .dout(lsu_error_pkt_r.exc_valid), .clk(lsu_c2_r_clk));
rvdff #(1) lsu_single_ecc_error_rff(.*, .din(lsu_error_pkt_m.single_ecc_error), .dout(lsu_error_pkt_r.single_ecc_error), .clk(lsu_c2_r_clk));
rvdffe #($bits(el2_lsu_error_pkt_t)-2) lsu_error_pkt_rff (.*, .din(lsu_error_pkt_m[$bits(el2_lsu_error_pkt_t)-1:2]), .dout(lsu_error_pkt_r[$bits(el2_lsu_error_pkt_t)-1:2]), .en(lsu_error_pkt_m.exc_valid | lsu_error_pkt_m.single_ecc_error | clk_override));
rvdff #(2) lsu_fir_error_rff (.*, .din(lsu_fir_error_m[1:0]), .dout(lsu_fir_error[1:0]), .clk(lsu_c2_r_clk));
end
//Create DMA packet
always_comb begin
dma_pkt_d = '0;
dma_pkt_d.valid = dma_dccm_req;
dma_pkt_d.dma = 1'b1;
dma_pkt_d.store = dma_mem_write;
dma_pkt_d.load = ~dma_mem_write;
dma_pkt_d.by = (dma_mem_sz[2:0] == 3'b0);
dma_pkt_d.half = (dma_mem_sz[2:0] == 3'b1);
dma_pkt_d.word = (dma_mem_sz[2:0] == 3'b10);
dma_pkt_d.dword = (dma_mem_sz[2:0] == 3'b11);
end
always_comb begin
lsu_pkt_d = dec_lsu_valid_raw_d ? lsu_p : dma_pkt_d;
lsu_pkt_m_in = lsu_pkt_d;
lsu_pkt_r_in = lsu_pkt_m;
lsu_pkt_d.valid = (lsu_p.valid & ~(flush_m_up & ~lsu_p.fast_int)) | dma_dccm_req;
lsu_pkt_m_in.valid = lsu_pkt_d.valid & ~(flush_m_up & ~lsu_pkt_d.dma);
lsu_pkt_r_in.valid = lsu_pkt_m.valid & ~(flush_m_up & ~lsu_pkt_m.dma) ;
end
// C2 clock for valid and C1 for other bits of packet
rvdff #(1) lsu_pkt_vldmff (.*, .din(lsu_pkt_m_in.valid), .dout(lsu_pkt_m.valid), .clk(lsu_c2_m_clk));
rvdff #(1) lsu_pkt_vldrff (.*, .din(lsu_pkt_r_in.valid), .dout(lsu_pkt_r.valid), .clk(lsu_c2_r_clk));
rvdff #($bits(el2_lsu_pkt_t)-1) lsu_pkt_mff (.*, .din(lsu_pkt_m_in[$bits(el2_lsu_pkt_t)-1:1]), .dout(lsu_pkt_m[$bits(el2_lsu_pkt_t)-1:1]), .clk(lsu_c1_m_clk));
rvdff #($bits(el2_lsu_pkt_t)-1) lsu_pkt_rff (.*, .din(lsu_pkt_r_in[$bits(el2_lsu_pkt_t)-1:1]), .dout(lsu_pkt_r[$bits(el2_lsu_pkt_t)-1:1]), .clk(lsu_c1_r_clk));
if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U1_Plus1_1
logic [31:0] lsu_ld_datafn_r, lsu_ld_datafn_corr_r;
assign lsu_ld_datafn_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_r[31:0];
assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0];
// this is really R stage signal
assign lsu_result_m[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_r[7:0]}) |
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_r[15:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_r[7]}}, lsu_ld_datafn_r[7:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_r[15]}},lsu_ld_datafn_r[15:0]}) |
({32{lsu_pkt_r.word}} & lsu_ld_datafn_r[31:0]);
// this signal is used for gpr update
assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) |
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) |
({32{lsu_pkt_r.word}} & lsu_ld_datafn_corr_r[31:0]);
end else begin: L2U1_Plus1_0 // block: L2U1_Plus1_1
logic [31:0] lsu_ld_datafn_m, lsu_ld_datafn_corr_r;
assign lsu_ld_datafn_m[31:0] = addr_external_m ? bus_read_data_m[31:0] : lsu_ld_data_m[31:0];
assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0];
// this result must look at prior stores and merge them in
assign lsu_result_m[31:0] = ({32{ lsu_pkt_m.unsign & lsu_pkt_m.by }} & {24'b0,lsu_ld_datafn_m[7:0]}) |
({32{ lsu_pkt_m.unsign & lsu_pkt_m.half}} & {16'b0,lsu_ld_datafn_m[15:0]}) |
({32{~lsu_pkt_m.unsign & lsu_pkt_m.by }} & {{24{ lsu_ld_datafn_m[7]}}, lsu_ld_datafn_m[7:0]}) |
({32{~lsu_pkt_m.unsign & lsu_pkt_m.half}} & {{16{ lsu_ld_datafn_m[15]}},lsu_ld_datafn_m[15:0]}) |
({32{lsu_pkt_m.word}} & lsu_ld_datafn_m[31:0]);
// this signal is used for gpr update
assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) |
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) |
({32{lsu_pkt_r.word}} & lsu_ld_datafn_corr_r[31:0]);
end
// Fast interrupt address
assign lsu_fir_addr[31:1] = lsu_ld_data_corr_r[31:1];
// absence load/store all 0's
assign lsu_addr_d[31:0] = full_addr_d[31:0];
// Interrupt as a flush source allows the WB to occur
assign lsu_commit_r = lsu_pkt_r.valid & (lsu_pkt_r.store | lsu_pkt_r.load) & ~flush_r & ~lsu_pkt_r.dma;
assign dma_mem_wdata_shifted[63:0] = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores
assign store_data_d[31:0] = dma_dccm_req ? dma_mem_wdata_shifted[31:0] : exu_lsu_rs2_d[31:0]; // Write to PIC still happens in r stage
assign store_data_m_in[31:0] = (lsu_pkt_d.store_data_bypass_d) ? lsu_result_m[31:0] : store_data_d[31:0];
assign store_data_m[31:0] = (picm_mask_data_m[31:0] | {32{~addr_in_pic_m}}) & ((lsu_pkt_m.store_data_bypass_m) ? lsu_result_m[31:0] : store_data_pre_m[31:0]);
rvdff #(32) sdmff (.*, .din(store_data_m_in[31:0]), .dout(store_data_pre_m[31:0]), .clk(lsu_store_c1_m_clk));
rvdff #(32) samff (.*, .din(lsu_addr_d[31:0]), .dout(lsu_addr_m[31:0]), .clk(lsu_c1_m_clk));
rvdff #(32) sarff (.*, .din(lsu_addr_m[31:0]), .dout(lsu_addr_r[31:0]), .clk(lsu_c1_r_clk));
assign end_addr_m[31:3] = ldst_dual_m ? end_addr_pre_m[31:3] : lsu_addr_m[31:3]; // This is for power saving
assign end_addr_r[31:3] = ldst_dual_r ? end_addr_pre_r[31:3] : lsu_addr_r[31:3]; // This is for power saving
rvdffe #(29) end_addr_hi_mff (.*, .din(end_addr_d[31:3]), .dout(end_addr_pre_m[31:3]), .en((lsu_pkt_d.valid & ldst_dual_d) | clk_override));
rvdffe #(29) end_addr_hi_rff (.*, .din(end_addr_m[31:3]), .dout(end_addr_pre_r[31:3]), .en((lsu_pkt_m.valid & ldst_dual_m) | clk_override));
rvdff #(3) end_addr_lo_mff (.*, .din(end_addr_d[2:0]), .dout(end_addr_m[2:0]), .clk(lsu_c1_m_clk));
rvdff #(3) end_addr_lo_rff (.*, .din(end_addr_m[2:0]), .dout(end_addr_r[2:0]), .clk(lsu_c1_r_clk));
rvdff #(1) addr_in_dccm_mff(.din(addr_in_dccm_d), .dout(addr_in_dccm_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) addr_in_dccm_rff(.din(addr_in_dccm_m), .dout(addr_in_dccm_r), .clk(lsu_c1_r_clk), .*);
rvdff #(1) addr_in_pic_mff(.din(addr_in_pic_d), .dout(addr_in_pic_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) addr_in_pic_rff(.din(addr_in_pic_m), .dout(addr_in_pic_r), .clk(lsu_c1_r_clk), .*);
rvdff #(1) addr_external_mff(.din(addr_external_d), .dout(addr_external_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) addr_external_rff(.din(addr_external_m), .dout(addr_external_r), .clk(lsu_c1_r_clk), .*);
rvdff #(1) access_fault_mff (.din(access_fault_d), .dout(access_fault_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) misaligned_fault_mff (.din(misaligned_fault_d), .dout(misaligned_fault_m), .clk(lsu_c1_m_clk), .*);
rvdff #(4) exc_mscause_mff (.din(exc_mscause_d[3:0]), .dout(exc_mscause_m[3:0]), .clk(lsu_c1_m_clk), .*);
rvdff #(1) fir_dccm_access_error_mff (.din(fir_dccm_access_error_d), .dout(fir_dccm_access_error_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_d), .dout(fir_nondccm_access_error_m), .clk(lsu_c1_m_clk), .*);
rvdffe #(32) bus_read_data_r_ff (.*, .din(bus_read_data_m[31:0]), .dout(bus_read_data_r[31:0]), .en(addr_external_m | clk_override));
endmodule