329 lines
17 KiB
Systemverilog
329 lines
17 KiB
Systemverilog
// SPDX-License-Identifier: Apache-2.0
|
|
// Copyright 2020 Western Digital Corporation or it's affiliates.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
//********************************************************************************
|
|
// $Id$
|
|
//
|
|
//
|
|
// Owner:
|
|
// Function: LSU control
|
|
// Comments:
|
|
//
|
|
//
|
|
// DC1 -> DC2 -> DC3 -> DC4 (Commit)
|
|
//
|
|
//********************************************************************************
|
|
module el2_lsu_lsc_ctl
|
|
import el2_pkg::*;
|
|
#(
|
|
`include "el2_param.vh"
|
|
)(
|
|
input logic rst_l,
|
|
|
|
// clocks per pipe
|
|
input logic lsu_c1_m_clk,
|
|
input logic lsu_c1_r_clk,
|
|
input logic lsu_c2_m_clk,
|
|
input logic lsu_c2_r_clk,
|
|
input logic lsu_store_c1_m_clk,
|
|
|
|
input logic [31:0] lsu_ld_data_r,
|
|
input logic [31:0] lsu_ld_data_corr_r, // ECC corrected data
|
|
input logic lsu_single_ecc_error_r,
|
|
input logic lsu_double_ecc_error_r,
|
|
|
|
input logic [31:0] lsu_ld_data_m,
|
|
input logic lsu_single_ecc_error_m,
|
|
input logic lsu_double_ecc_error_m,
|
|
|
|
input logic flush_m_up,
|
|
input logic flush_r,
|
|
|
|
input logic [31:0] exu_lsu_rs1_d, // address
|
|
input logic [31:0] exu_lsu_rs2_d, // store data
|
|
|
|
input el2_lsu_pkt_t lsu_p, // lsu control packet
|
|
input logic dec_lsu_valid_raw_d, // Raw valid for address computation
|
|
input logic [11:0] dec_lsu_offset_d,
|
|
|
|
input logic [31:0] picm_mask_data_m,
|
|
input logic [31:0] bus_read_data_m,
|
|
output logic [31:0] lsu_result_m,
|
|
output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF
|
|
// lsu address down the pipe
|
|
output logic [31:0] lsu_addr_d,
|
|
output logic [31:0] lsu_addr_m,
|
|
output logic [31:0] lsu_addr_r,
|
|
// lsu address down the pipe - needed to check unaligned
|
|
output logic [31:0] end_addr_d,
|
|
output logic [31:0] end_addr_m,
|
|
output logic [31:0] end_addr_r,
|
|
// store data down the pipe
|
|
output logic [31:0] store_data_m,
|
|
|
|
input logic [31:0] dec_tlu_mrac_ff,
|
|
output logic lsu_exc_m,
|
|
output logic is_sideeffects_m,
|
|
output logic lsu_commit_r,
|
|
output logic lsu_single_ecc_error_incr,
|
|
output el2_lsu_error_pkt_t lsu_error_pkt_r,
|
|
|
|
output logic [31:1] lsu_fir_addr, // fast interrupt address
|
|
output logic [1:0] lsu_fir_error, // Error during fast interrupt lookup
|
|
|
|
// address in dccm/pic/external per pipe stage
|
|
output logic addr_in_dccm_d,
|
|
output logic addr_in_dccm_m,
|
|
output logic addr_in_dccm_r,
|
|
|
|
output logic addr_in_pic_d,
|
|
output logic addr_in_pic_m,
|
|
output logic addr_in_pic_r,
|
|
|
|
output logic addr_external_m,
|
|
|
|
// DMA slave
|
|
input logic dma_dccm_req,
|
|
input logic [31:0] dma_mem_addr,
|
|
input logic [2:0] dma_mem_sz,
|
|
input logic dma_mem_write,
|
|
input logic [63:0] dma_mem_wdata,
|
|
|
|
// Store buffer related signals
|
|
output el2_lsu_pkt_t lsu_pkt_d,
|
|
output el2_lsu_pkt_t lsu_pkt_m,
|
|
output el2_lsu_pkt_t lsu_pkt_r,
|
|
|
|
input logic scan_mode
|
|
|
|
);
|
|
|
|
logic [31:0] full_addr_d;
|
|
logic [31:0] full_end_addr_d;
|
|
logic [31:0] lsu_rs1_d;
|
|
logic [11:0] lsu_offset_d;
|
|
logic [31:0] rs1_d;
|
|
logic [11:0] offset_d;
|
|
logic [12:0] end_addr_offset_d;
|
|
logic [2:0] addr_offset_d;
|
|
|
|
logic [63:0] dma_mem_wdata_shifted;
|
|
logic addr_external_d;
|
|
logic addr_external_r;
|
|
logic access_fault_d, misaligned_fault_d;
|
|
logic access_fault_m, misaligned_fault_m;
|
|
|
|
logic fir_dccm_access_error_d, fir_nondccm_access_error_d;
|
|
logic fir_dccm_access_error_m, fir_nondccm_access_error_m;
|
|
|
|
logic [3:0] exc_mscause_d, exc_mscause_m;
|
|
logic [31:0] rs1_d_raw;
|
|
logic [31:0] store_data_d, store_data_pre_m, store_data_m_in;
|
|
logic [31:0] bus_read_data_r;
|
|
|
|
el2_lsu_pkt_t dma_pkt_d;
|
|
el2_lsu_pkt_t lsu_pkt_m_in, lsu_pkt_r_in;
|
|
el2_lsu_error_pkt_t lsu_error_pkt_m;
|
|
|
|
|
|
// Premux the rs1/offset for dma
|
|
assign lsu_rs1_d[31:0] = dec_lsu_valid_raw_d ? exu_lsu_rs1_d[31:0] : dma_mem_addr[31:0];
|
|
assign lsu_offset_d[11:0] = dec_lsu_offset_d[11:0] & {12{dec_lsu_valid_raw_d}};
|
|
assign rs1_d_raw[31:0] = lsu_rs1_d[31:0];
|
|
assign offset_d[11:0] = lsu_offset_d[11:0];
|
|
|
|
assign rs1_d[31:0] = (lsu_pkt_d.load_ldst_bypass_d) ? lsu_result_m[31:0] : rs1_d_raw[31:0];
|
|
|
|
// generate the ls address
|
|
// need to refine this is memory is only 128KB
|
|
rvlsadder lsadder (.rs1(rs1_d[31:0]),
|
|
.offset(offset_d[11:0]),
|
|
.dout(full_addr_d[31:0])
|
|
);
|
|
|
|
// Module to generate the memory map of the address
|
|
el2_lsu_addrcheck addrcheck (
|
|
.start_addr_d(full_addr_d[31:0]),
|
|
.end_addr_d(full_end_addr_d[31:0]),
|
|
.rs1_region_d(rs1_d[31:28]),
|
|
.*
|
|
);
|
|
|
|
// Calculate start/end address for load/store
|
|
assign addr_offset_d[2:0] = ({3{lsu_pkt_d.half}} & 3'b01) | ({3{lsu_pkt_d.word}} & 3'b11) | ({3{lsu_pkt_d.dword}} & 3'b111);
|
|
assign end_addr_offset_d[12:0] = {offset_d[11],offset_d[11:0]} + {9'b0,addr_offset_d[2:0]};
|
|
assign full_end_addr_d[31:0] = rs1_d[31:0] + {{19{end_addr_offset_d[12]}},end_addr_offset_d[12:0]};
|
|
assign end_addr_d[31:0] = full_end_addr_d[31:0];
|
|
assign lsu_exc_m = access_fault_m | misaligned_fault_m;
|
|
|
|
// Goes to TLU to increment the ECC error counter
|
|
assign lsu_single_ecc_error_incr = (lsu_single_ecc_error_r & ~lsu_double_ecc_error_r) & (lsu_commit_r | lsu_pkt_r.dma) & lsu_pkt_r.valid;
|
|
|
|
if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1
|
|
logic access_fault_r, misaligned_fault_r;
|
|
logic [3:0] exc_mscause_r;
|
|
logic fir_dccm_access_error_r, fir_nondccm_access_error_r;
|
|
|
|
// Generate exception packet
|
|
assign lsu_error_pkt_r.exc_valid = (access_fault_r | misaligned_fault_r | lsu_double_ecc_error_r) & lsu_pkt_r.valid & ~lsu_pkt_r.dma & ~lsu_pkt_r.fast_int;
|
|
assign lsu_error_pkt_r.single_ecc_error = lsu_single_ecc_error_r & ~lsu_error_pkt_r.exc_valid & ~lsu_pkt_r.dma;
|
|
assign lsu_error_pkt_r.inst_type = lsu_pkt_r.store;
|
|
assign lsu_error_pkt_r.exc_type = ~misaligned_fault_r;
|
|
assign lsu_error_pkt_r.mscause[3:0] = (lsu_double_ecc_error_r & ~misaligned_fault_r & ~access_fault_r) ? 4'h1 : exc_mscause_r[3:0];
|
|
assign lsu_error_pkt_r.addr[31:0] = lsu_addr_r[31:0];
|
|
|
|
assign lsu_fir_error[1:0] = fir_nondccm_access_error_r ? 2'b11 : (fir_dccm_access_error_r ? 2'b10 : ((lsu_pkt_r.fast_int & lsu_double_ecc_error_r) ? 2'b01 : 2'b00));
|
|
|
|
rvdff #(1) access_fault_rff (.din(access_fault_m), .dout(access_fault_r), .clk(lsu_c1_r_clk), .*);
|
|
rvdff #(1) misaligned_fault_rff (.din(misaligned_fault_m), .dout(misaligned_fault_r), .clk(lsu_c1_r_clk), .*);
|
|
rvdff #(4) exc_mscause_rff (.din(exc_mscause_m[3:0]), .dout(exc_mscause_r[3:0]), .clk(lsu_c1_r_clk), .*);
|
|
rvdff #(1) fir_dccm_access_error_mff (.din(fir_dccm_access_error_m), .dout(fir_dccm_access_error_r), .clk(lsu_c1_r_clk), .*);
|
|
rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_m), .dout(fir_nondccm_access_error_r), .clk(lsu_c1_r_clk), .*);
|
|
|
|
end else begin: L2U_Plus1_0
|
|
logic [1:0] lsu_fir_error_m;
|
|
|
|
// Generate exception packet
|
|
assign lsu_error_pkt_m.exc_valid = (access_fault_m | misaligned_fault_m | lsu_double_ecc_error_m) & lsu_pkt_m.valid & ~lsu_pkt_m.dma & ~lsu_pkt_m.fast_int & ~flush_m_up;
|
|
assign lsu_error_pkt_m.single_ecc_error = lsu_single_ecc_error_m & ~lsu_error_pkt_m.exc_valid & ~lsu_pkt_m.dma;
|
|
assign lsu_error_pkt_m.inst_type = lsu_pkt_m.store;
|
|
assign lsu_error_pkt_m.exc_type = ~misaligned_fault_m;
|
|
assign lsu_error_pkt_m.mscause[3:0] = (lsu_double_ecc_error_m & ~misaligned_fault_m & ~access_fault_m) ? 4'h1 : exc_mscause_m[3:0];
|
|
assign lsu_error_pkt_m.addr[31:0] = lsu_addr_m[31:0];
|
|
|
|
assign lsu_fir_error_m[1:0] = fir_nondccm_access_error_m ? 2'b11 : (fir_dccm_access_error_m ? 2'b10 : ((lsu_pkt_m.fast_int & lsu_double_ecc_error_m) ? 2'b01 : 2'b00));
|
|
|
|
rvdff #($bits(el2_lsu_error_pkt_t)) lsu_error_pkt_rff(.*, .din(lsu_error_pkt_m), .dout(lsu_error_pkt_r), .clk(lsu_c2_r_clk));
|
|
rvdff #(2) lsu_fir_error_rff(.*, .din(lsu_fir_error_m[1:0]), .dout(lsu_fir_error[1:0]), .clk(lsu_c2_r_clk));
|
|
end
|
|
|
|
//Create DMA packet
|
|
always_comb begin
|
|
dma_pkt_d = '0;
|
|
dma_pkt_d.valid = dma_dccm_req;
|
|
dma_pkt_d.dma = 1'b1;
|
|
dma_pkt_d.store = dma_mem_write;
|
|
dma_pkt_d.load = ~dma_mem_write;
|
|
dma_pkt_d.by = (dma_mem_sz[2:0] == 3'b0);
|
|
dma_pkt_d.half = (dma_mem_sz[2:0] == 3'b1);
|
|
dma_pkt_d.word = (dma_mem_sz[2:0] == 3'b10);
|
|
dma_pkt_d.dword = (dma_mem_sz[2:0] == 3'b11);
|
|
end
|
|
|
|
always_comb begin
|
|
lsu_pkt_d = dec_lsu_valid_raw_d ? lsu_p : dma_pkt_d;
|
|
lsu_pkt_m_in = lsu_pkt_d;
|
|
lsu_pkt_r_in = lsu_pkt_m;
|
|
|
|
lsu_pkt_d.valid = (lsu_p.valid & ~(flush_m_up & ~lsu_p.fast_int)) | dma_dccm_req;
|
|
lsu_pkt_m_in.valid = lsu_pkt_d.valid & ~(flush_m_up & ~lsu_pkt_d.dma);
|
|
lsu_pkt_r_in.valid = lsu_pkt_m.valid & ~(flush_m_up & ~lsu_pkt_m.dma) ;
|
|
end
|
|
|
|
// C2 clock for valid and C1 for other bits of packet
|
|
rvdff #(1) lsu_pkt_vldmff (.*, .din(lsu_pkt_m_in.valid), .dout(lsu_pkt_m.valid), .clk(lsu_c2_m_clk));
|
|
rvdff #(1) lsu_pkt_vldrff (.*, .din(lsu_pkt_r_in.valid), .dout(lsu_pkt_r.valid), .clk(lsu_c2_r_clk));
|
|
|
|
rvdff #($bits(el2_lsu_pkt_t)-1) lsu_pkt_mff (.*, .din(lsu_pkt_m_in[$bits(el2_lsu_pkt_t)-1:1]), .dout(lsu_pkt_m[$bits(el2_lsu_pkt_t)-1:1]), .clk(lsu_c1_m_clk));
|
|
rvdff #($bits(el2_lsu_pkt_t)-1) lsu_pkt_rff (.*, .din(lsu_pkt_r_in[$bits(el2_lsu_pkt_t)-1:1]), .dout(lsu_pkt_r[$bits(el2_lsu_pkt_t)-1:1]), .clk(lsu_c1_r_clk));
|
|
|
|
|
|
|
|
if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U1_Plus1_1
|
|
logic [31:0] lsu_ld_datafn_r, lsu_ld_datafn_corr_r;
|
|
|
|
assign lsu_ld_datafn_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_r[31:0];
|
|
assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0];
|
|
|
|
// this is really R stage but don't want to make all the changes to support M,R buses
|
|
assign lsu_result_m[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_r[7:0]}) |
|
|
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_r[15:0]}) |
|
|
({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_r[7]}}, lsu_ld_datafn_r[7:0]}) |
|
|
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_r[15]}},lsu_ld_datafn_r[15:0]}) |
|
|
({32{lsu_pkt_r.word}} & lsu_ld_datafn_r[31:0]);
|
|
|
|
// this signal is used for gpr update
|
|
assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) |
|
|
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) |
|
|
({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) |
|
|
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) |
|
|
({32{lsu_pkt_r.word}} & lsu_ld_datafn_corr_r[31:0]);
|
|
|
|
end else begin: L2U1_Plus1_0 // block: L2U1_Plus1_1
|
|
logic [31:0] lsu_ld_datafn_m, lsu_ld_datafn_corr_r;
|
|
|
|
assign lsu_ld_datafn_m[31:0] = addr_external_m ? bus_read_data_m[31:0] : lsu_ld_data_m[31:0];
|
|
assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0];
|
|
|
|
// this result must look at prior stores and merge them in
|
|
assign lsu_result_m[31:0] = ({32{ lsu_pkt_m.unsign & lsu_pkt_m.by }} & {24'b0,lsu_ld_datafn_m[7:0]}) |
|
|
({32{ lsu_pkt_m.unsign & lsu_pkt_m.half}} & {16'b0,lsu_ld_datafn_m[15:0]}) |
|
|
({32{~lsu_pkt_m.unsign & lsu_pkt_m.by }} & {{24{ lsu_ld_datafn_m[7]}}, lsu_ld_datafn_m[7:0]}) |
|
|
({32{~lsu_pkt_m.unsign & lsu_pkt_m.half}} & {{16{ lsu_ld_datafn_m[15]}},lsu_ld_datafn_m[15:0]}) |
|
|
({32{lsu_pkt_m.word}} & lsu_ld_datafn_m[31:0]);
|
|
|
|
// this signal is used for gpr update
|
|
assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) |
|
|
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) |
|
|
({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) |
|
|
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) |
|
|
({32{lsu_pkt_r.word}} & lsu_ld_datafn_corr_r[31:0]);
|
|
end
|
|
|
|
// Fast interrupt address
|
|
assign lsu_fir_addr[31:1] = lsu_ld_data_corr_r[31:1];
|
|
|
|
// absence load/store all 0's
|
|
assign lsu_addr_d[31:0] = full_addr_d[31:0];
|
|
|
|
// Interrupt as a flush source allows the WB to occur
|
|
assign lsu_commit_r = lsu_pkt_r.valid & (lsu_pkt_r.store | lsu_pkt_r.load) & ~flush_r & ~lsu_pkt_r.dma;
|
|
|
|
assign dma_mem_wdata_shifted[63:0] = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores
|
|
assign store_data_d[31:0] = dma_dccm_req ? dma_mem_wdata_shifted[31:0] : exu_lsu_rs2_d[31:0]; // Write to PIC still happens in r stage
|
|
|
|
assign store_data_m_in[31:0] = (lsu_pkt_d.store_data_bypass_d) ? lsu_result_m[31:0] : store_data_d[31:0];
|
|
|
|
assign store_data_m[31:0] = (picm_mask_data_m[31:0] | {32{~addr_in_pic_m}}) & ((lsu_pkt_m.store_data_bypass_m) ? lsu_result_m[31:0] : store_data_pre_m[31:0]);
|
|
|
|
|
|
rvdff #(32) sdmff (.*, .din(store_data_m_in[31:0]), .dout(store_data_pre_m[31:0]), .clk(lsu_store_c1_m_clk));
|
|
|
|
rvdff #(32) samff (.*, .din(lsu_addr_d[31:0]), .dout(lsu_addr_m[31:0]), .clk(lsu_c1_m_clk));
|
|
rvdff #(32) sarff (.*, .din(lsu_addr_m[31:0]), .dout(lsu_addr_r[31:0]), .clk(lsu_c1_r_clk));
|
|
|
|
rvdff #(32) end_addr_mff (.*, .din(end_addr_d[31:0]), .dout(end_addr_m[31:0]), .clk(lsu_c1_m_clk));
|
|
rvdff #(32) end_addr_rff (.*, .din(end_addr_m[31:0]), .dout(end_addr_r[31:0]), .clk(lsu_c1_r_clk));
|
|
|
|
rvdff #(1) addr_in_dccm_mff(.din(addr_in_dccm_d), .dout(addr_in_dccm_m), .clk(lsu_c1_m_clk), .*);
|
|
rvdff #(1) addr_in_dccm_rff(.din(addr_in_dccm_m), .dout(addr_in_dccm_r), .clk(lsu_c1_r_clk), .*);
|
|
|
|
rvdff #(1) addr_in_pic_mff(.din(addr_in_pic_d), .dout(addr_in_pic_m), .clk(lsu_c1_m_clk), .*);
|
|
rvdff #(1) addr_in_pic_rff(.din(addr_in_pic_m), .dout(addr_in_pic_r), .clk(lsu_c1_r_clk), .*);
|
|
|
|
rvdff #(1) addr_external_mff(.din(addr_external_d), .dout(addr_external_m), .clk(lsu_c1_m_clk), .*);
|
|
rvdff #(1) addr_external_rff(.din(addr_external_m), .dout(addr_external_r), .clk(lsu_c1_r_clk), .*);
|
|
|
|
rvdff #(1) access_fault_mff (.din(access_fault_d), .dout(access_fault_m), .clk(lsu_c1_m_clk), .*);
|
|
rvdff #(1) misaligned_fault_mff (.din(misaligned_fault_d), .dout(misaligned_fault_m), .clk(lsu_c1_m_clk), .*);
|
|
rvdff #(4) exc_mscause_mff (.din(exc_mscause_d[3:0]), .dout(exc_mscause_m[3:0]), .clk(lsu_c1_m_clk), .*);
|
|
|
|
rvdff #(1) fir_dccm_access_error_mff (.din(fir_dccm_access_error_d), .dout(fir_dccm_access_error_m), .clk(lsu_c1_m_clk), .*);
|
|
rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_d), .dout(fir_nondccm_access_error_m), .clk(lsu_c1_m_clk), .*);
|
|
|
|
rvdff #(32) bus_read_data_r_ff (.*, .din(bus_read_data_m[31:0]), .dout(bus_read_data_r[31:0]), .clk(lsu_c1_r_clk));
|
|
|
|
endmodule
|