abstractaccelerator/design/lsu/lsu_lsc_ctl.sv

340 lines
18 KiB
Systemverilog

// SPDX-License-Identifier: Apache-2.0
// Copyright 2019 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//********************************************************************************
// $Id$
//
//
// Owner:
// Function: LSU control
// Comments:
//
//
// DC1 -> DC2 -> DC3 -> DC4 (Commit)
//
//********************************************************************************
module lsu_lsc_ctl
import swerv_types::*;
(
input logic rst_l,
input logic clk,
// clocks per pipe
input logic lsu_c1_dc4_clk,
input logic lsu_c1_dc5_clk,
input logic lsu_c2_dc4_clk,
input logic lsu_c2_dc5_clk,
// freez clocks per pipe
input logic lsu_freeze_c1_dc1_clken,
input logic lsu_freeze_c1_dc2_clken,
input logic lsu_freeze_c1_dc2_clk,
input logic lsu_freeze_c1_dc3_clken,
input logic lsu_freeze_c1_dc3_clk,
input logic lsu_freeze_c2_dc1_clk,
input logic lsu_freeze_c2_dc2_clk,
input logic lsu_freeze_c2_dc3_clk,
input logic lsu_store_c1_dc1_clken,
input logic lsu_store_c1_dc2_clken,
input logic lsu_store_c1_dc3_clken,
input logic lsu_store_c1_dc4_clk,
input logic lsu_store_c1_dc5_clk,
input logic [31:0] i0_result_e4_eff,
input logic [31:0] i1_result_e4_eff,
input logic [31:0] i0_result_e2,
input logic ld_bus_error_dc3,
input logic [31:0] ld_bus_error_addr_dc3,
input logic lsu_single_ecc_error_dc3,
input logic lsu_single_ecc_error_dc5,
input logic lsu_double_ecc_error_dc3,
input logic lsu_freeze_dc3,
input logic lsu_i0_valid_dc3,
input logic flush_dc2_up,
input logic flush_dc3,
input logic flush_dc4,
input logic flush_dc5,
input logic [31:0] exu_lsu_rs1_d, // address
input logic [31:0] exu_lsu_rs2_d, // store data
input lsu_pkt_t lsu_p, // lsu control packet
input logic [11:0] dec_lsu_offset_d,
input logic [31:0] picm_mask_data_dc3,
input logic [31:0] lsu_ld_data_dc3,
input logic [31:0] lsu_ld_data_corr_dc3,
input logic [31:0] bus_read_data_dc3,
output logic [31:0] lsu_result_dc3,
output logic [31:0] lsu_result_corr_dc4, // This is the ECC corrected data going to RF
// lsu address down the pipe
output logic [31:0] lsu_addr_dc1,
output logic [31:0] lsu_addr_dc2,
output logic [31:0] lsu_addr_dc3,
output logic [31:0] lsu_addr_dc4,
output logic [31:0] lsu_addr_dc5,
// lsu address down the pipe - needed to check unaligned
output logic [31:0] end_addr_dc1,
output logic [31:0] end_addr_dc2,
output logic [31:0] end_addr_dc3,
output logic [31:0] end_addr_dc4,
output logic [31:0] end_addr_dc5,
// store data down the pipe
output logic [63:0] store_data_dc2,
output logic [63:0] store_data_dc3,
output logic [31:0] store_data_dc4,
output logic [31:0] store_data_dc5,
input logic [31:0] dec_tlu_mrac_ff,
output logic lsu_exc_dc2,
output lsu_error_pkt_t lsu_error_pkt_dc3,
output logic lsu_single_ecc_error_incr, // Increment the counter for Single ECC error
output logic lsu_freeze_external_ints_dc3,
output logic is_sideeffects_dc2,
output logic is_sideeffects_dc3,
output logic lsu_commit_dc5,
// address in dccm/pic/external per pipe stage
output logic addr_in_dccm_dc1,
output logic addr_in_dccm_dc2,
output logic addr_in_dccm_dc3,
output logic addr_in_pic_dc1,
output logic addr_in_pic_dc2,
output logic addr_in_pic_dc3,
output logic addr_external_dc2,
output logic addr_external_dc3,
output logic addr_external_dc4,
output logic addr_external_dc5,
// DMA slave
input logic dma_dccm_req,
input logic [31:0] dma_mem_addr,
input logic [2:0] dma_mem_sz,
input logic dma_mem_write,
input logic [63:0] dma_mem_wdata,
// Store buffer related signals
output lsu_pkt_t lsu_pkt_dc1,
output lsu_pkt_t lsu_pkt_dc2,
output lsu_pkt_t lsu_pkt_dc3,
output lsu_pkt_t lsu_pkt_dc4,
output lsu_pkt_t lsu_pkt_dc5,
input logic scan_mode
);
`include "global.h"
logic [31:0] full_addr_dc1;
logic [31:0] full_end_addr_dc1;
logic [31:0] lsu_rs1_d;
logic [11:0] lsu_offset_d;
logic [31:0] rs1_dc1;
logic [11:0] offset_dc1;
logic [12:0] end_addr_offset_dc1;
logic [31:0] lsu_ld_datafn_dc3;
logic [31:0] lsu_ld_datafn_corr_dc3;
logic [31:0] lsu_result_corr_dc3;
logic [2:0] addr_offset_dc1;
logic [63:0] dma_mem_wdata_shifted;
logic addr_external_dc1;
logic access_fault_dc1, misaligned_fault_dc1;
logic access_fault_dc2, misaligned_fault_dc2;
logic access_fault_dc3, misaligned_fault_dc3;
logic [63:0] store_data_d;
logic [63:0] store_data_dc1;
logic [63:0] store_data_pre_dc2;
logic [63:0] store_data_pre_dc3;
logic [63:0] store_data_dc2_in;
logic [31:0] rs1_dc1_raw;
lsu_pkt_t dma_pkt_d;
lsu_pkt_t lsu_pkt_dc1_in, lsu_pkt_dc2_in, lsu_pkt_dc3_in, lsu_pkt_dc4_in, lsu_pkt_dc5_in;
// Premux the rs1/offset for dma
assign lsu_rs1_d[31:0] = dma_dccm_req ? dma_mem_addr[31:0] : exu_lsu_rs1_d[31:0];
assign lsu_offset_d[11:0] = dec_lsu_offset_d[11:0] & ~{12{dma_dccm_req}};
rvdffe #(32) rs1ff (.*, .din(lsu_rs1_d[31:0]), .dout(rs1_dc1_raw[31:0]), .en(lsu_freeze_c1_dc1_clken));
rvdffe #(12) offsetff (.*, .din(lsu_offset_d[11:0]), .dout(offset_dc1[11:0]), .en(lsu_freeze_c1_dc1_clken));
assign rs1_dc1[31:0] = (lsu_pkt_dc1.load_ldst_bypass_c1) ? lsu_result_dc3[31:0] : rs1_dc1_raw[31:0];
// generate the ls address
// need to refine this is memory is only 128KB
rvlsadder lsadder (.rs1(rs1_dc1[31:0]),
.offset(offset_dc1[11:0]),
.dout(full_addr_dc1[31:0])
);
// Module to generate the memory map of the address
lsu_addrcheck addrcheck (
.start_addr_dc1(full_addr_dc1[31:0]),
.end_addr_dc1(full_end_addr_dc1[31:0]),
.*
);
// Calculate start/end address for load/store
assign addr_offset_dc1[2:0] = ({3{lsu_pkt_dc1.half}} & 3'b01) | ({3{lsu_pkt_dc1.word}} & 3'b11) | ({3{lsu_pkt_dc1.dword}} & 3'b111);
assign end_addr_offset_dc1[12:0] = {offset_dc1[11],offset_dc1[11:0]} + {9'b0,addr_offset_dc1[2:0]};
assign full_end_addr_dc1[31:0] = rs1_dc1[31:0] + {{19{end_addr_offset_dc1[12]}},end_addr_offset_dc1[12:0]};
assign end_addr_dc1[31:0] = full_end_addr_dc1[31:0];
assign lsu_exc_dc2 = access_fault_dc2 | misaligned_fault_dc2;
assign lsu_freeze_external_ints_dc3 = lsu_freeze_dc3 & is_sideeffects_dc3;
// Increment the single bit ecc counter
assign lsu_single_ecc_error_incr = lsu_single_ecc_error_dc5 & (lsu_commit_dc5 | lsu_pkt_dc5.dma);
// Generate exception packet
assign lsu_error_pkt_dc3.exc_valid = (access_fault_dc3 | misaligned_fault_dc3 | ld_bus_error_dc3 | lsu_double_ecc_error_dc3) & lsu_pkt_dc3.valid & ~lsu_pkt_dc3.dma & ~flush_dc3;
assign lsu_error_pkt_dc3.single_ecc_error = lsu_single_ecc_error_dc3 & ~(access_fault_dc3 | misaligned_fault_dc3 | lsu_double_ecc_error_dc3);
assign lsu_error_pkt_dc3.inst_type = lsu_pkt_dc3.store;
assign lsu_error_pkt_dc3.dma_valid = lsu_pkt_dc3.dma;
assign lsu_error_pkt_dc3.inst_pipe = ~lsu_i0_valid_dc3;
assign lsu_error_pkt_dc3.exc_type = ~misaligned_fault_dc3;
// assign lsu_error_pkt_dc3.addr[31:0] = (access_fault_dc3 | misaligned_fault_dc3) ? lsu_addr_dc3[31:0] : ld_bus_error_addr_dc3[31:0];
assign lsu_error_pkt_dc3.addr[31:0] = lsu_addr_dc3[31:0];
//Create DMA packet
assign dma_pkt_d.valid = dma_dccm_req;
assign dma_pkt_d.dma = 1'b1;
assign dma_pkt_d.unsign = '0;
assign dma_pkt_d.store = dma_mem_write;
assign dma_pkt_d.load = ~dma_mem_write;
assign dma_pkt_d.by = (dma_mem_sz[2:0] == 3'b0);
assign dma_pkt_d.half = (dma_mem_sz[2:0] == 3'b1);
assign dma_pkt_d.word = (dma_mem_sz[2:0] == 3'b10);
assign dma_pkt_d.dword = (dma_mem_sz[2:0] == 3'b11);
assign dma_pkt_d.load_ldst_bypass_c1 = '0;
assign dma_pkt_d.store_data_bypass_c1 = '0;
assign dma_pkt_d.store_data_bypass_c2 = '0;
assign dma_pkt_d.store_data_bypass_i0_e2_c2 = '0;
assign dma_pkt_d.store_data_bypass_e4_c1 = '0;
assign dma_pkt_d.store_data_bypass_e4_c2 = '0;
assign dma_pkt_d.store_data_bypass_e4_c3 = '0;
always_comb begin
lsu_pkt_dc1_in = dma_dccm_req ? dma_pkt_d : lsu_p;
lsu_pkt_dc2_in = lsu_pkt_dc1;
lsu_pkt_dc3_in = lsu_pkt_dc2;
lsu_pkt_dc4_in = lsu_pkt_dc3;
lsu_pkt_dc5_in = lsu_pkt_dc4;
lsu_pkt_dc1_in.valid = (lsu_p.valid & ~flush_dc2_up) | dma_dccm_req;
lsu_pkt_dc2_in.valid = lsu_pkt_dc1.valid & ~(flush_dc2_up & ~lsu_pkt_dc1.dma);
lsu_pkt_dc3_in.valid = lsu_pkt_dc2.valid & ~(flush_dc2_up & ~lsu_pkt_dc2.dma);
lsu_pkt_dc4_in.valid = lsu_pkt_dc3.valid & ~(flush_dc3 & ~lsu_pkt_dc3.dma) & ~lsu_freeze_dc3;
lsu_pkt_dc5_in.valid = lsu_pkt_dc4.valid & ~(flush_dc4 & ~lsu_pkt_dc4.dma);
end
// C2 clock for valid and C1 for other bits of packet
rvdff #(1) lsu_pkt_vlddc1ff (.*, .din(lsu_pkt_dc1_in.valid), .dout(lsu_pkt_dc1.valid), .clk(lsu_freeze_c2_dc1_clk));
rvdff #(1) lsu_pkt_vlddc2ff (.*, .din(lsu_pkt_dc2_in.valid), .dout(lsu_pkt_dc2.valid), .clk(lsu_freeze_c2_dc2_clk));
rvdff #(1) lsu_pkt_vlddc3ff (.*, .din(lsu_pkt_dc3_in.valid), .dout(lsu_pkt_dc3.valid), .clk(lsu_freeze_c2_dc3_clk));
rvdff #(1) lsu_pkt_vlddc4ff (.*, .din(lsu_pkt_dc4_in.valid), .dout(lsu_pkt_dc4.valid), .clk(lsu_c2_dc4_clk));
rvdff #(1) lsu_pkt_vlddc5ff (.*, .din(lsu_pkt_dc5_in.valid), .dout(lsu_pkt_dc5.valid), .clk(lsu_c2_dc5_clk));
rvdffe #($bits(lsu_pkt_t)-1) lsu_pkt_dc1ff (.*, .din(lsu_pkt_dc1_in[$bits(lsu_pkt_t)-1:1]), .dout(lsu_pkt_dc1[$bits(lsu_pkt_t)-1:1]), .en(lsu_freeze_c1_dc1_clken));
rvdffe #($bits(lsu_pkt_t)-1) lsu_pkt_dc2ff (.*, .din(lsu_pkt_dc2_in[$bits(lsu_pkt_t)-1:1]), .dout(lsu_pkt_dc2[$bits(lsu_pkt_t)-1:1]), .en(lsu_freeze_c1_dc2_clken));
rvdffe #($bits(lsu_pkt_t)-1) lsu_pkt_dc3ff (.*, .din(lsu_pkt_dc3_in[$bits(lsu_pkt_t)-1:1]), .dout(lsu_pkt_dc3[$bits(lsu_pkt_t)-1:1]), .en(lsu_freeze_c1_dc3_clken));
rvdff #($bits(lsu_pkt_t)-1) lsu_pkt_dc4ff (.*, .din(lsu_pkt_dc4_in[$bits(lsu_pkt_t)-1:1]), .dout(lsu_pkt_dc4[$bits(lsu_pkt_t)-1:1]), .clk(lsu_c1_dc4_clk));
rvdff #($bits(lsu_pkt_t)-1) lsu_pkt_dc5ff (.*, .din(lsu_pkt_dc5_in[$bits(lsu_pkt_t)-1:1]), .dout(lsu_pkt_dc5[$bits(lsu_pkt_t)-1:1]), .clk(lsu_c1_dc5_clk));
assign lsu_ld_datafn_dc3[31:0] = addr_external_dc3 ? bus_read_data_dc3[31:0] : lsu_ld_data_dc3[31:0];
assign lsu_ld_datafn_corr_dc3[31:0] = addr_external_dc3 ? bus_read_data_dc3[31:0] : lsu_ld_data_corr_dc3[31:0];
// this result must look at prior stores and merge them in
assign lsu_result_dc3[31:0] = ({32{ lsu_pkt_dc3.unsign & lsu_pkt_dc3.by }} & {24'b0,lsu_ld_datafn_dc3[7:0]}) |
({32{ lsu_pkt_dc3.unsign & lsu_pkt_dc3.half}} & {16'b0,lsu_ld_datafn_dc3[15:0]}) |
({32{~lsu_pkt_dc3.unsign & lsu_pkt_dc3.by }} & {{24{ lsu_ld_datafn_dc3[7]}}, lsu_ld_datafn_dc3[7:0]}) |
({32{~lsu_pkt_dc3.unsign & lsu_pkt_dc3.half}} & {{16{ lsu_ld_datafn_dc3[15]}},lsu_ld_datafn_dc3[15:0]}) |
({32{lsu_pkt_dc3.word}} & lsu_ld_datafn_dc3[31:0]);
assign lsu_result_corr_dc3[31:0] = ({32{ lsu_pkt_dc3.unsign & lsu_pkt_dc3.by }} & {24'b0,lsu_ld_datafn_corr_dc3[7:0]}) |
({32{ lsu_pkt_dc3.unsign & lsu_pkt_dc3.half}} & {16'b0,lsu_ld_datafn_corr_dc3[15:0]}) |
({32{~lsu_pkt_dc3.unsign & lsu_pkt_dc3.by }} & {{24{ lsu_ld_datafn_corr_dc3[7]}}, lsu_ld_datafn_corr_dc3[7:0]}) |
({32{~lsu_pkt_dc3.unsign & lsu_pkt_dc3.half}} & {{16{ lsu_ld_datafn_corr_dc3[15]}},lsu_ld_datafn_corr_dc3[15:0]}) |
({32{lsu_pkt_dc3.word}} & lsu_ld_datafn_corr_dc3[31:0]);
// absence load/store all 0's
assign lsu_addr_dc1[31:0] = full_addr_dc1[31:0];
// Interrupt as a flush source allows the WB to occur
assign lsu_commit_dc5 = lsu_pkt_dc5.valid & (lsu_pkt_dc5.store | lsu_pkt_dc5.load) & ~flush_dc5 & ~lsu_pkt_dc5.dma;
assign dma_mem_wdata_shifted[63:0] = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores
assign store_data_d[63:0] = dma_dccm_req ? dma_mem_wdata_shifted[63:0] : {32'b0,exu_lsu_rs2_d[31:0]};
assign store_data_dc2_in[63:32] = store_data_dc1[63:32];
assign store_data_dc2_in[31:0] = (lsu_pkt_dc1.store_data_bypass_c1) ? lsu_result_dc3[31:0] :
(lsu_pkt_dc1.store_data_bypass_e4_c1[1]) ? i1_result_e4_eff[31:0] :
(lsu_pkt_dc1.store_data_bypass_e4_c1[0]) ? i0_result_e4_eff[31:0] : store_data_dc1[31:0];
assign store_data_dc2[63:32] = store_data_pre_dc2[63:32];
assign store_data_dc2[31:0] = (lsu_pkt_dc2.store_data_bypass_i0_e2_c2) ? i0_result_e2[31:0] :
(lsu_pkt_dc2.store_data_bypass_c2) ? lsu_result_dc3[31:0] :
(lsu_pkt_dc2.store_data_bypass_e4_c2[1]) ? i1_result_e4_eff[31:0] :
(lsu_pkt_dc2.store_data_bypass_e4_c2[0]) ? i0_result_e4_eff[31:0] : store_data_pre_dc2[31:0];
assign store_data_dc3[63:32] = store_data_pre_dc3[63:32];
assign store_data_dc3[31:0] = (picm_mask_data_dc3[31:0] | {32{~addr_in_pic_dc3}}) &
((lsu_pkt_dc3.store_data_bypass_e4_c3[1]) ? i1_result_e4_eff[31:0] :
(lsu_pkt_dc3.store_data_bypass_e4_c3[0]) ? i0_result_e4_eff[31:0] : store_data_pre_dc3[31:0]);
rvdff #(32) lsu_result_corr_dc4ff (.*, .din(lsu_result_corr_dc3[31:0]), .dout(lsu_result_corr_dc4[31:0]), .clk(lsu_c1_dc4_clk));
rvdffe #(64) sddc1ff (.*, .din(store_data_d[63:0]), .dout(store_data_dc1[63:0]), .en(lsu_store_c1_dc1_clken));
rvdffe #(64) sddc2ff (.*, .din(store_data_dc2_in[63:0]), .dout(store_data_pre_dc2[63:0]), .en(lsu_store_c1_dc2_clken));
rvdffe #(64) sddc3ff (.*, .din(store_data_dc2[63:0]), .dout(store_data_pre_dc3[63:0]), .en(~lsu_freeze_dc3 & lsu_store_c1_dc3_clken) );
rvdff #(32) sddc4ff (.*, .din(store_data_dc3[31:0]), .dout(store_data_dc4[31:0]), .clk(lsu_store_c1_dc4_clk));
rvdff #(32) sddc5ff (.*, .din(store_data_dc4[31:0]), .dout(store_data_dc5[31:0]), .clk(lsu_store_c1_dc5_clk));
rvdffe #(32) sadc2ff (.*, .din(lsu_addr_dc1[31:0]), .dout(lsu_addr_dc2[31:0]), .en(lsu_freeze_c1_dc2_clken));
rvdffe #(32) sadc3ff (.*, .din(lsu_addr_dc2[31:0]), .dout(lsu_addr_dc3[31:0]), .en(lsu_freeze_c1_dc3_clken));
rvdff #(32) sadc4ff (.*, .din(lsu_addr_dc3[31:0]), .dout(lsu_addr_dc4[31:0]), .clk(lsu_c1_dc4_clk));
rvdff #(32) sadc5ff (.*, .din(lsu_addr_dc4[31:0]), .dout(lsu_addr_dc5[31:0]), .clk(lsu_c1_dc5_clk));
rvdffe #(32) end_addr_dc2ff (.*, .din(end_addr_dc1[31:0]), .dout(end_addr_dc2[31:0]), .en(lsu_freeze_c1_dc2_clken));
rvdffe #(32) end_addr_dc3ff (.*, .din(end_addr_dc2[31:0]), .dout(end_addr_dc3[31:0]), .en(lsu_freeze_c1_dc3_clken));
rvdff #(32) end_addr_dc4ff (.*, .din(end_addr_dc3[31:0]), .dout(end_addr_dc4[31:0]), .clk(lsu_c1_dc4_clk));
rvdff #(32) end_addr_dc5ff (.*, .din(end_addr_dc4[31:0]), .dout(end_addr_dc5[31:0]), .clk(lsu_c1_dc5_clk));
rvdff #(1) addr_in_dccm_dc2ff(.din(addr_in_dccm_dc1), .dout(addr_in_dccm_dc2), .clk(lsu_freeze_c1_dc2_clk), .*);
rvdff #(1) addr_in_dccm_dc3ff(.din(addr_in_dccm_dc2), .dout(addr_in_dccm_dc3), .clk(lsu_freeze_c1_dc3_clk), .*);
rvdff #(1) addr_in_pic_dc2ff(.din(addr_in_pic_dc1), .dout(addr_in_pic_dc2), .clk(lsu_freeze_c1_dc2_clk), .*);
rvdff #(1) addr_in_pic_dc3ff(.din(addr_in_pic_dc2), .dout(addr_in_pic_dc3), .clk(lsu_freeze_c1_dc3_clk), .*);
rvdff #(1) addr_external_dc2ff(.din(addr_external_dc1), .dout(addr_external_dc2), .clk(lsu_freeze_c1_dc2_clk), .*);
rvdff #(1) addr_external_dc3ff(.din(addr_external_dc2), .dout(addr_external_dc3), .clk(lsu_freeze_c1_dc3_clk), .*);
rvdff #(1) addr_external_dc4ff(.din(addr_external_dc3), .dout(addr_external_dc4), .clk(lsu_c1_dc4_clk), .*);
rvdff #(1) addr_external_dc5ff(.din(addr_external_dc4), .dout(addr_external_dc5), .clk(lsu_c1_dc5_clk), .*);
rvdff #(1) access_fault_dc2ff(.din(access_fault_dc1), .dout(access_fault_dc2), .clk(lsu_freeze_c1_dc2_clk), .*);
rvdff #(1) access_fault_dc3ff(.din(access_fault_dc2), .dout(access_fault_dc3), .clk(lsu_freeze_c1_dc3_clk), .*);
rvdff #(1) misaligned_fault_dc2ff(.din(misaligned_fault_dc1), .dout(misaligned_fault_dc2), .clk(lsu_freeze_c1_dc2_clk), .*);
rvdff #(1) misaligned_fault_dc3ff(.din(misaligned_fault_dc2), .dout(misaligned_fault_dc3), .clk(lsu_freeze_c1_dc3_clk), .*);
endmodule