// SPDX-License-Identifier: Apache-2.0 // Copyright 2020 Western Digital Corporation or its affiliates. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //******************************************************************************** // $Id$ // // // Owner: // Function: lsu interface with interface queue // Comments: // //******************************************************************************** module el2_lsu_bus_intf import el2_pkg::*; #( `include "el2_param.vh" ) ( input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. input logic clk_override, // Override non-functional clock gating input logic rst_l, // reset, active low input logic scan_mode, // scan mode input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals input logic dec_tlu_wb_coalescing_disable, // disable write buffer coalescing input logic dec_tlu_sideeffect_posted_disable, // disable the posted sideeffect load store to the bus // various clocks needed for the bus reads and writes input logic lsu_bus_obuf_c1_clken, // obuf clock enable input logic lsu_busm_clken, // bus clock enable input logic lsu_c1_r_clk, // r pipe single pulse clock input logic lsu_c2_r_clk, // r pipe double pulse clock input logic lsu_bus_ibuf_c1_clk, // ibuf single pulse clock input logic lsu_bus_obuf_c1_clk, // obuf single pulse clock input logic lsu_bus_buf_c1_clk, // buf single pulse clock input logic lsu_free_c2_clk, // free clock double pulse clock input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. input logic lsu_busm_clk, // bus clock input logic dec_lsu_valid_raw_d, // Raw valid for address computation input logic lsu_busreq_m, // bus request is in m input el2_lsu_pkt_t lsu_pkt_m, // lsu packet flowing down the pipe input el2_lsu_pkt_t lsu_pkt_r, // lsu packet flowing down the pipe input logic [31:0] lsu_addr_m, // lsu address flowing down the pipe input logic [31:0] lsu_addr_r, // lsu address flowing down the pipe input logic [31:0] end_addr_m, // lsu address flowing down the pipe input logic [31:0] end_addr_r, // lsu address flowing down the pipe input logic [31:0] store_data_r, // store data flowing down the pipe input logic dec_tlu_force_halt, input logic lsu_commit_r, // lsu instruction in r commits input logic is_sideeffects_m, // lsu attribute is side_effects input logic flush_m_up, // flush input logic flush_r, // flush input logic ldst_dual_d, ldst_dual_m, ldst_dual_r, output logic lsu_busreq_r, // bus request is in r output logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry output logic lsu_bus_buffer_full_any, // write buffer is full output logic lsu_bus_buffer_empty_any, // write buffer is empty output logic [31:0] bus_read_data_m, // the bus return data output logic lsu_imprecise_error_load_any, // imprecise load bus error output logic lsu_imprecise_error_store_any, // imprecise store bus error output logic [31:0] lsu_imprecise_error_addr_any, // address of the imprecise error // Non-blocking loads output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated output logic lsu_nonblock_load_data_valid,// the non block is valid - sending information back to the cam output logic lsu_nonblock_load_data_error, // non block load has an error output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error output logic [31:0] lsu_nonblock_load_data, // Data of the non block load // PMU events output logic lsu_pmu_bus_trxn, output logic lsu_pmu_bus_misaligned, output logic lsu_pmu_bus_error, output logic lsu_pmu_bus_busy, // AXI Write Channels output logic lsu_axi_awvalid, input logic lsu_axi_awready, output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid, output logic [ 31:0] lsu_axi_awaddr, output logic [ 3:0] lsu_axi_awregion, output logic [ 7:0] lsu_axi_awlen, output logic [ 2:0] lsu_axi_awsize, output logic [ 1:0] lsu_axi_awburst, output logic lsu_axi_awlock, output logic [ 3:0] lsu_axi_awcache, output logic [ 2:0] lsu_axi_awprot, output logic [ 3:0] lsu_axi_awqos, output logic lsu_axi_wvalid, input logic lsu_axi_wready, output logic [63:0] lsu_axi_wdata, output logic [ 7:0] lsu_axi_wstrb, output logic lsu_axi_wlast, input logic lsu_axi_bvalid, output logic lsu_axi_bready, input logic [ 1:0] lsu_axi_bresp, input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid, // AXI Read Channels output logic lsu_axi_arvalid, input logic lsu_axi_arready, output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid, output logic [ 31:0] lsu_axi_araddr, output logic [ 3:0] lsu_axi_arregion, output logic [ 7:0] lsu_axi_arlen, output logic [ 2:0] lsu_axi_arsize, output logic [ 1:0] lsu_axi_arburst, output logic lsu_axi_arlock, output logic [ 3:0] lsu_axi_arcache, output logic [ 2:0] lsu_axi_arprot, output logic [ 3:0] lsu_axi_arqos, input logic lsu_axi_rvalid, output logic lsu_axi_rready, input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid, input logic [ 63:0] lsu_axi_rdata, input logic [ 1:0] lsu_axi_rresp, input logic lsu_bus_clk_en ); logic lsu_bus_clk_en_q; logic [3:0] ldst_byteen_m, ldst_byteen_r; logic [7:0] ldst_byteen_ext_m, ldst_byteen_ext_r; logic [3:0] ldst_byteen_hi_m, ldst_byteen_hi_r; logic [3:0] ldst_byteen_lo_m, ldst_byteen_lo_r; logic is_sideeffects_r; logic [63:0] store_data_ext_r; logic [31:0] store_data_hi_r; logic [31:0] store_data_lo_r; logic addr_match_dw_lo_r_m; logic addr_match_word_lo_r_m; logic no_word_merge_r, no_dword_merge_r; logic ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi; logic [3:0] ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi; logic [3:0] ld_byte_hit_lo, ld_byte_rhit_lo; logic [3:0] ld_byte_hit_hi, ld_byte_rhit_hi; logic [31:0] ld_fwddata_rpipe_lo; logic [31:0] ld_fwddata_rpipe_hi; logic [3:0] ld_byte_hit_buf_lo, ld_byte_hit_buf_hi; logic [31:0] ld_fwddata_buf_lo, ld_fwddata_buf_hi; logic [63:0] ld_fwddata_lo, ld_fwddata_hi; logic [63:0] ld_fwddata_m; logic ld_full_hit_hi_m, ld_full_hit_lo_m; logic ld_full_hit_m; assign ldst_byteen_m[3:0] = ({4{lsu_pkt_m.by}} & 4'b0001) | ({4{lsu_pkt_m.half}} & 4'b0011) | ({4{lsu_pkt_m.word}} & 4'b1111); // Read/Write Buffer el2_lsu_bus_buffer #(.pt(pt)) bus_buffer (.*); // Logic to determine if dc5 store can be coalesced or not with younger stores. Bypass ibuf if cannot colaesced assign addr_match_dw_lo_r_m = (lsu_addr_r[31:3] == lsu_addr_m[31:3]); assign addr_match_word_lo_r_m = addr_match_dw_lo_r_m & ~(lsu_addr_r[2] ^ lsu_addr_m[2]); assign no_word_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_word_lo_r_m); assign no_dword_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_dw_lo_r_m); // Create Hi/Lo signals assign ldst_byteen_ext_m[7:0] = {4'b0, ldst_byteen_m[3:0]} << lsu_addr_m[1:0]; assign ldst_byteen_ext_r[7:0] = {4'b0, ldst_byteen_r[3:0]} << lsu_addr_r[1:0]; assign store_data_ext_r[63:0] = {32'b0, store_data_r[31:0]} << {lsu_addr_r[1:0], 3'b0}; assign ldst_byteen_hi_m[3:0] = ldst_byteen_ext_m[7:4]; assign ldst_byteen_lo_m[3:0] = ldst_byteen_ext_m[3:0]; assign ldst_byteen_hi_r[3:0] = ldst_byteen_ext_r[7:4]; assign ldst_byteen_lo_r[3:0] = ldst_byteen_ext_r[3:0]; assign store_data_hi_r[31:0] = store_data_ext_r[63:32]; assign store_data_lo_r[31:0] = store_data_ext_r[31:0]; assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r; assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r; assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r; assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r; for (genvar i = 0; i < 4; i++) begin : GenBusBufFwd assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i] & ldst_byteen_lo_m[i]; assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i] & ldst_byteen_hi_m[i]; assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i] & ldst_byteen_lo_m[i]; assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i] & ldst_byteen_hi_m[i]; assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i] | ld_byte_hit_buf_lo[i]; assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i] | ld_byte_hit_buf_hi[i]; assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i]; assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i]; assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) | ({8{ld_byte_rhit_hi_lo[i]}} & store_data_hi_r[(8*i)+7:(8*i)]); assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) | ({8{ld_byte_rhit_hi_hi[i]}} & store_data_hi_r[(8*i)+7:(8*i)]); // Final muxing between m/r assign ld_fwddata_lo[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i] ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : ld_fwddata_buf_lo[(8*i)+7:(8*i)]; assign ld_fwddata_hi[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i] ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : ld_fwddata_buf_hi[(8*i)+7:(8*i)]; end always_comb begin ld_full_hit_lo_m = 1'b1; ld_full_hit_hi_m = 1'b1; for (int i = 0; i < 4; i++) begin ld_full_hit_lo_m &= (ld_byte_hit_lo[i] | ~ldst_byteen_lo_m[i]); ld_full_hit_hi_m &= (ld_byte_hit_hi[i] | ~ldst_byteen_hi_m[i]); end end // This will be high if all the bytes of load hit the stores in pipe/write buffer (m/r/wrbuf) assign ld_full_hit_m = ld_full_hit_lo_m & ld_full_hit_hi_m & lsu_busreq_m & lsu_pkt_m.load & ~is_sideeffects_m; assign ld_fwddata_m[63:0] = {ld_fwddata_hi[31:0], ld_fwddata_lo[31:0]} >> (8 * lsu_addr_m[1:0]); assign bus_read_data_m[31:0] = ld_fwddata_m[31:0]; // Fifo flops rvdff #( .WIDTH(1) ) clken_ff ( .din (lsu_bus_clk_en), .dout(lsu_bus_clk_en_q), .clk (active_clk), .* ); rvdff #( .WIDTH(1) ) is_sideeffects_rff ( .din (is_sideeffects_m), .dout(is_sideeffects_r), .clk (lsu_c1_r_clk), .* ); rvdff #(4) lsu_byten_rff ( .*, .din (ldst_byteen_m[3:0]), .dout(ldst_byteen_r[3:0]), .clk (lsu_c1_r_clk) ); endmodule // el2_lsu_bus_intf