From 8065eef6770f4d1ea15683fe88022beae83f732d Mon Sep 17 00:00:00 2001 From: Joseph Rahmeh Date: Thu, 25 Jun 2020 19:59:36 -0700 Subject: [PATCH] Branch for version 1.7 --- README.md | 13 +++-- configs/swerv.config | 89 +++++++++++++++-------------- design/dbg/dbg.sv | 43 +++++++++----- design/dec/dec_tlu_ctl.sv | 2 +- design/dma_ctrl.sv | 48 +++++++++------- design/exu/exu_mul_ctl.sv | 30 +++++----- design/ifu/ifu.sv | 4 +- design/ifu/ifu_bp_ctl.sv | 15 +++-- design/ifu/ifu_ic_mem.sv | 65 ++++++++++++--------- design/ifu/ifu_iccm_mem.sv | 3 +- design/ifu/ifu_mem_ctl.sv | 106 ++++++++++++++++++++--------------- design/lib/ahb_to_axi4.sv | 54 +++++++++++------- design/lib/axi4_to_ahb.sv | 73 +++++++++++++----------- design/lib/beh_lib.sv | 75 ++++++++++++++++++++++++- design/lsu/lsu.sv | 12 ++-- design/lsu/lsu_addrcheck.sv | 8 ++- design/lsu/lsu_bus_buffer.sv | 49 ++++++++-------- design/lsu/lsu_bus_intf.sv | 16 ++++-- design/lsu/lsu_clkdomain.sv | 55 +++++++++++------- design/lsu/lsu_dccm_ctl.sv | 16 ++++-- design/lsu/lsu_dccm_mem.sv | 20 ++++++- design/lsu/lsu_lsc_ctl.sv | 29 +++++----- design/lsu/lsu_stbuf.sv | 12 ++-- design/mem.sv | 5 +- design/pic_ctrl.sv | 6 +- design/swerv.sv | 2 +- design/swerv_wrapper.sv | 2 +- docs/README.md | 4 +- release-notes.md | 11 ++++ 29 files changed, 553 insertions(+), 314 deletions(-) diff --git a/README.md b/README.md index fc8f36c..7e3161a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# EH1 SweRV RISC-V CoreTM 1.6 from Western Digital +# EH1 SweRV RISC-V CoreTM 1.7 from Western Digital -This repository contains the SweRV EH1 ver 1.6 CoreTM design RTL +This repository contains the SweRV EH1 CoreTM design RTL ## License @@ -28,7 +28,7 @@ Files under the [tools](tools/) directory may be available under a different lic ## Dependencies -- Verilator **(4.020 or later)** must be installed on the system if running with verilator +- Verilator **(4.030 or later)** must be installed on the system if running with verilator - If adding/removing instructions, espresso must be installed (used by *tools/coredecode*) - RISCV tool chain (based on gcc version 7.3 or higher) must be installed so that it can be used to prepare RISCV binaries to run. @@ -192,7 +192,12 @@ The `$RV_ROOT/testbench/hex` directory contains precompiled hex files of the tes **Building an FPGA speed optimized model:** -Use ``-fpga_optimize=1`` option to ``swerv.config`` to build a model that is removes clock gating logic from flop model so that the FPGA builds can run a higher speeds. +Use ``-fpga_optimize=1`` option to ``swerv.config`` to build a model that removes clock gating logic from flop model so that the FPGA builds can run at higher speeds. **This is now the default option for +targets other than ``default_pd``.** + +**Building a Power optimized model (ASIC flows):** +Use ``-fpga_optimize=0`` option to ``swerv.config`` to build a model that **enables** clock gating logic into the flop model so that the ASIC flows get a better power footprint. **This is now the default option for +target``default_pd``.** ---- Western Digital, the Western Digital logo, G-Technology, SanDisk, Tegile, Upthere, WD, SweRV Core, SweRV ISS, diff --git a/configs/swerv.config b/configs/swerv.config index 4ff6d4f..f4d6dc4 100755 --- a/configs/swerv.config +++ b/configs/swerv.config @@ -270,6 +270,7 @@ if ($target eq "default") { if (!defined($pic_offset)) { $pic_offset="0xc0000"; } # 3*256*1024 if (!defined($pic_size)) { $pic_size=32; } if (!defined($pic_total_int)) { $pic_total_int=8; } + if (!defined($fpga_optimize)) { $fpga_optimize=1; } # default is AXI bus } @@ -295,6 +296,7 @@ elsif ($target eq "default_ahb") { if (!defined($pic_offset)) { $pic_offset="0xc0000"; } # 3*256*1024 if (!defined($pic_size)) { $pic_size=32; } if (!defined($pic_total_int)) { $pic_total_int=8; } + if (!defined($fpga_optimize)) { $fpga_optimize=1; } $ahb_lite = 1; } elsif ($target eq "default_pd") { @@ -319,6 +321,7 @@ elsif ($target eq "default_ahb") { if (!defined($pic_offset)) { $pic_offset="0xc0000"; } # 3*256*1024 if (!defined($pic_size)) { $pic_size=32; } if (!defined($pic_total_int)) { $pic_total_int=8; } + if (!defined($fpga_optimize)) { $fpga_optimize=0; } } elsif ($target eq "high_perf") { if (!defined($ret_stack_size)) { $ret_stack_size=4; } @@ -342,6 +345,7 @@ elsif ($target eq "default_ahb") { if (!defined($pic_offset)) { $pic_offset="0xc0000"; } # 3*256*1024 if (!defined($pic_size)) { $pic_size=32; } if (!defined($pic_total_int)) { $pic_total_int=8; } + if (!defined($fpga_optimize)) { $fpga_optimize=1; } } else { die "$self: ERROR! Unsupported target \"$target\". Supported targets are: \"default, default_ahb, default_pd, high_perf\"!\n"; @@ -413,7 +417,7 @@ our %csr = (#{{{ "exists" => "true", }, "mimpid" => { - "reset" => "0x3", + "reset" => "0x4", "mask" => "0x0", "exists" => "true", }, @@ -890,7 +894,7 @@ our %config = (#{{{ "external_prog" => 'derived, overridable', # Testbench only "debug_sb_mem" => 'derived, overridable', # Testbench only "external_data_1" => 'derived, overridable', # Testbench only - "external_mem_hole" => 'derived, overridable', # Testbench only + "external_mem_hole" => 'default disabled', # Testbench only # "consoleio" => 'derived', # Part of serial io. }, "bus" => { @@ -1376,55 +1380,54 @@ if (hex($config{protection}{data_access_enable0}) > 0 || hex($config{protection}{inst_access_enable4}) > 0 || hex($config{protection}{inst_access_enable5}) > 0 || hex($config{protection}{inst_access_enable6}) > 0 || - hex($config{protection}{inst_access_enable7}) > 0) { + hex($config{protection}{inst_access_enable7}) > 0 || + $config{memmap}{external_mem_hole} eq "default disabled"){ delete($config{memmap}{external_mem_hole}) ; } else { - # Unused region to create a memory map hole - for (my $rgn = 15;$rgn >= 0; $rgn--) { - if (!defined($regions_used{$rgn})) { - $config{memmap}{external_mem_hole} = ($rgn << 28); - $regions_used{$rgn} = 1; - last; + # Unused region to create a memory map hole, if not already specified + if ($config{memmap}{external_mem_hole} eq "derived, overridable") { + for (my $rgn = 15;$rgn >= 0; $rgn--) { + if (!defined($regions_used{$rgn})) { + $config{memmap}{external_mem_hole} = ($rgn << 28); + $regions_used{$rgn} = 1; + last; + } } - } - if ($config{memmap}{external_mem_hole} == 0) { - $config{protection}{data_access_addr0} = "0x10000000"; - $config{protection}{data_access_mask0} = "0xffffffff"; - $config{protection}{data_access_enable0} = "1"; - } elsif (($config{memmap}{external_mem_hole}>>28) == 16) { - $config{protection}{data_access_addr0} = "0x00000000"; - $config{protection}{data_access_mask0} = "0xefffffff"; - $config{protection}{data_access_enable0} = "1"; } else { - my $hreg = $config{memmap}{external_mem_hole}>>28; - $config{protection}{data_access_addr0} = sprintf("0x%x", (($hreg^8)&8)<<28); - $config{protection}{data_access_mask0} = "0x7fffffff"; - $config{protection}{data_access_addr1} = sprintf("0x%x", ($hreg&8) << 28 |(($hreg^4)&4)<<28); - $config{protection}{data_access_mask1} = "0x3fffffff"; - $config{protection}{data_access_addr2} = sprintf("0x%x", ($hreg&12) <<28 | (($hreg^2)&2) <<28); - $config{protection}{data_access_mask2} = "0x1fffffff"; - $config{protection}{data_access_addr3} = sprintf("0x%x", ($hreg&14) << 28 |(($hreg^1)&1)<<28); - $config{protection}{data_access_mask3} = "0x0fffffff"; - $config{protection}{data_access_enable0} = "1"; - $config{protection}{data_access_enable1} = "1"; - $config{protection}{data_access_enable2} = "1"; - $config{protection}{data_access_enable3} = "1"; - $config{protection}{inst_access_addr0} = sprintf("0x%x", (($hreg^8)&8)<<28); - $config{protection}{inst_access_mask0} = "0x7fffffff"; - $config{protection}{inst_access_addr1} = sprintf("0x%x", ($hreg&8) << 28 |(($hreg^4)&4)<<28); - $config{protection}{inst_access_mask1} = "0x3fffffff"; - $config{protection}{inst_access_addr2} = sprintf("0x%x", ($hreg&12) <<28 | (($hreg^2)&2) <<28); - $config{protection}{inst_access_mask2} = "0x1fffffff"; - $config{protection}{inst_access_addr3} = sprintf("0x%x", ($hreg&14) << 28 |(($hreg^1)&1)<<28); - $config{protection}{inst_access_mask3} = "0x0fffffff"; - $config{protection}{inst_access_enable0} = "1"; - $config{protection}{inst_access_enable1} = "1"; - $config{protection}{inst_access_enable2} = "1"; - $config{protection}{inst_access_enable3} = "1"; + my $rgn = hex($config{memmap}{external_mem_hole})>>28; + $config{memmap}{external_mem_hole} = ($rgn << 28); + $regions_used{$rgn} =1; } + my $hreg = $config{memmap}{external_mem_hole}>>28; + $config{protection}{data_access_addr0} = sprintf("0x%x", (($hreg^8)&8)<<28); + $config{protection}{data_access_mask0} = "0x7fffffff"; + $config{protection}{data_access_addr1} = sprintf("0x%x", ($hreg&8) << 28 |(($hreg^4)&4)<<28); + $config{protection}{data_access_mask1} = "0x3fffffff"; + $config{protection}{data_access_addr2} = sprintf("0x%x", ($hreg&12) <<28 | (($hreg^2)&2) <<28); + $config{protection}{data_access_mask2} = "0x1fffffff"; + $config{protection}{data_access_addr3} = sprintf("0x%x", ($hreg&14) << 28 |(($hreg^1)&1)<<28); + $config{protection}{data_access_mask3} = "0x0fffffff"; + $config{protection}{data_access_enable0} = "1"; + $config{protection}{data_access_enable1} = "1"; + $config{protection}{data_access_enable2} = "1"; + $config{protection}{data_access_enable3} = "1"; + $config{protection}{inst_access_addr0} = sprintf("0x%x", (($hreg^8)&8)<<28); + $config{protection}{inst_access_mask0} = "0x7fffffff"; + $config{protection}{inst_access_addr1} = sprintf("0x%x", ($hreg&8) << 28 |(($hreg^4)&4)<<28); + $config{protection}{inst_access_mask1} = "0x3fffffff"; + $config{protection}{inst_access_addr2} = sprintf("0x%x", ($hreg&12) <<28 | (($hreg^2)&2) <<28); + $config{protection}{inst_access_mask2} = "0x1fffffff"; + $config{protection}{inst_access_addr3} = sprintf("0x%x", ($hreg&14) << 28 |(($hreg^1)&1)<<28); + $config{protection}{inst_access_mask3} = "0x0fffffff"; + $config{protection}{inst_access_enable0} = "1"; + $config{protection}{inst_access_enable1} = "1"; + $config{protection}{inst_access_enable2} = "1"; + $config{protection}{inst_access_enable3} = "1"; + $config{memmap}{external_mem_hole} = sprintf("0x%08x", $config{memmap}{external_mem_hole}); } + #Define 5 unused regions for used in TG foreach my $unr (reverse(0 .. 15)) { diff --git a/design/dbg/dbg.sv b/design/dbg/dbg.sv index 6e46423..0027eea 100644 --- a/design/dbg/dbg.sv +++ b/design/dbg/dbg.sv @@ -161,6 +161,8 @@ module dbg ( logic dmcontrol_wren, dmcontrol_wren_Q; // command logic command_wren; + logic command_transfer_din; + logic command_postexec_din; logic [31:0] command_din; // needed to send the read data back for dmi reads logic [31:0] dmi_reg_rdata_din; @@ -180,6 +182,7 @@ module dbg ( logic [2:0] sbcs_sberror_din; logic sbcs_unaligned; logic sbcs_illegal_size; + logic [19:15] sbcs_reg_int; // data logic sbdata0_reg_wren0; @@ -239,7 +242,10 @@ module dbg ( rvoclkhdr dbg_free_cgc (.en(dbg_free_clken), .l1clk(dbg_free_clk), .*); rvoclkhdr sb_free_cgc (.en(sb_free_clken), .l1clk(sb_free_clk), .*); - rvclkhdr bus_cgc (.en(bus_clken), .l1clk(bus_clk), .*); + +`ifndef RV_FPGA_OPTIMIZE + rvclkhdr bus_cgc (.en(bus_clken), .l1clk(bus_clk), .*); // ifndef FPGA_OPTIMIZE +`endif // end clocking section @@ -251,6 +257,7 @@ module dbg ( // sbcs[31:29], sbcs - [22]:sbbusyerror, [21]: sbbusy, [20]:sbreadonaddr, [19:17]:sbaccess, [16]:sbautoincrement, [15]:sbreadondata, [14:12]:sberror, sbsize=32, 128=0, 64/32/16/8 are legal assign sbcs_reg[31:29] = 3'b1; assign sbcs_reg[28:23] = '0; + assign sbcs_reg[19:15] = {sbcs_reg_int[19], ~sbcs_reg_int[18], sbcs_reg_int[17:15]}; assign sbcs_reg[11:5] = 7'h20; assign sbcs_reg[4:0] = 5'b01111; assign sbcs_wren = (dmi_reg_addr == 7'h38) & dmi_reg_en & dmi_reg_wr_en & (sb_state == SBIDLE); // & (sbcs_reg[14:12] == 3'b000); @@ -261,7 +268,8 @@ module dbg ( rvdffs #(1) sbcs_sbbusyerror_reg (.din(sbcs_sbbusyerror_din), .dout(sbcs_reg[22]), .en(sbcs_sbbusyerror_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); rvdffs #(1) sbcs_sbbusy_reg (.din(sbcs_sbbusy_din), .dout(sbcs_reg[21]), .en(sbcs_sbbusy_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); rvdffs #(1) sbcs_sbreadonaddr_reg (.din(dmi_reg_wdata[20]), .dout(sbcs_reg[20]), .en(sbcs_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); - rvdffs #(5) sbcs_misc_reg (.din(dmi_reg_wdata[19:15]), .dout(sbcs_reg[19:15]), .en(sbcs_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + rvdffs #(5) sbcs_misc_reg (.din({dmi_reg_wdata[19],~dmi_reg_wdata[18],dmi_reg_wdata[17:15]}), + .dout(sbcs_reg_int[19:15]), .en(sbcs_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); rvdffs #(3) sbcs_error_reg (.din(sbcs_sberror_din[2:0]), .dout(sbcs_reg[14:12]), .en(sbcs_sberror_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); assign sbcs_unaligned = ((sbcs_reg[19:17] == 3'b001) & sbaddress0_reg[0]) | @@ -354,11 +362,14 @@ module dbg ( assign abstractcs_reg[7:4] = '0; assign abstractcs_reg[3:0] = 4'h2; // One data register assign abstractcs_error_sel0 = abstractcs_reg[12] & dmi_reg_en & ((dmi_reg_wr_en & ( (dmi_reg_addr == 7'h16) | (dmi_reg_addr == 7'h17))) | (dmi_reg_addr == 7'h4)); - assign abstractcs_error_sel1 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h17) & ~((dmi_reg_wdata[31:24] == 8'b0) | (dmi_reg_wdata[31:24] == 8'h2)); + assign abstractcs_error_sel1 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h17) & + ((~((dmi_reg_wdata[31:24] == 8'b0) | (dmi_reg_wdata[31:24] == 8'h2))) | // Illegal command + ((dmi_reg_wdata[22:20] != 3'b010) & ((dmi_reg_wdata[31:24] == 8'h2) | ((dmi_reg_wdata[31:24] == 8'h0) & dmi_reg_wdata[17]))) | // Illegal size + ((dmi_reg_wdata[31:24] == 8'h0) & (dmi_reg_wdata[19] | dmi_reg_wdata[18])) | //aarpostincrement/postexec for abstract register access + ((dmi_reg_wdata[31:24] == 8'h2) & dmi_reg_wdata[19])); //aampostincrement for abstract memory access assign abstractcs_error_sel2 = core_dbg_cmd_done & core_dbg_cmd_fail; assign abstractcs_error_sel3 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h17) & (dbg_state != HALTED); - assign abstractcs_error_sel4 = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en & - ((dmi_reg_wdata[22:20] != 3'b010) | ((dmi_reg_wdata[31:24] == 8'h2) && (|data1_reg[1:0]))); // Only word size is allowed + assign abstractcs_error_sel4 = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en & ((dmi_reg_wdata[31:24] == 8'h2) && (|data1_reg[1:0])); //Unaligned address for abstract memory assign abstractcs_error_sel5 = (dmi_reg_addr == 7'h16) & dmi_reg_en & dmi_reg_wr_en; @@ -379,7 +390,9 @@ module dbg ( // command register - implemented all the bits in this register // command[16] = 1: write, 0: read assign command_wren = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en & (dbg_state == HALTED); - assign command_din[31:0] = {dmi_reg_wdata[31:24],1'b0,dmi_reg_wdata[22:20],3'b0,dmi_reg_wdata[16:0]}; + assign command_postexec_din = (dmi_reg_wdata[31:24] == 8'h0) & dmi_reg_wdata[18]; + assign command_transfer_din = (dmi_reg_wdata[31:24] == 8'h0) & dmi_reg_wdata[17]; + assign command_din[31:0] = {dmi_reg_wdata[31:24],1'b0,dmi_reg_wdata[22:19],command_postexec_din,command_transfer_din, dmi_reg_wdata[16:0]}; rvdffe #(32) dmcommand_reg (.*, .din(command_din[31:0]), .dout(command_reg[31:0]), .en(command_wren), .rst_l(dbg_dm_rst_l)); // data0 reg @@ -388,7 +401,7 @@ module dbg ( assign data0_reg_wren = data0_reg_wren0 | data0_reg_wren1; assign data0_din[31:0] = ({32{data0_reg_wren0}} & dmi_reg_wdata[31:0]) | - ({32{data0_reg_wren1}} & core_dbg_rddata[31:0]); + ({32{data0_reg_wren1}} & core_dbg_rddata[31:0]); rvdffe #(32) dbg_data0_reg (.*, .din(data0_din[31:0]), .dout(data0_reg[31:0]), .en(data0_reg_wren), .rst_l(dbg_dm_rst_l)); @@ -434,8 +447,9 @@ module dbg ( dbg_resume_req = dbg_state_en & (dbg_nxtstate == RESUMING); // single cycle pulse to core if resuming end CMD_START: begin - dbg_nxtstate = dmcontrol_reg[1] ? IDLE : (|abstractcs_reg[10:8]) ? CMD_DONE : CMD_WAIT; // new command sent to the core - dbg_state_en = dbg_cmd_valid | (|abstractcs_reg[10:8]) | dmcontrol_reg[1]; + // Don't execute the command if cmderror or transfer=0 for abstract register access + dbg_nxtstate = dmcontrol_reg[1] ? IDLE : ((|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17])) ? CMD_DONE : CMD_WAIT; // new command sent to the core + dbg_state_en = dbg_cmd_valid | (|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17]) | dmcontrol_reg[1]; end CMD_WAIT: begin dbg_nxtstate = dmcontrol_reg[1] ? IDLE : CMD_DONE; @@ -464,7 +478,7 @@ module dbg ( assign dmi_reg_rdata_din[31:0] = ({32{dmi_reg_addr == 7'h4}} & data0_reg[31:0]) | ({32{dmi_reg_addr == 7'h5}} & data1_reg[31:0]) | - ({32{dmi_reg_addr == 7'h10}} & dmcontrol_reg[31:0]) | + ({32{dmi_reg_addr == 7'h10}} & {2'b0,dmcontrol_reg[29],1'b0,dmcontrol_reg[27:0]}) | // Read0 to Write only bits ({32{dmi_reg_addr == 7'h11}} & dmstatus_reg[31:0]) | ({32{dmi_reg_addr == 7'h16}} & abstractcs_reg[31:0]) | ({32{dmi_reg_addr == 7'h17}} & command_reg[31:0]) | @@ -483,7 +497,7 @@ module dbg ( // interface for the core assign dbg_cmd_addr[31:0] = (command_reg[31:24] == 8'h2) ? {data1_reg[31:2],2'b0} : {20'b0, command_reg[11:0]}; // Only word addresses for abstract memory assign dbg_cmd_wrdata[31:0] = data0_reg[31:0]; - assign dbg_cmd_valid = (dbg_state == CMD_START) & ~(|abstractcs_reg[10:8]) & dma_dbg_ready; + assign dbg_cmd_valid = (dbg_state == CMD_START) & ~((|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17])) & dma_dbg_ready; assign dbg_cmd_write = command_reg[16]; assign dbg_cmd_type[1:0] = (command_reg[31:24] == 8'h2) ? 2'b10 : {1'b0, (command_reg[15:12] == 4'b0)}; assign dbg_cmd_size[1:0] = command_reg[21:20]; @@ -554,7 +568,7 @@ module dbg ( sb_state_en = 1'b1; sbcs_sbbusy_wren = 1'b1; // reset the single read sbcs_sbbusy_din = 1'b0; - sbaddress0_reg_wren1 = sbcs_reg[16]; // auto increment was set. Update to new address after completing the current command + sbaddress0_reg_wren1 = sbcs_reg[16] & (sbcs_reg[14:12] == 3'b0); // auto increment was set and no error. Update to new address after completing the current command end default : begin sb_nxtstate = SBIDLE; @@ -572,6 +586,9 @@ module dbg ( //rvdff #(.WIDTH(1)) bus_clken_ff (.din(dbg_bus_clk_en), .dout(dbg_bus_clk_en_q), .rst_l(dbg_dm_rst_l), .clk(dbg_sb_c2_free_clk), .*); + rvdff_fpga #(2) axi_bresp_ff (.din(sb_axi_bresp[1:0]), .dout(sb_axi_bresp_q[1:0]), .rst_l(dbg_dm_rst_l), .clk(bus_clk), .clken(bus_clken), .rawclk(clk), .*); + rvdff_fpga #(2) axi_rresp_ff (.din(sb_axi_rresp[1:0]), .dout(sb_axi_rresp_q[1:0]), .rst_l(dbg_dm_rst_l), .clk(bus_clk), .clken(bus_clken), .rawclk(clk), .*); + rvdffs #(.WIDTH(1)) axi_awvalid_ff (.din(sb_axi_awvalid), .dout(sb_axi_awvalid_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); rvdffs #(.WIDTH(1)) axi_awready_ff (.din(sb_axi_awready), .dout(sb_axi_awready_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); rvdffs #(.WIDTH(1)) axi_wvalid_ff (.din(sb_axi_wvalid), .dout(sb_axi_wvalid_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); @@ -581,10 +598,8 @@ module dbg ( rvdffs #(.WIDTH(1)) axi_bvalid_ff (.din(sb_axi_bvalid), .dout(sb_axi_bvalid_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); rvdffs #(.WIDTH(1)) axi_bready_ff (.din(sb_axi_bready), .dout(sb_axi_bready_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); - rvdff #(.WIDTH(2)) axi_bresp_ff (.din(sb_axi_bresp[1:0]), .dout(sb_axi_bresp_q[1:0]), .rst_l(dbg_dm_rst_l), .clk(bus_clk), .*); rvdffs #(.WIDTH(1)) axi_rvalid_ff (.din(sb_axi_rvalid), .dout(sb_axi_rvalid_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); rvdffs #(.WIDTH(1)) axi_rready_ff (.din(sb_axi_rready), .dout(sb_axi_rready_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*); - rvdff #(.WIDTH(2)) axi_rresp_ff (.din(sb_axi_rresp[1:0]), .dout(sb_axi_rresp_q[1:0]), .rst_l(dbg_dm_rst_l), .clk(bus_clk), .*); rvdffe #(.WIDTH(64)) axi_rdata_ff (.din(sb_axi_rdata[63:0]), .dout(sb_axi_rdata_q[63:0]), .rst_l(dbg_dm_rst_l), .en(bus_clken), .*); // AXI Request signals diff --git a/design/dec/dec_tlu_ctl.sv b/design/dec/dec_tlu_ctl.sv index 18fcdb1..4ec04ac 100644 --- a/design/dec/dec_tlu_ctl.sv +++ b/design/dec/dec_tlu_ctl.sv @@ -2533,7 +2533,7 @@ assign dec_csr_legal_d = ( dec_csr_any_unq_d & assign dec_csr_rddata_d[31:0] = ( ({32{csr_misa}} & 32'h40001104) | ({32{csr_mvendorid}} & 32'h00000045) | ({32{csr_marchid}} & 32'h0000000b) | - ({32{csr_mimpid}} & 32'h3) | + ({32{csr_mimpid}} & 32'h4) | ({32{csr_mstatus}} & {19'b0, 2'b11, 3'b0, mstatus[1], 3'b0, mstatus[0], 3'b0}) | ({32{csr_mtvec}} & {mtvec[30:1], 1'b0, mtvec[0]}) | ({32{csr_mip}} & {1'b0, mip[5:3], 16'b0, mip[2], 3'b0, mip[1], 3'b0, mip[0], 3'b0}) | diff --git a/design/dma_ctrl.sv b/design/dma_ctrl.sv index 4238b28..8a4baac 100644 --- a/design/dma_ctrl.sv +++ b/design/dma_ctrl.sv @@ -396,9 +396,6 @@ module dma_ctrl ( // Inputs rvdff #(1) ahbs_bus_clken_ff (.din(dma_bus_clk_en), .dout(dma_bus_clk_en_q), .clk(free_clk), .*); - rvdff #(1) fifo_full_bus_ff (.din(fifo_full_spec), .dout(fifo_full_spec_bus), .clk(dma_bus_clk), .*); - rvdff #(1) dbg_dma_bubble_ff (.din(dbg_dma_bubble), .dout(dbg_dma_bubble_bus), .clk(dma_bus_clk), .*); - rvdff #(1) dec_tlu_stall_dma_ff (.din(dec_tlu_stall_dma), .dout(dec_tlu_stall_dma_bus), .clk(dma_bus_clk), .*); rvdff #(1) dma_dbg_cmd_doneff (.din(dma_dbg_cmd_done), .dout(dma_dbg_cmd_done_q), .clk(free_clk), .*); // Clock Gating logic @@ -407,7 +404,34 @@ module dma_ctrl ( rvoclkhdr dma_buffer_c1cgc ( .en(dma_buffer_c1_clken), .l1clk(dma_buffer_c1_clk), .* ); rvoclkhdr dma_free_cgc (.en(dma_free_clken), .l1clk(dma_free_clk), .*); - rvclkhdr dma_bus_cgc (.en(dma_bus_clk_en), .l1clk(dma_bus_clk), .*); + +`ifndef RV_FPGA_OPTIMIZE + rvclkhdr dma_bus_cgc (.en(dma_bus_clk_en), .l1clk(dma_bus_clk), .*); // ifndef FPGA_OPTIMIZE +`endif + + rvdffsc_fpga #(1) wrbuf_vldff (.din(1'b1), .clear(wrbuf_rst), .dout(wrbuf_vld), .en(wrbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + rvdffsc_fpga #(1) wrbuf_data_vldff(.din(1'b1), .clear(wrbuf_data_rst), .dout(wrbuf_data_vld), .en(wrbuf_data_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + rvdffsc_fpga #(1) rdbuf_vldff (.din(1'b1), .clear(rdbuf_rst), .dout(rdbuf_vld), .en(rdbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + + + rvdff_fpga #(1) fifo_full_bus_ff (.din(fifo_full_spec), .dout(fifo_full_spec_bus), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(1) dbg_dma_bubble_ff (.din(dbg_dma_bubble), .dout(dbg_dma_bubble_bus), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(1) dec_tlu_stall_dma_ff (.din(dec_tlu_stall_dma), .dout(dec_tlu_stall_dma_bus), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + rvdffs_fpga #(1) wrbuf_postedff (.din(1'b0), .dout(wrbuf_posted), .en(wrbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + rvdffs_fpga #(DMA_BUS_TAG) wrbuf_tagff (.din(dma_axi_awid[DMA_BUS_TAG-1:0]), .dout(wrbuf_tag[DMA_BUS_TAG-1:0]), .en(wrbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + rvdffs_fpga #(3) wrbuf_sizeff (.din(dma_axi_awsize[2:0]), .dout(wrbuf_size[2:0]), .en(wrbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + rvdffs_fpga #(8) wrbuf_byteenff (.din(dma_axi_wstrb[7:0]), .dout(wrbuf_byteen[7:0]), .en(wrbuf_data_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + rvdffs_fpga #(DMA_BUS_TAG) rdbuf_tagff (.din(dma_axi_arid[DMA_BUS_TAG-1:0]), .dout(rdbuf_tag[DMA_BUS_TAG-1:0]), .en(rdbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + rvdffs_fpga #(3) rdbuf_sizeff (.din(dma_axi_arsize[2:0]), .dout(rdbuf_size[2:0]), .en(rdbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + rvdffs_fpga #(1) mstr_prtyff (.din(axi_mstr_prty_in), .dout(axi_mstr_priority), .en(axi_mstr_prty_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + + rvdff_fpga #(1) axi_mstr_validff ( .din(axi_mstr_valid), .dout(axi_mstr_valid_q), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(1) axi_slv_sentff ( .din(axi_slv_sent), .dout(axi_slv_sent_q), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + + + rvdffe #(32) wrbuf_addrff(.din(dma_axi_awaddr[31:0]), .dout(wrbuf_addr[31:0]), .en(wrbuf_en & dma_bus_clk_en), .*); + rvdffe #(64) wrbuf_dataff(.din(dma_axi_wdata[63:0]), .dout(wrbuf_data[63:0]), .en(wrbuf_data_en & dma_bus_clk_en), .*); + rvdffe #(32) rdbuf_addrff(.din(dma_axi_araddr[31:0]), .dout(rdbuf_addr[31:0]), .en(rdbuf_en & dma_bus_clk_en), .*); // Write channel buffer assign wrbuf_en = dma_axi_awvalid & dma_axi_awready; @@ -416,23 +440,11 @@ module dma_ctrl ( assign wrbuf_rst = wrbuf_cmd_sent & ~wrbuf_en; assign wrbuf_data_rst = wrbuf_cmd_sent & ~wrbuf_data_en; - rvdffsc #(.WIDTH(1)) wrbuf_vldff(.din(1'b1), .dout(wrbuf_vld), .en(wrbuf_en), .clear(wrbuf_rst), .clk(dma_bus_clk), .*); - rvdffsc #(.WIDTH(1)) wrbuf_data_vldff(.din(1'b1), .dout(wrbuf_data_vld), .en(wrbuf_data_en), .clear(wrbuf_data_rst), .clk(dma_bus_clk), .*); - rvdffs #(.WIDTH(1)) wrbuf_postedff(.din(1'b0), .dout(wrbuf_posted), .en(wrbuf_en), .clk(dma_bus_clk), .*); - rvdffs #(.WIDTH(DMA_BUS_TAG)) wrbuf_tagff(.din(dma_axi_awid[DMA_BUS_TAG-1:0]), .dout(wrbuf_tag[DMA_BUS_TAG-1:0]), .en(wrbuf_en), .clk(dma_bus_clk), .*); - rvdffs #(.WIDTH(3)) wrbuf_sizeff(.din(dma_axi_awsize[2:0]), .dout(wrbuf_size[2:0]), .en(wrbuf_en), .clk(dma_bus_clk), .*); - rvdffe #(.WIDTH(32)) wrbuf_addrff(.din(dma_axi_awaddr[31:0]), .dout(wrbuf_addr[31:0]), .en(wrbuf_en & dma_bus_clk_en), .*); - rvdffe #(.WIDTH(64)) wrbuf_dataff(.din(dma_axi_wdata[63:0]), .dout(wrbuf_data[63:0]), .en(wrbuf_data_en & dma_bus_clk_en), .*); - rvdffs #(.WIDTH(8)) wrbuf_byteenff(.din(dma_axi_wstrb[7:0]), .dout(wrbuf_byteen[7:0]), .en(wrbuf_data_en), .clk(dma_bus_clk), .*); // Read channel buffer assign rdbuf_en = dma_axi_arvalid & dma_axi_arready; assign rdbuf_cmd_sent = axi_mstr_valid & ~axi_mstr_write & dma_fifo_ready; assign rdbuf_rst = rdbuf_cmd_sent & ~rdbuf_en; - rvdffsc #(.WIDTH(1)) rdbuf_vldff(.din(1'b1), .dout(rdbuf_vld), .en(rdbuf_en), .clear(rdbuf_rst), .clk(dma_bus_clk), .*); - rvdffs #(.WIDTH(DMA_BUS_TAG)) rdbuf_tagff(.din(dma_axi_arid[DMA_BUS_TAG-1:0]), .dout(rdbuf_tag[DMA_BUS_TAG-1:0]), .en(rdbuf_en), .clk(dma_bus_clk), .*); - rvdffs #(.WIDTH(3)) rdbuf_sizeff(.din(dma_axi_arsize[2:0]), .dout(rdbuf_size[2:0]), .en(rdbuf_en), .clk(dma_bus_clk), .*); - rvdffe #(.WIDTH(32)) rdbuf_addrff(.din(dma_axi_araddr[31:0]), .dout(rdbuf_addr[31:0]), .en(rdbuf_en & dma_bus_clk_en), .*); assign dma_axi_awready = ~(wrbuf_vld & ~wrbuf_cmd_sent); assign dma_axi_wready = ~(wrbuf_data_vld & ~wrbuf_cmd_sent); @@ -452,10 +464,6 @@ module dma_ctrl ( assign axi_mstr_sel = (wrbuf_vld & wrbuf_data_vld & rdbuf_vld) ? axi_mstr_priority : (wrbuf_vld & wrbuf_data_vld); assign axi_mstr_prty_in = ~axi_mstr_priority; assign axi_mstr_prty_en = axi_mstr_valid; - rvdffs #(.WIDTH(1)) mstr_prtyff(.din(axi_mstr_prty_in), .dout(axi_mstr_priority), .en(axi_mstr_prty_en), .clk(dma_bus_clk), .*); - - rvdff #(.WIDTH(1)) axi_mstr_validff (.din(axi_mstr_valid), .dout(axi_mstr_valid_q), .clk(dma_bus_clk), .*); - rvdff #(.WIDTH(1)) axi_slv_sentff (.din(axi_slv_sent), .dout(axi_slv_sent_q), .clk(dma_bus_clk), .*); //assign axi_slv_valid = fifo_valid[RspPtr] & ~fifo_rsp_done[RspPtr] & ~fifo_dbg[RspPtr] & // ((fifo_write[RspPtr] & fifo_done_bus[RspPtr]) | (~fifo_write[RspPtr] & fifo_data_bus_valid[RspPtr]) | fifo_error_bus[RspPtr]); diff --git a/design/exu/exu_mul_ctl.sv b/design/exu/exu_mul_ctl.sv index d6e74a1..41a86e8 100644 --- a/design/exu/exu_mul_ctl.sv +++ b/design/exu/exu_mul_ctl.sv @@ -59,20 +59,23 @@ module exu_mul_ctl assign mul_c1_e2_clken = (valid_e1 | clk_override) & ~freeze; assign mul_c1_e3_clken = (valid_e2 | clk_override) & ~freeze; +`ifndef RV_FPGA_OPTIMIZE // C1 - 1 clock pulse for data - rvclkhdr exu_mul_c1e1_cgc (.*, .en(mul_c1_e1_clken), .l1clk(exu_mul_c1_e1_clk)); - rvclkhdr exu_mul_c1e2_cgc (.*, .en(mul_c1_e2_clken), .l1clk(exu_mul_c1_e2_clk)); - rvclkhdr exu_mul_c1e3_cgc (.*, .en(mul_c1_e3_clken), .l1clk(exu_mul_c1_e3_clk)); + rvclkhdr exu_mul_c1e1_cgc (.*, .en(mul_c1_e1_clken), .l1clk(exu_mul_c1_e1_clk)); // ifndef FPGA_OPTIMIZE + rvclkhdr exu_mul_c1e2_cgc (.*, .en(mul_c1_e2_clken), .l1clk(exu_mul_c1_e2_clk)); // ifndef FPGA_OPTIMIZE + rvclkhdr exu_mul_c1e3_cgc (.*, .en(mul_c1_e3_clken), .l1clk(exu_mul_c1_e3_clk)); // ifndef FPGA_OPTIMIZE +`endif // --------------------------- Input flops ---------------------------------- - rvdffs #(1) valid_e1_ff (.*, .din(mp.valid), .dout(valid_e1), .clk(active_clk), .en(~freeze)); - rvdff #(1) rs1_sign_e1_ff (.*, .din(mp.rs1_sign), .dout(rs1_sign_e1), .clk(exu_mul_c1_e1_clk)); - rvdff #(1) rs2_sign_e1_ff (.*, .din(mp.rs2_sign), .dout(rs2_sign_e1), .clk(exu_mul_c1_e1_clk)); - rvdff #(1) low_e1_ff (.*, .din(mp.low), .dout(low_e1), .clk(exu_mul_c1_e1_clk)); - rvdff #(1) ld_rs1_byp_e1_ff (.*, .din(mp.load_mul_rs1_bypass_e1), .dout(load_mul_rs1_bypass_e1), .clk(exu_mul_c1_e1_clk)); - rvdff #(1) ld_rs2_byp_e1_ff (.*, .din(mp.load_mul_rs2_bypass_e1), .dout(load_mul_rs2_bypass_e1), .clk(exu_mul_c1_e1_clk)); + rvdffs #(1) valid_e1_ff (.*, .din(mp.valid), .dout(valid_e1), .clk(active_clk), .en(~freeze)); + + rvdff_fpga #(1) rs1_sign_e1_ff (.*, .din(mp.rs1_sign), .dout(rs1_sign_e1), .clk(exu_mul_c1_e1_clk), .clken(mul_c1_e1_clken), .rawclk(clk)); + rvdff_fpga #(1) rs2_sign_e1_ff (.*, .din(mp.rs2_sign), .dout(rs2_sign_e1), .clk(exu_mul_c1_e1_clk), .clken(mul_c1_e1_clken), .rawclk(clk)); + rvdff_fpga #(1) low_e1_ff (.*, .din(mp.low), .dout(low_e1), .clk(exu_mul_c1_e1_clk), .clken(mul_c1_e1_clken), .rawclk(clk)); + rvdff_fpga #(1) ld_rs1_byp_e1_ff (.*, .din(mp.load_mul_rs1_bypass_e1), .dout(load_mul_rs1_bypass_e1), .clk(exu_mul_c1_e1_clk), .clken(mul_c1_e1_clken), .rawclk(clk)); + rvdff_fpga #(1) ld_rs2_byp_e1_ff (.*, .din(mp.load_mul_rs2_bypass_e1), .dout(load_mul_rs2_bypass_e1), .clk(exu_mul_c1_e1_clk), .clken(mul_c1_e1_clken), .rawclk(clk)); rvdffe #(32) a_e1_ff (.*, .din(a[31:0]), .dout(a_ff_e1[31:0]), .en(mul_c1_e1_clken)); rvdffe #(32) b_e1_ff (.*, .din(b[31:0]), .dout(b_ff_e1[31:0]), .en(mul_c1_e1_clken)); @@ -88,8 +91,9 @@ module exu_mul_ctl assign rs2_neg_e1 = rs2_sign_e1 & b_e1[31]; - rvdffs #(1) valid_e2_ff (.*, .din(valid_e1), .dout(valid_e2), .clk(active_clk), .en(~freeze)); - rvdff #(1) low_e2_ff (.*, .din(low_e1), .dout(low_e2), .clk(exu_mul_c1_e2_clk)); + rvdffs #(1) valid_e2_ff (.*, .din(valid_e1), .dout(valid_e2), .clk(active_clk), .en(~freeze)); + + rvdff_fpga #(1) low_e2_ff (.*, .din(low_e1), .dout(low_e2), .clk(exu_mul_c1_e2_clk), .clken(mul_c1_e2_clken), .rawclk(clk)); rvdffe #(33) a_e2_ff (.*, .din({rs1_neg_e1, a_e1[31:0]}), .dout(a_ff_e2[32:0]), .en(mul_c1_e2_clken)); rvdffe #(33) b_e2_ff (.*, .din({rs2_neg_e1, b_e1[31:0]}), .dout(b_ff_e2[32:0]), .en(mul_c1_e2_clken)); @@ -103,9 +107,9 @@ module exu_mul_ctl assign prod_e2[65:0] = a_ff_e2 * b_ff_e2; + rvdff_fpga #(1) low_e3_ff (.*, .din(low_e2), .dout(low_e3), .clk(exu_mul_c1_e3_clk), .clken(mul_c1_e3_clken), .rawclk(clk)); - rvdff #(1) low_e3_ff (.*, .din(low_e2), .dout(low_e3), .clk(exu_mul_c1_e3_clk)); - rvdffe #(64) prod_e3_ff (.*, .din(prod_e2[63:0]), .dout(prod_e3[63:0]), .en(mul_c1_e3_clken)); + rvdffe #(64) prod_e3_ff (.*, .din(prod_e2[63:0]), .dout(prod_e3[63:0]), .en(mul_c1_e3_clken)); diff --git a/design/ifu/ifu.sv b/design/ifu/ifu.sv index d978439..4f9df76 100644 --- a/design/ifu/ifu.sv +++ b/design/ifu/ifu.sv @@ -123,7 +123,7 @@ module ifu output logic ifu_pmu_fetch_stall, // I$ & ITAG Ports - output logic [31:3] ic_rw_addr, // Read/Write addresss to the Icache. + output logic [31:2] ic_rw_addr, // Read/Write addresss to the Icache. output logic [3:0] ic_wr_en, // Icache write enable, when filling the Icache. output logic ic_rd_en, // Icache read enable. `ifdef RV_ICACHE_ECC @@ -358,7 +358,7 @@ module ifu assign mppc_ns[31:1] = `EXU.exu_i0_flush_upper_e1 ? `DEC.decode.i0_pc_e1[31:1] : (`EXU.exu_i1_flush_upper_e1 ? `DEC.decode.i1_pc_e1[31:1] : (`EXU.exu_i0_flush_lower_e4 ? `DEC.decode.i0_pc_e4[31:1] : `DEC.decode.i1_pc_e4[31:1])); assign mppc_ns[0] = 1'b0; logic [3:0] ic_rd_hit_f2; - rvdff #(36) mdseal_ff (.*, .din({mppc_ns[31:0], mem_ctl.ic_rd_hit[3:0]}), .dout({mppc[31:0],ic_rd_hit_f2[3:0]})); + rvdff #(36) junk_ff (.*, .clk(free_clk), .din({mppc_ns[31:0], mem_ctl.ic_rd_hit[3:0]}), .dout({mppc[31:0],ic_rd_hit_f2[3:0]})); logic [2:0] tmp_bnk; assign tmp_bnk[2:0] = encode8_3(bp.btb_sel_f2[7:0]); always @(negedge clk) begin diff --git a/design/ifu/ifu_bp_ctl.sv b/design/ifu/ifu_bp_ctl.sv index bc3687a..215b369 100644 --- a/design/ifu/ifu_bp_ctl.sv +++ b/design/ifu/ifu_bp_ctl.sv @@ -1289,7 +1289,7 @@ assign fgmask_f2[0] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] ({31{e1_rs_hold}} & e1_rets_out[i][31:1]) ); - rvdff #(31) e1_rets_ff (.*, .din(e1_rets_in[i][31:1]), .dout(e1_rets_out[i][31:1])); + rvdffe #(31) e1_rets_ff (.*, .en(~e1_rs_hold), .din(e1_rets_in[i][31:1]), .dout(e1_rets_out[i][31:1])); end : e1_retstack @@ -1346,7 +1346,7 @@ assign fgmask_f2[0] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] ({31{e4_rs_hold}} & e4_rets_out[i][31:1]) ); - rvdff #(31) e4_rets_ff (.*, .din(e4_rets_in[i][31:1]), .dout(e4_rets_out[i][31:1])); + rvdffe #(31) e4_rets_ff (.*, .en(~e4_rs_hold), .din(e4_rets_in[i][31:1]), .dout(e4_rets_out[i][31:1])); end : e4_retstack @@ -1628,7 +1628,11 @@ assign fgmask_f2[0] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2] (bht_wr_en1[i] & ((bht_wr_addr1[`RV_BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) | (bht_wr_en2[i] & ((bht_wr_addr2[`RV_BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)); - rvclkhdr bht_bank_grp_cgc ( .en(bht_bank_clken[i][k]), .l1clk(bht_bank_clk[i][k]), .* ); +`ifndef RV_FPGA_OPTIMIZE + rvclkhdr bht_bank_grp_cgc ( .en(bht_bank_clken[i][k]), .l1clk(bht_bank_clk[i][k]), .* ); // ifndef RV_FPGA_OPTIMIZE +`else + assign bht_bank_clk[i][k] = '0; +`endif for (j=0 ; j= 8) begin: genblock + if (WIDTH >= 8 || OVERRIDE==1) begin: genblock `endif `ifdef RV_FPGA_OPTIMIZE diff --git a/design/lsu/lsu.sv b/design/lsu/lsu.sv index 127dd06..3e34420 100644 --- a/design/lsu/lsu.sv +++ b/design/lsu/lsu.sv @@ -260,12 +260,13 @@ module lsu logic lsu_c2_dc3_clk, lsu_c2_dc4_clk, lsu_c2_dc5_clk; logic lsu_freeze_c1_dc2_clk, lsu_freeze_c1_dc3_clk; logic lsu_freeze_c1_dc1_clken, lsu_freeze_c1_dc2_clken, lsu_freeze_c1_dc3_clken; + logic lsu_freeze_c2_dc1_clken, lsu_freeze_c2_dc2_clken, lsu_freeze_c2_dc3_clken, lsu_freeze_c2_dc4_clken; logic lsu_store_c1_dc1_clken, lsu_store_c1_dc2_clken, lsu_store_c1_dc3_clken, lsu_store_c1_dc4_clk, lsu_store_c1_dc5_clk; logic lsu_freeze_c2_dc1_clk, lsu_freeze_c2_dc2_clk, lsu_freeze_c2_dc3_clk, lsu_freeze_c2_dc4_clk; logic lsu_stbuf_c1_clk; logic lsu_bus_ibuf_c1_clk, lsu_bus_obuf_c1_clk, lsu_bus_buf_c1_clk; - logic lsu_dccm_c1_dc3_clk, lsu_pic_c1_dc3_clken; + logic lsu_dccm_c1_dc3_clk, lsu_dccm_c1_dc3_clken, lsu_pic_c1_dc3_clken; logic lsu_busm_clk; logic lsu_free_c2_clk; @@ -359,10 +360,11 @@ module lsu //Flops //rvdffs #(1) lsu_i0_valid_dc1ff (.*, .din(dec_i0_lsu_decode_d), .dout(lsu_i0_valid_dc1), .en(~lsu_freeze_dc3)); - rvdff #(1) lsu_i0_valid_dc1ff (.*, .din(dec_i0_lsu_decode_d), .dout(lsu_i0_valid_dc1), .clk(lsu_freeze_c2_dc1_clk)); - rvdff #(1) lsu_i0_valid_dc2ff (.*, .din(lsu_i0_valid_dc1), .dout(lsu_i0_valid_dc2), .clk(lsu_freeze_c2_dc2_clk)); - rvdff #(1) lsu_i0_valid_dc3ff (.*, .din(lsu_i0_valid_dc2), .dout(lsu_i0_valid_dc3), .clk(lsu_freeze_c2_dc3_clk)); - rvdff #(1) lsu_i0_valid_dc4ff (.*, .din(lsu_i0_valid_dc3), .dout(lsu_i0_valid_dc4), .clk(lsu_freeze_c2_dc4_clk)); + rvdff_fpga #(1) lsu_i0_valid_dc1ff (.*, .din(dec_i0_lsu_decode_d), .dout(lsu_i0_valid_dc1), .clk(lsu_freeze_c2_dc1_clk), .clken(lsu_freeze_c2_dc1_clken), .rawclk(clk)); + rvdff_fpga #(1) lsu_i0_valid_dc2ff (.*, .din(lsu_i0_valid_dc1), .dout(lsu_i0_valid_dc2), .clk(lsu_freeze_c2_dc2_clk), .clken(lsu_freeze_c2_dc2_clken), .rawclk(clk)); + rvdff_fpga #(1) lsu_i0_valid_dc3ff (.*, .din(lsu_i0_valid_dc2), .dout(lsu_i0_valid_dc3), .clk(lsu_freeze_c2_dc3_clk), .clken(lsu_freeze_c2_dc3_clken), .rawclk(clk)); + rvdff_fpga #(1) lsu_i0_valid_dc4ff (.*, .din(lsu_i0_valid_dc3), .dout(lsu_i0_valid_dc4), .clk(lsu_freeze_c2_dc4_clk), .clken(lsu_freeze_c2_dc4_clken), .rawclk(clk)); + rvdff #(1) lsu_i0_valid_dc5ff (.*, .din(lsu_i0_valid_dc4), .dout(lsu_i0_valid_dc5), .clk(lsu_c2_dc5_clk)); rvdff #(1) lsu_single_ecc_err_dc4(.*, .din(lsu_single_ecc_error_dc3), .dout(lsu_single_ecc_error_dc4), .clk(lsu_c2_dc4_clk)); rvdff #(1) lsu_single_ecc_err_dc5(.*, .din(lsu_single_ecc_error_dc4), .dout(lsu_single_ecc_error_dc5), .clk(lsu_c2_dc5_clk)); diff --git a/design/lsu/lsu_addrcheck.sv b/design/lsu/lsu_addrcheck.sv index e6bdf10..fd771e2 100644 --- a/design/lsu/lsu_addrcheck.sv +++ b/design/lsu/lsu_addrcheck.sv @@ -27,7 +27,10 @@ module lsu_addrcheck ( input logic lsu_freeze_c2_dc2_clk, // clock input logic lsu_freeze_c2_dc3_clk, + input logic lsu_freeze_c2_dc2_clken, + input logic lsu_freeze_c2_dc3_clken, input logic rst_l, // reset + input logic clk, input logic [31:0] start_addr_dc1, // start address for lsu input logic [31:0] end_addr_dc1, // end address for lsu @@ -177,8 +180,9 @@ module lsu_addrcheck assign misaligned_fault_dc1 = ((start_addr_dc1[31:28] != end_addr_dc1[31:28]) | (is_sideeffects_dc1 & ~is_aligned_dc1)) & addr_external_dc1 & lsu_pkt_dc1.valid & ~lsu_pkt_dc1.dma; - rvdff #(.WIDTH(1)) is_sideeffects_dc2ff (.din(is_sideeffects_dc1), .dout(is_sideeffects_dc2), .clk(lsu_freeze_c2_dc2_clk), .*); - rvdff #(.WIDTH(1)) is_sideeffects_dc3ff (.din(is_sideeffects_dc2), .dout(is_sideeffects_dc3), .clk(lsu_freeze_c2_dc3_clk), .*); + rvdff_fpga #(.WIDTH(1)) is_sideeffects_dc2ff (.din(is_sideeffects_dc1), .dout(is_sideeffects_dc2), .clk(lsu_freeze_c2_dc2_clk), .clken(lsu_freeze_c2_dc2_clken), .rawclk(clk), .*); + rvdff_fpga #(.WIDTH(1)) is_sideeffects_dc3ff (.din(is_sideeffects_dc2), .dout(is_sideeffects_dc3), .clk(lsu_freeze_c2_dc3_clk), .clken(lsu_freeze_c2_dc3_clken), .rawclk(clk), .*); + endmodule // lsu_addrcheck diff --git a/design/lsu/lsu_bus_buffer.sv b/design/lsu/lsu_bus_buffer.sv index 6a6cceb..2bc4abf 100644 --- a/design/lsu/lsu_bus_buffer.sv +++ b/design/lsu/lsu_bus_buffer.sv @@ -58,6 +58,7 @@ module lsu_bus_buffer input logic lsu_freeze_c1_dc3_clk, input logic lsu_freeze_c2_dc2_clk, input logic lsu_freeze_c2_dc3_clk, + input logic lsu_freeze_c2_dc3_clken, input logic lsu_bus_ibuf_c1_clk, input logic lsu_bus_obuf_c1_clk, input logic lsu_bus_buf_c1_clk, @@ -527,10 +528,28 @@ module lsu_bus_buffer assign obuf_merge_en = (CmdPtr0 != CmdPtr1) & found_cmdptr0 & found_cmdptr1 & (buf_state[CmdPtr0] == CMD) & (buf_state[CmdPtr1] == CMD) & ~buf_cmd_state_bus_en[CmdPtr0] & ~buf_sideeffect[CmdPtr0] & (~buf_write[CmdPtr0] & buf_dual[CmdPtr0] & ~buf_dualhi[CmdPtr0] & buf_samedw[CmdPtr0]); // CmdPtr0/CmdPtr1 are for same load which is within a DW - rvdff #(.WIDTH(1)) obuf_wren_ff (.din(obuf_wr_en), .dout(obuf_wr_enQ), .clk(lsu_busm_clk), .*); - rvdff #(.WIDTH(1)) obuf_cmd_done_ff (.din(obuf_cmd_done_in), .dout(obuf_cmd_done), .clk(lsu_busm_clk), .*); - rvdff #(.WIDTH(1)) obuf_data_done_ff (.din(obuf_data_done_in), .dout(obuf_data_done), .clk(lsu_busm_clk), .*); - rvdffsc #(.WIDTH(1)) obuf_valid_ff (.din(1'b1), .dout(obuf_valid), .en(obuf_wr_en), .clear(obuf_rst), .clk(lsu_busm_clk), .*); + rvdff_fpga #(1) obuf_wren_ff (.din(obuf_wr_en), .dout(obuf_wr_enQ), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(1) obuf_cmd_done_ff (.din(obuf_cmd_done_in), .dout(obuf_cmd_done), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(1) obuf_data_done_ff (.din(obuf_data_done_in), .dout(obuf_data_done), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdffsc_fpga #(1) obuf_valid_ff (.din(1'b1), .dout(obuf_valid), .clear(obuf_rst), .en(obuf_wr_en), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(TIMER_LOG2) obuf_timerff (.din(obuf_wr_timer_in), .dout(obuf_wr_timer), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + + rvdff_fpga #(1) lsu_axi_awvalid_ff (.din(lsu_axi_awvalid), .dout(lsu_axi_awvalid_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(1) lsu_axi_awready_ff (.din(lsu_axi_awready), .dout(lsu_axi_awready_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(1) lsu_axi_wvalid_ff (.din(lsu_axi_wvalid), .dout(lsu_axi_wvalid_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(1) lsu_axi_wready_ff (.din(lsu_axi_wready), .dout(lsu_axi_wready_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(1) lsu_axi_arvalid_ff (.din(lsu_axi_arvalid), .dout(lsu_axi_arvalid_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(1) lsu_axi_arready_ff (.din(lsu_axi_arready), .dout(lsu_axi_arready_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(1) lsu_axi_bvalid_ff (.din(lsu_axi_bvalid), .dout(lsu_axi_bvalid_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(1) lsu_axi_bready_ff (.din(lsu_axi_bready), .dout(lsu_axi_bready_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(2) lsu_axi_bresp_ff (.din(lsu_axi_bresp[1:0]), .dout(lsu_axi_bresp_q[1:0]), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(LSU_BUS_TAG) lsu_axi_bid_ff (.din(lsu_axi_bid[LSU_BUS_TAG-1:0]), .dout(lsu_axi_bid_q[LSU_BUS_TAG-1:0]), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(1) lsu_axi_rvalid_ff (.din(lsu_axi_rvalid), .dout(lsu_axi_rvalid_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(1) lsu_axi_rready_ff (.din(lsu_axi_rready), .dout(lsu_axi_rready_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(2) lsu_axi_rresp_ff (.din(lsu_axi_rresp[1:0]), .dout(lsu_axi_rresp_q[1:0]), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + rvdff_fpga #(LSU_BUS_TAG) lsu_axi_rid_ff (.din(lsu_axi_rid[LSU_BUS_TAG-1:0]), .dout(lsu_axi_rid_q[LSU_BUS_TAG-1:0]), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*); + + rvdffs #(.WIDTH(LSU_BUS_TAG)) obuf_tag0ff (.din(obuf_tag0_in), .dout(obuf_tag0), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); rvdffs #(.WIDTH(LSU_BUS_TAG)) obuf_tag1ff (.din(obuf_tag1_in), .dout(obuf_tag1), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); rvdffs #(.WIDTH(1)) obuf_mergeff (.din(obuf_merge_in), .dout(obuf_merge), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); @@ -540,7 +559,6 @@ module lsu_bus_buffer rvdffe #(.WIDTH(32)) obuf_addrff (.din(obuf_addr_in[31:0]), .dout(obuf_addr), .en(obuf_wr_en), .*); rvdffs #(.WIDTH(8)) obuf_byteenff (.din(obuf_byteen_in[7:0]), .dout(obuf_byteen), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*); rvdffe #(.WIDTH(64)) obuf_dataff (.din(obuf_data_in[63:0]), .dout(obuf_data), .en(obuf_wr_en), .*); - rvdff #(.WIDTH(TIMER_LOG2)) obuf_timerff (.din(obuf_wr_timer_in), .dout(obuf_wr_timer), .clk(lsu_busm_clk), .*); //------------------------------------------------------------------------------ // Output buffer logic ends here @@ -872,24 +890,8 @@ module lsu_bus_buffer assign lsu_pmu_bus_error = ld_bus_error_dc3 | lsu_imprecise_error_load_any | lsu_imprecise_error_store_any; assign lsu_pmu_bus_busy = (lsu_axi_awvalid_q & ~lsu_axi_awready_q) | (lsu_axi_wvalid_q & ~lsu_axi_wready_q) | (lsu_axi_arvalid_q & ~lsu_axi_arready_q); - rvdff #(.WIDTH(1)) lsu_axi_awvalid_ff (.din(lsu_axi_awvalid), .dout(lsu_axi_awvalid_q), .clk(lsu_busm_clk), .*); - rvdff #(.WIDTH(1)) lsu_axi_awready_ff (.din(lsu_axi_awready), .dout(lsu_axi_awready_q), .clk(lsu_busm_clk), .*); - rvdff #(.WIDTH(1)) lsu_axi_wvalid_ff (.din(lsu_axi_wvalid), .dout(lsu_axi_wvalid_q), .clk(lsu_busm_clk), .*); - rvdff #(.WIDTH(1)) lsu_axi_wready_ff (.din(lsu_axi_wready), .dout(lsu_axi_wready_q), .clk(lsu_busm_clk), .*); - rvdff #(.WIDTH(1)) lsu_axi_arvalid_ff (.din(lsu_axi_arvalid), .dout(lsu_axi_arvalid_q), .clk(lsu_busm_clk), .*); - rvdff #(.WIDTH(1)) lsu_axi_arready_ff (.din(lsu_axi_arready), .dout(lsu_axi_arready_q), .clk(lsu_busm_clk), .*); - - rvdff #(.WIDTH(1)) lsu_axi_bvalid_ff (.din(lsu_axi_bvalid), .dout(lsu_axi_bvalid_q), .clk(lsu_busm_clk), .*); - rvdff #(.WIDTH(1)) lsu_axi_bready_ff (.din(lsu_axi_bready), .dout(lsu_axi_bready_q), .clk(lsu_busm_clk), .*); - rvdff #(.WIDTH(2)) lsu_axi_bresp_ff (.din(lsu_axi_bresp[1:0]), .dout(lsu_axi_bresp_q[1:0]), .clk(lsu_busm_clk), .*); - rvdff #(.WIDTH(LSU_BUS_TAG)) lsu_axi_bid_ff (.din(lsu_axi_bid[LSU_BUS_TAG-1:0]), .dout(lsu_axi_bid_q[LSU_BUS_TAG-1:0]), .clk(lsu_busm_clk), .*); rvdffe #(.WIDTH(64)) lsu_axi_rdata_ff (.din(lsu_axi_rdata[63:0]), .dout(lsu_axi_rdata_q[63:0]), .en(lsu_axi_rvalid & lsu_bus_clk_en), .*); - rvdff #(.WIDTH(1)) lsu_axi_rvalid_ff (.din(lsu_axi_rvalid), .dout(lsu_axi_rvalid_q), .clk(lsu_busm_clk), .*); - rvdff #(.WIDTH(1)) lsu_axi_rready_ff (.din(lsu_axi_rready), .dout(lsu_axi_rready_q), .clk(lsu_busm_clk), .*); - rvdff #(.WIDTH(2)) lsu_axi_rresp_ff (.din(lsu_axi_rresp[1:0]), .dout(lsu_axi_rresp_q[1:0]), .clk(lsu_busm_clk), .*); - rvdff #(.WIDTH(LSU_BUS_TAG)) lsu_axi_rid_ff (.din(lsu_axi_rid[LSU_BUS_TAG-1:0]), .dout(lsu_axi_rid_q[LSU_BUS_TAG-1:0]), .clk(lsu_busm_clk), .*); - // General flops rvdffsc #(.WIDTH(1)) ld_freezeff (.din(1'b1), .dout(ld_freeze_dc3), .en(ld_freeze_en), .clear(ld_freeze_rst), .clk(lsu_free_c2_clk), .*); rvdffs #(.WIDTH(DEPTH_LOG2)) lsu_FreezePtrff (.din(WrPtr0_dc3), .dout(FreezePtr), .en(FreezePtrEn), .clk(lsu_free_c2_clk), .*); @@ -905,7 +907,10 @@ module lsu_bus_buffer rvdff #(.WIDTH(1)) lsu_busreq_dc4ff (.din(lsu_busreq_dc3 & ~flush_dc4), .dout(lsu_busreq_dc4), .clk(lsu_c2_dc4_clk), .*); rvdff #(.WIDTH(1)) lsu_busreq_dc5ff (.din(lsu_busreq_dc4 & ~flush_dc5), .dout(lsu_busreq_dc5), .clk(lsu_c2_dc5_clk), .*); - rvdff #(.WIDTH(1)) dec_nonblock_load_freeze_dc3ff (.din(dec_nonblock_load_freeze_dc2), .dout(dec_nonblock_load_freeze_dc3), .clk(lsu_freeze_c2_dc3_clk), .*); + rvdff_fpga #(.WIDTH(1)) dec_nonblock_load_freeze_dc3ff (.din(dec_nonblock_load_freeze_dc2), .dout(dec_nonblock_load_freeze_dc3), .clk(lsu_freeze_c2_dc3_clk), .clken(lsu_freeze_c2_dc3_clken), .rawclk(clk), .*); + + + rvdff #(.WIDTH(1)) lsu_nonblock_load_valid_dc4ff (.din(lsu_nonblock_load_valid_dc3), .dout(lsu_nonblock_load_valid_dc4), .clk(lsu_c2_dc4_clk), .*); rvdff #(.WIDTH(1)) lsu_nonblock_load_valid_dc5ff (.din(lsu_nonblock_load_valid_dc4), .dout(lsu_nonblock_load_valid_dc5), .clk(lsu_c2_dc5_clk), .*); diff --git a/design/lsu/lsu_bus_intf.sv b/design/lsu/lsu_bus_intf.sv index 095f31a..4212f6b 100644 --- a/design/lsu/lsu_bus_intf.sv +++ b/design/lsu/lsu_bus_intf.sv @@ -44,6 +44,10 @@ module lsu_bus_intf input logic lsu_freeze_c1_dc3_clk, input logic lsu_freeze_c2_dc2_clk, input logic lsu_freeze_c2_dc3_clk, + input logic lsu_freeze_c1_dc2_clken, + input logic lsu_freeze_c1_dc3_clken, + input logic lsu_freeze_c2_dc2_clken, + input logic lsu_freeze_c2_dc3_clken, input logic lsu_bus_ibuf_c1_clk, input logic lsu_bus_obuf_c1_clk, input logic lsu_bus_buf_c1_clk, @@ -383,19 +387,21 @@ module lsu_bus_intf assign bus_read_data_dc3[31:0] = ld_full_hit_dc3 ? ld_fwddata_dc3[31:0] : ld_bus_data_dc3[31:0]; // Fifo flops - rvdff #(.WIDTH(1)) lsu_full_hit_dc3ff (.din(ld_full_hit_dc2), .dout(ld_full_hit_dc3), .clk(lsu_freeze_c2_dc3_clk), .*); - rvdff #(.WIDTH(32)) lsu_fwddata_dc3ff (.din(ld_fwddata_dc2[31:0]), .dout(ld_fwddata_dc3[31:0]), .clk(lsu_c1_dc3_clk), .*); rvdff #(.WIDTH(1)) clken_ff (.din(lsu_bus_clk_en), .dout(lsu_bus_clk_en_q), .clk(free_clk), .*); - rvdff #(.WIDTH(1)) ldst_dual_dc2ff (.din(ldst_dual_dc1), .dout(ldst_dual_dc2), .clk(lsu_freeze_c1_dc2_clk), .*); - rvdff #(.WIDTH(1)) ldst_dual_dc3ff (.din(ldst_dual_dc2), .dout(ldst_dual_dc3), .clk(lsu_freeze_c1_dc3_clk), .*); + rvdff_fpga #(.WIDTH(1)) ldst_dual_dc2ff (.din(ldst_dual_dc1), .dout(ldst_dual_dc2), .clk(lsu_freeze_c1_dc2_clk), .clken(lsu_freeze_c1_dc2_clken), .rawclk(clk), .*); + rvdff_fpga #(.WIDTH(1)) lsu_full_hit_dc3ff (.din(ld_full_hit_dc2), .dout(ld_full_hit_dc3), .clk(lsu_freeze_c2_dc3_clk), .clken(lsu_freeze_c2_dc3_clken), .rawclk(clk), .*); + rvdff_fpga #(.WIDTH(1)) ldst_dual_dc3ff (.din(ldst_dual_dc2), .dout(ldst_dual_dc3), .clk(lsu_freeze_c1_dc3_clk), .clken(lsu_freeze_c1_dc3_clken), .rawclk(clk), .*); + rvdff_fpga #(4) lsu_byten_dc3ff (.din(ldst_byteen_dc2[3:0]), .dout(ldst_byteen_dc3[3:0]), .clk(lsu_freeze_c1_dc3_clk), .clken(lsu_freeze_c1_dc3_clken), .rawclk(clk), .*); + + rvdff #(.WIDTH(32)) lsu_fwddata_dc3ff (.din(ld_fwddata_dc2[31:0]), .dout(ld_fwddata_dc3[31:0]), .clk(lsu_c1_dc3_clk), .*); + rvdff #(.WIDTH(1)) ldst_dual_dc4ff (.din(ldst_dual_dc3), .dout(ldst_dual_dc4), .clk(lsu_c1_dc4_clk), .*); rvdff #(.WIDTH(1)) ldst_dual_dc5ff (.din(ldst_dual_dc4), .dout(ldst_dual_dc5), .clk(lsu_c1_dc5_clk), .*); rvdff #(.WIDTH(1)) is_sideeffects_dc4ff (.din(is_sideeffects_dc3), .dout(is_sideeffects_dc4), .clk(lsu_c1_dc4_clk), .*); rvdff #(.WIDTH(1)) is_sideeffects_dc5ff (.din(is_sideeffects_dc4), .dout(is_sideeffects_dc5), .clk(lsu_c1_dc5_clk), .*); - rvdff #(4) lsu_byten_dc3ff (.*, .din(ldst_byteen_dc2[3:0]), .dout(ldst_byteen_dc3[3:0]), .clk(lsu_freeze_c1_dc3_clk)); rvdff #(4) lsu_byten_dc4ff (.*, .din(ldst_byteen_dc3[3:0]), .dout(ldst_byteen_dc4[3:0]), .clk(lsu_c1_dc4_clk)); rvdff #(4) lsu_byten_dc5ff (.*, .din(ldst_byteen_dc4[3:0]), .dout(ldst_byteen_dc5[3:0]), .clk(lsu_c1_dc5_clk)); diff --git a/design/lsu/lsu_clkdomain.sv b/design/lsu/lsu_clkdomain.sv index 00f7c3f..14bef2e 100644 --- a/design/lsu/lsu_clkdomain.sv +++ b/design/lsu/lsu_clkdomain.sv @@ -84,7 +84,14 @@ module lsu_clkdomain output logic lsu_freeze_c2_dc3_clk, output logic lsu_freeze_c2_dc4_clk, + output logic lsu_freeze_c2_dc1_clken, + output logic lsu_freeze_c2_dc2_clken, + output logic lsu_freeze_c2_dc3_clken, + output logic lsu_freeze_c2_dc4_clken, + output logic lsu_dccm_c1_dc3_clk, // dccm clock + output logic lsu_dccm_c1_dc3_clken, + output logic lsu_pic_c1_dc3_clken, // pic clock enable output logic lsu_stbuf_c1_clk, @@ -104,13 +111,12 @@ module lsu_clkdomain logic lsu_store_c1_dc4_clken, lsu_store_c1_dc5_clken; logic lsu_freeze_c1_dc4_clken; - logic lsu_freeze_c2_dc1_clken, lsu_freeze_c2_dc2_clken, lsu_freeze_c2_dc3_clken, lsu_freeze_c2_dc4_clken; + logic lsu_freeze_c1_dc1_clken_q, lsu_freeze_c1_dc2_clken_q, lsu_freeze_c1_dc3_clken_q, lsu_freeze_c1_dc4_clken_q; logic lsu_stbuf_c1_clken; logic lsu_bus_ibuf_c1_clken, lsu_bus_obuf_c1_clken, lsu_bus_buf_c1_clken; - logic lsu_dccm_c1_dc3_clken; logic lsu_free_c1_clken, lsu_free_c1_clken_q, lsu_free_c2_clken; logic lsu_bus_valid_clken; @@ -168,10 +174,10 @@ module lsu_clkdomain rvdff #(1) lsu_c1_dc4_clkenff (.din(lsu_c1_dc4_clken), .dout(lsu_c1_dc4_clken_q), .clk(lsu_free_c2_clk), .*); rvdff #(1) lsu_c1_dc5_clkenff (.din(lsu_c1_dc5_clken), .dout(lsu_c1_dc5_clken_q), .clk(lsu_free_c2_clk), .*); - rvdff #(1) lsu_freeze_c1_dc1_clkenff (.din(lsu_freeze_c1_dc1_clken), .dout(lsu_freeze_c1_dc1_clken_q), .clk(lsu_freeze_c2_dc1_clk), .*); - rvdff #(1) lsu_freeze_c1_dc2_clkenff (.din(lsu_freeze_c1_dc2_clken), .dout(lsu_freeze_c1_dc2_clken_q), .clk(lsu_freeze_c2_dc2_clk), .*); - rvdff #(1) lsu_freeze_c1_dc3_clkenff (.din(lsu_freeze_c1_dc3_clken), .dout(lsu_freeze_c1_dc3_clken_q), .clk(lsu_freeze_c2_dc3_clk), .*); - rvdff #(1) lsu_freeze_c1_dc4_clkenff (.din(lsu_freeze_c1_dc4_clken), .dout(lsu_freeze_c1_dc4_clken_q), .clk(lsu_freeze_c2_dc4_clk), .*); + rvdff_fpga #(1) lsu_freeze_c1_dc1_clkenff (.din(lsu_freeze_c1_dc1_clken), .dout(lsu_freeze_c1_dc1_clken_q), .clk(lsu_freeze_c2_dc1_clk), .clken(lsu_freeze_c2_dc1_clken), .rawclk(clk), .*); + rvdff_fpga #(1) lsu_freeze_c1_dc2_clkenff (.din(lsu_freeze_c1_dc2_clken), .dout(lsu_freeze_c1_dc2_clken_q), .clk(lsu_freeze_c2_dc2_clk), .clken(lsu_freeze_c2_dc2_clken), .rawclk(clk), .*); + rvdff_fpga #(1) lsu_freeze_c1_dc3_clkenff (.din(lsu_freeze_c1_dc3_clken), .dout(lsu_freeze_c1_dc3_clken_q), .clk(lsu_freeze_c2_dc3_clk), .clken(lsu_freeze_c2_dc3_clken), .rawclk(clk), .*); + rvdff_fpga #(1) lsu_freeze_c1_dc4_clkenff (.din(lsu_freeze_c1_dc4_clken), .dout(lsu_freeze_c1_dc4_clken_q), .clk(lsu_freeze_c2_dc4_clk), .clken(lsu_freeze_c2_dc4_clken), .rawclk(clk), .*); // Clock Headers rvoclkhdr lsu_c1dc3_cgc ( .en(lsu_c1_dc3_clken), .l1clk(lsu_c1_dc3_clk), .* ); @@ -182,32 +188,41 @@ module lsu_clkdomain rvoclkhdr lsu_c2dc4_cgc ( .en(lsu_c2_dc4_clken), .l1clk(lsu_c2_dc4_clk), .* ); rvoclkhdr lsu_c2dc5_cgc ( .en(lsu_c2_dc5_clken), .l1clk(lsu_c2_dc5_clk), .* ); -// rvclkhdr lsu_store_c1dc1_cgc (.en(lsu_store_c1_dc1_clken), .l1clk(lsu_store_c1_dc1_clk), .*); -// rvclkhdr lsu_store_c1dc2_cgc (.en(lsu_store_c1_dc2_clken), .l1clk(lsu_store_c1_dc2_clk), .*); -// rvclkhdr lsu_store_c1dc3_cgc (.en(lsu_store_c1_dc3_clken), .l1clk(lsu_store_c1_dc3_clk), .*); rvoclkhdr lsu_store_c1dc4_cgc (.en(lsu_store_c1_dc4_clken), .l1clk(lsu_store_c1_dc4_clk), .*); rvoclkhdr lsu_store_c1dc5_cgc (.en(lsu_store_c1_dc5_clken), .l1clk(lsu_store_c1_dc5_clk), .*); -// rvclkhdr lsu_freeze_c1dc1_cgc ( .en(lsu_freeze_c1_dc1_clken), .l1clk(lsu_freeze_c1_dc1_clk), .* ); - rvclkhdr lsu_freeze_c1dc2_cgc ( .en(lsu_freeze_c1_dc2_clken), .l1clk(lsu_freeze_c1_dc2_clk), .* ); - rvclkhdr lsu_freeze_c1dc3_cgc ( .en(lsu_freeze_c1_dc3_clken), .l1clk(lsu_freeze_c1_dc3_clk), .* ); +`ifdef RV_FPGA_OPTIMIZE + assign lsu_freeze_c1_dc2_clk = 1'b0; + assign lsu_freeze_c1_dc3_clk = 1'b0; + assign lsu_freeze_c2_dc1_clk = 1'b0; + assign lsu_freeze_c2_dc2_clk = 1'b0; + assign lsu_freeze_c2_dc3_clk = 1'b0; + assign lsu_freeze_c2_dc4_clk = 1'b0; - rvclkhdr lsu_freeze_c2dc1_cgc ( .en(lsu_freeze_c2_dc1_clken), .l1clk(lsu_freeze_c2_dc1_clk), .* ); - rvclkhdr lsu_freeze_c2dc2_cgc ( .en(lsu_freeze_c2_dc2_clken), .l1clk(lsu_freeze_c2_dc2_clk), .* ); - rvclkhdr lsu_freeze_c2dc3_cgc ( .en(lsu_freeze_c2_dc3_clken), .l1clk(lsu_freeze_c2_dc3_clk), .* ); - rvclkhdr lsu_freeze_c2dc4_cgc ( .en(lsu_freeze_c2_dc4_clken), .l1clk(lsu_freeze_c2_dc4_clk), .* ); + assign lsu_busm_clk = 1'b0; + assign lsu_dccm_c1_dc3_clk = 1'b0; + +`else + rvclkhdr lsu_freeze_c1dc2_cgc ( .en(lsu_freeze_c1_dc2_clken), .l1clk(lsu_freeze_c1_dc2_clk), .* ); // ifndef FPGA_OPTIMIZE + rvclkhdr lsu_freeze_c1dc3_cgc ( .en(lsu_freeze_c1_dc3_clken), .l1clk(lsu_freeze_c1_dc3_clk), .* ); // ifndef FPGA_OPTIMIZE + rvclkhdr lsu_freeze_c2dc1_cgc ( .en(lsu_freeze_c2_dc1_clken), .l1clk(lsu_freeze_c2_dc1_clk), .* ); // ifndef FPGA_OPTIMIZE + rvclkhdr lsu_freeze_c2dc2_cgc ( .en(lsu_freeze_c2_dc2_clken), .l1clk(lsu_freeze_c2_dc2_clk), .* ); // ifndef FPGA_OPTIMIZE + rvclkhdr lsu_freeze_c2dc3_cgc ( .en(lsu_freeze_c2_dc3_clken), .l1clk(lsu_freeze_c2_dc3_clk), .* ); // ifndef FPGA_OPTIMIZE + rvclkhdr lsu_freeze_c2dc4_cgc ( .en(lsu_freeze_c2_dc4_clken), .l1clk(lsu_freeze_c2_dc4_clk), .* ); // ifndef FPGA_OPTIMIZE + + rvclkhdr lsu_busm_cgc (.en(lsu_bus_clk_en), .l1clk(lsu_busm_clk), .*); // ifndef FPGA_OPTIMIZE + rvclkhdr lsu_dccm_c1dc3_cgc (.en(lsu_dccm_c1_dc3_clken), .l1clk(lsu_dccm_c1_dc3_clk), .*); // ifndef FPGA_OPTIMIZE + +`endif rvoclkhdr lsu_stbuf_c1_cgc ( .en(lsu_stbuf_c1_clken), .l1clk(lsu_stbuf_c1_clk), .* ); rvoclkhdr lsu_bus_ibuf_c1_cgc ( .en(lsu_bus_ibuf_c1_clken), .l1clk(lsu_bus_ibuf_c1_clk), .* ); rvoclkhdr lsu_bus_obuf_c1_cgc ( .en(lsu_bus_obuf_c1_clken), .l1clk(lsu_bus_obuf_c1_clk), .* ); rvoclkhdr lsu_bus_buf_c1_cgc ( .en(lsu_bus_buf_c1_clken), .l1clk(lsu_bus_buf_c1_clk), .* ); - rvclkhdr lsu_busm_cgc (.en(lsu_bus_clk_en), .l1clk(lsu_busm_clk), .*); - rvclkhdr lsu_dccm_c1dc3_cgc (.en(lsu_dccm_c1_dc3_clken), .l1clk(lsu_dccm_c1_dc3_clk), .*); -// rvclkhdr lsu_pic_c1dc3_cgc (.en(lsu_pic_c1_dc3_clken), .l1clk(lsu_pic_c1_dc3_clk), .*); - rvclkhdr lsu_free_cgc (.en(lsu_free_c2_clken), .l1clk(lsu_free_c2_clk), .*); + rvoclkhdr lsu_free_cgc (.en(lsu_free_c2_clken), .l1clk(lsu_free_c2_clk), .*); endmodule diff --git a/design/lsu/lsu_dccm_ctl.sv b/design/lsu/lsu_dccm_ctl.sv index 5122746..303a6fe 100644 --- a/design/lsu/lsu_dccm_ctl.sv +++ b/design/lsu/lsu_dccm_ctl.sv @@ -31,7 +31,10 @@ module lsu_dccm_ctl ( input logic lsu_freeze_c2_dc2_clk, // clocks input logic lsu_freeze_c2_dc3_clk, + input logic lsu_freeze_c2_dc2_clken, // clocks + input logic lsu_freeze_c2_dc3_clken, input logic lsu_dccm_c1_dc3_clk, + input logic lsu_dccm_c1_dc3_clken, input logic lsu_pic_c1_dc3_clken, input logic rst_l, @@ -178,14 +181,15 @@ module lsu_dccm_ctl assign picm_rd_data_dc3[63:0] = {picm_rd_data_lo_dc3[31:0], picm_rd_data_lo_dc3[31:0]} ; rvdffe #(32) picm_data_ff (.*, .din(picm_rd_data[31:0]), .dout(picm_rd_data_lo_dc3[31:0]), .en(lsu_pic_c1_dc3_clken)); if (DCCM_ENABLE == 1) begin: Gen_dccm_enable - rvdff #(1) dccm_rden_dc2ff (.*, .din(lsu_dccm_rden_dc1), .dout(lsu_dccm_rden_dc2), .clk(lsu_freeze_c2_dc2_clk)); - rvdff #(1) dccm_rden_dc3ff (.*, .din(lsu_dccm_rden_dc2), .dout(lsu_dccm_rden_dc3), .clk(lsu_freeze_c2_dc3_clk)); - rvdff #(DCCM_DATA_WIDTH) dccm_data_hi_ff (.*, .din(dccm_data_hi_dc2[DCCM_DATA_WIDTH-1:0]), .dout(dccm_data_hi_dc3[DCCM_DATA_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk)); - rvdff #(DCCM_DATA_WIDTH) dccm_data_lo_ff (.*, .din(dccm_data_lo_dc2[DCCM_DATA_WIDTH-1:0]), .dout(dccm_data_lo_dc3[DCCM_DATA_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk)); + rvdff_fpga #(1) dccm_rden_dc2ff (.*, .din(lsu_dccm_rden_dc1), .dout(lsu_dccm_rden_dc2), .clk(lsu_freeze_c2_dc2_clk), .clken(lsu_freeze_c2_dc2_clken), .rawclk(clk)); + rvdff_fpga #(1) dccm_rden_dc3ff (.*, .din(lsu_dccm_rden_dc2), .dout(lsu_dccm_rden_dc3), .clk(lsu_freeze_c2_dc3_clk), .clken(lsu_freeze_c2_dc3_clken), .rawclk(clk)); + + rvdff_fpga #(DCCM_DATA_WIDTH) dccm_data_hi_ff (.*, .din(dccm_data_hi_dc2[DCCM_DATA_WIDTH-1:0]), .dout(dccm_data_hi_dc3[DCCM_DATA_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk), .clken(lsu_dccm_c1_dc3_clken), .rawclk(clk)); + rvdff_fpga #(DCCM_DATA_WIDTH) dccm_data_lo_ff (.*, .din(dccm_data_lo_dc2[DCCM_DATA_WIDTH-1:0]), .dout(dccm_data_lo_dc3[DCCM_DATA_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk), .clken(lsu_dccm_c1_dc3_clken), .rawclk(clk)); + rvdff_fpga #(DCCM_ECC_WIDTH) dccm_data_ecc_hi_ff (.*, .din(dccm_data_ecc_hi_dc2[DCCM_ECC_WIDTH-1:0]), .dout(dccm_data_ecc_hi_dc3[DCCM_ECC_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk), .clken(lsu_dccm_c1_dc3_clken), .rawclk(clk)); + rvdff_fpga #(DCCM_ECC_WIDTH) dccm_data_ecc_lo_ff (.*, .din(dccm_data_ecc_lo_dc2[DCCM_ECC_WIDTH-1:0]), .dout(dccm_data_ecc_lo_dc3[DCCM_ECC_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk), .clken(lsu_dccm_c1_dc3_clken), .rawclk(clk)); - rvdff #(DCCM_ECC_WIDTH) dccm_data_ecc_hi_ff (.*, .din(dccm_data_ecc_hi_dc2[DCCM_ECC_WIDTH-1:0]), .dout(dccm_data_ecc_hi_dc3[DCCM_ECC_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk)); - rvdff #(DCCM_ECC_WIDTH) dccm_data_ecc_lo_ff (.*, .din(dccm_data_ecc_lo_dc2[DCCM_ECC_WIDTH-1:0]), .dout(dccm_data_ecc_lo_dc3[DCCM_ECC_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk)); end else begin: Gen_dccm_disable assign lsu_dccm_rden_dc2 = '0; assign lsu_dccm_rden_dc3 = '0; diff --git a/design/lsu/lsu_dccm_mem.sv b/design/lsu/lsu_dccm_mem.sv index 9d201a5..ed376ee 100644 --- a/design/lsu/lsu_dccm_mem.sv +++ b/design/lsu/lsu_dccm_mem.sv @@ -30,6 +30,7 @@ module lsu_dccm_mem import swerv_types::*; ( input logic clk, // clock + input logic free_clk, // clock input logic rst_l, input logic lsu_freeze_dc3, // freeze input logic clk_override, // clock override @@ -71,11 +72,24 @@ module lsu_dccm_mem logic [DCCM_NUM_BANKS-1:0] dccm_clk; logic [DCCM_NUM_BANKS-1:0] dccm_clken; + logic [DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo_q, dccm_rd_data_hi_q; + logic lsu_freeze_dc3_q; + assign rd_unaligned = (dccm_rd_addr_lo[DCCM_WIDTH_BITS+:DCCM_BANK_BITS] != dccm_rd_addr_hi[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]); +`ifdef RV_FPGA_OPTIMIZE + // Align the read data + assign dccm_rd_data_lo[DCCM_FDATA_WIDTH-1:0] = lsu_freeze_dc3_q ? dccm_rd_data_lo_q[DCCM_FDATA_WIDTH-1:0] : dccm_bank_dout[dccm_rd_addr_lo_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]][DCCM_FDATA_WIDTH-1:0]; + assign dccm_rd_data_hi[DCCM_FDATA_WIDTH-1:0] = lsu_freeze_dc3_q ? dccm_rd_data_hi_q[DCCM_FDATA_WIDTH-1:0] : dccm_bank_dout[dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]][DCCM_FDATA_WIDTH-1:0]; + + rvdff #(1) lsu_freeze_dc3ff(.din(lsu_freeze_dc3), .dout(lsu_freeze_dc3_q), .clk(free_clk), .*); + rvdffe #(DCCM_FDATA_WIDTH) dccm_rd_data_loff(.din(dccm_rd_data_lo), .dout(dccm_rd_data_lo_q), .en(~lsu_freeze_dc3_q), .*); + rvdffe #(DCCM_FDATA_WIDTH) dccm_rd_data_hiff(.din(dccm_rd_data_hi), .dout(dccm_rd_data_hi_q), .en(~lsu_freeze_dc3_q), .*); +`else // Align the read data assign dccm_rd_data_lo[DCCM_FDATA_WIDTH-1:0] = dccm_bank_dout[dccm_rd_addr_lo_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]][DCCM_FDATA_WIDTH-1:0]; assign dccm_rd_data_hi[DCCM_FDATA_WIDTH-1:0] = dccm_bank_dout[dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]][DCCM_FDATA_WIDTH-1:0]; +`endif // Generate even/odd address // assign rd_addr_even[(DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] = dccm_rd_addr_lo[2] ? dccm_rd_addr_hi[(DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] : @@ -103,7 +117,7 @@ module lsu_dccm_mem // clock gating section assign dccm_clken[i] = (wren_bank[i] | rden_bank[i] | clk_override) & ~lsu_freeze_dc3; - rvclkhdr lsu_dccm_cgc (.en(dccm_clken[i]), .l1clk(dccm_clk[i]), .*); + rvoclkhdr lsu_dccm_cgc (.en(dccm_clken[i]), .l1clk(dccm_clk[i]), .*); // end clock gating section `RV_DCCM_DATA_CELL dccm_bank ( @@ -119,8 +133,8 @@ module lsu_dccm_mem end : mem_bank // Flops - rvdffs #(DCCM_BANK_BITS) rd_addr_lo_ff (.*, .din(dccm_rd_addr_lo[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .dout(dccm_rd_addr_lo_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .en(~lsu_freeze_dc3)); - rvdffs #(DCCM_BANK_BITS) rd_addr_hi_ff (.*, .din(dccm_rd_addr_hi[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .dout(dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .en(~lsu_freeze_dc3)); + rvdffs #(DCCM_BANK_BITS) rd_addr_lo_ff (.*, .din(dccm_rd_addr_lo[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .dout(dccm_rd_addr_lo_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .en(~lsu_freeze_dc3), .clk(free_clk)); + rvdffs #(DCCM_BANK_BITS) rd_addr_hi_ff (.*, .din(dccm_rd_addr_hi[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .dout(dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .en(~lsu_freeze_dc3), .clk(free_clk)); endmodule // lsu_dccm_mem diff --git a/design/lsu/lsu_lsc_ctl.sv b/design/lsu/lsu_lsc_ctl.sv index 231e17a..4891a1a 100644 --- a/design/lsu/lsu_lsc_ctl.sv +++ b/design/lsu/lsu_lsc_ctl.sv @@ -45,6 +45,9 @@ module lsu_lsc_ctl input logic lsu_freeze_c2_dc1_clk, input logic lsu_freeze_c2_dc2_clk, input logic lsu_freeze_c2_dc3_clk, + input logic lsu_freeze_c2_dc1_clken, + input logic lsu_freeze_c2_dc2_clken, + input logic lsu_freeze_c2_dc3_clken, input logic lsu_store_c1_dc1_clken, input logic lsu_store_c1_dc2_clken, @@ -248,9 +251,6 @@ module lsu_lsc_ctl end // C2 clock for valid and C1 for other bits of packet - rvdff #(1) lsu_pkt_vlddc1ff (.*, .din(lsu_pkt_dc1_in.valid), .dout(lsu_pkt_dc1.valid), .clk(lsu_freeze_c2_dc1_clk)); - rvdff #(1) lsu_pkt_vlddc2ff (.*, .din(lsu_pkt_dc2_in.valid), .dout(lsu_pkt_dc2.valid), .clk(lsu_freeze_c2_dc2_clk)); - rvdff #(1) lsu_pkt_vlddc3ff (.*, .din(lsu_pkt_dc3_in.valid), .dout(lsu_pkt_dc3.valid), .clk(lsu_freeze_c2_dc3_clk)); rvdff #(1) lsu_pkt_vlddc4ff (.*, .din(lsu_pkt_dc4_in.valid), .dout(lsu_pkt_dc4.valid), .clk(lsu_c2_dc4_clk)); rvdff #(1) lsu_pkt_vlddc5ff (.*, .din(lsu_pkt_dc5_in.valid), .dout(lsu_pkt_dc5.valid), .clk(lsu_c2_dc5_clk)); @@ -321,19 +321,22 @@ module lsu_lsc_ctl rvdff #(32) end_addr_dc4ff (.*, .din(end_addr_dc3[31:0]), .dout(end_addr_dc4[31:0]), .clk(lsu_c1_dc4_clk)); rvdff #(32) end_addr_dc5ff (.*, .din(end_addr_dc4[31:0]), .dout(end_addr_dc5[31:0]), .clk(lsu_c1_dc5_clk)); - rvdff #(1) addr_in_dccm_dc2ff(.din(addr_in_dccm_dc1), .dout(addr_in_dccm_dc2), .clk(lsu_freeze_c1_dc2_clk), .*); - rvdff #(1) addr_in_dccm_dc3ff(.din(addr_in_dccm_dc2), .dout(addr_in_dccm_dc3), .clk(lsu_freeze_c1_dc3_clk), .*); - rvdff #(1) addr_in_pic_dc2ff(.din(addr_in_pic_dc1), .dout(addr_in_pic_dc2), .clk(lsu_freeze_c1_dc2_clk), .*); - rvdff #(1) addr_in_pic_dc3ff(.din(addr_in_pic_dc2), .dout(addr_in_pic_dc3), .clk(lsu_freeze_c1_dc3_clk), .*); + rvdff_fpga #(1) addr_in_dccm_dc2ff (.din(addr_in_dccm_dc1), .dout(addr_in_dccm_dc2), .clk(lsu_freeze_c1_dc2_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc2_clken), .*); + rvdff_fpga #(1) addr_in_dccm_dc3ff (.din(addr_in_dccm_dc2), .dout(addr_in_dccm_dc3), .clk(lsu_freeze_c1_dc3_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc3_clken), .*); + rvdff_fpga #(1) addr_in_pic_dc2ff (.din(addr_in_pic_dc1), .dout(addr_in_pic_dc2), .clk(lsu_freeze_c1_dc2_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc2_clken), .*); + rvdff_fpga #(1) addr_in_pic_dc3ff (.din(addr_in_pic_dc2), .dout(addr_in_pic_dc3), .clk(lsu_freeze_c1_dc3_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc3_clken), .*); + rvdff_fpga #(1) access_fault_dc2ff (.din(access_fault_dc1), .dout(access_fault_dc2), .clk(lsu_freeze_c1_dc2_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc2_clken), .*); + rvdff_fpga #(1) access_fault_dc3ff (.din(access_fault_dc2), .dout(access_fault_dc3), .clk(lsu_freeze_c1_dc3_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc3_clken), .*); + rvdff_fpga #(1) addr_external_dc2ff (.din(addr_external_dc1), .dout(addr_external_dc2), .clk(lsu_freeze_c1_dc2_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc2_clken), .*); + rvdff_fpga #(1) addr_external_dc3ff (.din(addr_external_dc2), .dout(addr_external_dc3), .clk(lsu_freeze_c1_dc3_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc3_clken), .*); + rvdff_fpga #(1) misaligned_fault_dc2ff (.din(misaligned_fault_dc1), .dout(misaligned_fault_dc2), .clk(lsu_freeze_c1_dc2_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc2_clken), .*); + rvdff_fpga #(1) misaligned_fault_dc3ff (.din(misaligned_fault_dc2), .dout(misaligned_fault_dc3), .clk(lsu_freeze_c1_dc3_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc3_clken), .*); + rvdff_fpga #(1) lsu_pkt_vlddc1ff (.din(lsu_pkt_dc1_in.valid), .dout(lsu_pkt_dc1.valid), .clk(lsu_freeze_c2_dc1_clk), .rawclk(clk), .clken(lsu_freeze_c2_dc1_clken), .*); + rvdff_fpga #(1) lsu_pkt_vlddc2ff (.din(lsu_pkt_dc2_in.valid), .dout(lsu_pkt_dc2.valid), .clk(lsu_freeze_c2_dc2_clk), .rawclk(clk), .clken(lsu_freeze_c2_dc2_clken), .*); + rvdff_fpga #(1) lsu_pkt_vlddc3ff (.din(lsu_pkt_dc3_in.valid), .dout(lsu_pkt_dc3.valid), .clk(lsu_freeze_c2_dc3_clk), .rawclk(clk), .clken(lsu_freeze_c2_dc3_clken), .*); - rvdff #(1) addr_external_dc2ff(.din(addr_external_dc1), .dout(addr_external_dc2), .clk(lsu_freeze_c1_dc2_clk), .*); - rvdff #(1) addr_external_dc3ff(.din(addr_external_dc2), .dout(addr_external_dc3), .clk(lsu_freeze_c1_dc3_clk), .*); rvdff #(1) addr_external_dc4ff(.din(addr_external_dc3), .dout(addr_external_dc4), .clk(lsu_c1_dc4_clk), .*); rvdff #(1) addr_external_dc5ff(.din(addr_external_dc4), .dout(addr_external_dc5), .clk(lsu_c1_dc5_clk), .*); - rvdff #(1) access_fault_dc2ff(.din(access_fault_dc1), .dout(access_fault_dc2), .clk(lsu_freeze_c1_dc2_clk), .*); - rvdff #(1) access_fault_dc3ff(.din(access_fault_dc2), .dout(access_fault_dc3), .clk(lsu_freeze_c1_dc3_clk), .*); - rvdff #(1) misaligned_fault_dc2ff(.din(misaligned_fault_dc1), .dout(misaligned_fault_dc2), .clk(lsu_freeze_c1_dc2_clk), .*); - rvdff #(1) misaligned_fault_dc3ff(.din(misaligned_fault_dc2), .dout(misaligned_fault_dc3), .clk(lsu_freeze_c1_dc3_clk), .*); endmodule diff --git a/design/lsu/lsu_stbuf.sv b/design/lsu/lsu_stbuf.sv index 318302f..dea32c4 100644 --- a/design/lsu/lsu_stbuf.sv +++ b/design/lsu/lsu_stbuf.sv @@ -38,6 +38,9 @@ module lsu_stbuf input logic lsu_freeze_c1_dc2_clk, // freeze clock input logic lsu_freeze_c1_dc3_clk, // freeze clock + input logic lsu_freeze_c2_dc2_clken, + input logic lsu_freeze_c2_dc3_clken, + input logic lsu_freeze_c1_dc2_clken, input logic lsu_freeze_c1_dc3_clken, input logic lsu_c1_dc4_clk, // lsu pipe clock @@ -232,8 +235,11 @@ module lsu_stbuf rvdff #(.WIDTH(DEPTH_LOG2)) WrPtr_dc4ff (.din(WrPtr_dc3[DEPTH_LOG2-1:0]), .dout(WrPtr_dc4[DEPTH_LOG2-1:0]), .clk(lsu_c1_dc4_clk), .*); rvdff #(.WIDTH(DEPTH_LOG2)) WrPtr_dc5ff (.din(WrPtr_dc4[DEPTH_LOG2-1:0]), .dout(WrPtr_dc5[DEPTH_LOG2-1:0]), .clk(lsu_c1_dc5_clk), .*); - rvdff #(.WIDTH(1)) ldst_dual_dc2ff (.din(ldst_dual_dc1), .dout(ldst_dual_dc2), .clk(lsu_freeze_c1_dc2_clk), .*); - rvdff #(.WIDTH(1)) ldst_dual_dc3ff (.din(ldst_dual_dc2), .dout(ldst_dual_dc3), .clk(lsu_freeze_c1_dc3_clk), .*); + rvdff_fpga #(.WIDTH(1)) ldst_dual_dc2ff (.din(ldst_dual_dc1), .dout(ldst_dual_dc2), .clk(lsu_freeze_c1_dc2_clk), .clken(lsu_freeze_c1_dc2_clken), .rawclk(clk), .*); + rvdff_fpga #(.WIDTH(1)) ldst_dual_dc3ff (.din(ldst_dual_dc2), .dout(ldst_dual_dc3), .clk(lsu_freeze_c1_dc3_clk), .clken(lsu_freeze_c1_dc3_clken), .rawclk(clk), .*); + rvdff_fpga #(.WIDTH(BYTE_WIDTH)) stbuf_fwdbyteen_hi_dc3ff (.din(stbuf_fwdbyteen_hi_fn_dc2[BYTE_WIDTH-1:0]), .dout(stbuf_fwdbyteen_hi_dc3[BYTE_WIDTH-1:0]), .clk(lsu_freeze_c1_dc3_clk), .clken(lsu_freeze_c1_dc3_clken), .rawclk(clk), .*); + rvdff_fpga #(.WIDTH(BYTE_WIDTH)) stbuf_fwdbyteen_lo_dc3ff (.din(stbuf_fwdbyteen_lo_fn_dc2[BYTE_WIDTH-1:0]), .dout(stbuf_fwdbyteen_lo_dc3[BYTE_WIDTH-1:0]), .clk(lsu_freeze_c1_dc3_clk), .clken(lsu_freeze_c1_dc3_clken), .rawclk(clk), .*); + rvdff #(.WIDTH(1)) ldst_dual_dc4ff (.din(ldst_dual_dc3), .dout(ldst_dual_dc4), .clk(lsu_c1_dc4_clk), .*); rvdff #(.WIDTH(1)) ldst_dual_dc5ff (.din(ldst_dual_dc4), .dout(ldst_dual_dc5), .clk(lsu_c1_dc5_clk), .*); @@ -387,8 +393,6 @@ module lsu_stbuf rvdffs #(.WIDTH(DEPTH_LOG2)) WrPtrff (.din(NxtWrPtr[DEPTH_LOG2-1:0]), .dout(WrPtr[DEPTH_LOG2-1:0]), .en(WrPtrEn), .clk(lsu_stbuf_c1_clk), .*); rvdffs #(.WIDTH(DEPTH_LOG2)) RdPtrff (.din(NxtRdPtr[DEPTH_LOG2-1:0]), .dout(RdPtr[DEPTH_LOG2-1:0]), .en(RdPtrEn), .clk(lsu_stbuf_c1_clk), .*); - rvdff #(.WIDTH(BYTE_WIDTH)) stbuf_fwdbyteen_hi_dc3ff (.din(stbuf_fwdbyteen_hi_fn_dc2[BYTE_WIDTH-1:0]), .dout(stbuf_fwdbyteen_hi_dc3[BYTE_WIDTH-1:0]), .clk(lsu_freeze_c1_dc3_clk), .*); - rvdff #(.WIDTH(BYTE_WIDTH)) stbuf_fwdbyteen_lo_dc3ff (.din(stbuf_fwdbyteen_lo_fn_dc2[BYTE_WIDTH-1:0]), .dout(stbuf_fwdbyteen_lo_dc3[BYTE_WIDTH-1:0]), .clk(lsu_freeze_c1_dc3_clk), .*); rvdffe #(.WIDTH(DATA_WIDTH)) stbuf_fwddata_hi_dc3ff (.din(stbuf_fwddata_hi_fn_dc2[DATA_WIDTH-1:0]), .dout(stbuf_fwddata_hi_dc3[DATA_WIDTH-1:0]), .en(lsu_freeze_c1_dc3_clken), .*); rvdffe #(.WIDTH(DATA_WIDTH)) stbuf_fwddata_lo_dc3ff (.din(stbuf_fwddata_lo_fn_dc2[DATA_WIDTH-1:0]), .dout(stbuf_fwddata_lo_dc3[DATA_WIDTH-1:0]), .en(lsu_freeze_c1_dc3_clken), .*); diff --git a/design/mem.sv b/design/mem.sv index f406c31..5cc4808 100644 --- a/design/mem.sv +++ b/design/mem.sv @@ -50,7 +50,7 @@ module mem `endif // Icache and Itag Ports `ifdef RV_ICACHE_ENABLE //temp - input logic [31:3] ic_rw_addr, + input logic [31:2] ic_rw_addr, input logic [3:0] ic_tag_valid, input logic [3:0] ic_wr_en, input logic ic_rd_en, @@ -98,6 +98,9 @@ module mem localparam DCCM_ENABLE = 1'b0; `endif + logic free_clk; + rvoclkhdr free_cg ( .en(1'b1), .l1clk(free_clk), .* ); + // DCCM Instantiation if (DCCM_ENABLE == 1) begin: Gen_dccm_enable lsu_dccm_mem dccm ( diff --git a/design/pic_ctrl.sv b/design/pic_ctrl.sv index fb033f6..53b2fe2 100644 --- a/design/pic_ctrl.sv +++ b/design/pic_ctrl.sv @@ -201,7 +201,7 @@ for (i=0; iTM 1.7 from Western Digital +## Release Notes + +* RV_FPGA_OPTIMIZE is now default build option. + * Use -fpga_optimize=0 to build for lower power (ASIC) flows. +* Fixed a couple of cases of clock enable qualification for power reduction +* Fixes for 4 debug compliance issues reported by Codasip +* Fixed some remaining clock gating issues for RV_FPGA_OPTIMIZE to improve fpga speed + + + # SweRV RISC-V CoreTM 1.6 from Western Digital ## Release Notes