diff --git a/README.md b/README.md
index fc8f36c..7e3161a 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-# EH1 SweRV RISC-V CoreTM 1.6 from Western Digital
+# EH1 SweRV RISC-V CoreTM 1.7 from Western Digital
-This repository contains the SweRV EH1 ver 1.6 CoreTM design RTL
+This repository contains the SweRV EH1 CoreTM design RTL
## License
@@ -28,7 +28,7 @@ Files under the [tools](tools/) directory may be available under a different lic
## Dependencies
-- Verilator **(4.020 or later)** must be installed on the system if running with verilator
+- Verilator **(4.030 or later)** must be installed on the system if running with verilator
- If adding/removing instructions, espresso must be installed (used by *tools/coredecode*)
- RISCV tool chain (based on gcc version 7.3 or higher) must be
installed so that it can be used to prepare RISCV binaries to run.
@@ -192,7 +192,12 @@ The `$RV_ROOT/testbench/hex` directory contains precompiled hex files of the tes
**Building an FPGA speed optimized model:**
-Use ``-fpga_optimize=1`` option to ``swerv.config`` to build a model that is removes clock gating logic from flop model so that the FPGA builds can run a higher speeds.
+Use ``-fpga_optimize=1`` option to ``swerv.config`` to build a model that removes clock gating logic from flop model so that the FPGA builds can run at higher speeds. **This is now the default option for
+targets other than ``default_pd``.**
+
+**Building a Power optimized model (ASIC flows):**
+Use ``-fpga_optimize=0`` option to ``swerv.config`` to build a model that **enables** clock gating logic into the flop model so that the ASIC flows get a better power footprint. **This is now the default option for
+target``default_pd``.**
----
Western Digital, the Western Digital logo, G-Technology, SanDisk, Tegile, Upthere, WD, SweRV Core, SweRV ISS,
diff --git a/configs/swerv.config b/configs/swerv.config
index 4ff6d4f..f4d6dc4 100755
--- a/configs/swerv.config
+++ b/configs/swerv.config
@@ -270,6 +270,7 @@ if ($target eq "default") {
if (!defined($pic_offset)) { $pic_offset="0xc0000"; } # 3*256*1024
if (!defined($pic_size)) { $pic_size=32; }
if (!defined($pic_total_int)) { $pic_total_int=8; }
+ if (!defined($fpga_optimize)) { $fpga_optimize=1; }
# default is AXI bus
}
@@ -295,6 +296,7 @@ elsif ($target eq "default_ahb") {
if (!defined($pic_offset)) { $pic_offset="0xc0000"; } # 3*256*1024
if (!defined($pic_size)) { $pic_size=32; }
if (!defined($pic_total_int)) { $pic_total_int=8; }
+ if (!defined($fpga_optimize)) { $fpga_optimize=1; }
$ahb_lite = 1;
} elsif ($target eq "default_pd") {
@@ -319,6 +321,7 @@ elsif ($target eq "default_ahb") {
if (!defined($pic_offset)) { $pic_offset="0xc0000"; } # 3*256*1024
if (!defined($pic_size)) { $pic_size=32; }
if (!defined($pic_total_int)) { $pic_total_int=8; }
+ if (!defined($fpga_optimize)) { $fpga_optimize=0; }
} elsif ($target eq "high_perf") {
if (!defined($ret_stack_size)) { $ret_stack_size=4; }
@@ -342,6 +345,7 @@ elsif ($target eq "default_ahb") {
if (!defined($pic_offset)) { $pic_offset="0xc0000"; } # 3*256*1024
if (!defined($pic_size)) { $pic_size=32; }
if (!defined($pic_total_int)) { $pic_total_int=8; }
+ if (!defined($fpga_optimize)) { $fpga_optimize=1; }
} else {
die "$self: ERROR! Unsupported target \"$target\". Supported targets are: \"default, default_ahb, default_pd, high_perf\"!\n";
@@ -413,7 +417,7 @@ our %csr = (#{{{
"exists" => "true",
},
"mimpid" => {
- "reset" => "0x3",
+ "reset" => "0x4",
"mask" => "0x0",
"exists" => "true",
},
@@ -890,7 +894,7 @@ our %config = (#{{{
"external_prog" => 'derived, overridable', # Testbench only
"debug_sb_mem" => 'derived, overridable', # Testbench only
"external_data_1" => 'derived, overridable', # Testbench only
- "external_mem_hole" => 'derived, overridable', # Testbench only
+ "external_mem_hole" => 'default disabled', # Testbench only
# "consoleio" => 'derived', # Part of serial io.
},
"bus" => {
@@ -1376,55 +1380,54 @@ if (hex($config{protection}{data_access_enable0}) > 0 ||
hex($config{protection}{inst_access_enable4}) > 0 ||
hex($config{protection}{inst_access_enable5}) > 0 ||
hex($config{protection}{inst_access_enable6}) > 0 ||
- hex($config{protection}{inst_access_enable7}) > 0) {
+ hex($config{protection}{inst_access_enable7}) > 0 ||
+ $config{memmap}{external_mem_hole} eq "default disabled"){
delete($config{memmap}{external_mem_hole}) ;
} else {
- # Unused region to create a memory map hole
- for (my $rgn = 15;$rgn >= 0; $rgn--) {
- if (!defined($regions_used{$rgn})) {
- $config{memmap}{external_mem_hole} = ($rgn << 28);
- $regions_used{$rgn} = 1;
- last;
+ # Unused region to create a memory map hole, if not already specified
+ if ($config{memmap}{external_mem_hole} eq "derived, overridable") {
+ for (my $rgn = 15;$rgn >= 0; $rgn--) {
+ if (!defined($regions_used{$rgn})) {
+ $config{memmap}{external_mem_hole} = ($rgn << 28);
+ $regions_used{$rgn} = 1;
+ last;
+ }
}
- }
- if ($config{memmap}{external_mem_hole} == 0) {
- $config{protection}{data_access_addr0} = "0x10000000";
- $config{protection}{data_access_mask0} = "0xffffffff";
- $config{protection}{data_access_enable0} = "1";
- } elsif (($config{memmap}{external_mem_hole}>>28) == 16) {
- $config{protection}{data_access_addr0} = "0x00000000";
- $config{protection}{data_access_mask0} = "0xefffffff";
- $config{protection}{data_access_enable0} = "1";
} else {
- my $hreg = $config{memmap}{external_mem_hole}>>28;
- $config{protection}{data_access_addr0} = sprintf("0x%x", (($hreg^8)&8)<<28);
- $config{protection}{data_access_mask0} = "0x7fffffff";
- $config{protection}{data_access_addr1} = sprintf("0x%x", ($hreg&8) << 28 |(($hreg^4)&4)<<28);
- $config{protection}{data_access_mask1} = "0x3fffffff";
- $config{protection}{data_access_addr2} = sprintf("0x%x", ($hreg&12) <<28 | (($hreg^2)&2) <<28);
- $config{protection}{data_access_mask2} = "0x1fffffff";
- $config{protection}{data_access_addr3} = sprintf("0x%x", ($hreg&14) << 28 |(($hreg^1)&1)<<28);
- $config{protection}{data_access_mask3} = "0x0fffffff";
- $config{protection}{data_access_enable0} = "1";
- $config{protection}{data_access_enable1} = "1";
- $config{protection}{data_access_enable2} = "1";
- $config{protection}{data_access_enable3} = "1";
- $config{protection}{inst_access_addr0} = sprintf("0x%x", (($hreg^8)&8)<<28);
- $config{protection}{inst_access_mask0} = "0x7fffffff";
- $config{protection}{inst_access_addr1} = sprintf("0x%x", ($hreg&8) << 28 |(($hreg^4)&4)<<28);
- $config{protection}{inst_access_mask1} = "0x3fffffff";
- $config{protection}{inst_access_addr2} = sprintf("0x%x", ($hreg&12) <<28 | (($hreg^2)&2) <<28);
- $config{protection}{inst_access_mask2} = "0x1fffffff";
- $config{protection}{inst_access_addr3} = sprintf("0x%x", ($hreg&14) << 28 |(($hreg^1)&1)<<28);
- $config{protection}{inst_access_mask3} = "0x0fffffff";
- $config{protection}{inst_access_enable0} = "1";
- $config{protection}{inst_access_enable1} = "1";
- $config{protection}{inst_access_enable2} = "1";
- $config{protection}{inst_access_enable3} = "1";
+ my $rgn = hex($config{memmap}{external_mem_hole})>>28;
+ $config{memmap}{external_mem_hole} = ($rgn << 28);
+ $regions_used{$rgn} =1;
}
+ my $hreg = $config{memmap}{external_mem_hole}>>28;
+ $config{protection}{data_access_addr0} = sprintf("0x%x", (($hreg^8)&8)<<28);
+ $config{protection}{data_access_mask0} = "0x7fffffff";
+ $config{protection}{data_access_addr1} = sprintf("0x%x", ($hreg&8) << 28 |(($hreg^4)&4)<<28);
+ $config{protection}{data_access_mask1} = "0x3fffffff";
+ $config{protection}{data_access_addr2} = sprintf("0x%x", ($hreg&12) <<28 | (($hreg^2)&2) <<28);
+ $config{protection}{data_access_mask2} = "0x1fffffff";
+ $config{protection}{data_access_addr3} = sprintf("0x%x", ($hreg&14) << 28 |(($hreg^1)&1)<<28);
+ $config{protection}{data_access_mask3} = "0x0fffffff";
+ $config{protection}{data_access_enable0} = "1";
+ $config{protection}{data_access_enable1} = "1";
+ $config{protection}{data_access_enable2} = "1";
+ $config{protection}{data_access_enable3} = "1";
+ $config{protection}{inst_access_addr0} = sprintf("0x%x", (($hreg^8)&8)<<28);
+ $config{protection}{inst_access_mask0} = "0x7fffffff";
+ $config{protection}{inst_access_addr1} = sprintf("0x%x", ($hreg&8) << 28 |(($hreg^4)&4)<<28);
+ $config{protection}{inst_access_mask1} = "0x3fffffff";
+ $config{protection}{inst_access_addr2} = sprintf("0x%x", ($hreg&12) <<28 | (($hreg^2)&2) <<28);
+ $config{protection}{inst_access_mask2} = "0x1fffffff";
+ $config{protection}{inst_access_addr3} = sprintf("0x%x", ($hreg&14) << 28 |(($hreg^1)&1)<<28);
+ $config{protection}{inst_access_mask3} = "0x0fffffff";
+ $config{protection}{inst_access_enable0} = "1";
+ $config{protection}{inst_access_enable1} = "1";
+ $config{protection}{inst_access_enable2} = "1";
+ $config{protection}{inst_access_enable3} = "1";
+
$config{memmap}{external_mem_hole} = sprintf("0x%08x", $config{memmap}{external_mem_hole});
}
+
#Define 5 unused regions for used in TG
foreach my $unr (reverse(0 .. 15)) {
diff --git a/design/dbg/dbg.sv b/design/dbg/dbg.sv
index 6e46423..0027eea 100644
--- a/design/dbg/dbg.sv
+++ b/design/dbg/dbg.sv
@@ -161,6 +161,8 @@ module dbg (
logic dmcontrol_wren, dmcontrol_wren_Q;
// command
logic command_wren;
+ logic command_transfer_din;
+ logic command_postexec_din;
logic [31:0] command_din;
// needed to send the read data back for dmi reads
logic [31:0] dmi_reg_rdata_din;
@@ -180,6 +182,7 @@ module dbg (
logic [2:0] sbcs_sberror_din;
logic sbcs_unaligned;
logic sbcs_illegal_size;
+ logic [19:15] sbcs_reg_int;
// data
logic sbdata0_reg_wren0;
@@ -239,7 +242,10 @@ module dbg (
rvoclkhdr dbg_free_cgc (.en(dbg_free_clken), .l1clk(dbg_free_clk), .*);
rvoclkhdr sb_free_cgc (.en(sb_free_clken), .l1clk(sb_free_clk), .*);
- rvclkhdr bus_cgc (.en(bus_clken), .l1clk(bus_clk), .*);
+
+`ifndef RV_FPGA_OPTIMIZE
+ rvclkhdr bus_cgc (.en(bus_clken), .l1clk(bus_clk), .*); // ifndef FPGA_OPTIMIZE
+`endif
// end clocking section
@@ -251,6 +257,7 @@ module dbg (
// sbcs[31:29], sbcs - [22]:sbbusyerror, [21]: sbbusy, [20]:sbreadonaddr, [19:17]:sbaccess, [16]:sbautoincrement, [15]:sbreadondata, [14:12]:sberror, sbsize=32, 128=0, 64/32/16/8 are legal
assign sbcs_reg[31:29] = 3'b1;
assign sbcs_reg[28:23] = '0;
+ assign sbcs_reg[19:15] = {sbcs_reg_int[19], ~sbcs_reg_int[18], sbcs_reg_int[17:15]};
assign sbcs_reg[11:5] = 7'h20;
assign sbcs_reg[4:0] = 5'b01111;
assign sbcs_wren = (dmi_reg_addr == 7'h38) & dmi_reg_en & dmi_reg_wr_en & (sb_state == SBIDLE); // & (sbcs_reg[14:12] == 3'b000);
@@ -261,7 +268,8 @@ module dbg (
rvdffs #(1) sbcs_sbbusyerror_reg (.din(sbcs_sbbusyerror_din), .dout(sbcs_reg[22]), .en(sbcs_sbbusyerror_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk));
rvdffs #(1) sbcs_sbbusy_reg (.din(sbcs_sbbusy_din), .dout(sbcs_reg[21]), .en(sbcs_sbbusy_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk));
rvdffs #(1) sbcs_sbreadonaddr_reg (.din(dmi_reg_wdata[20]), .dout(sbcs_reg[20]), .en(sbcs_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk));
- rvdffs #(5) sbcs_misc_reg (.din(dmi_reg_wdata[19:15]), .dout(sbcs_reg[19:15]), .en(sbcs_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk));
+ rvdffs #(5) sbcs_misc_reg (.din({dmi_reg_wdata[19],~dmi_reg_wdata[18],dmi_reg_wdata[17:15]}),
+ .dout(sbcs_reg_int[19:15]), .en(sbcs_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk));
rvdffs #(3) sbcs_error_reg (.din(sbcs_sberror_din[2:0]), .dout(sbcs_reg[14:12]), .en(sbcs_sberror_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk));
assign sbcs_unaligned = ((sbcs_reg[19:17] == 3'b001) & sbaddress0_reg[0]) |
@@ -354,11 +362,14 @@ module dbg (
assign abstractcs_reg[7:4] = '0;
assign abstractcs_reg[3:0] = 4'h2; // One data register
assign abstractcs_error_sel0 = abstractcs_reg[12] & dmi_reg_en & ((dmi_reg_wr_en & ( (dmi_reg_addr == 7'h16) | (dmi_reg_addr == 7'h17))) | (dmi_reg_addr == 7'h4));
- assign abstractcs_error_sel1 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h17) & ~((dmi_reg_wdata[31:24] == 8'b0) | (dmi_reg_wdata[31:24] == 8'h2));
+ assign abstractcs_error_sel1 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h17) &
+ ((~((dmi_reg_wdata[31:24] == 8'b0) | (dmi_reg_wdata[31:24] == 8'h2))) | // Illegal command
+ ((dmi_reg_wdata[22:20] != 3'b010) & ((dmi_reg_wdata[31:24] == 8'h2) | ((dmi_reg_wdata[31:24] == 8'h0) & dmi_reg_wdata[17]))) | // Illegal size
+ ((dmi_reg_wdata[31:24] == 8'h0) & (dmi_reg_wdata[19] | dmi_reg_wdata[18])) | //aarpostincrement/postexec for abstract register access
+ ((dmi_reg_wdata[31:24] == 8'h2) & dmi_reg_wdata[19])); //aampostincrement for abstract memory access
assign abstractcs_error_sel2 = core_dbg_cmd_done & core_dbg_cmd_fail;
assign abstractcs_error_sel3 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h17) & (dbg_state != HALTED);
- assign abstractcs_error_sel4 = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en &
- ((dmi_reg_wdata[22:20] != 3'b010) | ((dmi_reg_wdata[31:24] == 8'h2) && (|data1_reg[1:0]))); // Only word size is allowed
+ assign abstractcs_error_sel4 = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en & ((dmi_reg_wdata[31:24] == 8'h2) && (|data1_reg[1:0])); //Unaligned address for abstract memory
assign abstractcs_error_sel5 = (dmi_reg_addr == 7'h16) & dmi_reg_en & dmi_reg_wr_en;
@@ -379,7 +390,9 @@ module dbg (
// command register - implemented all the bits in this register
// command[16] = 1: write, 0: read
assign command_wren = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en & (dbg_state == HALTED);
- assign command_din[31:0] = {dmi_reg_wdata[31:24],1'b0,dmi_reg_wdata[22:20],3'b0,dmi_reg_wdata[16:0]};
+ assign command_postexec_din = (dmi_reg_wdata[31:24] == 8'h0) & dmi_reg_wdata[18];
+ assign command_transfer_din = (dmi_reg_wdata[31:24] == 8'h0) & dmi_reg_wdata[17];
+ assign command_din[31:0] = {dmi_reg_wdata[31:24],1'b0,dmi_reg_wdata[22:19],command_postexec_din,command_transfer_din, dmi_reg_wdata[16:0]};
rvdffe #(32) dmcommand_reg (.*, .din(command_din[31:0]), .dout(command_reg[31:0]), .en(command_wren), .rst_l(dbg_dm_rst_l));
// data0 reg
@@ -388,7 +401,7 @@ module dbg (
assign data0_reg_wren = data0_reg_wren0 | data0_reg_wren1;
assign data0_din[31:0] = ({32{data0_reg_wren0}} & dmi_reg_wdata[31:0]) |
- ({32{data0_reg_wren1}} & core_dbg_rddata[31:0]);
+ ({32{data0_reg_wren1}} & core_dbg_rddata[31:0]);
rvdffe #(32) dbg_data0_reg (.*, .din(data0_din[31:0]), .dout(data0_reg[31:0]), .en(data0_reg_wren), .rst_l(dbg_dm_rst_l));
@@ -434,8 +447,9 @@ module dbg (
dbg_resume_req = dbg_state_en & (dbg_nxtstate == RESUMING); // single cycle pulse to core if resuming
end
CMD_START: begin
- dbg_nxtstate = dmcontrol_reg[1] ? IDLE : (|abstractcs_reg[10:8]) ? CMD_DONE : CMD_WAIT; // new command sent to the core
- dbg_state_en = dbg_cmd_valid | (|abstractcs_reg[10:8]) | dmcontrol_reg[1];
+ // Don't execute the command if cmderror or transfer=0 for abstract register access
+ dbg_nxtstate = dmcontrol_reg[1] ? IDLE : ((|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17])) ? CMD_DONE : CMD_WAIT; // new command sent to the core
+ dbg_state_en = dbg_cmd_valid | (|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17]) | dmcontrol_reg[1];
end
CMD_WAIT: begin
dbg_nxtstate = dmcontrol_reg[1] ? IDLE : CMD_DONE;
@@ -464,7 +478,7 @@ module dbg (
assign dmi_reg_rdata_din[31:0] = ({32{dmi_reg_addr == 7'h4}} & data0_reg[31:0]) |
({32{dmi_reg_addr == 7'h5}} & data1_reg[31:0]) |
- ({32{dmi_reg_addr == 7'h10}} & dmcontrol_reg[31:0]) |
+ ({32{dmi_reg_addr == 7'h10}} & {2'b0,dmcontrol_reg[29],1'b0,dmcontrol_reg[27:0]}) | // Read0 to Write only bits
({32{dmi_reg_addr == 7'h11}} & dmstatus_reg[31:0]) |
({32{dmi_reg_addr == 7'h16}} & abstractcs_reg[31:0]) |
({32{dmi_reg_addr == 7'h17}} & command_reg[31:0]) |
@@ -483,7 +497,7 @@ module dbg (
// interface for the core
assign dbg_cmd_addr[31:0] = (command_reg[31:24] == 8'h2) ? {data1_reg[31:2],2'b0} : {20'b0, command_reg[11:0]}; // Only word addresses for abstract memory
assign dbg_cmd_wrdata[31:0] = data0_reg[31:0];
- assign dbg_cmd_valid = (dbg_state == CMD_START) & ~(|abstractcs_reg[10:8]) & dma_dbg_ready;
+ assign dbg_cmd_valid = (dbg_state == CMD_START) & ~((|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17])) & dma_dbg_ready;
assign dbg_cmd_write = command_reg[16];
assign dbg_cmd_type[1:0] = (command_reg[31:24] == 8'h2) ? 2'b10 : {1'b0, (command_reg[15:12] == 4'b0)};
assign dbg_cmd_size[1:0] = command_reg[21:20];
@@ -554,7 +568,7 @@ module dbg (
sb_state_en = 1'b1;
sbcs_sbbusy_wren = 1'b1; // reset the single read
sbcs_sbbusy_din = 1'b0;
- sbaddress0_reg_wren1 = sbcs_reg[16]; // auto increment was set. Update to new address after completing the current command
+ sbaddress0_reg_wren1 = sbcs_reg[16] & (sbcs_reg[14:12] == 3'b0); // auto increment was set and no error. Update to new address after completing the current command
end
default : begin
sb_nxtstate = SBIDLE;
@@ -572,6 +586,9 @@ module dbg (
//rvdff #(.WIDTH(1)) bus_clken_ff (.din(dbg_bus_clk_en), .dout(dbg_bus_clk_en_q), .rst_l(dbg_dm_rst_l), .clk(dbg_sb_c2_free_clk), .*);
+ rvdff_fpga #(2) axi_bresp_ff (.din(sb_axi_bresp[1:0]), .dout(sb_axi_bresp_q[1:0]), .rst_l(dbg_dm_rst_l), .clk(bus_clk), .clken(bus_clken), .rawclk(clk), .*);
+ rvdff_fpga #(2) axi_rresp_ff (.din(sb_axi_rresp[1:0]), .dout(sb_axi_rresp_q[1:0]), .rst_l(dbg_dm_rst_l), .clk(bus_clk), .clken(bus_clken), .rawclk(clk), .*);
+
rvdffs #(.WIDTH(1)) axi_awvalid_ff (.din(sb_axi_awvalid), .dout(sb_axi_awvalid_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*);
rvdffs #(.WIDTH(1)) axi_awready_ff (.din(sb_axi_awready), .dout(sb_axi_awready_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*);
rvdffs #(.WIDTH(1)) axi_wvalid_ff (.din(sb_axi_wvalid), .dout(sb_axi_wvalid_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*);
@@ -581,10 +598,8 @@ module dbg (
rvdffs #(.WIDTH(1)) axi_bvalid_ff (.din(sb_axi_bvalid), .dout(sb_axi_bvalid_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*);
rvdffs #(.WIDTH(1)) axi_bready_ff (.din(sb_axi_bready), .dout(sb_axi_bready_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*);
- rvdff #(.WIDTH(2)) axi_bresp_ff (.din(sb_axi_bresp[1:0]), .dout(sb_axi_bresp_q[1:0]), .rst_l(dbg_dm_rst_l), .clk(bus_clk), .*);
rvdffs #(.WIDTH(1)) axi_rvalid_ff (.din(sb_axi_rvalid), .dout(sb_axi_rvalid_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*);
rvdffs #(.WIDTH(1)) axi_rready_ff (.din(sb_axi_rready), .dout(sb_axi_rready_q), .en(dbg_bus_clk_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk), .*);
- rvdff #(.WIDTH(2)) axi_rresp_ff (.din(sb_axi_rresp[1:0]), .dout(sb_axi_rresp_q[1:0]), .rst_l(dbg_dm_rst_l), .clk(bus_clk), .*);
rvdffe #(.WIDTH(64)) axi_rdata_ff (.din(sb_axi_rdata[63:0]), .dout(sb_axi_rdata_q[63:0]), .rst_l(dbg_dm_rst_l), .en(bus_clken), .*);
// AXI Request signals
diff --git a/design/dec/dec_tlu_ctl.sv b/design/dec/dec_tlu_ctl.sv
index 18fcdb1..4ec04ac 100644
--- a/design/dec/dec_tlu_ctl.sv
+++ b/design/dec/dec_tlu_ctl.sv
@@ -2533,7 +2533,7 @@ assign dec_csr_legal_d = ( dec_csr_any_unq_d &
assign dec_csr_rddata_d[31:0] = ( ({32{csr_misa}} & 32'h40001104) |
({32{csr_mvendorid}} & 32'h00000045) |
({32{csr_marchid}} & 32'h0000000b) |
- ({32{csr_mimpid}} & 32'h3) |
+ ({32{csr_mimpid}} & 32'h4) |
({32{csr_mstatus}} & {19'b0, 2'b11, 3'b0, mstatus[1], 3'b0, mstatus[0], 3'b0}) |
({32{csr_mtvec}} & {mtvec[30:1], 1'b0, mtvec[0]}) |
({32{csr_mip}} & {1'b0, mip[5:3], 16'b0, mip[2], 3'b0, mip[1], 3'b0, mip[0], 3'b0}) |
diff --git a/design/dma_ctrl.sv b/design/dma_ctrl.sv
index 4238b28..8a4baac 100644
--- a/design/dma_ctrl.sv
+++ b/design/dma_ctrl.sv
@@ -396,9 +396,6 @@ module dma_ctrl (
// Inputs
rvdff #(1) ahbs_bus_clken_ff (.din(dma_bus_clk_en), .dout(dma_bus_clk_en_q), .clk(free_clk), .*);
- rvdff #(1) fifo_full_bus_ff (.din(fifo_full_spec), .dout(fifo_full_spec_bus), .clk(dma_bus_clk), .*);
- rvdff #(1) dbg_dma_bubble_ff (.din(dbg_dma_bubble), .dout(dbg_dma_bubble_bus), .clk(dma_bus_clk), .*);
- rvdff #(1) dec_tlu_stall_dma_ff (.din(dec_tlu_stall_dma), .dout(dec_tlu_stall_dma_bus), .clk(dma_bus_clk), .*);
rvdff #(1) dma_dbg_cmd_doneff (.din(dma_dbg_cmd_done), .dout(dma_dbg_cmd_done_q), .clk(free_clk), .*);
// Clock Gating logic
@@ -407,7 +404,34 @@ module dma_ctrl (
rvoclkhdr dma_buffer_c1cgc ( .en(dma_buffer_c1_clken), .l1clk(dma_buffer_c1_clk), .* );
rvoclkhdr dma_free_cgc (.en(dma_free_clken), .l1clk(dma_free_clk), .*);
- rvclkhdr dma_bus_cgc (.en(dma_bus_clk_en), .l1clk(dma_bus_clk), .*);
+
+`ifndef RV_FPGA_OPTIMIZE
+ rvclkhdr dma_bus_cgc (.en(dma_bus_clk_en), .l1clk(dma_bus_clk), .*); // ifndef FPGA_OPTIMIZE
+`endif
+
+ rvdffsc_fpga #(1) wrbuf_vldff (.din(1'b1), .clear(wrbuf_rst), .dout(wrbuf_vld), .en(wrbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+ rvdffsc_fpga #(1) wrbuf_data_vldff(.din(1'b1), .clear(wrbuf_data_rst), .dout(wrbuf_data_vld), .en(wrbuf_data_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+ rvdffsc_fpga #(1) rdbuf_vldff (.din(1'b1), .clear(rdbuf_rst), .dout(rdbuf_vld), .en(rdbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+
+
+ rvdff_fpga #(1) fifo_full_bus_ff (.din(fifo_full_spec), .dout(fifo_full_spec_bus), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(1) dbg_dma_bubble_ff (.din(dbg_dma_bubble), .dout(dbg_dma_bubble_bus), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(1) dec_tlu_stall_dma_ff (.din(dec_tlu_stall_dma), .dout(dec_tlu_stall_dma_bus), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+ rvdffs_fpga #(1) wrbuf_postedff (.din(1'b0), .dout(wrbuf_posted), .en(wrbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+ rvdffs_fpga #(DMA_BUS_TAG) wrbuf_tagff (.din(dma_axi_awid[DMA_BUS_TAG-1:0]), .dout(wrbuf_tag[DMA_BUS_TAG-1:0]), .en(wrbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+ rvdffs_fpga #(3) wrbuf_sizeff (.din(dma_axi_awsize[2:0]), .dout(wrbuf_size[2:0]), .en(wrbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+ rvdffs_fpga #(8) wrbuf_byteenff (.din(dma_axi_wstrb[7:0]), .dout(wrbuf_byteen[7:0]), .en(wrbuf_data_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+ rvdffs_fpga #(DMA_BUS_TAG) rdbuf_tagff (.din(dma_axi_arid[DMA_BUS_TAG-1:0]), .dout(rdbuf_tag[DMA_BUS_TAG-1:0]), .en(rdbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+ rvdffs_fpga #(3) rdbuf_sizeff (.din(dma_axi_arsize[2:0]), .dout(rdbuf_size[2:0]), .en(rdbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+ rvdffs_fpga #(1) mstr_prtyff (.din(axi_mstr_prty_in), .dout(axi_mstr_priority), .en(axi_mstr_prty_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+
+ rvdff_fpga #(1) axi_mstr_validff ( .din(axi_mstr_valid), .dout(axi_mstr_valid_q), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(1) axi_slv_sentff ( .din(axi_slv_sent), .dout(axi_slv_sent_q), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*);
+
+
+ rvdffe #(32) wrbuf_addrff(.din(dma_axi_awaddr[31:0]), .dout(wrbuf_addr[31:0]), .en(wrbuf_en & dma_bus_clk_en), .*);
+ rvdffe #(64) wrbuf_dataff(.din(dma_axi_wdata[63:0]), .dout(wrbuf_data[63:0]), .en(wrbuf_data_en & dma_bus_clk_en), .*);
+ rvdffe #(32) rdbuf_addrff(.din(dma_axi_araddr[31:0]), .dout(rdbuf_addr[31:0]), .en(rdbuf_en & dma_bus_clk_en), .*);
// Write channel buffer
assign wrbuf_en = dma_axi_awvalid & dma_axi_awready;
@@ -416,23 +440,11 @@ module dma_ctrl (
assign wrbuf_rst = wrbuf_cmd_sent & ~wrbuf_en;
assign wrbuf_data_rst = wrbuf_cmd_sent & ~wrbuf_data_en;
- rvdffsc #(.WIDTH(1)) wrbuf_vldff(.din(1'b1), .dout(wrbuf_vld), .en(wrbuf_en), .clear(wrbuf_rst), .clk(dma_bus_clk), .*);
- rvdffsc #(.WIDTH(1)) wrbuf_data_vldff(.din(1'b1), .dout(wrbuf_data_vld), .en(wrbuf_data_en), .clear(wrbuf_data_rst), .clk(dma_bus_clk), .*);
- rvdffs #(.WIDTH(1)) wrbuf_postedff(.din(1'b0), .dout(wrbuf_posted), .en(wrbuf_en), .clk(dma_bus_clk), .*);
- rvdffs #(.WIDTH(DMA_BUS_TAG)) wrbuf_tagff(.din(dma_axi_awid[DMA_BUS_TAG-1:0]), .dout(wrbuf_tag[DMA_BUS_TAG-1:0]), .en(wrbuf_en), .clk(dma_bus_clk), .*);
- rvdffs #(.WIDTH(3)) wrbuf_sizeff(.din(dma_axi_awsize[2:0]), .dout(wrbuf_size[2:0]), .en(wrbuf_en), .clk(dma_bus_clk), .*);
- rvdffe #(.WIDTH(32)) wrbuf_addrff(.din(dma_axi_awaddr[31:0]), .dout(wrbuf_addr[31:0]), .en(wrbuf_en & dma_bus_clk_en), .*);
- rvdffe #(.WIDTH(64)) wrbuf_dataff(.din(dma_axi_wdata[63:0]), .dout(wrbuf_data[63:0]), .en(wrbuf_data_en & dma_bus_clk_en), .*);
- rvdffs #(.WIDTH(8)) wrbuf_byteenff(.din(dma_axi_wstrb[7:0]), .dout(wrbuf_byteen[7:0]), .en(wrbuf_data_en), .clk(dma_bus_clk), .*);
// Read channel buffer
assign rdbuf_en = dma_axi_arvalid & dma_axi_arready;
assign rdbuf_cmd_sent = axi_mstr_valid & ~axi_mstr_write & dma_fifo_ready;
assign rdbuf_rst = rdbuf_cmd_sent & ~rdbuf_en;
- rvdffsc #(.WIDTH(1)) rdbuf_vldff(.din(1'b1), .dout(rdbuf_vld), .en(rdbuf_en), .clear(rdbuf_rst), .clk(dma_bus_clk), .*);
- rvdffs #(.WIDTH(DMA_BUS_TAG)) rdbuf_tagff(.din(dma_axi_arid[DMA_BUS_TAG-1:0]), .dout(rdbuf_tag[DMA_BUS_TAG-1:0]), .en(rdbuf_en), .clk(dma_bus_clk), .*);
- rvdffs #(.WIDTH(3)) rdbuf_sizeff(.din(dma_axi_arsize[2:0]), .dout(rdbuf_size[2:0]), .en(rdbuf_en), .clk(dma_bus_clk), .*);
- rvdffe #(.WIDTH(32)) rdbuf_addrff(.din(dma_axi_araddr[31:0]), .dout(rdbuf_addr[31:0]), .en(rdbuf_en & dma_bus_clk_en), .*);
assign dma_axi_awready = ~(wrbuf_vld & ~wrbuf_cmd_sent);
assign dma_axi_wready = ~(wrbuf_data_vld & ~wrbuf_cmd_sent);
@@ -452,10 +464,6 @@ module dma_ctrl (
assign axi_mstr_sel = (wrbuf_vld & wrbuf_data_vld & rdbuf_vld) ? axi_mstr_priority : (wrbuf_vld & wrbuf_data_vld);
assign axi_mstr_prty_in = ~axi_mstr_priority;
assign axi_mstr_prty_en = axi_mstr_valid;
- rvdffs #(.WIDTH(1)) mstr_prtyff(.din(axi_mstr_prty_in), .dout(axi_mstr_priority), .en(axi_mstr_prty_en), .clk(dma_bus_clk), .*);
-
- rvdff #(.WIDTH(1)) axi_mstr_validff (.din(axi_mstr_valid), .dout(axi_mstr_valid_q), .clk(dma_bus_clk), .*);
- rvdff #(.WIDTH(1)) axi_slv_sentff (.din(axi_slv_sent), .dout(axi_slv_sent_q), .clk(dma_bus_clk), .*);
//assign axi_slv_valid = fifo_valid[RspPtr] & ~fifo_rsp_done[RspPtr] & ~fifo_dbg[RspPtr] &
// ((fifo_write[RspPtr] & fifo_done_bus[RspPtr]) | (~fifo_write[RspPtr] & fifo_data_bus_valid[RspPtr]) | fifo_error_bus[RspPtr]);
diff --git a/design/exu/exu_mul_ctl.sv b/design/exu/exu_mul_ctl.sv
index d6e74a1..41a86e8 100644
--- a/design/exu/exu_mul_ctl.sv
+++ b/design/exu/exu_mul_ctl.sv
@@ -59,20 +59,23 @@ module exu_mul_ctl
assign mul_c1_e2_clken = (valid_e1 | clk_override) & ~freeze;
assign mul_c1_e3_clken = (valid_e2 | clk_override) & ~freeze;
+`ifndef RV_FPGA_OPTIMIZE
// C1 - 1 clock pulse for data
- rvclkhdr exu_mul_c1e1_cgc (.*, .en(mul_c1_e1_clken), .l1clk(exu_mul_c1_e1_clk));
- rvclkhdr exu_mul_c1e2_cgc (.*, .en(mul_c1_e2_clken), .l1clk(exu_mul_c1_e2_clk));
- rvclkhdr exu_mul_c1e3_cgc (.*, .en(mul_c1_e3_clken), .l1clk(exu_mul_c1_e3_clk));
+ rvclkhdr exu_mul_c1e1_cgc (.*, .en(mul_c1_e1_clken), .l1clk(exu_mul_c1_e1_clk)); // ifndef FPGA_OPTIMIZE
+ rvclkhdr exu_mul_c1e2_cgc (.*, .en(mul_c1_e2_clken), .l1clk(exu_mul_c1_e2_clk)); // ifndef FPGA_OPTIMIZE
+ rvclkhdr exu_mul_c1e3_cgc (.*, .en(mul_c1_e3_clken), .l1clk(exu_mul_c1_e3_clk)); // ifndef FPGA_OPTIMIZE
+`endif
// --------------------------- Input flops ----------------------------------
- rvdffs #(1) valid_e1_ff (.*, .din(mp.valid), .dout(valid_e1), .clk(active_clk), .en(~freeze));
- rvdff #(1) rs1_sign_e1_ff (.*, .din(mp.rs1_sign), .dout(rs1_sign_e1), .clk(exu_mul_c1_e1_clk));
- rvdff #(1) rs2_sign_e1_ff (.*, .din(mp.rs2_sign), .dout(rs2_sign_e1), .clk(exu_mul_c1_e1_clk));
- rvdff #(1) low_e1_ff (.*, .din(mp.low), .dout(low_e1), .clk(exu_mul_c1_e1_clk));
- rvdff #(1) ld_rs1_byp_e1_ff (.*, .din(mp.load_mul_rs1_bypass_e1), .dout(load_mul_rs1_bypass_e1), .clk(exu_mul_c1_e1_clk));
- rvdff #(1) ld_rs2_byp_e1_ff (.*, .din(mp.load_mul_rs2_bypass_e1), .dout(load_mul_rs2_bypass_e1), .clk(exu_mul_c1_e1_clk));
+ rvdffs #(1) valid_e1_ff (.*, .din(mp.valid), .dout(valid_e1), .clk(active_clk), .en(~freeze));
+
+ rvdff_fpga #(1) rs1_sign_e1_ff (.*, .din(mp.rs1_sign), .dout(rs1_sign_e1), .clk(exu_mul_c1_e1_clk), .clken(mul_c1_e1_clken), .rawclk(clk));
+ rvdff_fpga #(1) rs2_sign_e1_ff (.*, .din(mp.rs2_sign), .dout(rs2_sign_e1), .clk(exu_mul_c1_e1_clk), .clken(mul_c1_e1_clken), .rawclk(clk));
+ rvdff_fpga #(1) low_e1_ff (.*, .din(mp.low), .dout(low_e1), .clk(exu_mul_c1_e1_clk), .clken(mul_c1_e1_clken), .rawclk(clk));
+ rvdff_fpga #(1) ld_rs1_byp_e1_ff (.*, .din(mp.load_mul_rs1_bypass_e1), .dout(load_mul_rs1_bypass_e1), .clk(exu_mul_c1_e1_clk), .clken(mul_c1_e1_clken), .rawclk(clk));
+ rvdff_fpga #(1) ld_rs2_byp_e1_ff (.*, .din(mp.load_mul_rs2_bypass_e1), .dout(load_mul_rs2_bypass_e1), .clk(exu_mul_c1_e1_clk), .clken(mul_c1_e1_clken), .rawclk(clk));
rvdffe #(32) a_e1_ff (.*, .din(a[31:0]), .dout(a_ff_e1[31:0]), .en(mul_c1_e1_clken));
rvdffe #(32) b_e1_ff (.*, .din(b[31:0]), .dout(b_ff_e1[31:0]), .en(mul_c1_e1_clken));
@@ -88,8 +91,9 @@ module exu_mul_ctl
assign rs2_neg_e1 = rs2_sign_e1 & b_e1[31];
- rvdffs #(1) valid_e2_ff (.*, .din(valid_e1), .dout(valid_e2), .clk(active_clk), .en(~freeze));
- rvdff #(1) low_e2_ff (.*, .din(low_e1), .dout(low_e2), .clk(exu_mul_c1_e2_clk));
+ rvdffs #(1) valid_e2_ff (.*, .din(valid_e1), .dout(valid_e2), .clk(active_clk), .en(~freeze));
+
+ rvdff_fpga #(1) low_e2_ff (.*, .din(low_e1), .dout(low_e2), .clk(exu_mul_c1_e2_clk), .clken(mul_c1_e2_clken), .rawclk(clk));
rvdffe #(33) a_e2_ff (.*, .din({rs1_neg_e1, a_e1[31:0]}), .dout(a_ff_e2[32:0]), .en(mul_c1_e2_clken));
rvdffe #(33) b_e2_ff (.*, .din({rs2_neg_e1, b_e1[31:0]}), .dout(b_ff_e2[32:0]), .en(mul_c1_e2_clken));
@@ -103,9 +107,9 @@ module exu_mul_ctl
assign prod_e2[65:0] = a_ff_e2 * b_ff_e2;
+ rvdff_fpga #(1) low_e3_ff (.*, .din(low_e2), .dout(low_e3), .clk(exu_mul_c1_e3_clk), .clken(mul_c1_e3_clken), .rawclk(clk));
- rvdff #(1) low_e3_ff (.*, .din(low_e2), .dout(low_e3), .clk(exu_mul_c1_e3_clk));
- rvdffe #(64) prod_e3_ff (.*, .din(prod_e2[63:0]), .dout(prod_e3[63:0]), .en(mul_c1_e3_clken));
+ rvdffe #(64) prod_e3_ff (.*, .din(prod_e2[63:0]), .dout(prod_e3[63:0]), .en(mul_c1_e3_clken));
diff --git a/design/ifu/ifu.sv b/design/ifu/ifu.sv
index d978439..4f9df76 100644
--- a/design/ifu/ifu.sv
+++ b/design/ifu/ifu.sv
@@ -123,7 +123,7 @@ module ifu
output logic ifu_pmu_fetch_stall,
// I$ & ITAG Ports
- output logic [31:3] ic_rw_addr, // Read/Write addresss to the Icache.
+ output logic [31:2] ic_rw_addr, // Read/Write addresss to the Icache.
output logic [3:0] ic_wr_en, // Icache write enable, when filling the Icache.
output logic ic_rd_en, // Icache read enable.
`ifdef RV_ICACHE_ECC
@@ -358,7 +358,7 @@ module ifu
assign mppc_ns[31:1] = `EXU.exu_i0_flush_upper_e1 ? `DEC.decode.i0_pc_e1[31:1] : (`EXU.exu_i1_flush_upper_e1 ? `DEC.decode.i1_pc_e1[31:1] : (`EXU.exu_i0_flush_lower_e4 ? `DEC.decode.i0_pc_e4[31:1] : `DEC.decode.i1_pc_e4[31:1]));
assign mppc_ns[0] = 1'b0;
logic [3:0] ic_rd_hit_f2;
- rvdff #(36) mdseal_ff (.*, .din({mppc_ns[31:0], mem_ctl.ic_rd_hit[3:0]}), .dout({mppc[31:0],ic_rd_hit_f2[3:0]}));
+ rvdff #(36) junk_ff (.*, .clk(free_clk), .din({mppc_ns[31:0], mem_ctl.ic_rd_hit[3:0]}), .dout({mppc[31:0],ic_rd_hit_f2[3:0]}));
logic [2:0] tmp_bnk;
assign tmp_bnk[2:0] = encode8_3(bp.btb_sel_f2[7:0]);
always @(negedge clk) begin
diff --git a/design/ifu/ifu_bp_ctl.sv b/design/ifu/ifu_bp_ctl.sv
index bc3687a..215b369 100644
--- a/design/ifu/ifu_bp_ctl.sv
+++ b/design/ifu/ifu_bp_ctl.sv
@@ -1289,7 +1289,7 @@ assign fgmask_f2[0] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2]
({31{e1_rs_hold}} & e1_rets_out[i][31:1]) );
- rvdff #(31) e1_rets_ff (.*, .din(e1_rets_in[i][31:1]), .dout(e1_rets_out[i][31:1]));
+ rvdffe #(31) e1_rets_ff (.*, .en(~e1_rs_hold), .din(e1_rets_in[i][31:1]), .dout(e1_rets_out[i][31:1]));
end : e1_retstack
@@ -1346,7 +1346,7 @@ assign fgmask_f2[0] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2]
({31{e4_rs_hold}} & e4_rets_out[i][31:1]) );
- rvdff #(31) e4_rets_ff (.*, .din(e4_rets_in[i][31:1]), .dout(e4_rets_out[i][31:1]));
+ rvdffe #(31) e4_rets_ff (.*, .en(~e4_rs_hold), .din(e4_rets_in[i][31:1]), .dout(e4_rets_out[i][31:1]));
end : e4_retstack
@@ -1628,7 +1628,11 @@ assign fgmask_f2[0] = (~ifc_fetch_addr_f2[3] & ~ifc_fetch_addr_f2[2]
(bht_wr_en1[i] & ((bht_wr_addr1[`RV_BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) |
(bht_wr_en2[i] & ((bht_wr_addr2[`RV_BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH));
- rvclkhdr bht_bank_grp_cgc ( .en(bht_bank_clken[i][k]), .l1clk(bht_bank_clk[i][k]), .* );
+`ifndef RV_FPGA_OPTIMIZE
+ rvclkhdr bht_bank_grp_cgc ( .en(bht_bank_clken[i][k]), .l1clk(bht_bank_clk[i][k]), .* ); // ifndef RV_FPGA_OPTIMIZE
+`else
+ assign bht_bank_clk[i][k] = '0;
+`endif
for (j=0 ; j= 8) begin: genblock
+ if (WIDTH >= 8 || OVERRIDE==1) begin: genblock
`endif
`ifdef RV_FPGA_OPTIMIZE
diff --git a/design/lsu/lsu.sv b/design/lsu/lsu.sv
index 127dd06..3e34420 100644
--- a/design/lsu/lsu.sv
+++ b/design/lsu/lsu.sv
@@ -260,12 +260,13 @@ module lsu
logic lsu_c2_dc3_clk, lsu_c2_dc4_clk, lsu_c2_dc5_clk;
logic lsu_freeze_c1_dc2_clk, lsu_freeze_c1_dc3_clk;
logic lsu_freeze_c1_dc1_clken, lsu_freeze_c1_dc2_clken, lsu_freeze_c1_dc3_clken;
+ logic lsu_freeze_c2_dc1_clken, lsu_freeze_c2_dc2_clken, lsu_freeze_c2_dc3_clken, lsu_freeze_c2_dc4_clken;
logic lsu_store_c1_dc1_clken, lsu_store_c1_dc2_clken, lsu_store_c1_dc3_clken, lsu_store_c1_dc4_clk, lsu_store_c1_dc5_clk;
logic lsu_freeze_c2_dc1_clk, lsu_freeze_c2_dc2_clk, lsu_freeze_c2_dc3_clk, lsu_freeze_c2_dc4_clk;
logic lsu_stbuf_c1_clk;
logic lsu_bus_ibuf_c1_clk, lsu_bus_obuf_c1_clk, lsu_bus_buf_c1_clk;
- logic lsu_dccm_c1_dc3_clk, lsu_pic_c1_dc3_clken;
+ logic lsu_dccm_c1_dc3_clk, lsu_dccm_c1_dc3_clken, lsu_pic_c1_dc3_clken;
logic lsu_busm_clk;
logic lsu_free_c2_clk;
@@ -359,10 +360,11 @@ module lsu
//Flops
//rvdffs #(1) lsu_i0_valid_dc1ff (.*, .din(dec_i0_lsu_decode_d), .dout(lsu_i0_valid_dc1), .en(~lsu_freeze_dc3));
- rvdff #(1) lsu_i0_valid_dc1ff (.*, .din(dec_i0_lsu_decode_d), .dout(lsu_i0_valid_dc1), .clk(lsu_freeze_c2_dc1_clk));
- rvdff #(1) lsu_i0_valid_dc2ff (.*, .din(lsu_i0_valid_dc1), .dout(lsu_i0_valid_dc2), .clk(lsu_freeze_c2_dc2_clk));
- rvdff #(1) lsu_i0_valid_dc3ff (.*, .din(lsu_i0_valid_dc2), .dout(lsu_i0_valid_dc3), .clk(lsu_freeze_c2_dc3_clk));
- rvdff #(1) lsu_i0_valid_dc4ff (.*, .din(lsu_i0_valid_dc3), .dout(lsu_i0_valid_dc4), .clk(lsu_freeze_c2_dc4_clk));
+ rvdff_fpga #(1) lsu_i0_valid_dc1ff (.*, .din(dec_i0_lsu_decode_d), .dout(lsu_i0_valid_dc1), .clk(lsu_freeze_c2_dc1_clk), .clken(lsu_freeze_c2_dc1_clken), .rawclk(clk));
+ rvdff_fpga #(1) lsu_i0_valid_dc2ff (.*, .din(lsu_i0_valid_dc1), .dout(lsu_i0_valid_dc2), .clk(lsu_freeze_c2_dc2_clk), .clken(lsu_freeze_c2_dc2_clken), .rawclk(clk));
+ rvdff_fpga #(1) lsu_i0_valid_dc3ff (.*, .din(lsu_i0_valid_dc2), .dout(lsu_i0_valid_dc3), .clk(lsu_freeze_c2_dc3_clk), .clken(lsu_freeze_c2_dc3_clken), .rawclk(clk));
+ rvdff_fpga #(1) lsu_i0_valid_dc4ff (.*, .din(lsu_i0_valid_dc3), .dout(lsu_i0_valid_dc4), .clk(lsu_freeze_c2_dc4_clk), .clken(lsu_freeze_c2_dc4_clken), .rawclk(clk));
+
rvdff #(1) lsu_i0_valid_dc5ff (.*, .din(lsu_i0_valid_dc4), .dout(lsu_i0_valid_dc5), .clk(lsu_c2_dc5_clk));
rvdff #(1) lsu_single_ecc_err_dc4(.*, .din(lsu_single_ecc_error_dc3), .dout(lsu_single_ecc_error_dc4), .clk(lsu_c2_dc4_clk));
rvdff #(1) lsu_single_ecc_err_dc5(.*, .din(lsu_single_ecc_error_dc4), .dout(lsu_single_ecc_error_dc5), .clk(lsu_c2_dc5_clk));
diff --git a/design/lsu/lsu_addrcheck.sv b/design/lsu/lsu_addrcheck.sv
index e6bdf10..fd771e2 100644
--- a/design/lsu/lsu_addrcheck.sv
+++ b/design/lsu/lsu_addrcheck.sv
@@ -27,7 +27,10 @@ module lsu_addrcheck
(
input logic lsu_freeze_c2_dc2_clk, // clock
input logic lsu_freeze_c2_dc3_clk,
+ input logic lsu_freeze_c2_dc2_clken,
+ input logic lsu_freeze_c2_dc3_clken,
input logic rst_l, // reset
+ input logic clk,
input logic [31:0] start_addr_dc1, // start address for lsu
input logic [31:0] end_addr_dc1, // end address for lsu
@@ -177,8 +180,9 @@ module lsu_addrcheck
assign misaligned_fault_dc1 = ((start_addr_dc1[31:28] != end_addr_dc1[31:28]) |
(is_sideeffects_dc1 & ~is_aligned_dc1)) & addr_external_dc1 & lsu_pkt_dc1.valid & ~lsu_pkt_dc1.dma;
- rvdff #(.WIDTH(1)) is_sideeffects_dc2ff (.din(is_sideeffects_dc1), .dout(is_sideeffects_dc2), .clk(lsu_freeze_c2_dc2_clk), .*);
- rvdff #(.WIDTH(1)) is_sideeffects_dc3ff (.din(is_sideeffects_dc2), .dout(is_sideeffects_dc3), .clk(lsu_freeze_c2_dc3_clk), .*);
+ rvdff_fpga #(.WIDTH(1)) is_sideeffects_dc2ff (.din(is_sideeffects_dc1), .dout(is_sideeffects_dc2), .clk(lsu_freeze_c2_dc2_clk), .clken(lsu_freeze_c2_dc2_clken), .rawclk(clk), .*);
+ rvdff_fpga #(.WIDTH(1)) is_sideeffects_dc3ff (.din(is_sideeffects_dc2), .dout(is_sideeffects_dc3), .clk(lsu_freeze_c2_dc3_clk), .clken(lsu_freeze_c2_dc3_clken), .rawclk(clk), .*);
+
endmodule // lsu_addrcheck
diff --git a/design/lsu/lsu_bus_buffer.sv b/design/lsu/lsu_bus_buffer.sv
index 6a6cceb..2bc4abf 100644
--- a/design/lsu/lsu_bus_buffer.sv
+++ b/design/lsu/lsu_bus_buffer.sv
@@ -58,6 +58,7 @@ module lsu_bus_buffer
input logic lsu_freeze_c1_dc3_clk,
input logic lsu_freeze_c2_dc2_clk,
input logic lsu_freeze_c2_dc3_clk,
+ input logic lsu_freeze_c2_dc3_clken,
input logic lsu_bus_ibuf_c1_clk,
input logic lsu_bus_obuf_c1_clk,
input logic lsu_bus_buf_c1_clk,
@@ -527,10 +528,28 @@ module lsu_bus_buffer
assign obuf_merge_en = (CmdPtr0 != CmdPtr1) & found_cmdptr0 & found_cmdptr1 & (buf_state[CmdPtr0] == CMD) & (buf_state[CmdPtr1] == CMD) & ~buf_cmd_state_bus_en[CmdPtr0] & ~buf_sideeffect[CmdPtr0] &
(~buf_write[CmdPtr0] & buf_dual[CmdPtr0] & ~buf_dualhi[CmdPtr0] & buf_samedw[CmdPtr0]); // CmdPtr0/CmdPtr1 are for same load which is within a DW
- rvdff #(.WIDTH(1)) obuf_wren_ff (.din(obuf_wr_en), .dout(obuf_wr_enQ), .clk(lsu_busm_clk), .*);
- rvdff #(.WIDTH(1)) obuf_cmd_done_ff (.din(obuf_cmd_done_in), .dout(obuf_cmd_done), .clk(lsu_busm_clk), .*);
- rvdff #(.WIDTH(1)) obuf_data_done_ff (.din(obuf_data_done_in), .dout(obuf_data_done), .clk(lsu_busm_clk), .*);
- rvdffsc #(.WIDTH(1)) obuf_valid_ff (.din(1'b1), .dout(obuf_valid), .en(obuf_wr_en), .clear(obuf_rst), .clk(lsu_busm_clk), .*);
+ rvdff_fpga #(1) obuf_wren_ff (.din(obuf_wr_en), .dout(obuf_wr_enQ), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(1) obuf_cmd_done_ff (.din(obuf_cmd_done_in), .dout(obuf_cmd_done), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(1) obuf_data_done_ff (.din(obuf_data_done_in), .dout(obuf_data_done), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdffsc_fpga #(1) obuf_valid_ff (.din(1'b1), .dout(obuf_valid), .clear(obuf_rst), .en(obuf_wr_en), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(TIMER_LOG2) obuf_timerff (.din(obuf_wr_timer_in), .dout(obuf_wr_timer), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+
+ rvdff_fpga #(1) lsu_axi_awvalid_ff (.din(lsu_axi_awvalid), .dout(lsu_axi_awvalid_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(1) lsu_axi_awready_ff (.din(lsu_axi_awready), .dout(lsu_axi_awready_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(1) lsu_axi_wvalid_ff (.din(lsu_axi_wvalid), .dout(lsu_axi_wvalid_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(1) lsu_axi_wready_ff (.din(lsu_axi_wready), .dout(lsu_axi_wready_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(1) lsu_axi_arvalid_ff (.din(lsu_axi_arvalid), .dout(lsu_axi_arvalid_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(1) lsu_axi_arready_ff (.din(lsu_axi_arready), .dout(lsu_axi_arready_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(1) lsu_axi_bvalid_ff (.din(lsu_axi_bvalid), .dout(lsu_axi_bvalid_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(1) lsu_axi_bready_ff (.din(lsu_axi_bready), .dout(lsu_axi_bready_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(2) lsu_axi_bresp_ff (.din(lsu_axi_bresp[1:0]), .dout(lsu_axi_bresp_q[1:0]), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(LSU_BUS_TAG) lsu_axi_bid_ff (.din(lsu_axi_bid[LSU_BUS_TAG-1:0]), .dout(lsu_axi_bid_q[LSU_BUS_TAG-1:0]), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(1) lsu_axi_rvalid_ff (.din(lsu_axi_rvalid), .dout(lsu_axi_rvalid_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(1) lsu_axi_rready_ff (.din(lsu_axi_rready), .dout(lsu_axi_rready_q), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(2) lsu_axi_rresp_ff (.din(lsu_axi_rresp[1:0]), .dout(lsu_axi_rresp_q[1:0]), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+ rvdff_fpga #(LSU_BUS_TAG) lsu_axi_rid_ff (.din(lsu_axi_rid[LSU_BUS_TAG-1:0]), .dout(lsu_axi_rid_q[LSU_BUS_TAG-1:0]), .clk(lsu_busm_clk), .clken(lsu_bus_clk_en), .rawclk(clk), .*);
+
+
rvdffs #(.WIDTH(LSU_BUS_TAG)) obuf_tag0ff (.din(obuf_tag0_in), .dout(obuf_tag0), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*);
rvdffs #(.WIDTH(LSU_BUS_TAG)) obuf_tag1ff (.din(obuf_tag1_in), .dout(obuf_tag1), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*);
rvdffs #(.WIDTH(1)) obuf_mergeff (.din(obuf_merge_in), .dout(obuf_merge), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*);
@@ -540,7 +559,6 @@ module lsu_bus_buffer
rvdffe #(.WIDTH(32)) obuf_addrff (.din(obuf_addr_in[31:0]), .dout(obuf_addr), .en(obuf_wr_en), .*);
rvdffs #(.WIDTH(8)) obuf_byteenff (.din(obuf_byteen_in[7:0]), .dout(obuf_byteen), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .*);
rvdffe #(.WIDTH(64)) obuf_dataff (.din(obuf_data_in[63:0]), .dout(obuf_data), .en(obuf_wr_en), .*);
- rvdff #(.WIDTH(TIMER_LOG2)) obuf_timerff (.din(obuf_wr_timer_in), .dout(obuf_wr_timer), .clk(lsu_busm_clk), .*);
//------------------------------------------------------------------------------
// Output buffer logic ends here
@@ -872,24 +890,8 @@ module lsu_bus_buffer
assign lsu_pmu_bus_error = ld_bus_error_dc3 | lsu_imprecise_error_load_any | lsu_imprecise_error_store_any;
assign lsu_pmu_bus_busy = (lsu_axi_awvalid_q & ~lsu_axi_awready_q) | (lsu_axi_wvalid_q & ~lsu_axi_wready_q) | (lsu_axi_arvalid_q & ~lsu_axi_arready_q);
- rvdff #(.WIDTH(1)) lsu_axi_awvalid_ff (.din(lsu_axi_awvalid), .dout(lsu_axi_awvalid_q), .clk(lsu_busm_clk), .*);
- rvdff #(.WIDTH(1)) lsu_axi_awready_ff (.din(lsu_axi_awready), .dout(lsu_axi_awready_q), .clk(lsu_busm_clk), .*);
- rvdff #(.WIDTH(1)) lsu_axi_wvalid_ff (.din(lsu_axi_wvalid), .dout(lsu_axi_wvalid_q), .clk(lsu_busm_clk), .*);
- rvdff #(.WIDTH(1)) lsu_axi_wready_ff (.din(lsu_axi_wready), .dout(lsu_axi_wready_q), .clk(lsu_busm_clk), .*);
- rvdff #(.WIDTH(1)) lsu_axi_arvalid_ff (.din(lsu_axi_arvalid), .dout(lsu_axi_arvalid_q), .clk(lsu_busm_clk), .*);
- rvdff #(.WIDTH(1)) lsu_axi_arready_ff (.din(lsu_axi_arready), .dout(lsu_axi_arready_q), .clk(lsu_busm_clk), .*);
-
- rvdff #(.WIDTH(1)) lsu_axi_bvalid_ff (.din(lsu_axi_bvalid), .dout(lsu_axi_bvalid_q), .clk(lsu_busm_clk), .*);
- rvdff #(.WIDTH(1)) lsu_axi_bready_ff (.din(lsu_axi_bready), .dout(lsu_axi_bready_q), .clk(lsu_busm_clk), .*);
- rvdff #(.WIDTH(2)) lsu_axi_bresp_ff (.din(lsu_axi_bresp[1:0]), .dout(lsu_axi_bresp_q[1:0]), .clk(lsu_busm_clk), .*);
- rvdff #(.WIDTH(LSU_BUS_TAG)) lsu_axi_bid_ff (.din(lsu_axi_bid[LSU_BUS_TAG-1:0]), .dout(lsu_axi_bid_q[LSU_BUS_TAG-1:0]), .clk(lsu_busm_clk), .*);
rvdffe #(.WIDTH(64)) lsu_axi_rdata_ff (.din(lsu_axi_rdata[63:0]), .dout(lsu_axi_rdata_q[63:0]), .en(lsu_axi_rvalid & lsu_bus_clk_en), .*);
- rvdff #(.WIDTH(1)) lsu_axi_rvalid_ff (.din(lsu_axi_rvalid), .dout(lsu_axi_rvalid_q), .clk(lsu_busm_clk), .*);
- rvdff #(.WIDTH(1)) lsu_axi_rready_ff (.din(lsu_axi_rready), .dout(lsu_axi_rready_q), .clk(lsu_busm_clk), .*);
- rvdff #(.WIDTH(2)) lsu_axi_rresp_ff (.din(lsu_axi_rresp[1:0]), .dout(lsu_axi_rresp_q[1:0]), .clk(lsu_busm_clk), .*);
- rvdff #(.WIDTH(LSU_BUS_TAG)) lsu_axi_rid_ff (.din(lsu_axi_rid[LSU_BUS_TAG-1:0]), .dout(lsu_axi_rid_q[LSU_BUS_TAG-1:0]), .clk(lsu_busm_clk), .*);
-
// General flops
rvdffsc #(.WIDTH(1)) ld_freezeff (.din(1'b1), .dout(ld_freeze_dc3), .en(ld_freeze_en), .clear(ld_freeze_rst), .clk(lsu_free_c2_clk), .*);
rvdffs #(.WIDTH(DEPTH_LOG2)) lsu_FreezePtrff (.din(WrPtr0_dc3), .dout(FreezePtr), .en(FreezePtrEn), .clk(lsu_free_c2_clk), .*);
@@ -905,7 +907,10 @@ module lsu_bus_buffer
rvdff #(.WIDTH(1)) lsu_busreq_dc4ff (.din(lsu_busreq_dc3 & ~flush_dc4), .dout(lsu_busreq_dc4), .clk(lsu_c2_dc4_clk), .*);
rvdff #(.WIDTH(1)) lsu_busreq_dc5ff (.din(lsu_busreq_dc4 & ~flush_dc5), .dout(lsu_busreq_dc5), .clk(lsu_c2_dc5_clk), .*);
- rvdff #(.WIDTH(1)) dec_nonblock_load_freeze_dc3ff (.din(dec_nonblock_load_freeze_dc2), .dout(dec_nonblock_load_freeze_dc3), .clk(lsu_freeze_c2_dc3_clk), .*);
+ rvdff_fpga #(.WIDTH(1)) dec_nonblock_load_freeze_dc3ff (.din(dec_nonblock_load_freeze_dc2), .dout(dec_nonblock_load_freeze_dc3), .clk(lsu_freeze_c2_dc3_clk), .clken(lsu_freeze_c2_dc3_clken), .rawclk(clk), .*);
+
+
+
rvdff #(.WIDTH(1)) lsu_nonblock_load_valid_dc4ff (.din(lsu_nonblock_load_valid_dc3), .dout(lsu_nonblock_load_valid_dc4), .clk(lsu_c2_dc4_clk), .*);
rvdff #(.WIDTH(1)) lsu_nonblock_load_valid_dc5ff (.din(lsu_nonblock_load_valid_dc4), .dout(lsu_nonblock_load_valid_dc5), .clk(lsu_c2_dc5_clk), .*);
diff --git a/design/lsu/lsu_bus_intf.sv b/design/lsu/lsu_bus_intf.sv
index 095f31a..4212f6b 100644
--- a/design/lsu/lsu_bus_intf.sv
+++ b/design/lsu/lsu_bus_intf.sv
@@ -44,6 +44,10 @@ module lsu_bus_intf
input logic lsu_freeze_c1_dc3_clk,
input logic lsu_freeze_c2_dc2_clk,
input logic lsu_freeze_c2_dc3_clk,
+ input logic lsu_freeze_c1_dc2_clken,
+ input logic lsu_freeze_c1_dc3_clken,
+ input logic lsu_freeze_c2_dc2_clken,
+ input logic lsu_freeze_c2_dc3_clken,
input logic lsu_bus_ibuf_c1_clk,
input logic lsu_bus_obuf_c1_clk,
input logic lsu_bus_buf_c1_clk,
@@ -383,19 +387,21 @@ module lsu_bus_intf
assign bus_read_data_dc3[31:0] = ld_full_hit_dc3 ? ld_fwddata_dc3[31:0] : ld_bus_data_dc3[31:0];
// Fifo flops
- rvdff #(.WIDTH(1)) lsu_full_hit_dc3ff (.din(ld_full_hit_dc2), .dout(ld_full_hit_dc3), .clk(lsu_freeze_c2_dc3_clk), .*);
- rvdff #(.WIDTH(32)) lsu_fwddata_dc3ff (.din(ld_fwddata_dc2[31:0]), .dout(ld_fwddata_dc3[31:0]), .clk(lsu_c1_dc3_clk), .*);
rvdff #(.WIDTH(1)) clken_ff (.din(lsu_bus_clk_en), .dout(lsu_bus_clk_en_q), .clk(free_clk), .*);
- rvdff #(.WIDTH(1)) ldst_dual_dc2ff (.din(ldst_dual_dc1), .dout(ldst_dual_dc2), .clk(lsu_freeze_c1_dc2_clk), .*);
- rvdff #(.WIDTH(1)) ldst_dual_dc3ff (.din(ldst_dual_dc2), .dout(ldst_dual_dc3), .clk(lsu_freeze_c1_dc3_clk), .*);
+ rvdff_fpga #(.WIDTH(1)) ldst_dual_dc2ff (.din(ldst_dual_dc1), .dout(ldst_dual_dc2), .clk(lsu_freeze_c1_dc2_clk), .clken(lsu_freeze_c1_dc2_clken), .rawclk(clk), .*);
+ rvdff_fpga #(.WIDTH(1)) lsu_full_hit_dc3ff (.din(ld_full_hit_dc2), .dout(ld_full_hit_dc3), .clk(lsu_freeze_c2_dc3_clk), .clken(lsu_freeze_c2_dc3_clken), .rawclk(clk), .*);
+ rvdff_fpga #(.WIDTH(1)) ldst_dual_dc3ff (.din(ldst_dual_dc2), .dout(ldst_dual_dc3), .clk(lsu_freeze_c1_dc3_clk), .clken(lsu_freeze_c1_dc3_clken), .rawclk(clk), .*);
+ rvdff_fpga #(4) lsu_byten_dc3ff (.din(ldst_byteen_dc2[3:0]), .dout(ldst_byteen_dc3[3:0]), .clk(lsu_freeze_c1_dc3_clk), .clken(lsu_freeze_c1_dc3_clken), .rawclk(clk), .*);
+
+ rvdff #(.WIDTH(32)) lsu_fwddata_dc3ff (.din(ld_fwddata_dc2[31:0]), .dout(ld_fwddata_dc3[31:0]), .clk(lsu_c1_dc3_clk), .*);
+
rvdff #(.WIDTH(1)) ldst_dual_dc4ff (.din(ldst_dual_dc3), .dout(ldst_dual_dc4), .clk(lsu_c1_dc4_clk), .*);
rvdff #(.WIDTH(1)) ldst_dual_dc5ff (.din(ldst_dual_dc4), .dout(ldst_dual_dc5), .clk(lsu_c1_dc5_clk), .*);
rvdff #(.WIDTH(1)) is_sideeffects_dc4ff (.din(is_sideeffects_dc3), .dout(is_sideeffects_dc4), .clk(lsu_c1_dc4_clk), .*);
rvdff #(.WIDTH(1)) is_sideeffects_dc5ff (.din(is_sideeffects_dc4), .dout(is_sideeffects_dc5), .clk(lsu_c1_dc5_clk), .*);
- rvdff #(4) lsu_byten_dc3ff (.*, .din(ldst_byteen_dc2[3:0]), .dout(ldst_byteen_dc3[3:0]), .clk(lsu_freeze_c1_dc3_clk));
rvdff #(4) lsu_byten_dc4ff (.*, .din(ldst_byteen_dc3[3:0]), .dout(ldst_byteen_dc4[3:0]), .clk(lsu_c1_dc4_clk));
rvdff #(4) lsu_byten_dc5ff (.*, .din(ldst_byteen_dc4[3:0]), .dout(ldst_byteen_dc5[3:0]), .clk(lsu_c1_dc5_clk));
diff --git a/design/lsu/lsu_clkdomain.sv b/design/lsu/lsu_clkdomain.sv
index 00f7c3f..14bef2e 100644
--- a/design/lsu/lsu_clkdomain.sv
+++ b/design/lsu/lsu_clkdomain.sv
@@ -84,7 +84,14 @@ module lsu_clkdomain
output logic lsu_freeze_c2_dc3_clk,
output logic lsu_freeze_c2_dc4_clk,
+ output logic lsu_freeze_c2_dc1_clken,
+ output logic lsu_freeze_c2_dc2_clken,
+ output logic lsu_freeze_c2_dc3_clken,
+ output logic lsu_freeze_c2_dc4_clken,
+
output logic lsu_dccm_c1_dc3_clk, // dccm clock
+ output logic lsu_dccm_c1_dc3_clken,
+
output logic lsu_pic_c1_dc3_clken, // pic clock enable
output logic lsu_stbuf_c1_clk,
@@ -104,13 +111,12 @@ module lsu_clkdomain
logic lsu_store_c1_dc4_clken, lsu_store_c1_dc5_clken;
logic lsu_freeze_c1_dc4_clken;
- logic lsu_freeze_c2_dc1_clken, lsu_freeze_c2_dc2_clken, lsu_freeze_c2_dc3_clken, lsu_freeze_c2_dc4_clken;
+
logic lsu_freeze_c1_dc1_clken_q, lsu_freeze_c1_dc2_clken_q, lsu_freeze_c1_dc3_clken_q, lsu_freeze_c1_dc4_clken_q;
logic lsu_stbuf_c1_clken;
logic lsu_bus_ibuf_c1_clken, lsu_bus_obuf_c1_clken, lsu_bus_buf_c1_clken;
- logic lsu_dccm_c1_dc3_clken;
logic lsu_free_c1_clken, lsu_free_c1_clken_q, lsu_free_c2_clken;
logic lsu_bus_valid_clken;
@@ -168,10 +174,10 @@ module lsu_clkdomain
rvdff #(1) lsu_c1_dc4_clkenff (.din(lsu_c1_dc4_clken), .dout(lsu_c1_dc4_clken_q), .clk(lsu_free_c2_clk), .*);
rvdff #(1) lsu_c1_dc5_clkenff (.din(lsu_c1_dc5_clken), .dout(lsu_c1_dc5_clken_q), .clk(lsu_free_c2_clk), .*);
- rvdff #(1) lsu_freeze_c1_dc1_clkenff (.din(lsu_freeze_c1_dc1_clken), .dout(lsu_freeze_c1_dc1_clken_q), .clk(lsu_freeze_c2_dc1_clk), .*);
- rvdff #(1) lsu_freeze_c1_dc2_clkenff (.din(lsu_freeze_c1_dc2_clken), .dout(lsu_freeze_c1_dc2_clken_q), .clk(lsu_freeze_c2_dc2_clk), .*);
- rvdff #(1) lsu_freeze_c1_dc3_clkenff (.din(lsu_freeze_c1_dc3_clken), .dout(lsu_freeze_c1_dc3_clken_q), .clk(lsu_freeze_c2_dc3_clk), .*);
- rvdff #(1) lsu_freeze_c1_dc4_clkenff (.din(lsu_freeze_c1_dc4_clken), .dout(lsu_freeze_c1_dc4_clken_q), .clk(lsu_freeze_c2_dc4_clk), .*);
+ rvdff_fpga #(1) lsu_freeze_c1_dc1_clkenff (.din(lsu_freeze_c1_dc1_clken), .dout(lsu_freeze_c1_dc1_clken_q), .clk(lsu_freeze_c2_dc1_clk), .clken(lsu_freeze_c2_dc1_clken), .rawclk(clk), .*);
+ rvdff_fpga #(1) lsu_freeze_c1_dc2_clkenff (.din(lsu_freeze_c1_dc2_clken), .dout(lsu_freeze_c1_dc2_clken_q), .clk(lsu_freeze_c2_dc2_clk), .clken(lsu_freeze_c2_dc2_clken), .rawclk(clk), .*);
+ rvdff_fpga #(1) lsu_freeze_c1_dc3_clkenff (.din(lsu_freeze_c1_dc3_clken), .dout(lsu_freeze_c1_dc3_clken_q), .clk(lsu_freeze_c2_dc3_clk), .clken(lsu_freeze_c2_dc3_clken), .rawclk(clk), .*);
+ rvdff_fpga #(1) lsu_freeze_c1_dc4_clkenff (.din(lsu_freeze_c1_dc4_clken), .dout(lsu_freeze_c1_dc4_clken_q), .clk(lsu_freeze_c2_dc4_clk), .clken(lsu_freeze_c2_dc4_clken), .rawclk(clk), .*);
// Clock Headers
rvoclkhdr lsu_c1dc3_cgc ( .en(lsu_c1_dc3_clken), .l1clk(lsu_c1_dc3_clk), .* );
@@ -182,32 +188,41 @@ module lsu_clkdomain
rvoclkhdr lsu_c2dc4_cgc ( .en(lsu_c2_dc4_clken), .l1clk(lsu_c2_dc4_clk), .* );
rvoclkhdr lsu_c2dc5_cgc ( .en(lsu_c2_dc5_clken), .l1clk(lsu_c2_dc5_clk), .* );
-// rvclkhdr lsu_store_c1dc1_cgc (.en(lsu_store_c1_dc1_clken), .l1clk(lsu_store_c1_dc1_clk), .*);
-// rvclkhdr lsu_store_c1dc2_cgc (.en(lsu_store_c1_dc2_clken), .l1clk(lsu_store_c1_dc2_clk), .*);
-// rvclkhdr lsu_store_c1dc3_cgc (.en(lsu_store_c1_dc3_clken), .l1clk(lsu_store_c1_dc3_clk), .*);
rvoclkhdr lsu_store_c1dc4_cgc (.en(lsu_store_c1_dc4_clken), .l1clk(lsu_store_c1_dc4_clk), .*);
rvoclkhdr lsu_store_c1dc5_cgc (.en(lsu_store_c1_dc5_clken), .l1clk(lsu_store_c1_dc5_clk), .*);
-// rvclkhdr lsu_freeze_c1dc1_cgc ( .en(lsu_freeze_c1_dc1_clken), .l1clk(lsu_freeze_c1_dc1_clk), .* );
- rvclkhdr lsu_freeze_c1dc2_cgc ( .en(lsu_freeze_c1_dc2_clken), .l1clk(lsu_freeze_c1_dc2_clk), .* );
- rvclkhdr lsu_freeze_c1dc3_cgc ( .en(lsu_freeze_c1_dc3_clken), .l1clk(lsu_freeze_c1_dc3_clk), .* );
+`ifdef RV_FPGA_OPTIMIZE
+ assign lsu_freeze_c1_dc2_clk = 1'b0;
+ assign lsu_freeze_c1_dc3_clk = 1'b0;
+ assign lsu_freeze_c2_dc1_clk = 1'b0;
+ assign lsu_freeze_c2_dc2_clk = 1'b0;
+ assign lsu_freeze_c2_dc3_clk = 1'b0;
+ assign lsu_freeze_c2_dc4_clk = 1'b0;
- rvclkhdr lsu_freeze_c2dc1_cgc ( .en(lsu_freeze_c2_dc1_clken), .l1clk(lsu_freeze_c2_dc1_clk), .* );
- rvclkhdr lsu_freeze_c2dc2_cgc ( .en(lsu_freeze_c2_dc2_clken), .l1clk(lsu_freeze_c2_dc2_clk), .* );
- rvclkhdr lsu_freeze_c2dc3_cgc ( .en(lsu_freeze_c2_dc3_clken), .l1clk(lsu_freeze_c2_dc3_clk), .* );
- rvclkhdr lsu_freeze_c2dc4_cgc ( .en(lsu_freeze_c2_dc4_clken), .l1clk(lsu_freeze_c2_dc4_clk), .* );
+ assign lsu_busm_clk = 1'b0;
+ assign lsu_dccm_c1_dc3_clk = 1'b0;
+
+`else
+ rvclkhdr lsu_freeze_c1dc2_cgc ( .en(lsu_freeze_c1_dc2_clken), .l1clk(lsu_freeze_c1_dc2_clk), .* ); // ifndef FPGA_OPTIMIZE
+ rvclkhdr lsu_freeze_c1dc3_cgc ( .en(lsu_freeze_c1_dc3_clken), .l1clk(lsu_freeze_c1_dc3_clk), .* ); // ifndef FPGA_OPTIMIZE
+ rvclkhdr lsu_freeze_c2dc1_cgc ( .en(lsu_freeze_c2_dc1_clken), .l1clk(lsu_freeze_c2_dc1_clk), .* ); // ifndef FPGA_OPTIMIZE
+ rvclkhdr lsu_freeze_c2dc2_cgc ( .en(lsu_freeze_c2_dc2_clken), .l1clk(lsu_freeze_c2_dc2_clk), .* ); // ifndef FPGA_OPTIMIZE
+ rvclkhdr lsu_freeze_c2dc3_cgc ( .en(lsu_freeze_c2_dc3_clken), .l1clk(lsu_freeze_c2_dc3_clk), .* ); // ifndef FPGA_OPTIMIZE
+ rvclkhdr lsu_freeze_c2dc4_cgc ( .en(lsu_freeze_c2_dc4_clken), .l1clk(lsu_freeze_c2_dc4_clk), .* ); // ifndef FPGA_OPTIMIZE
+
+ rvclkhdr lsu_busm_cgc (.en(lsu_bus_clk_en), .l1clk(lsu_busm_clk), .*); // ifndef FPGA_OPTIMIZE
+ rvclkhdr lsu_dccm_c1dc3_cgc (.en(lsu_dccm_c1_dc3_clken), .l1clk(lsu_dccm_c1_dc3_clk), .*); // ifndef FPGA_OPTIMIZE
+
+`endif
rvoclkhdr lsu_stbuf_c1_cgc ( .en(lsu_stbuf_c1_clken), .l1clk(lsu_stbuf_c1_clk), .* );
rvoclkhdr lsu_bus_ibuf_c1_cgc ( .en(lsu_bus_ibuf_c1_clken), .l1clk(lsu_bus_ibuf_c1_clk), .* );
rvoclkhdr lsu_bus_obuf_c1_cgc ( .en(lsu_bus_obuf_c1_clken), .l1clk(lsu_bus_obuf_c1_clk), .* );
rvoclkhdr lsu_bus_buf_c1_cgc ( .en(lsu_bus_buf_c1_clken), .l1clk(lsu_bus_buf_c1_clk), .* );
- rvclkhdr lsu_busm_cgc (.en(lsu_bus_clk_en), .l1clk(lsu_busm_clk), .*);
- rvclkhdr lsu_dccm_c1dc3_cgc (.en(lsu_dccm_c1_dc3_clken), .l1clk(lsu_dccm_c1_dc3_clk), .*);
-// rvclkhdr lsu_pic_c1dc3_cgc (.en(lsu_pic_c1_dc3_clken), .l1clk(lsu_pic_c1_dc3_clk), .*);
- rvclkhdr lsu_free_cgc (.en(lsu_free_c2_clken), .l1clk(lsu_free_c2_clk), .*);
+ rvoclkhdr lsu_free_cgc (.en(lsu_free_c2_clken), .l1clk(lsu_free_c2_clk), .*);
endmodule
diff --git a/design/lsu/lsu_dccm_ctl.sv b/design/lsu/lsu_dccm_ctl.sv
index 5122746..303a6fe 100644
--- a/design/lsu/lsu_dccm_ctl.sv
+++ b/design/lsu/lsu_dccm_ctl.sv
@@ -31,7 +31,10 @@ module lsu_dccm_ctl
(
input logic lsu_freeze_c2_dc2_clk, // clocks
input logic lsu_freeze_c2_dc3_clk,
+ input logic lsu_freeze_c2_dc2_clken, // clocks
+ input logic lsu_freeze_c2_dc3_clken,
input logic lsu_dccm_c1_dc3_clk,
+ input logic lsu_dccm_c1_dc3_clken,
input logic lsu_pic_c1_dc3_clken,
input logic rst_l,
@@ -178,14 +181,15 @@ module lsu_dccm_ctl
assign picm_rd_data_dc3[63:0] = {picm_rd_data_lo_dc3[31:0], picm_rd_data_lo_dc3[31:0]} ;
rvdffe #(32) picm_data_ff (.*, .din(picm_rd_data[31:0]), .dout(picm_rd_data_lo_dc3[31:0]), .en(lsu_pic_c1_dc3_clken));
if (DCCM_ENABLE == 1) begin: Gen_dccm_enable
- rvdff #(1) dccm_rden_dc2ff (.*, .din(lsu_dccm_rden_dc1), .dout(lsu_dccm_rden_dc2), .clk(lsu_freeze_c2_dc2_clk));
- rvdff #(1) dccm_rden_dc3ff (.*, .din(lsu_dccm_rden_dc2), .dout(lsu_dccm_rden_dc3), .clk(lsu_freeze_c2_dc3_clk));
- rvdff #(DCCM_DATA_WIDTH) dccm_data_hi_ff (.*, .din(dccm_data_hi_dc2[DCCM_DATA_WIDTH-1:0]), .dout(dccm_data_hi_dc3[DCCM_DATA_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk));
- rvdff #(DCCM_DATA_WIDTH) dccm_data_lo_ff (.*, .din(dccm_data_lo_dc2[DCCM_DATA_WIDTH-1:0]), .dout(dccm_data_lo_dc3[DCCM_DATA_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk));
+ rvdff_fpga #(1) dccm_rden_dc2ff (.*, .din(lsu_dccm_rden_dc1), .dout(lsu_dccm_rden_dc2), .clk(lsu_freeze_c2_dc2_clk), .clken(lsu_freeze_c2_dc2_clken), .rawclk(clk));
+ rvdff_fpga #(1) dccm_rden_dc3ff (.*, .din(lsu_dccm_rden_dc2), .dout(lsu_dccm_rden_dc3), .clk(lsu_freeze_c2_dc3_clk), .clken(lsu_freeze_c2_dc3_clken), .rawclk(clk));
+
+ rvdff_fpga #(DCCM_DATA_WIDTH) dccm_data_hi_ff (.*, .din(dccm_data_hi_dc2[DCCM_DATA_WIDTH-1:0]), .dout(dccm_data_hi_dc3[DCCM_DATA_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk), .clken(lsu_dccm_c1_dc3_clken), .rawclk(clk));
+ rvdff_fpga #(DCCM_DATA_WIDTH) dccm_data_lo_ff (.*, .din(dccm_data_lo_dc2[DCCM_DATA_WIDTH-1:0]), .dout(dccm_data_lo_dc3[DCCM_DATA_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk), .clken(lsu_dccm_c1_dc3_clken), .rawclk(clk));
+ rvdff_fpga #(DCCM_ECC_WIDTH) dccm_data_ecc_hi_ff (.*, .din(dccm_data_ecc_hi_dc2[DCCM_ECC_WIDTH-1:0]), .dout(dccm_data_ecc_hi_dc3[DCCM_ECC_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk), .clken(lsu_dccm_c1_dc3_clken), .rawclk(clk));
+ rvdff_fpga #(DCCM_ECC_WIDTH) dccm_data_ecc_lo_ff (.*, .din(dccm_data_ecc_lo_dc2[DCCM_ECC_WIDTH-1:0]), .dout(dccm_data_ecc_lo_dc3[DCCM_ECC_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk), .clken(lsu_dccm_c1_dc3_clken), .rawclk(clk));
- rvdff #(DCCM_ECC_WIDTH) dccm_data_ecc_hi_ff (.*, .din(dccm_data_ecc_hi_dc2[DCCM_ECC_WIDTH-1:0]), .dout(dccm_data_ecc_hi_dc3[DCCM_ECC_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk));
- rvdff #(DCCM_ECC_WIDTH) dccm_data_ecc_lo_ff (.*, .din(dccm_data_ecc_lo_dc2[DCCM_ECC_WIDTH-1:0]), .dout(dccm_data_ecc_lo_dc3[DCCM_ECC_WIDTH-1:0]), .clk(lsu_dccm_c1_dc3_clk));
end else begin: Gen_dccm_disable
assign lsu_dccm_rden_dc2 = '0;
assign lsu_dccm_rden_dc3 = '0;
diff --git a/design/lsu/lsu_dccm_mem.sv b/design/lsu/lsu_dccm_mem.sv
index 9d201a5..ed376ee 100644
--- a/design/lsu/lsu_dccm_mem.sv
+++ b/design/lsu/lsu_dccm_mem.sv
@@ -30,6 +30,7 @@ module lsu_dccm_mem
import swerv_types::*;
(
input logic clk, // clock
+ input logic free_clk, // clock
input logic rst_l,
input logic lsu_freeze_dc3, // freeze
input logic clk_override, // clock override
@@ -71,11 +72,24 @@ module lsu_dccm_mem
logic [DCCM_NUM_BANKS-1:0] dccm_clk;
logic [DCCM_NUM_BANKS-1:0] dccm_clken;
+ logic [DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo_q, dccm_rd_data_hi_q;
+ logic lsu_freeze_dc3_q;
+
assign rd_unaligned = (dccm_rd_addr_lo[DCCM_WIDTH_BITS+:DCCM_BANK_BITS] != dccm_rd_addr_hi[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]);
+`ifdef RV_FPGA_OPTIMIZE
+ // Align the read data
+ assign dccm_rd_data_lo[DCCM_FDATA_WIDTH-1:0] = lsu_freeze_dc3_q ? dccm_rd_data_lo_q[DCCM_FDATA_WIDTH-1:0] : dccm_bank_dout[dccm_rd_addr_lo_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]][DCCM_FDATA_WIDTH-1:0];
+ assign dccm_rd_data_hi[DCCM_FDATA_WIDTH-1:0] = lsu_freeze_dc3_q ? dccm_rd_data_hi_q[DCCM_FDATA_WIDTH-1:0] : dccm_bank_dout[dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]][DCCM_FDATA_WIDTH-1:0];
+
+ rvdff #(1) lsu_freeze_dc3ff(.din(lsu_freeze_dc3), .dout(lsu_freeze_dc3_q), .clk(free_clk), .*);
+ rvdffe #(DCCM_FDATA_WIDTH) dccm_rd_data_loff(.din(dccm_rd_data_lo), .dout(dccm_rd_data_lo_q), .en(~lsu_freeze_dc3_q), .*);
+ rvdffe #(DCCM_FDATA_WIDTH) dccm_rd_data_hiff(.din(dccm_rd_data_hi), .dout(dccm_rd_data_hi_q), .en(~lsu_freeze_dc3_q), .*);
+`else
// Align the read data
assign dccm_rd_data_lo[DCCM_FDATA_WIDTH-1:0] = dccm_bank_dout[dccm_rd_addr_lo_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]][DCCM_FDATA_WIDTH-1:0];
assign dccm_rd_data_hi[DCCM_FDATA_WIDTH-1:0] = dccm_bank_dout[dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]][DCCM_FDATA_WIDTH-1:0];
+`endif
// Generate even/odd address
// assign rd_addr_even[(DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] = dccm_rd_addr_lo[2] ? dccm_rd_addr_hi[(DCCM_BANK_BITS+DCCM_WIDTH_BITS)+:DCCM_INDEX_BITS] :
@@ -103,7 +117,7 @@ module lsu_dccm_mem
// clock gating section
assign dccm_clken[i] = (wren_bank[i] | rden_bank[i] | clk_override) & ~lsu_freeze_dc3;
- rvclkhdr lsu_dccm_cgc (.en(dccm_clken[i]), .l1clk(dccm_clk[i]), .*);
+ rvoclkhdr lsu_dccm_cgc (.en(dccm_clken[i]), .l1clk(dccm_clk[i]), .*);
// end clock gating section
`RV_DCCM_DATA_CELL dccm_bank (
@@ -119,8 +133,8 @@ module lsu_dccm_mem
end : mem_bank
// Flops
- rvdffs #(DCCM_BANK_BITS) rd_addr_lo_ff (.*, .din(dccm_rd_addr_lo[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .dout(dccm_rd_addr_lo_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .en(~lsu_freeze_dc3));
- rvdffs #(DCCM_BANK_BITS) rd_addr_hi_ff (.*, .din(dccm_rd_addr_hi[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .dout(dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .en(~lsu_freeze_dc3));
+ rvdffs #(DCCM_BANK_BITS) rd_addr_lo_ff (.*, .din(dccm_rd_addr_lo[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .dout(dccm_rd_addr_lo_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .en(~lsu_freeze_dc3), .clk(free_clk));
+ rvdffs #(DCCM_BANK_BITS) rd_addr_hi_ff (.*, .din(dccm_rd_addr_hi[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .dout(dccm_rd_addr_hi_q[DCCM_WIDTH_BITS+:DCCM_BANK_BITS]), .en(~lsu_freeze_dc3), .clk(free_clk));
endmodule // lsu_dccm_mem
diff --git a/design/lsu/lsu_lsc_ctl.sv b/design/lsu/lsu_lsc_ctl.sv
index 231e17a..4891a1a 100644
--- a/design/lsu/lsu_lsc_ctl.sv
+++ b/design/lsu/lsu_lsc_ctl.sv
@@ -45,6 +45,9 @@ module lsu_lsc_ctl
input logic lsu_freeze_c2_dc1_clk,
input logic lsu_freeze_c2_dc2_clk,
input logic lsu_freeze_c2_dc3_clk,
+ input logic lsu_freeze_c2_dc1_clken,
+ input logic lsu_freeze_c2_dc2_clken,
+ input logic lsu_freeze_c2_dc3_clken,
input logic lsu_store_c1_dc1_clken,
input logic lsu_store_c1_dc2_clken,
@@ -248,9 +251,6 @@ module lsu_lsc_ctl
end
// C2 clock for valid and C1 for other bits of packet
- rvdff #(1) lsu_pkt_vlddc1ff (.*, .din(lsu_pkt_dc1_in.valid), .dout(lsu_pkt_dc1.valid), .clk(lsu_freeze_c2_dc1_clk));
- rvdff #(1) lsu_pkt_vlddc2ff (.*, .din(lsu_pkt_dc2_in.valid), .dout(lsu_pkt_dc2.valid), .clk(lsu_freeze_c2_dc2_clk));
- rvdff #(1) lsu_pkt_vlddc3ff (.*, .din(lsu_pkt_dc3_in.valid), .dout(lsu_pkt_dc3.valid), .clk(lsu_freeze_c2_dc3_clk));
rvdff #(1) lsu_pkt_vlddc4ff (.*, .din(lsu_pkt_dc4_in.valid), .dout(lsu_pkt_dc4.valid), .clk(lsu_c2_dc4_clk));
rvdff #(1) lsu_pkt_vlddc5ff (.*, .din(lsu_pkt_dc5_in.valid), .dout(lsu_pkt_dc5.valid), .clk(lsu_c2_dc5_clk));
@@ -321,19 +321,22 @@ module lsu_lsc_ctl
rvdff #(32) end_addr_dc4ff (.*, .din(end_addr_dc3[31:0]), .dout(end_addr_dc4[31:0]), .clk(lsu_c1_dc4_clk));
rvdff #(32) end_addr_dc5ff (.*, .din(end_addr_dc4[31:0]), .dout(end_addr_dc5[31:0]), .clk(lsu_c1_dc5_clk));
- rvdff #(1) addr_in_dccm_dc2ff(.din(addr_in_dccm_dc1), .dout(addr_in_dccm_dc2), .clk(lsu_freeze_c1_dc2_clk), .*);
- rvdff #(1) addr_in_dccm_dc3ff(.din(addr_in_dccm_dc2), .dout(addr_in_dccm_dc3), .clk(lsu_freeze_c1_dc3_clk), .*);
- rvdff #(1) addr_in_pic_dc2ff(.din(addr_in_pic_dc1), .dout(addr_in_pic_dc2), .clk(lsu_freeze_c1_dc2_clk), .*);
- rvdff #(1) addr_in_pic_dc3ff(.din(addr_in_pic_dc2), .dout(addr_in_pic_dc3), .clk(lsu_freeze_c1_dc3_clk), .*);
+ rvdff_fpga #(1) addr_in_dccm_dc2ff (.din(addr_in_dccm_dc1), .dout(addr_in_dccm_dc2), .clk(lsu_freeze_c1_dc2_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc2_clken), .*);
+ rvdff_fpga #(1) addr_in_dccm_dc3ff (.din(addr_in_dccm_dc2), .dout(addr_in_dccm_dc3), .clk(lsu_freeze_c1_dc3_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc3_clken), .*);
+ rvdff_fpga #(1) addr_in_pic_dc2ff (.din(addr_in_pic_dc1), .dout(addr_in_pic_dc2), .clk(lsu_freeze_c1_dc2_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc2_clken), .*);
+ rvdff_fpga #(1) addr_in_pic_dc3ff (.din(addr_in_pic_dc2), .dout(addr_in_pic_dc3), .clk(lsu_freeze_c1_dc3_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc3_clken), .*);
+ rvdff_fpga #(1) access_fault_dc2ff (.din(access_fault_dc1), .dout(access_fault_dc2), .clk(lsu_freeze_c1_dc2_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc2_clken), .*);
+ rvdff_fpga #(1) access_fault_dc3ff (.din(access_fault_dc2), .dout(access_fault_dc3), .clk(lsu_freeze_c1_dc3_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc3_clken), .*);
+ rvdff_fpga #(1) addr_external_dc2ff (.din(addr_external_dc1), .dout(addr_external_dc2), .clk(lsu_freeze_c1_dc2_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc2_clken), .*);
+ rvdff_fpga #(1) addr_external_dc3ff (.din(addr_external_dc2), .dout(addr_external_dc3), .clk(lsu_freeze_c1_dc3_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc3_clken), .*);
+ rvdff_fpga #(1) misaligned_fault_dc2ff (.din(misaligned_fault_dc1), .dout(misaligned_fault_dc2), .clk(lsu_freeze_c1_dc2_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc2_clken), .*);
+ rvdff_fpga #(1) misaligned_fault_dc3ff (.din(misaligned_fault_dc2), .dout(misaligned_fault_dc3), .clk(lsu_freeze_c1_dc3_clk), .rawclk(clk), .clken(lsu_freeze_c1_dc3_clken), .*);
+ rvdff_fpga #(1) lsu_pkt_vlddc1ff (.din(lsu_pkt_dc1_in.valid), .dout(lsu_pkt_dc1.valid), .clk(lsu_freeze_c2_dc1_clk), .rawclk(clk), .clken(lsu_freeze_c2_dc1_clken), .*);
+ rvdff_fpga #(1) lsu_pkt_vlddc2ff (.din(lsu_pkt_dc2_in.valid), .dout(lsu_pkt_dc2.valid), .clk(lsu_freeze_c2_dc2_clk), .rawclk(clk), .clken(lsu_freeze_c2_dc2_clken), .*);
+ rvdff_fpga #(1) lsu_pkt_vlddc3ff (.din(lsu_pkt_dc3_in.valid), .dout(lsu_pkt_dc3.valid), .clk(lsu_freeze_c2_dc3_clk), .rawclk(clk), .clken(lsu_freeze_c2_dc3_clken), .*);
- rvdff #(1) addr_external_dc2ff(.din(addr_external_dc1), .dout(addr_external_dc2), .clk(lsu_freeze_c1_dc2_clk), .*);
- rvdff #(1) addr_external_dc3ff(.din(addr_external_dc2), .dout(addr_external_dc3), .clk(lsu_freeze_c1_dc3_clk), .*);
rvdff #(1) addr_external_dc4ff(.din(addr_external_dc3), .dout(addr_external_dc4), .clk(lsu_c1_dc4_clk), .*);
rvdff #(1) addr_external_dc5ff(.din(addr_external_dc4), .dout(addr_external_dc5), .clk(lsu_c1_dc5_clk), .*);
- rvdff #(1) access_fault_dc2ff(.din(access_fault_dc1), .dout(access_fault_dc2), .clk(lsu_freeze_c1_dc2_clk), .*);
- rvdff #(1) access_fault_dc3ff(.din(access_fault_dc2), .dout(access_fault_dc3), .clk(lsu_freeze_c1_dc3_clk), .*);
- rvdff #(1) misaligned_fault_dc2ff(.din(misaligned_fault_dc1), .dout(misaligned_fault_dc2), .clk(lsu_freeze_c1_dc2_clk), .*);
- rvdff #(1) misaligned_fault_dc3ff(.din(misaligned_fault_dc2), .dout(misaligned_fault_dc3), .clk(lsu_freeze_c1_dc3_clk), .*);
endmodule
diff --git a/design/lsu/lsu_stbuf.sv b/design/lsu/lsu_stbuf.sv
index 318302f..dea32c4 100644
--- a/design/lsu/lsu_stbuf.sv
+++ b/design/lsu/lsu_stbuf.sv
@@ -38,6 +38,9 @@ module lsu_stbuf
input logic lsu_freeze_c1_dc2_clk, // freeze clock
input logic lsu_freeze_c1_dc3_clk, // freeze clock
+ input logic lsu_freeze_c2_dc2_clken,
+ input logic lsu_freeze_c2_dc3_clken,
+ input logic lsu_freeze_c1_dc2_clken,
input logic lsu_freeze_c1_dc3_clken,
input logic lsu_c1_dc4_clk, // lsu pipe clock
@@ -232,8 +235,11 @@ module lsu_stbuf
rvdff #(.WIDTH(DEPTH_LOG2)) WrPtr_dc4ff (.din(WrPtr_dc3[DEPTH_LOG2-1:0]), .dout(WrPtr_dc4[DEPTH_LOG2-1:0]), .clk(lsu_c1_dc4_clk), .*);
rvdff #(.WIDTH(DEPTH_LOG2)) WrPtr_dc5ff (.din(WrPtr_dc4[DEPTH_LOG2-1:0]), .dout(WrPtr_dc5[DEPTH_LOG2-1:0]), .clk(lsu_c1_dc5_clk), .*);
- rvdff #(.WIDTH(1)) ldst_dual_dc2ff (.din(ldst_dual_dc1), .dout(ldst_dual_dc2), .clk(lsu_freeze_c1_dc2_clk), .*);
- rvdff #(.WIDTH(1)) ldst_dual_dc3ff (.din(ldst_dual_dc2), .dout(ldst_dual_dc3), .clk(lsu_freeze_c1_dc3_clk), .*);
+ rvdff_fpga #(.WIDTH(1)) ldst_dual_dc2ff (.din(ldst_dual_dc1), .dout(ldst_dual_dc2), .clk(lsu_freeze_c1_dc2_clk), .clken(lsu_freeze_c1_dc2_clken), .rawclk(clk), .*);
+ rvdff_fpga #(.WIDTH(1)) ldst_dual_dc3ff (.din(ldst_dual_dc2), .dout(ldst_dual_dc3), .clk(lsu_freeze_c1_dc3_clk), .clken(lsu_freeze_c1_dc3_clken), .rawclk(clk), .*);
+ rvdff_fpga #(.WIDTH(BYTE_WIDTH)) stbuf_fwdbyteen_hi_dc3ff (.din(stbuf_fwdbyteen_hi_fn_dc2[BYTE_WIDTH-1:0]), .dout(stbuf_fwdbyteen_hi_dc3[BYTE_WIDTH-1:0]), .clk(lsu_freeze_c1_dc3_clk), .clken(lsu_freeze_c1_dc3_clken), .rawclk(clk), .*);
+ rvdff_fpga #(.WIDTH(BYTE_WIDTH)) stbuf_fwdbyteen_lo_dc3ff (.din(stbuf_fwdbyteen_lo_fn_dc2[BYTE_WIDTH-1:0]), .dout(stbuf_fwdbyteen_lo_dc3[BYTE_WIDTH-1:0]), .clk(lsu_freeze_c1_dc3_clk), .clken(lsu_freeze_c1_dc3_clken), .rawclk(clk), .*);
+
rvdff #(.WIDTH(1)) ldst_dual_dc4ff (.din(ldst_dual_dc3), .dout(ldst_dual_dc4), .clk(lsu_c1_dc4_clk), .*);
rvdff #(.WIDTH(1)) ldst_dual_dc5ff (.din(ldst_dual_dc4), .dout(ldst_dual_dc5), .clk(lsu_c1_dc5_clk), .*);
@@ -387,8 +393,6 @@ module lsu_stbuf
rvdffs #(.WIDTH(DEPTH_LOG2)) WrPtrff (.din(NxtWrPtr[DEPTH_LOG2-1:0]), .dout(WrPtr[DEPTH_LOG2-1:0]), .en(WrPtrEn), .clk(lsu_stbuf_c1_clk), .*);
rvdffs #(.WIDTH(DEPTH_LOG2)) RdPtrff (.din(NxtRdPtr[DEPTH_LOG2-1:0]), .dout(RdPtr[DEPTH_LOG2-1:0]), .en(RdPtrEn), .clk(lsu_stbuf_c1_clk), .*);
- rvdff #(.WIDTH(BYTE_WIDTH)) stbuf_fwdbyteen_hi_dc3ff (.din(stbuf_fwdbyteen_hi_fn_dc2[BYTE_WIDTH-1:0]), .dout(stbuf_fwdbyteen_hi_dc3[BYTE_WIDTH-1:0]), .clk(lsu_freeze_c1_dc3_clk), .*);
- rvdff #(.WIDTH(BYTE_WIDTH)) stbuf_fwdbyteen_lo_dc3ff (.din(stbuf_fwdbyteen_lo_fn_dc2[BYTE_WIDTH-1:0]), .dout(stbuf_fwdbyteen_lo_dc3[BYTE_WIDTH-1:0]), .clk(lsu_freeze_c1_dc3_clk), .*);
rvdffe #(.WIDTH(DATA_WIDTH)) stbuf_fwddata_hi_dc3ff (.din(stbuf_fwddata_hi_fn_dc2[DATA_WIDTH-1:0]), .dout(stbuf_fwddata_hi_dc3[DATA_WIDTH-1:0]), .en(lsu_freeze_c1_dc3_clken), .*);
rvdffe #(.WIDTH(DATA_WIDTH)) stbuf_fwddata_lo_dc3ff (.din(stbuf_fwddata_lo_fn_dc2[DATA_WIDTH-1:0]), .dout(stbuf_fwddata_lo_dc3[DATA_WIDTH-1:0]), .en(lsu_freeze_c1_dc3_clken), .*);
diff --git a/design/mem.sv b/design/mem.sv
index f406c31..5cc4808 100644
--- a/design/mem.sv
+++ b/design/mem.sv
@@ -50,7 +50,7 @@ module mem
`endif
// Icache and Itag Ports
`ifdef RV_ICACHE_ENABLE //temp
- input logic [31:3] ic_rw_addr,
+ input logic [31:2] ic_rw_addr,
input logic [3:0] ic_tag_valid,
input logic [3:0] ic_wr_en,
input logic ic_rd_en,
@@ -98,6 +98,9 @@ module mem
localparam DCCM_ENABLE = 1'b0;
`endif
+ logic free_clk;
+ rvoclkhdr free_cg ( .en(1'b1), .l1clk(free_clk), .* );
+
// DCCM Instantiation
if (DCCM_ENABLE == 1) begin: Gen_dccm_enable
lsu_dccm_mem dccm (
diff --git a/design/pic_ctrl.sv b/design/pic_ctrl.sv
index fb033f6..53b2fe2 100644
--- a/design/pic_ctrl.sv
+++ b/design/pic_ctrl.sv
@@ -201,7 +201,7 @@ for (i=0; iTM 1.7 from Western Digital
+## Release Notes
+
+* RV_FPGA_OPTIMIZE is now default build option.
+ * Use -fpga_optimize=0 to build for lower power (ASIC) flows.
+* Fixed a couple of cases of clock enable qualification for power reduction
+* Fixes for 4 debug compliance issues reported by Codasip
+* Fixed some remaining clock gating issues for RV_FPGA_OPTIMIZE to improve fpga speed
+
+
+
# SweRV RISC-V CoreTM 1.6 from Western Digital
## Release Notes