diff --git a/README.md b/README.md index a3c34ed..237042e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# EL2 SweRV RISC-V CoreTM 1.0 from Western Digital +# EL2 SweRV RISC-V CoreTM 1.1 from Western Digital This repository contains the SweRV EL2 CoreTM design RTL @@ -141,12 +141,12 @@ The simulation run/build command has following generic form: make -f $RV_ROOT/tools/Makefile [] [debug=1] [snapshot=mybuild] [target=] [TEST=] [TEST_DIR=] where: -``` - - can be 'verilator' (by default) 'irun' - Cadence xrun, 'vcs' - Synopsys VCS +``` + - can be 'verilator' (by default) 'irun' - Cadence xrun, 'vcs' - Synopsys VCS, 'vlog' Mentor Questa if not provided, 'make' cleans work directory, builds verilator executable and runs a test. debug=1 - allows VCD generation for verilator and VCS and SHM waves for irun option. - predefined CPU configurations 'default' ( by default), 'default_ahb', 'typical_pd', 'high_perf' -TEST - allows to run a C (.c) or assembly (.s) test, hello_world2 is run by default +TEST - allows to run a C (.c) or assembly (.s) test, hello_world is run by default TEST_DIR - alternative to test source directory testbench/asm - run and build executable model of custom CPU configuration, remember to provide 'snapshot' argument for runs on custom configurations. @@ -164,28 +164,33 @@ If you want to compile a test only, you can run: make -f $RV_ROOT/tools/Makefile program.hex TEST= [TEST_DIR=/path/to/dir] -For the cmark test, the script in `$RV_ROOT/tools/calc_cmarks.pl` can be used -to extract the core-marks score by invoking that script in the run -directory. - -The Makefile uses `$RV_ROOT/testbench/linker.ld` file by default to build test executable. +The Makefile uses `$RV_ROOT/testbench/link.ld` file by default to build test executable. User can provide test specific linker file in form `.ld` to build the test executable, in the same directory with the test source. User also can create a test specific makefile in form `.makefile`, containing building instructions how to create `program.hex` and `data.hex` files used by simulation. The private makefile should be in the same directory as the test source. -*(`program.hex` file is loaded to instruction bus memory slave and 'data.hex' file is loaded to LSU bus memory slave and +*(`program.hex` file is loaded to instruction bus memory slave and `data.hex` file is loaded to LSU bus memory slave and optionally to DCCM at the beginning of simulation)*. +Note: You may need to delete `program.hex` file from work directory, when run a new test. + The `$RV_ROOT/testbench/asm` directory contains following tests ready to simulate: ``` -hello_world2 - default tes to run, prints Hello World message to screen and console.log +hello_world - default tes to run, prints Hello World message to screen and console.log hello_world_dccm - the same as above, but takes the string from preloaded DCCM. +hello_world_iccm - the same as hello_world, but loads the test code to ICCM via LSU to DMA bridge and then executes + it from there. Runs on EL2 with AXI4 buses only. cmark - coremark benchmark running with code and data in external memories cmark_dccm - the same as above, running data and stack from DCCM (faster) +cmark_iccm - the same as above with preloaded code to ICCM. ``` +The `$RV_ROOT/testbench/hex` directory contains precompiled hex files of the tests, ready for simulation in case RISCV SW tools are not installed. + +**Note**: The testbench has a simple synthesizable bridge that allows you to load the ICCM via load/store instructions. This is only supported for AXI4 builds. + ---- Western Digital, the Western Digital logo, G-Technology, SanDisk, Tegile, Upthere, WD, SweRV Core, SweRV ISS, and OmniXtend are registered trademarks or trademarks of Western Digital Corporation or its affiliates in the US diff --git a/configs/swerv.config b/configs/swerv.config index ab3dda3..20379e0 100755 --- a/configs/swerv.config +++ b/configs/swerv.config @@ -81,7 +81,7 @@ This script can be run stand-alone by processes not running vsim User options: - -target = {default, typical_pd, high_perf, default_ahb} + -target = {default, typical_pd, high_perf, default_ahb, lsu2dma_axi} use default settings for one of the targets -set=var=value @@ -191,6 +191,8 @@ my $pic_total_int; my $top_align_iccm = 0; +my $lsu2dma = 0; + my $target = "default"; my $snapshot ; my $build_path ; @@ -319,7 +321,10 @@ my $opensource=0; # IDEA: is ghr at 5b the right size for el2 core -if ($target eq "default") { +if ($target eq "default") { } +elsif ($target eq "lsu2dma_axi") { + $lsu2dma = 1; + $iccm_enable = 1; } elsif ($target eq "typical_pd") { print "$self: Using target \"typical_pd\"\n"; @@ -339,7 +344,7 @@ elsif ($target eq "default_ahb") { print "$self: Using target \"default_ahb\"\n"; } else { - die "$self: ERROR! Unsupported target \"$target\". Supported are 'default', 'default_ahb', 'typical_pd', 'high_perf'\n" ; + die "$self: ERROR! Unsupported target \"$target\". Supported are 'default', 'default_ahb', 'typical_pd', 'high_perf', 'lsu2dma_axi\n" ; } @@ -734,7 +739,8 @@ our %config = (#{{{ "iccm_only" => 'derived', # Used by design "icache_only" => 'derived', # Used by design "no_iccm_no_icache" => 'derived', # Used by design - "fast_interrupt_redirect" => "$fast_interrupt_redirect" # Design Parm, Overridable + "fast_interrupt_redirect" => "$fast_interrupt_redirect", # Design Parm, Overridable + "lsu2dma" => $lsu2dma, # used by design/TB for LSU to DMA bridge }, "dccm" => { @@ -825,8 +831,23 @@ our %config = (#{{{ "pic_mpiccfg_offset" => '0x3000', # Testbench only: Offset of mpiccfg relative to pic_base_addr "pic_meipt_offset" => '0x3004', # Testbench only: Offset of meipt relative to pic_base_addr -- deprecated "pic_meigwctrl_offset" => '0x4000', # Testbench only: gateway control regs relative to pic_base_addr - "pic_meigwclr_offset" => '0x5000' # Testbench only: gateway clear regs relative to pic_base_addr + "pic_meigwclr_offset" => '0x5000', # Testbench only: gateway clear regs relative to pic_base_addr + "pic_meipl_mask" => '0xf', + "pic_meip_mask" => '0x0', + "pic_meie_mask" => '0x1', + "pic_mpiccfg_mask" => '0x1', + "pic_meipt_mask" => '0x0', + "pic_meigwctrl_mask" => '0x3', + "pic_meigwclr_mask" => '0x0', + + "pic_meipl_count" => $pic_total_int, + "pic_meip_count" => 4, + "pic_meie_count" => $pic_total_int, + "pic_mpiccfg_count" => 1, + "pic_meipt_count" => $pic_total_int, + "pic_meigwctrl_count" => $pic_total_int, + "pic_meigwclr_count" => $pic_total_int }, "testbench" => { "TOP" => "tb_top", @@ -927,6 +948,7 @@ our %config = (#{{{ # need to have this be width in binary # for now autosize to the data our %verilog_parms = ( + "lsu2dma" => '1', "fast_interrupt_redirect" => '1', "inst_access_enable0" => '1', "inst_access_addr0" => '32', @@ -1987,6 +2009,32 @@ sub collect_mem_protection { } +# Collect the memory mapped registers associated with the pic (platform +# interrup controller) to include in the whisper.json file. +sub collect_mem_mapped_regs { + my ($pic, $results) = @_; + my $default_mask = 0; + $results->{default_mask} = $default_mask; + my $addr = hex($pic->{pic_region})*256*1024*1024 + hex($pic->{pic_offset}); + $results->{address} = sprintf("0x%x", $addr); + $results->{size} = sprintf("0x%x", $pic->{pic_size}*1024); + + my @names = qw ( mpiccfg meipl meip meie meigwctrl meigwclr meidels ); + $results->{registers} = {}; + foreach my $name (@names) { + my $tag = "pic_${name}_offset"; + next unless exists $pic->{$tag}; + my %item; + my $offset = hex($pic->{$tag}); + $offset += 4 if ($name ne 'mpiccfg' and $name ne 'meip'); + $item{address} = sprintf("0x%x", $addr + $offset); + $item{mask} = $pic->{"pic_${name}_mask"}; + $item{count} = $pic->{"pic_${name}_count"}; + $results->{registers}{$name} = \%item; + } +} + + sub dump_whisper_config{#{{{ my ($config, $path) = @_; @@ -2063,6 +2111,11 @@ sub dump_whisper_config{#{{{ # Collect pic configs. if (exists $config{pic}) { + my %mem_mapped; + collect_mem_mapped_regs($config{pic}, \%mem_mapped); + $jh{'memory_mapped_registers'} = \%mem_mapped; + + # This is now deprecated. To be removed soon. while (my ($k, $v) = each %{$config{pic}}) { next if $k eq 'pic_base_addr'; # derived from region and offset if ($k eq 'pic_size') { diff --git a/design/dbg/el2_dbg.sv b/design/dbg/el2_dbg.sv index 5cfa548..63b3eb6 100644 --- a/design/dbg/el2_dbg.sv +++ b/design/dbg/el2_dbg.sv @@ -109,6 +109,7 @@ import el2_pkg::*; // general inputs input logic clk, input logic rst_l, + input logic dbg_rst_l, input logic clk_override, input logic scan_mode ); @@ -147,6 +148,8 @@ import el2_pkg::*; logic dmstatus_havereset_wren; logic dmstatus_havereset_rst; logic dmstatus_resumeack; + logic dmstatus_unavail; + logic dmstatus_running; logic dmstatus_halted; logic dmstatus_havereset; @@ -227,7 +230,7 @@ import el2_pkg::*; // end clocking section // Reset logic - assign dbg_dm_rst_l = rst_l & (dmcontrol_reg[0] | scan_mode); + assign dbg_dm_rst_l = dbg_rst_l & (dmcontrol_reg[0] | scan_mode); assign dbg_core_rst_l = ~dmcontrol_reg[1]; // system bus register @@ -253,10 +256,10 @@ import el2_pkg::*; assign sbcs_illegal_size = sbcs_reg[19]; // Anything bigger than 64 bits is illegal - assign sbaddress0_incr[3:0] = ({4{(sbcs_reg[19:17] == 3'b000)}} & 4'b0001) | - ({4{(sbcs_reg[19:17] == 3'b001)}} & 4'b0010) | - ({4{(sbcs_reg[19:17] == 3'b010)}} & 4'b0100) | - ({4{(sbcs_reg[19:17] == 3'b100)}} & 4'b1000); + assign sbaddress0_incr[3:0] = ({4{(sbcs_reg[19:17] == 3'h0)}} & 4'b0001) | + ({4{(sbcs_reg[19:17] == 3'h1)}} & 4'b0010) | + ({4{(sbcs_reg[19:17] == 3'h2)}} & 4'b0100) | + ({4{(sbcs_reg[19:17] == 3'h3)}} & 4'b1000); // sbdata assign sbdata0_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3c); // write data only when single read is 0 @@ -287,13 +290,14 @@ import el2_pkg::*; assign sbdata0wr_access = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3c); // write to sbdata0 will start write command to system bus // memory mapped registers - // dmcontrol register has only 6 bits implemented. 31: haltreq, 30: resumereq, 29: haltreset, 28: ackhavereset, 1: ndmreset, 0: dmactive. + // dmcontrol register has only 5 bits implemented. 31: haltreq, 30: resumereq, 28: ackhavereset, 1: ndmreset, 0: dmactive. // rest all the bits are zeroed out // dmactive flop is reset based on core rst_l, all other flops use dm_rst_l assign dmcontrol_wren = (dmi_reg_addr == 7'h10) & dmi_reg_en & dmi_reg_wr_en; + assign dmcontrol_reg[29] = '0; assign dmcontrol_reg[27:2] = '0; - rvdffs #(5) dmcontrolff (.din({dmi_reg_wdata[31:28],dmi_reg_wdata[1]}), .dout({dmcontrol_reg[31:28], dmcontrol_reg[1]}), .en(dmcontrol_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); - rvdffs #(1) dmcontrol_dmactive_ff (.din(dmi_reg_wdata[0]), .dout(dmcontrol_reg[0]), .en(dmcontrol_wren), .rst_l(rst_l), .clk(dbg_free_clk)); + rvdffs #(4) dmcontrolff (.din({dmi_reg_wdata[31:30],dmi_reg_wdata[28],dmi_reg_wdata[1]}), .dout({dmcontrol_reg[31:30], dmcontrol_reg[28], dmcontrol_reg[1]}), .en(dmcontrol_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + rvdffs #(1) dmcontrol_dmactive_ff (.din(dmi_reg_wdata[0]), .dout(dmcontrol_reg[0]), .en(dmcontrol_wren), .rst_l(dbg_rst_l), .clk(dbg_free_clk)); rvdff #(1) dmcontrol_wrenff(.din(dmcontrol_wren), .dout(dmcontrol_wren_Q), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); // dmstatus register bits that are implemented @@ -301,10 +305,12 @@ import el2_pkg::*; // rest all the bits are zeroed out assign dmstatus_reg[31:20] = '0; assign dmstatus_reg[19:18] = {2{dmstatus_havereset}}; - assign dmstatus_reg[15:10] = '0; + assign dmstatus_reg[15:14] = '0; assign dmstatus_reg[7] = '1; assign dmstatus_reg[6:4] = '0; assign dmstatus_reg[17:16] = {2{dmstatus_resumeack}}; + assign dmstatus_reg[13:12] = {2{dmstatus_unavail}}; + assign dmstatus_reg[11:10] = {2{dmstatus_running}}; assign dmstatus_reg[9:8] = {2{dmstatus_halted}}; assign dmstatus_reg[3:0] = 4'h2; @@ -314,6 +320,9 @@ import el2_pkg::*; assign dmstatus_havereset_wren = (dmi_reg_addr == 7'h10) & dmi_reg_wdata[1] & dmi_reg_en & dmi_reg_wr_en; assign dmstatus_havereset_rst = (dmi_reg_addr == 7'h10) & dmi_reg_wdata[28] & dmi_reg_en & dmi_reg_wr_en; + assign dmstatus_unavail = dmcontrol_reg[1] | ~rst_l; + assign dmstatus_running = ~(dmstatus_unavail | dmstatus_halted); + rvdffs #(1) dmstatus_resumeack_reg (.din(dmstatus_resumeack_din), .dout(dmstatus_resumeack), .en(dmstatus_resumeack_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); rvdff #(1) dmstatus_halted_reg (.din(dec_tlu_dbg_halted & ~dec_tlu_mpc_halted_only), .dout(dmstatus_halted), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); rvdffsc #(1) dmstatus_havereset_reg (.din(1'b1), .dout(dmstatus_havereset), .en(dmstatus_havereset_wren), .clear(dmstatus_havereset_rst), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); @@ -332,11 +341,8 @@ import el2_pkg::*; assign abstractcs_error_sel1 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h17) & ~((dmi_reg_wdata[31:24] == 8'b0) | (dmi_reg_wdata[31:24] == 8'h2)); assign abstractcs_error_sel2 = core_dbg_cmd_done & core_dbg_cmd_fail; assign abstractcs_error_sel3 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h17) & ~dmstatus_reg[9]; //(dbg_state != HALTED); - assign abstractcs_error_sel4 = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en & (dmi_reg_wdata[31:24] == 8'h2) & - ( ((dmi_reg_wdata[22:20] == 3'b001) & data1_reg[0]) | - ((dmi_reg_wdata[22:20] == 3'b010) & (|data1_reg[1:0])) | - dmi_reg_wdata[22] | (dmi_reg_wdata[22:20] == 3'b011) - ); + assign abstractcs_error_sel4 = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en & + ((dmi_reg_wdata[22:20] != 3'b010) | ((dmi_reg_wdata[31:24] == 8'h2) && (|data1_reg[1:0]))); // Only word size is allowed assign abstractcs_error_sel5 = (dmi_reg_addr == 7'h16) & dmi_reg_en & dmi_reg_wr_en; @@ -358,7 +364,7 @@ import el2_pkg::*; // command[16] = 1: write, 0: read // Size - 2, Bits Not implemented: 23 (aamvirtual), 19-autoincrement, 18-postexec, 17-transfer assign command_wren = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en & (dbg_state == HALTED); - assign command_din[31:0] = {dmi_reg_wdata[31:24],1'b0,3'b010,3'b0,dmi_reg_wdata[16:0]}; + assign command_din[31:0] = {dmi_reg_wdata[31:24],1'b0,dmi_reg_wdata[22:20],3'b0,dmi_reg_wdata[16:0]}; rvdffe #(32) dmcommand_reg (.*, .din(command_din[31:0]), .dout(command_reg[31:0]), .en(command_wren), .rst_l(dbg_dm_rst_l)); // data0 reg @@ -386,18 +392,18 @@ import el2_pkg::*; dbg_state_en = 1'b0; abstractcs_busy_wren = 1'b0; abstractcs_busy_din = 1'b0; - dbg_halt_req = dmcontrol_wren_Q & dmcontrol_reg[31]; // single pulse output to the core. Need to drive every time this register is written since core might be halted due to MPC + dbg_halt_req = dmcontrol_wren_Q & dmcontrol_reg[31] & ~dmcontrol_reg[1]; // single pulse output to the core. Need to drive every time this register is written since core might be halted due to MPC dbg_resume_req = 1'b0; // single pulse output to the core case (dbg_state) IDLE: begin dbg_nxtstate = (dmstatus_reg[9] | dec_tlu_mpc_halted_only) ? HALTED : HALTING; // initiate the halt command to the core dbg_state_en = ((dmcontrol_reg[31] & ~dec_tlu_debug_mode) | dmstatus_reg[9] | dec_tlu_mpc_halted_only) & ~dmcontrol_reg[1]; // when the jtag writes the halt bit in the DM register, OR when the status indicates H - dbg_halt_req = dmcontrol_reg[31]; // only when jtag has written the halt_req bit in the control. Removed debug mode qualification during MPC changes + dbg_halt_req = dmcontrol_reg[31] & ~dmcontrol_reg[1]; // only when jtag has written the halt_req bit in the control. Removed debug mode qualification during MPC changes end HALTING : begin - dbg_nxtstate = HALTED; // Goto HALTED once the core sends an ACK - dbg_state_en = dmstatus_reg[9]; // core indicates halted + dbg_nxtstate = dmcontrol_reg[1] ? IDLE : HALTED; // Goto HALTED once the core sends an ACK + dbg_state_en = dmstatus_reg[9] | dmcontrol_reg[1]; // core indicates halted end HALTED: begin // wait for halted to go away before send to resume. Else start of new command @@ -409,22 +415,22 @@ import el2_pkg::*; dbg_resume_req = dbg_state_en & (dbg_nxtstate == RESUMING); // single cycle pulse to core if resuming end CMD_START: begin - dbg_nxtstate = (|abstractcs_reg[10:8]) ? CMD_DONE : CMD_WAIT; // new command sent to the core - dbg_state_en = dbg_cmd_valid | (|abstractcs_reg[10:8]); + dbg_nxtstate = dmcontrol_reg[1] ? IDLE : (|abstractcs_reg[10:8]) ? CMD_DONE : CMD_WAIT; // new command sent to the core + dbg_state_en = dbg_cmd_valid | (|abstractcs_reg[10:8]) | dmcontrol_reg[1]; end CMD_WAIT: begin - dbg_nxtstate = CMD_DONE; - dbg_state_en = core_dbg_cmd_done; // go to done state for one cycle after completing current command + dbg_nxtstate = dmcontrol_reg[1] ? IDLE : CMD_DONE; + dbg_state_en = core_dbg_cmd_done | dmcontrol_reg[1]; // go to done state for one cycle after completing current command end CMD_DONE: begin - dbg_nxtstate = HALTED; + dbg_nxtstate = dmcontrol_reg[1] ? IDLE : HALTED; dbg_state_en = 1'b1; abstractcs_busy_wren = dbg_state_en; // remove the busy bit from the abstracts ( bit 12 ) abstractcs_busy_din = 1'b0; end RESUMING : begin dbg_nxtstate = IDLE; - dbg_state_en = dmstatus_reg[17]; // resume ack has been updated in the dmstatus register + dbg_state_en = dmstatus_reg[17] | dmcontrol_reg[1]; // resume ack has been updated in the dmstatus register end default : begin dbg_nxtstate = IDLE; @@ -450,7 +456,7 @@ import el2_pkg::*; ({32{dmi_reg_addr == 7'h3d}} & sbdata1_reg[31:0]); - rvdffs #($bits(state_t)) dbg_state_reg (.din(dbg_nxtstate), .dout({dbg_state}), .en(dbg_state_en), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + rvdffs #($bits(state_t)) dbg_state_reg (.din(dbg_nxtstate), .dout({dbg_state}), .en(dbg_state_en), .rst_l(dbg_dm_rst_l & rst_l), .clk(dbg_free_clk)); // Ack will use the power on reset only otherwise there won't be any ack until dmactive is 1 rvdffs #(32) dmi_rddata_reg (.din(dmi_reg_rdata_din[31:0]), .dout(dmi_reg_rdata[31:0]), .en(dmi_reg_en), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); diff --git a/design/dec/el2_dec.sv b/design/dec/el2_dec.sv index 2b50c55..de2659a 100644 --- a/design/dec/el2_dec.sv +++ b/design/dec/el2_dec.sv @@ -425,7 +425,7 @@ import el2_pkg::*; dec_tlu_i0_valid_wb1 | dec_tlu_i0_exc_valid_wb1}; assign rv_trace_pkt.rv_i_exception_ip = {dec_tlu_int_valid_wb1, dec_tlu_i0_exc_valid_wb1}; assign rv_trace_pkt.rv_i_ecause_ip = dec_tlu_exc_cause_wb1[4:0]; // replicate across ports - assign rv_trace_pkt.rv_i_interrupt_ip = {dec_tlu_int_valid_wb1,2'b0}; + assign rv_trace_pkt.rv_i_interrupt_ip = {dec_tlu_int_valid_wb1,1'b0}; assign rv_trace_pkt.rv_i_tval_ip = dec_tlu_mtval_wb1[31:0]; // replicate across ports // end trace diff --git a/design/el2_swerv.sv b/design/el2_swerv.sv index dd1dccd..b6943b0 100644 --- a/design/el2_swerv.sv +++ b/design/el2_swerv.sv @@ -28,6 +28,7 @@ import el2_pkg::*; ( input logic clk, input logic rst_l, + input logic dbg_rst_l, input logic [31:1] rst_vec, input logic nmi_int, input logic [31:1] nmi_vec, @@ -38,7 +39,7 @@ import el2_pkg::*; output logic [1:0] trace_rv_i_valid_ip, output logic [1:0] trace_rv_i_exception_ip, output logic [4:0] trace_rv_i_ecause_ip, - output logic [2:0] trace_rv_i_interrupt_ip, + output logic [1:0] trace_rv_i_interrupt_ip, output logic [31:0] trace_rv_i_tval_ip, output logic dccm_clk_override, @@ -825,10 +826,8 @@ import el2_pkg::*; assert_fetch_indbghalt: assert #0 (~(ifu.ifc_fetch_req_f & dec.tlu.dbg_tlu_halted_f & ~dec.tlu.dcsr_single_step_running)) else $display("ERROR: Fetching in dBG halt!"); `endif - - // ----------------- DEBUG END ----------------------------- - assign core_rst_l = rst_l & (dbg_core_rst_l | scan_mode); + // fetch el2_ifu #(.pt(pt)) ifu ( .rst_l(core_rst_l), @@ -1250,7 +1249,7 @@ if (pt.BUILD_AHB_LITE == 1) begin assign trace_rv_i_valid_ip[1:0] = rv_trace_pkt.rv_i_valid_ip[1:0]; assign trace_rv_i_exception_ip[1:0] = rv_trace_pkt.rv_i_exception_ip[1:0]; assign trace_rv_i_ecause_ip[4:0] = rv_trace_pkt.rv_i_ecause_ip[4:0]; - assign trace_rv_i_interrupt_ip[2:0] = rv_trace_pkt.rv_i_interrupt_ip[2:0]; + assign trace_rv_i_interrupt_ip[1:0] = rv_trace_pkt.rv_i_interrupt_ip[1:0]; assign trace_rv_i_tval_ip[31:0] = rv_trace_pkt.rv_i_tval_ip[31:0]; diff --git a/design/el2_swerv_wrapper.sv b/design/el2_swerv_wrapper.sv index b52194e..eddf515 100644 --- a/design/el2_swerv_wrapper.sv +++ b/design/el2_swerv_wrapper.sv @@ -28,6 +28,7 @@ import el2_pkg::*; ( input logic clk, input logic rst_l, + input logic dbg_rst_l, input logic [31:1] rst_vec, input logic nmi_int, input logic [31:1] nmi_vec, @@ -39,7 +40,7 @@ import el2_pkg::*; output logic [1:0] trace_rv_i_valid_ip, output logic [1:0] trace_rv_i_exception_ip, output logic [4:0] trace_rv_i_ecause_ip, - output logic [2:0] trace_rv_i_interrupt_ip, + output logic [1:0] trace_rv_i_interrupt_ip, output logic [31:0] trace_rv_i_tval_ip, // Bus signals @@ -695,7 +696,7 @@ import el2_pkg::*; .tdoEnable (), // Test Data Output enable // Processor Signals - .core_rst_n (core_rst_l), // Core reset, active low + .core_rst_n (dbg_rst_l), // Primary reset active low .core_clk (clk), // Core clock .jtag_id (jtag_id), // 32 bit JTAG ID .rd_data (dmi_reg_rdata), // 32 bit Read data from Processor diff --git a/design/ifu/el2_ifu_iccm_mem.sv b/design/ifu/el2_ifu_iccm_mem.sv index dd25ddb..846c974 100644 --- a/design/ifu/el2_ifu_iccm_mem.sv +++ b/design/ifu/el2_ifu_iccm_mem.sv @@ -89,6 +89,19 @@ import el2_pkg::*; ((addr_bank_inc[pt.ICCM_BANK_HI:2] == i) ? addr_bank_inc[pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO] : iccm_rw_addr[pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO]); + `ifdef VERILATOR + + el2_ram #(.depth(1<> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores diff --git a/design/lsu/el2_lsu_dccm_ctl.sv b/design/lsu/el2_lsu_dccm_ctl.sv index 0857631..1d44f69 100644 --- a/design/lsu/el2_lsu_dccm_ctl.sv +++ b/design/lsu/el2_lsu_dccm_ctl.sv @@ -100,7 +100,10 @@ import el2_pkg::*; input logic [31:0] store_data_m, input logic dma_dccm_wen, + input logic dma_pic_wen, input logic [2:0] dma_mem_tag_m, + input logic [31:0] dma_mem_addr, // DMA address + input logic [63:0] dma_mem_wdata, // DMA write data input logic [31:0] dma_dccm_wdata_lo, input logic [31:0] dma_dccm_wdata_hi, input logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_hi, // ECC bits for the DMA wdata @@ -365,14 +368,14 @@ import el2_pkg::*; assign dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_hi[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH]; // PIC signals. PIC ignores the lower 2 bits of address since PIC memory registers are 32-bits - assign picm_wren = lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_pic_r & lsu_commit_r; + assign picm_wren = (lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_pic_r & lsu_commit_r) | dma_pic_wen; assign picm_rden = lsu_pkt_d.valid & lsu_pkt_d.load & addr_in_pic_d; assign picm_mken = lsu_pkt_d.valid & lsu_pkt_d.store & addr_in_pic_d; // Get the mask for stores - assign picm_rdaddr[31:0] = pt.PIC_BASE_ADDR | {17'b0,lsu_addr_d[14:0]}; + assign picm_rdaddr[31:0] = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},lsu_addr_d[pt.PIC_BITS-1:0]}; - assign picm_wraddr[31:0] = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},lsu_addr_r[pt.PIC_BITS-1:0]}; + assign picm_wraddr[31:0] = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},(dma_pic_wen ? dma_mem_addr[pt.PIC_BITS-1:0] : lsu_addr_r[pt.PIC_BITS-1:0])}; - assign picm_wr_data[31:0] = store_datafn_lo_r[31:0]; + assign picm_wr_data[31:0] = dma_pic_wen ? dma_mem_wdata[31:0] : store_datafn_lo_r[31:0]; assign picm_mask_data_m[31:0] = picm_rd_data_m[31:0]; assign picm_rd_data_m[63:0] = {picm_rd_data[31:0],picm_rd_data[31:0]}; diff --git a/docs/RISC-V SweRV EL2 PRM.pdf b/docs/RISC-V SweRV EL2 PRM.pdf index 779d3c5..95730d9 100755 Binary files a/docs/RISC-V SweRV EL2 PRM.pdf and b/docs/RISC-V SweRV EL2 PRM.pdf differ diff --git a/testbench/asm/cmark.c b/testbench/asm/cmark.c index dab4304..4e7a9b7 100644 --- a/testbench/asm/cmark.c +++ b/testbench/asm/cmark.c @@ -1,5 +1,6 @@ #include "defines.h" +#define ITERATIONS 1 extern int STACK; void main(); @@ -1198,7 +1199,9 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) { #else ee_printf("Total time (secs): %d\n",time_in_secs(total_time)); if (time_in_secs(total_time) > 0) - ee_printf("Iterations/Sec : %d\n",default_num_contexts*results[0].iterations/time_in_secs(total_time)); +// ee_printf("Iterations/Sec : %d\n",default_num_contexts*results[0].iterations/time_in_secs(total_time)); + ee_printf("Iterat/Sec/MHz : %d.%d\n",1000*default_num_contexts*results[0].iterations/time_in_secs(total_time), + 100000*default_num_contexts*results[0].iterations/time_in_secs(total_time) % 100); #endif if (time_in_secs(total_time) < 10) { ee_printf("ERROR! Must execute for at least 10 secs for a valid result!\n"); @@ -2080,8 +2083,7 @@ ee_u8 check_data_types() { volatile ee_s32 seed2_volatile=0x8; volatile ee_s32 seed3_volatile=0x8; #endif -// volatile ee_s32 seed4_volatile=ITERATIONS; - volatile ee_s32 seed4_volatile=1; + volatile ee_s32 seed4_volatile=ITERATIONS; volatile ee_s32 seed5_volatile=0; /* Porting : Timing functions How to capture time and convert to seconds must be ported to whatever is supported by the platform. @@ -2102,7 +2104,9 @@ ee_u8 check_data_types() { #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) #define TIMER_RES_DIVIDER 1 #define SAMPLE_TIME_IMPLEMENTATION 1 -#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) +//#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) + +#define EE_TICKS_PER_SEC 1000 /** Define Host specific (POSIX), or target specific global time variables. */ static CORETIMETYPE start_time_val, stop_time_val; diff --git a/testbench/asm/hello_world.s b/testbench/asm/hello_world.s index 5f2c90a..0097c63 100644 --- a/testbench/asm/hello_world.s +++ b/testbench/asm/hello_world.s @@ -1,67 +1,71 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Assembly code for Hello World +// Not using only ALU ops for creating the string +#include "defines.h" + +#define STDOUT 0xd0580000 + + +// Code to execute +.section .text .global _start _start: - csrrw x2, 0xb02, x3 + + // Clear minstret + csrw minstret, zero + csrw minstreth, zero + + // Set up MTVEC - not expecting to use it though + li x1, RV_ICCM_SADR + csrw mtvec, x1 - lui x5, 974848 - ori x5, x5, 0 - csrrw x2, 0x305, x5 + // Enable Caches in MRAC + li x1, 0x5f555555 + csrw 0x7c0, x1 + // Load string from hw_data + // and write to stdout address - lui x6, 382293 - ori x6, x6, 1365 - csrrw x1, 0x7c0, x6 + li x3, STDOUT + la x4, hw_data +loop: + lb x5, 0(x4) + sb x5, 0(x3) + addi x4, x4, 1 + bnez x5, loop +// Write 0xff to STDOUT for TB to termiate test. +_finish: + li x3, STDOUT + addi x5, x0, 0xff + sb x5, 0(x3) + beq x0, x0, _finish +.rept 100 + nop +.endr - - lui x5, 0 - ori x5, x5, 0 - csrrw x2, 0x7f8, x5 - - - - - lui x5, 0 - ori x5, x5, 0 - csrrw x2, 0x7f9, x5 - - - addi x0, x0, 0 - lui x11, 853376 - ori x9, x0, 'H' - sw x9, 0 (x11) - ori x9, x0, 'E' - sw x9, 0 (x11) - ori x9, x0, 'L' - sw x9, 0 (x11) - sw x9, 0 (x11) - ori x9, x0, 'O' - sw x9, 0 (x11) - ori x9, x0, ' ' - sw x9, 0 (x11) - addi x9, x0, 'W' - sw x9, 0 (x11) - ori x9, x0, 'O' - sw x9, 0 (x11) - ori x9, x0, 'R' - sw x9, 0 (x11) - ori x9, x0, 'L' - sw x9, 0 (x11) - ori x9, x0, 'D' - sw x9, 0 (x11) - ori x9, x0, '!' - sw x9, 0 (x11) - ori x9, x0, 255 - sw x9, 0 (x11) - addi x1,x0,0 - -finish: - addi x1,x1,1 - jal x0, finish; - addi x0,x0,0 - addi x0,x0,0 - addi x0,x0,0 - addi x0,x0,0 +.data +hw_data: +.ascii "----------------------------------\n" +.ascii "Hello World from SweRV EL2 @WDC !!\n" +.ascii "----------------------------------\n" +.byte 0 diff --git a/testbench/asm/hello_world_dccm.ld b/testbench/asm/hello_world_dccm.ld index eea3cbe..64e9c37 100644 --- a/testbench/asm/hello_world_dccm.ld +++ b/testbench/asm/hello_world_dccm.ld @@ -5,7 +5,7 @@ ENTRY(_start) SECTIONS { .text : { *(.text*) } _end = .; - . = 0xfff8; + . = 0x1fff8; .data.ctl : { LONG(0xf0040000); LONG(STACK) } . = 0xf0040000; .data : AT(0x10000) { *(.*data) *(.rodata*) STACK = ALIGN(16) + 0x8000;} diff --git a/testbench/asm/hello_world_dccm.s b/testbench/asm/hello_world_dccm.s index 2c91c93..3418f77 120000 --- a/testbench/asm/hello_world_dccm.s +++ b/testbench/asm/hello_world_dccm.s @@ -1 +1 @@ -hello_world2.s \ No newline at end of file +hello_world.s \ No newline at end of file diff --git a/testbench/flist b/testbench/flist index fb1cd8f..e948cd0 100644 --- a/testbench/flist +++ b/testbench/flist @@ -1,8 +1,4 @@ +libext+.v+.sv -//-incdir $RV_ROOT/design/lib -//-incdir $RV_ROOT/design/include -//-incdir $RV_ROOT/design/dmi -//-incdir $SYNOPSYS_SYN_ROOT/dw/sim_ver //-y $SYNOPSYS_SYN_ROOT/dw/sim_ver $RV_ROOT/design/el2_swerv_wrapper.sv $RV_ROOT/design/el2_mem.sv @@ -24,7 +20,6 @@ $RV_ROOT/design/dec/el2_dec_tlu_ctl.sv $RV_ROOT/design/dec/el2_dec_trigger.sv $RV_ROOT/design/dec/el2_dec.sv $RV_ROOT/design/exu/el2_exu_alu_ctl.sv -//$RV_ROOT/design/exu/el2_exu_br_ctl.sv $RV_ROOT/design/exu/el2_exu_mul_ctl.sv $RV_ROOT/design/exu/el2_exu_div_ctl.sv $RV_ROOT/design/exu/el2_exu.sv @@ -47,6 +42,4 @@ $RV_ROOT/design/lib/el2_lib.sv -v $RV_ROOT/design/lib/beh_lib.sv -v $RV_ROOT/design/lib/mem_lib.sv -y $RV_ROOT/design/lib -//$RV_ROOT/design/lib/ahb_to_svci.sv -//$RV_ROOT/design/lib/ahb_to_axi4.sv -//$RV_ROOT/design/lib/axi4_to_ahb.sv +-v $RV_ROOT/testbench/axi_lsu_dma_bridge.sv diff --git a/testbench/tb_top.sv b/testbench/tb_top.sv index d62c800..478b7f7 100644 --- a/testbench/tb_top.sv +++ b/testbench/tb_top.sv @@ -13,64 +13,62 @@ // See the License for the specific language governing permissions and // limitations under the License. // -`ifndef VERILATOR -module tb_top; -`else +`ifdef VERILATOR module tb_top ( input bit core_clk ); -`endif - -`ifndef VERILATOR +`else +module tb_top; bit core_clk; `endif logic rst_l; + logic porst_l; logic nmi_int; logic [31:0] reset_vector; logic [31:0] nmi_vector; logic [31:1] jtag_id; - logic [31:0] ic_haddr ; - logic [2:0] ic_hburst ; - logic ic_hmastlock ; - logic [3:0] ic_hprot ; - logic [2:0] ic_hsize ; - logic [1:0] ic_htrans ; - logic ic_hwrite ; - logic [63:0] ic_hrdata ; - logic ic_hready ; - logic ic_hresp ; + logic [31:0] ic_haddr; + logic [2:0] ic_hburst; + logic ic_hmastlock; + logic [3:0] ic_hprot; + logic [2:0] ic_hsize; + logic [1:0] ic_htrans; + logic ic_hwrite; + logic [63:0] ic_hrdata; + logic ic_hready; + logic ic_hresp; - logic [31:0] lsu_haddr ; - logic [2:0] lsu_hburst ; - logic lsu_hmastlock ; - logic [3:0] lsu_hprot ; - logic [2:0] lsu_hsize ; - logic [1:0] lsu_htrans ; - logic lsu_hwrite ; - logic [63:0] lsu_hrdata ; - logic [63:0] lsu_hwdata ; - logic lsu_hready ; - logic lsu_hresp ; + logic [31:0] lsu_haddr; + logic [2:0] lsu_hburst; + logic lsu_hmastlock; + logic [3:0] lsu_hprot; + logic [2:0] lsu_hsize; + logic [1:0] lsu_htrans; + logic lsu_hwrite; + logic [63:0] lsu_hrdata; + logic [63:0] lsu_hwdata; + logic lsu_hready; + logic lsu_hresp; - logic [31:0] sb_haddr ; - logic [2:0] sb_hburst ; - logic sb_hmastlock ; - logic [3:0] sb_hprot ; - logic [2:0] sb_hsize ; - logic [1:0] sb_htrans ; - logic sb_hwrite ; + logic [31:0] sb_haddr; + logic [2:0] sb_hburst; + logic sb_hmastlock; + logic [3:0] sb_hprot; + logic [2:0] sb_hsize; + logic [1:0] sb_htrans; + logic sb_hwrite; - logic [63:0] sb_hrdata ; - logic [63:0] sb_hwdata ; - logic sb_hready ; - logic sb_hresp ; + logic [63:0] sb_hrdata; + logic [63:0] sb_hwdata; + logic sb_hready; + logic sb_hresp; logic [31:0] trace_rv_i_insn_ip; logic [31:0] trace_rv_i_address_ip; logic [1:0] trace_rv_i_valid_ip; logic [1:0] trace_rv_i_exception_ip; logic [4:0] trace_rv_i_ecause_ip; - logic [2:0] trace_rv_i_interrupt_ip; + logic [1:0] trace_rv_i_interrupt_ip; logic [31:0] trace_rv_i_tval_ip; logic o_debug_mode_status; @@ -82,10 +80,10 @@ module tb_top ( input bit core_clk ); logic o_cpu_run_ack; logic mailbox_write; - logic [63:0] dma_hrdata ; - logic [63:0] dma_hwdata ; - logic dma_hready ; - logic dma_hresp ; + logic [63:0] dma_hrdata; + logic [63:0] dma_hwdata; + logic dma_hready; + logic dma_hresp; logic mpc_debug_halt_req; logic mpc_debug_run_req; @@ -94,7 +92,7 @@ module tb_top ( input bit core_clk ); logic mpc_debug_run_ack; logic debug_brkpt_status; - bit [31:0] cycleCnt ; + bit [31:0] cycleCnt; logic mailbox_data_val; wire dma_hready_out; @@ -286,6 +284,27 @@ module tb_top ( input bit core_clk ); wire [1:0] dma_axi_rresp; wire dma_axi_rlast; + wire lmem_axi_arvalid; + wire lmem_axi_arready; + + wire lmem_axi_rvalid; + wire [`RV_LSU_BUS_TAG-1:0] lmem_axi_rid; + wire [1:0] lmem_axi_rresp; + wire [63:0] lmem_axi_rdata; + wire lmem_axi_rlast; + wire lmem_axi_rready; + + wire lmem_axi_awvalid; + wire lmem_axi_awready; + + wire lmem_axi_wvalid; + wire lmem_axi_wready; + + wire [1:0] lmem_axi_bresp; + wire lmem_axi_bvalid; + wire [`RV_LSU_BUS_TAG-1:0] lmem_axi_bid; + wire lmem_axi_bready; + `endif wire[63:0] WriteData; @@ -329,18 +348,18 @@ module tb_top ( input bit core_clk ); wb_valid[0] <= rvtop.swerv.dec.dec_i0_wen_r; wb_dest[0] <= rvtop.swerv.dec.dec_i0_waddr_r; wb_data[0] <= rvtop.swerv.dec.dec_i0_wdata_r; - if (rvtop.trace_rv_i_valid_ip !== 0) begin - $fwrite(tp,"%b,%h,%h,%0h,%0h,3,%b,%h,%h,%b\n", rvtop.trace_rv_i_valid_ip, 0, trace_rv_i_address_ip, + if (trace_rv_i_valid_ip !== 0) begin + $fwrite(tp,"%b,%h,%h,%0h,%0h,3,%b,%h,%h,%b\n", trace_rv_i_valid_ip, 0, trace_rv_i_address_ip, 0, trace_rv_i_insn_ip,trace_rv_i_exception_ip,trace_rv_i_ecause_ip, trace_rv_i_tval_ip,trace_rv_i_interrupt_ip); // Basic trace - no exception register updates // #1 0 ee000000 b0201073 c 0b02 00000000 for (int i=0; i<1; i++) - if (rvtop.trace_rv_i_valid_ip[i]==1) begin + if (trace_rv_i_valid_ip[i]==1) begin commit_count++; $fwrite (el, "%10d : %6s 0 %h %h %s\n", cycleCnt, $sformatf("#%0d",commit_count), trace_rv_i_address_ip[31+i*32 -:32], trace_rv_i_insn_ip[31+i*32-:32], - wb_dest[i] !=0 ? $sformatf("r%0d=%h", wb_dest[i], wb_data[i]) : ""); + (wb_dest[i] !=0 && wb_data[0])? $sformatf("r%0d=%h", wb_dest[i], wb_data[i]) : ""); end end end @@ -363,6 +382,7 @@ module tb_top ( input bit core_clk ); fd = $fopen("console.log","w"); commit_count = 0; preload_dccm(); + preload_iccm(); `ifndef VERILATOR if($test$plusargs("dumpon")) $dumpvars; @@ -372,12 +392,14 @@ module tb_top ( input bit core_clk ); assign rst_l = cycleCnt > 5; + assign porst_l = cycleCnt > 2; //=========================================================================- // RTL instance //=========================================================================- el2_swerv_wrapper rvtop ( .rst_l ( rst_l ), + .dbg_rst_l ( porst_l ), .clk ( core_clk ), .rst_vec ( reset_vector[31:1]), .nmi_int ( nmi_int ), @@ -590,40 +612,40 @@ el2_swerv_wrapper rvtop ( //-------------------------- DMA AXI signals-------------------------- // AXI Write Channels - .dma_axi_awvalid (1'b0), + .dma_axi_awvalid (dma_axi_awvalid), .dma_axi_awready (dma_axi_awready), - .dma_axi_awid (dma_axi_awid), - .dma_axi_awaddr (dma_axi_awaddr), - .dma_axi_awsize (dma_axi_awsize), - .dma_axi_awprot (dma_axi_awprot), - .dma_axi_awlen (dma_axi_awlen), - .dma_axi_awburst (dma_axi_awburst), + .dma_axi_awid ('0), + .dma_axi_awaddr (lsu_axi_awaddr), + .dma_axi_awsize (lsu_axi_awsize), + .dma_axi_awprot (lsu_axi_awprot), + .dma_axi_awlen (lsu_axi_awlen), + .dma_axi_awburst (lsu_axi_awburst), - .dma_axi_wvalid (1'b0), + .dma_axi_wvalid (dma_axi_wvalid), .dma_axi_wready (dma_axi_wready), - .dma_axi_wdata (dma_axi_wdata), - .dma_axi_wstrb (dma_axi_wstrb), - .dma_axi_wlast (dma_axi_wlast), + .dma_axi_wdata (lsu_axi_wdata), + .dma_axi_wstrb (lsu_axi_wstrb), + .dma_axi_wlast (lsu_axi_wlast), .dma_axi_bvalid (dma_axi_bvalid), - .dma_axi_bready (1'b0), + .dma_axi_bready (dma_axi_bready), .dma_axi_bresp (dma_axi_bresp), - .dma_axi_bid (dma_axi_bid), + .dma_axi_bid (), - .dma_axi_arvalid (1'b0), + .dma_axi_arvalid (dma_axi_arvalid), .dma_axi_arready (dma_axi_arready), - .dma_axi_arid (dma_axi_arid), - .dma_axi_araddr (dma_axi_araddr), - .dma_axi_arsize (dma_axi_arsize), - .dma_axi_arprot (dma_axi_arprot), - .dma_axi_arlen (dma_axi_arlen), - .dma_axi_arburst (dma_axi_arburst), + .dma_axi_arid ('0), + .dma_axi_araddr (lsu_axi_araddr), + .dma_axi_arsize (lsu_axi_arsize), + .dma_axi_arprot (lsu_axi_arprot), + .dma_axi_arlen (lsu_axi_arlen), + .dma_axi_arburst (lsu_axi_arburst), .dma_axi_rvalid (dma_axi_rvalid), - .dma_axi_rready (1'b0), - .dma_axi_rid (dma_axi_rid), + .dma_axi_rready (dma_axi_rready), + .dma_axi_rid (), .dma_axi_rdata (dma_axi_rdata), .dma_axi_rresp (dma_axi_rresp), .dma_axi_rlast (dma_axi_rlast), @@ -768,23 +790,23 @@ defparam lmem.TAGW =`RV_LSU_BUS_TAG; axi_slv lmem( .aclk(core_clk), .rst_l(rst_l), - .arvalid(lsu_axi_arvalid), - .arready(lsu_axi_arready), + .arvalid(lmem_axi_arvalid), + .arready(lmem_axi_arready), .araddr(lsu_axi_araddr), .arid(lsu_axi_arid), .arlen(lsu_axi_arlen), .arburst(lsu_axi_arburst), .arsize(lsu_axi_arsize), - .rvalid(lsu_axi_rvalid), - .rready(lsu_axi_rready), - .rdata(lsu_axi_rdata), - .rresp(lsu_axi_rresp), - .rid(lsu_axi_rid), - .rlast(lsu_axi_rlast), + .rvalid(lmem_axi_rvalid), + .rready(lmem_axi_rready), + .rdata(lmem_axi_rdata), + .rresp(lmem_axi_rresp), + .rid(lmem_axi_rid), + .rlast(lmem_axi_rlast), - .awvalid(lsu_axi_awvalid), - .awready(lsu_axi_awready), + .awvalid(lmem_axi_awvalid), + .awready(lmem_axi_awready), .awaddr(lsu_axi_awaddr), .awid(lsu_axi_awid), .awlen(lsu_axi_awlen), @@ -793,25 +815,145 @@ axi_slv lmem( .wdata(lsu_axi_wdata), .wstrb(lsu_axi_wstrb), - .wvalid(lsu_axi_wvalid), - .wready(lsu_axi_wready), + .wvalid(lmem_axi_wvalid), + .wready(lmem_axi_wready), - .bvalid(lsu_axi_bvalid), - .bready(lsu_axi_bready), - .bresp(lsu_axi_bresp), - .bid(lsu_axi_bid) + .bvalid(lmem_axi_bvalid), + .bready(lmem_axi_bready), + .bresp(lmem_axi_bresp), + .bid(lmem_axi_bid) ); + +axi_lsu_dma_bridge # (`RV_LSU_BUS_TAG,`RV_LSU_BUS_TAG ) bridge( + .clk(core_clk), + .reset_l(rst_l), + + .m_arvalid(lsu_axi_arvalid), + .m_arid(lsu_axi_arid), + .m_araddr(lsu_axi_araddr), + .m_arready(lsu_axi_arready), + + .m_rvalid(lsu_axi_rvalid), + .m_rready(lsu_axi_rready), + .m_rdata(lsu_axi_rdata), + .m_rid(lsu_axi_rid), + .m_rresp(lsu_axi_rresp), + .m_rlast(lsu_axi_rlast), + + .m_awvalid(lsu_axi_awvalid), + .m_awid(lsu_axi_awid), + .m_awaddr(lsu_axi_awaddr), + .m_awready(lsu_axi_awready), + + .m_wvalid(lsu_axi_wvalid), + .m_wready(lsu_axi_wready), + + .m_bresp(lsu_axi_bresp), + .m_bvalid(lsu_axi_bvalid), + .m_bid(lsu_axi_bid), + .m_bready(lsu_axi_bready), + + .s0_arvalid(lmem_axi_arvalid), + .s0_arready(lmem_axi_arready), + + .s0_rvalid(lmem_axi_rvalid), + .s0_rid(lmem_axi_rid), + .s0_rresp(lmem_axi_rresp), + .s0_rdata(lmem_axi_rdata), + .s0_rlast(lmem_axi_rlast), + .s0_rready(lmem_axi_rready), + + .s0_awvalid(lmem_axi_awvalid), + .s0_awready(lmem_axi_awready), + + .s0_wvalid(lmem_axi_wvalid), + .s0_wready(lmem_axi_wready), + + .s0_bresp(lmem_axi_bresp), + .s0_bvalid(lmem_axi_bvalid), + .s0_bid(lmem_axi_bid), + .s0_bready(lmem_axi_bready), + + + .s1_arvalid(dma_axi_arvalid), + .s1_arready(dma_axi_arready), + + .s1_rvalid(dma_axi_rvalid), + .s1_rresp(dma_axi_rresp), + .s1_rdata(dma_axi_rdata), + .s1_rlast(dma_axi_rlast), + .s1_rready(dma_axi_rready), + + .s1_awvalid(dma_axi_awvalid), + .s1_awready(dma_axi_awready), + + .s1_wvalid(dma_axi_wvalid), + .s1_wready(dma_axi_wready), + + .s1_bresp(dma_axi_bresp), + .s1_bvalid(dma_axi_bvalid), + .s1_bready(dma_axi_bready) +); + + `endif +task preload_iccm; +bit[31:0] data; +bit[31:0] addr, eaddr, saddr, faddr; +int adr; +/* +addresses: + 0xffec - ICCM start address to load + 0xfff0 - ICCM end address to load + 0xfff4 - imem start address +*/ + +addr = 'hffec; +saddr = {lmem.mem[addr+3],lmem.mem[addr+2],lmem.mem[addr+1],lmem.mem[addr]}; +if ( (saddr < `RV_ICCM_SADR) || (saddr > `RV_ICCM_EADR)) return; +`ifndef RV_ICCM_ENABLE + $display("********************************************************"); + $display("ICCM preload: there is no ICCM in SweRV, terminating !!!"); + $display("********************************************************"); + $finish; +`endif +init_iccm; +addr = 'hfff0; +eaddr = {lmem.mem[addr+3],lmem.mem[addr+2],lmem.mem[addr+1],lmem.mem[addr]}; +addr = 'hfff4; +faddr = {lmem.mem[addr+3],lmem.mem[addr+2],lmem.mem[addr+1],lmem.mem[addr]}; +$display("ICCM pre-load from %h to %h", saddr, eaddr); + +for(addr= saddr; addr <= eaddr; addr+=4) begin + adr = faddr & 'hffff; + data = {imem.mem[adr+3],imem.mem[adr+2],imem.mem[adr+1],imem.mem[adr]}; + slam_iccm_ram(addr, data == 0 ? 0 : {riscv_ecc32(data),data}); + faddr+=4; +end + +endtask task preload_dccm; bit[31:0] data; bit[31:0] addr, eaddr; int adr; +/* +addresses: + 0xfff8 - DCCM start address to load + 0xfffc - ICCM end address to load + 0x0 - lmem start addres to load from +*/ addr = 'hfff8; eaddr = {lmem.mem[addr+3],lmem.mem[addr+2],lmem.mem[addr+1],lmem.mem[addr]}; if (eaddr != `RV_DCCM_SADR) return; +`ifndef RV_DCCM_ENABLE + $display("********************************************************"); + $display("DCCM preload: there is no DCCM in SweRV, terminating !!!"); + $display("********************************************************"); + $finish; +`endif addr = 'hfffc; eaddr = {lmem.mem[addr+3],lmem.mem[addr+2],lmem.mem[addr+1],lmem.mem[addr]}; $display("DCCM pre-load from %h to %h", `RV_DCCM_SADR, eaddr); @@ -824,16 +966,20 @@ end endtask +`define ICCM_PATH `RV_TOP.mem.iccm.iccm `ifdef VERILATOR -`define DRAM(bank) rvtop.mem.Gen_dccm_enable.dccm.mem_bank[bank].ram.ram_core +`define DRAM(bk) rvtop.mem.Gen_dccm_enable.dccm.mem_bank[bk].ram.ram_core +`define IRAM(bk) `ICCM_PATH.mem_bank[bk].iccm_bank.ram_core `else -`define DRAM(bank) rvtop.mem.Gen_dccm_enable.dccm.mem_bank[bank].dccm.dccm_bank.ram_core +`define DRAM(bk) rvtop.mem.Gen_dccm_enable.dccm.mem_bank[bk].dccm.dccm_bank.ram_core +`define IRAM(bk) `ICCM_PATH.mem_bank[bk].iccm.iccm_bank.ram_core `endif + task slam_dccm_ram(input [31:0] addr, input[38:0] data); int bank, indx; bank = get_dccm_bank(addr, indx); -//`ifndef VERILATOR +`ifdef RV_DCCM_ENABLE case(bank) 0: `DRAM(0)[indx] = data; 1: `DRAM(1)[indx] = data; @@ -850,7 +996,82 @@ case(bank) 7: `DRAM(7)[indx] = data; `endif endcase -//`endif +`endif +//$display("Writing bank %0d indx=%0d A=%h, D=%h",bank, indx, addr, data); +endtask + + +task slam_iccm_ram( input[31:0] addr, input[38:0] data); +int bank, idx; + +bank = get_iccm_bank(addr, idx); +`ifdef RV_ICCM_ENABLE +case(bank) // { + 0: `IRAM(0)[idx] = data; + 1: `IRAM(1)[idx] = data; + `ifdef RV_ICCM_NUM_BANKS_4 + 2: `IRAM(2)[idx] = data; + 3: `IRAM(3)[idx] = data; + `endif + `ifdef RV_ICCM_NUM_BANKS_8 + 2: `IRAM(2)[idx] = data; + 3: `IRAM(3)[idx] = data; + 4: `IRAM(4)[idx] = data; + 5: `IRAM(5)[idx] = data; + 6: `IRAM(6)[idx] = data; + 7: `IRAM(7)[idx] = data; + `endif + + `ifdef RV_ICCM_NUM_BANKS_16 + 2: `IRAM(2)[idx] = data; + 3: `IRAM(3)[idx] = data; + 4: `IRAM(4)[idx] = data; + 5: `IRAM(5)[idx] = data; + 6: `IRAM(6)[idx] = data; + 7: `IRAM(7)[idx] = data; + 8: `IRAM(8)[idx] = data; + 9: `IRAM(9)[idx] = data; + 10: `IRAM(10)[idx] = data; + 11: `IRAM(11)[idx] = data; + 12: `IRAM(12)[idx] = data; + 13: `IRAM(13)[idx] = data; + 14: `IRAM(14)[idx] = data; + 15: `IRAM(15)[idx] = data; + `endif +endcase // } +`endif +endtask + +task init_iccm; +`ifdef RV_ICCM_ENABLE + `IRAM(0) = '{default:39'h0}; + `IRAM(1) = '{default:39'h0}; +`ifdef RV_ICCM_NUM_BANKS_4 + `IRAM(2) = '{default:39'h0}; + `IRAM(3) = '{default:39'h0}; +`endif +`ifdef RV_ICCM_NUM_BANKS_8 + `IRAM(4) = '{default:39'h0}; + `IRAM(5) = '{default:39'h0}; + `IRAM(6) = '{default:39'h0}; + `IRAM(7) = '{default:39'h0}; +`endif + +`ifdef RV_ICCM_NUM_BANKS_16 + `IRAM(4) = '{default:39'h0}; + `IRAM(5) = '{default:39'h0}; + `IRAM(6) = '{default:39'h0}; + `IRAM(7) = '{default:39'h0}; + `IRAM(8) = '{default:39'h0}; + `IRAM(9) = '{default:39'h0}; + `IRAM(10) = '{default:39'h0}; + `IRAM(11) = '{default:39'h0}; + `IRAM(12) = '{default:39'h0}; + `IRAM(13) = '{default:39'h0}; + `IRAM(14) = '{default:39'h0}; + `IRAM(15) = '{default:39'h0}; + `endif +`endif endtask @@ -866,7 +1087,7 @@ synd[6] = ^{data, synd[5:0]}; return synd; endfunction -function int get_dccm_bank(input int addr, output int bank_idx); +function int get_dccm_bank(input[31:0] addr, output int bank_idx); `ifdef RV_DCCM_NUM_BANKS_2 bank_idx = int'(addr[`RV_DCCM_BITS-1:3]); return int'( addr[2]); @@ -879,4 +1100,21 @@ function int get_dccm_bank(input int addr, output int bank_idx); `endif endfunction +function int get_iccm_bank(input[31:0] addr, output int bank_idx); +`ifdef RV_DCCM_NUM_BANKS_2 + bank_idx = int'(addr[`RV_DCCM_BITS-1:3]); + return int'( addr[2]); +`elsif RV_ICCM_NUM_BANKS_4 + bank_idx = int'(addr[`RV_ICCM_BITS-1:4]); + return int'(addr[3:2]); +`elsif RV_ICCM_NUM_BANKS_8 + bank_idx = int'(addr[`RV_ICCM_BITS-1:5]); + return int'( addr[4:2]); +`elsif RV_ICCM_NUM_BANKS_16 + bank_idx = int'(addr[`RV_ICCM_BITS-1:6]); + return int'( addr[5:2]); +`endif +endfunction + + endmodule diff --git a/testbench/test_tb_top.cpp b/testbench/test_tb_top.cpp index c88bb2b..899caf1 100644 --- a/testbench/test_tb_top.cpp +++ b/testbench/test_tb_top.cpp @@ -32,38 +32,32 @@ double sc_time_stamp () { int main(int argc, char** argv) { std::cout << "\nVerilatorTB: Start of sim\n" << std::endl; - // Check for +dumpon and remove it from argv - bool dumpWaves = false; - int newArgc = 0; - for (int i = 0; i < argc; ++i) - if (strcmp(argv[i], "+dumpon") == 0) - dumpWaves = true; - else - argv[newArgc++] = argv[i]; - argc = newArgc; - Verilated::commandArgs(argc, argv); Vtb_top* tb = new Vtb_top; // init trace dump - Verilated::traceEverOn(true); - VerilatedVcdC* tfp = new VerilatedVcdC; - tb->trace (tfp, 24); - if (dumpWaves) - tfp->open ("sim.vcd"); + VerilatedVcdC* tfp = NULL; +#if VM_TRACE + Verilated::traceEverOn(true); + tfp = new VerilatedVcdC; + tb->trace (tfp, 24); + tfp->open ("sim.vcd"); +#endif // Simulate while(!Verilated::gotFinish()){ - if (dumpWaves) - tfp->dump (main_time); +#if VM_TRACE + tfp->dump (main_time); +#endif main_time += 5; tb->core_clk = !tb->core_clk; tb->eval(); } - if (dumpWaves) - tfp->close(); +#if VM_TRACE + tfp->close(); +#endif std::cout << "\nVerilatorTB: End of sim" << std::endl; exit(EXIT_SUCCESS); diff --git a/tools/Makefile b/tools/Makefile index c968134..766fd1d 100755 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright 2019 Western Digital Corporation or its affiliates. +# Copyright 2020 Western Digital Corporation or its affiliates. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,21 +27,24 @@ snapshot = $(target) SWERV_CONFIG = ${RV_ROOT}/configs/swerv.config IRUN = xrun VCS = vcs +VLOG = qverilog VERILATOR = verilator GCC_PREFIX = riscv64-unknown-elf BUILD_DIR = snapshots/${snapshot} TBDIR = ${RV_ROOT}/testbench # Define test name -TEST = hello_world2 +TEST = hello_world # Define test name -TEST_DIR = ${RV_ROOT}/testbench/asm +TEST_DIR = ${TBDIR}/asm +HEX_DIR = ${TBDIR}/hex ifdef debug DEBUG_PLUS = +dumpon IRUN_DEBUG = -access +rc IRUN_DEBUG_RUN = -input ${RV_ROOT}/testbench/input.tcl + VERILATOR_DEBUG = --trace endif # provide specific link file @@ -73,18 +76,18 @@ all: clean verilator clean: rm -rf *.log *.s *.hex *.dis *.tbl irun* vcs* simv* snapshots swerv* \ - verilator* *.exe obj* *.o ucli.key vc_hdrs.h csrc *.csv + verilator* *.exe obj* *.o ucli.key vc_hdrs.h csrc *.csv work # If define files do not exist, then run swerv.config. ${BUILD_DIR}/defines.h : - BUILD_PATH=${BUILD_DIR} ${RV_ROOT}/configs/swerv.config -target=$(target) + BUILD_PATH=${BUILD_DIR} ${RV_ROOT}/configs/swerv.config -target=$(target) $(CONF_PARAMS) verilator-build: ${TBFILES} ${BUILD_DIR}/defines.h test_tb_top.cpp echo '`undef ASSERT_ON' >> ${BUILD_DIR}/common_defines.vh $(VERILATOR) '-UASSERT_ON' --cc -CFLAGS ${CFLAGS} $(defines) \ $(includes) -I${RV_ROOT}/testbench -f ${RV_ROOT}/testbench/flist \ -Wno-WIDTH -Wno-UNOPTFLAT ${TBFILES} --top-module tb_top \ - -exe test_tb_top.cpp --trace --autoflush + -exe test_tb_top.cpp --autoflush $(VERILATOR_DEBUG) cp ${RV_ROOT}/testbench/test_tb_top.cpp obj_dir/ $(MAKE) -C obj_dir/ -f Vtb_top.mk $(VERILATOR_MAKE_FLAGS) touch verilator-build @@ -107,7 +110,7 @@ irun-build: ${TBFILES} ${BUILD_DIR}/defines.h touch irun-build verilator: program.hex verilator-build - ./obj_dir/Vtb_top ${DEBUG_PLUS} + ./obj_dir/Vtb_top irun: program.hex irun-build $(IRUN) -64bit -abvglobalfailurelimit 1 +lic_queue -licqueue \ @@ -117,23 +120,30 @@ irun: program.hex irun-build vcs: program.hex vcs-build ./simv $(DEBUG_PLUS) +vcs+lic+wait -l vcs.log +vlog: program.hex ${TBFILES} ${BUILD_DIR}/defines.h + $(VLOG) -l vlog.log -sv -mfcu +incdir+${BUILD_DIR}+${RV_ROOT}/design/include+${RV_ROOT}/design/lib\ + $(defines) -f ${RV_ROOT}/testbench/flist ${TBFILES} -R ${DEBUG_PLUS} + + +ifeq ($(shell which $(GCC_PREFIX)-gcc 2> /dev/null),) +program.hex: ${BUILD_DIR}/defines.h + @echo " !!! No $(GCC_PREFIX)-gcc in path, using canned hex files !!" + cp ${HEX_DIR}/$(TEST).program.hex program.hex + cp ${HEX_DIR}/$(TEST).data.hex data.hex +else +ifneq (,$(wildcard $(TEST_DIR)/$(TEST).makefile)) +program.hex: + $(MAKE) -f $(TEST_DIR)/$(TEST).makefile +else program.hex: $(TEST).o $(LINK) @echo Building $(TEST) -ifeq ($(shell which $(GCC_PREFIX)-as),) - @echo " !!! No $(GCC_PREFIX)-as in path, using canned hex files !!" - cp ${RV_ROOT}/testbench/hex/*.hex . -else -ifeq (,$(wildcard $(TEST_DIR)/$(TEST).makefile)) $(GCC_PREFIX)-ld -m elf32lriscv --discard-none -T$(LINK) -o $(TEST).exe $(TEST).o - $(GCC_PREFIX)-objcopy -O verilog --only-section ".data*" --change-section-lma .data=0 $(TEST).exe data.hex - $(GCC_PREFIX)-objcopy -O verilog --only-section ".text" $(TEST).exe program.hex + $(GCC_PREFIX)-objcopy -O verilog --only-section ".data*" --change-section-lma .data*-0x10000 $(TEST).exe data.hex + $(GCC_PREFIX)-objcopy -O verilog --only-section ".text*" $(TEST).exe program.hex $(GCC_PREFIX)-objdump -S $(TEST).exe > $(TEST).dis $(GCC_PREFIX)-nm -f posix -C $(TEST).exe > $(TEST).tbl @echo Completed building $(TEST) -else - $(MAKE) -f $(TEST_DIR)/$(TEST).makefile -endif -endif + %.o : %.s ${BUILD_DIR}/defines.h $(GCC_PREFIX)-cpp -I${BUILD_DIR} $< > $(TEST).cpp.s @@ -144,9 +154,12 @@ ABI = -mabi=ilp32 -march=rv32imc %.o : %.c ${BUILD_DIR}/defines.h $(GCC_PREFIX)-gcc -I${BUILD_DIR} ${TEST_CFLAGS} ${ABI} -nostdlib -c $< -o $@ +endif +endif + help: @echo Make sure the environment variable RV_ROOT is set. - @echo Possible targets: verilator vcs irun help clean all verilator-build irun-build vcs-build program.hex + @echo Possible targets: verilator vcs irun vlog help clean all verilator-build irun-build vcs-build program.hex -.PHONY: help clean verilator vcs irun +.PHONY: help clean verilator vcs irun vlog