From ec254f54915fbd38d1878fca9247bec12db2fcaf Mon Sep 17 00:00:00 2001 From: Joseph Rahmeh Date: Wed, 27 Jan 2021 09:36:43 -0800 Subject: [PATCH] Version 1.9. --- README.md | 18 +- configs/swerv.config | 123 +- configs/swerv_config_gen.py | 20 +- design/dbg/dbg.sv | 19 +- design/dec/csrdecode | 2 +- design/dec/dec.sv | 3 + design/dec/dec_tlu_ctl.sv | 95 +- design/dma_ctrl.sv | 27 +- design/dmi/dmi_wrapper.v | 1 - design/lsu/lsu.sv | 2 +- design/lsu/lsu_addrcheck.sv | 2 +- design/lsu/lsu_bus_buffer.sv | 14 +- design/lsu/lsu_bus_intf.sv | 2 +- design/swerv.sv | 8 + design/swerv_wrapper.sv | 4 +- release-notes.md | 17 + testbench/asm/cmark.c | 238 +- testbench/asm/cmark.mki | 1 + testbench/asm/cmark_dccm.mki | 1 + testbench/asm/cmark_iccm.ld | 4 +- testbench/asm/cmark_iccm.mki | 1 + testbench/asm/crt0.s | 48 + testbench/asm/hello_world_dccm.ld | 5 +- testbench/asm/hello_world_iccm.ld | 13 + testbench/asm/hello_world_iccm.s | 85 + testbench/asm/printf.c | 312 ++ testbench/axi_lsu_dma_bridge.sv | 201 + testbench/dasm.svi | 395 ++ testbench/flist | 1 + testbench/hex/cmark.hex | 4832 ++++++++++---------- testbench/hex/cmark_dccm.hex | 4847 ++++++++++----------- testbench/hex/cmark_iccm.hex | 4847 ++++++++++----------- testbench/hex/dhry.hex | 312 ++ testbench/hex/hello_world.hex | 48 +- testbench/hex/hello_world_dccm.hex | 50 +- testbench/hex/hello_world_iccm.hex | 32 + testbench/link.ld | 3 + testbench/tb_top.sv | 265 +- testbench/tests/cmark_dccm/Makefile | 6 + testbench/tests/cmark_dccm/cmark_dccm.c | 2167 +++++++++ testbench/tests/cmark_dccm/cmark_dccm.ld | 12 + testbench/tests/cmark_dccm/crt0.c | 29 + testbench/tests/cmark_dccm/printf.c | 191 + testbench/tests/dhry/README | 7 + testbench/tests/dhry/crt0.s | 1 + testbench/tests/dhry/dhry.h | 437 ++ testbench/tests/dhry/dhry.ld | 1 + testbench/tests/dhry/dhry.mki | 2 + testbench/tests/dhry/dhry_1.c | 452 ++ testbench/tests/dhry/dhry_2.c | 214 + testbench/tests/dhry/printf.c | 1 + testbench/tests/hello_world/Makefile | 6 + testbench/tests/hello_world/hello_world.s | 72 + tools/Makefile | 47 +- tools/vivado.tcl | 1 + 55 files changed, 12276 insertions(+), 8268 deletions(-) create mode 100644 testbench/asm/cmark.mki create mode 120000 testbench/asm/cmark_dccm.mki create mode 120000 testbench/asm/cmark_iccm.mki create mode 100644 testbench/asm/crt0.s create mode 100644 testbench/asm/hello_world_iccm.ld create mode 100644 testbench/asm/hello_world_iccm.s create mode 100644 testbench/asm/printf.c create mode 100644 testbench/axi_lsu_dma_bridge.sv create mode 100644 testbench/dasm.svi create mode 100644 testbench/hex/dhry.hex create mode 100644 testbench/hex/hello_world_iccm.hex create mode 100644 testbench/tests/cmark_dccm/Makefile create mode 100644 testbench/tests/cmark_dccm/cmark_dccm.c create mode 100644 testbench/tests/cmark_dccm/cmark_dccm.ld create mode 100644 testbench/tests/cmark_dccm/crt0.c create mode 100644 testbench/tests/cmark_dccm/printf.c create mode 100644 testbench/tests/dhry/README create mode 120000 testbench/tests/dhry/crt0.s create mode 100644 testbench/tests/dhry/dhry.h create mode 120000 testbench/tests/dhry/dhry.ld create mode 100644 testbench/tests/dhry/dhry.mki create mode 100644 testbench/tests/dhry/dhry_1.c create mode 100644 testbench/tests/dhry/dhry_2.c create mode 120000 testbench/tests/dhry/printf.c create mode 100644 testbench/tests/hello_world/Makefile create mode 100644 testbench/tests/hello_world/hello_world.s diff --git a/README.md b/README.md index ba4d47a..cf97092 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# EH1 SweRV RISC-V CoreTM 1.8 from Western Digital +# EH1 RISC-V SweRV CoreTM 1.9 from Western Digital -This repository contains the SweRV EH1 CoreTM design RTL +This repository contains the EH1 SweRV CoreTM design RTL ## License @@ -16,7 +16,7 @@ Files under the [tools](tools/) directory may be available under a different lic │   ├── dec # Decode, Registers and Exceptions │   ├── dmi # DMI block │   ├── exu # EXU (ALU/MUL/DIV) - │   ├── ifu # Fetch & Branch Prediction + │   ├── ifu # Fetch & Branch Predictor │   ├── include │   ├── lib │   └── lsu # Load/Store @@ -28,7 +28,7 @@ Files under the [tools](tools/) directory may be available under a different lic ## Dependencies -- Verilator **(4.030 or later)** must be installed on the system if running with verilator +- Verilator **(4.102 or later)** must be installed on the system if running with verilator - If adding/removing instructions, espresso must be installed (used by *tools/coredecode*) - RISCV tool chain (based on gcc version 7.3 or higher) must be installed so that it can be used to prepare RISCV binaries to run. @@ -61,7 +61,7 @@ the `-target=name` option to swerv.config. This script derives the following consistent set of include files : - $RV_ROOT/configs/snapshots/default + snapshots/default ├── common_defines.vh # `defines for testbench or design ├── defines.h # #defines for C/assembly headers ├── pd_defines.vh # `defines for physical design @@ -87,13 +87,10 @@ Example for csh or its derivatives: *(Name your snapshot to distinguish it from the default. Without an explicit name, it will update/override the __default__ snapshot)* For example if `mybuild` is the name for the snapshot: - set BUILD_PATH environment variable: - - `setenv BUILD_PATH snapshots/mybuild` `$RV_ROOT/configs/swerv.config [configuration options..] -snapshot=mybuild` - Snapshots are placed in `$BUILD_PATH` directory + Snapshots are placed in ./snapshots directory **Building an FPGA speed optimized model:** Use ``-fpga_optimize=1`` option to ``swerv.config`` to build a model that removes clock gating logic from flop model so that the FPGA builds can run at higher speeds. **This is now the default option for @@ -191,10 +188,13 @@ The `$RV_ROOT/testbench/asm` directory contains following tests ready to simula ``` hello_world - default test to run, prints Hello World message to screen and console.log hello_world_dccm - the same as above, but takes the string from preloaded DCCM. +hello_world_iccm - the same as above, but CPU copies the code from external memory to ICCM via AXI LSU to DMA bridge + and then jumps there. The test runs only on CPU configurations with ICCM and AXI bus. cmark - coremark benchmark running with code and data in external memories cmark_dccm - the same as above, running data and stack from DCCM (faster) cmark_iccm - the same as above, but with code preloaded to iccm - runs only on CPU with ICCM use CONF_PARAMS=-set=iccm_enable argument to `make` to build CPU with ICCM +dhry - dhrystone benchmark - example of multi source files program ``` The `$RV_ROOT/testbench/hex` directory contains precompiled hex files of the tests, ready for simulation in case RISCV SW tools are not installed. diff --git a/configs/swerv.config b/configs/swerv.config index d4f4dc5..798eb14 100755 --- a/configs/swerv.config +++ b/configs/swerv.config @@ -243,6 +243,9 @@ my $pdfile = "$build_path/pd_defines.vh"; # Whisper config file path my $whisperfile = "$build_path/whisper.json"; +# Default linker file +my $linkerfile = "$build_path/link.ld"; + # Perl defines file path my $perlfile = "$build_path/perl_configs.pl"; @@ -368,8 +371,8 @@ our @triggers = (#{{{ }, { "reset" => ["0x23e00000", "0x00000000", "0x00000000"], - "mask" => ["0x081818c7", "0xffffffff", "0x00000000"], - "poke_mask" => ["0x081818c7", "0xffffffff", "0x00000000"] + "mask" => ["0x081810c7", "0xffffffff", "0x00000000"], + "poke_mask" => ["0x081810c7", "0xffffffff", "0x00000000"] }, { "reset" => ["0x23e00000", "0x00000000", "0x00000000"], @@ -378,8 +381,8 @@ our @triggers = (#{{{ }, { "reset" => ["0x23e00000", "0x00000000", "0x00000000"], - "mask" => ["0x081818c7", "0xffffffff", "0x00000000"], - "poke_mask" => ["0x081818c7", "0xffffffff", "0x00000000"] + "mask" => ["0x081810c7", "0xffffffff", "0x00000000"], + "poke_mask" => ["0x081810c7", "0xffffffff", "0x00000000"] }, );#}}} @@ -417,7 +420,7 @@ our %csr = (#{{{ "exists" => "true", }, "mimpid" => { - "reset" => "0x5", + "reset" => "0x6", "mask" => "0x0", "exists" => "true", }, @@ -621,7 +624,7 @@ our %csr = (#{{{ "mfdc" => { "number" => "0x7f9", "reset" => "0x00070000", - "mask" => "0x000707ff", + "mask" => "0x000727ff", "exists" => "true", }, "dmst" => { @@ -1153,57 +1156,6 @@ if($config{bht}{bht_size}==2048){ $config{bht}{bht_addr_hi} = 5; $config{bht}{bht_array_depth}= 4; } -#if($config{bht}{bht_size}==2048){ -# $config{bht}{bht_ghr_size}= 8; -# $config{bht}{bht_ghr_range}= "7:0"; -# $config{bht}{bht_ghr_pad}= "fghr[7:4],3'b0"; -# $config{bht}{bht_ghr_pad2}= "fghr[7:3],2'b0"; -# $config{bht}{bht_array_depth}= 256; -# $config{bht}{bht_addr_hi}= 11; -#} elsif($config{bht}{bht_size}==1024){ -# $config{bht}{bht_ghr_size}= 7; -# $config{bht}{bht_ghr_range}= "6:0"; -# $config{bht}{bht_ghr_pad}= "fghr[6:4],3'b0"; -# $config{bht}{bht_ghr_pad2}= "fghr[6:3],2'b0"; -# $config{bht}{bht_array_depth}= 128; -# $config{bht}{bht_addr_hi}= 10; -#} elsif($config{bht}{bht_size}==512){ -# $config{bht}{bht_ghr_size}= 6; -# $config{bht}{bht_ghr_range}= "5:0"; -# $config{bht}{bht_ghr_pad}= "fghr[5:4],3'b0"; -# $config{bht}{bht_ghr_pad2}= "fghr[5:3],2'b0"; -# $config{bht}{bht_array_depth}= 64; -# $config{bht}{bht_addr_hi}= 9; -#} elsif($config{bht}{bht_size}==256){ -# $config{bht}{bht_ghr_size}= 5; -# $config{bht}{bht_ghr_range}= "4:0"; -# $config{bht}{bht_ghr_pad}= "fghr[4],3'b0"; -# $config{bht}{bht_ghr_pad2}= "fghr[4:3],2'b0"; -# $config{bht}{bht_addr_hi} = 8; -# $config{bht}{bht_array_depth}= 32; -#} elsif($config{bht}{bht_size}==128){ -# $config{bht}{bht_ghr_size}= 5; -# $config{bht}{bht_ghr_range}= "4:0"; -# $config{bht}{bht_ghr_pad}= "fghr[4],3'b0"; -# $config{bht}{bht_ghr_pad2}= "fghr[4:3],2'b0"; -# $config{bht}{bht_addr_hi} = 7; -# $config{bht}{bht_array_depth}= 16; -#} elsif($config{bht}{bht_size}==64){ -# $config{bht}{bht_ghr_size}= 4; -# $config{bht}{bht_ghr_range}= "3:0"; -# $config{bht}{bht_ghr_pad}= "3'b0 "; -# $config{bht}{bht_ghr_pad2}= "fghr[4],2'b0"; -# $config{bht}{bht_addr_hi} = 6; -# $config{bht}{bht_array_depth}= 8; -#} elsif($config{bht}{bht_size}==32){ -# $config{bht}{bht_ghr_size}= 3; -# $config{bht}{bht_ghr_range}= "2:0"; -# $config{bht}{bht_ghr_pad}= "2'b0 "; -# $config{bht}{bht_ghr_pad2}= "2'b0"; -# $config{bht}{bht_addr_hi} = 5; -# $config{bht}{bht_array_depth}= 4; -# $config{bht}{bht_ghr_size_2} = 1; -#} $config{bht}{bht_hash_string} = &ghrhash($config{btb}{btb_index1_hi}, $config{bht}{bht_ghr_size}-1); @@ -1603,6 +1555,8 @@ print FILE Data::Dumper->Dump([\%config], [ qw(*config) ]); print FILE "1;\n"; close FILE; +# Default linker script +gen_default_linker_script(); # Done ################################################################## # exit(0); @@ -1960,7 +1914,7 @@ sub dump_whisper_config{#{{{ $jh{memmap}{inst} = [@inst_mem_prot] if @inst_mem_prot; $jh{memmap}{data} = [@data_mem_prot] if @data_mem_prot; $config{memmap}{consoleio} = $config{memmap}{serialio} if exists $config{memmap}{serialio}; - foreach my $tag (qw (size page_size serialio consoleio)) { + foreach my $tag (qw ( size page_size serialio consoleio)) { $jh{memmap}{$tag} = $config{memmap}{$tag} if exists $config{memmap}{$tag}; } @@ -2048,3 +2002,56 @@ sub log2 { my ($n) = @_; return log($n)/log(2); } + +sub gen_default_linker_script {#{{{ + + open (FILE, ">$linkerfile") || die "Cannot open $linkerfile for writing $!"; + print "$self: Writing $linkerfile\n"; + print FILE "/*\n"; + print_header(); + + my $io = "0xd0580000"; + $io = $config{memmap}{serialio} if exists $config{memmap}{serialio}; + + my $iccm = ""; my $iccm_ctl = ""; + if (exists $config{iccm} and $config{iccm}{iccm_enable}) { + my $sa = $config{iccm}{iccm_sadr}; my $ea = $config{iccm}{iccm_eadr}; + $iccm = " . = $sa ;"; + $iccm_ctl = " . = 0xfffffff0; .iccm.ctl . : { LONG($sa); LONG($ea) }" ; + } + + my $sa = $config{memmap}{external_data}; my $dccm_ctl = ""; + if (exists $config{dccm} and $config{dccm}{dccm_enable}) { + $sa = $config{dccm}{dccm_sadr}; + $dccm_ctl = " . = 0xfffffff8; .data.ctl : { LONG($sa); LONG(STACK) }" ; + } + my $data_loc = " . = $sa ;"; + + print FILE < PIC offset cross when DCCM/PIC in same region (PIC access are always word aligned so no cross possible from PIC->DCCM) // 4. Ld/St access to picm are not word aligned // 5. Address not in protected space or dccm/pic region - if (DCCM_REGION == PIC_REGION) begin + if (DCCM_ENABLE & (DCCM_REGION == PIC_REGION)) begin assign access_fault_dc1 = ((start_addr_in_dccm_region_dc1 & ~(start_addr_in_dccm_dc1 | start_addr_in_pic_dc1)) | (end_addr_in_dccm_region_dc1 & ~(end_addr_in_dccm_dc1 | end_addr_in_pic_dc1)) | (start_addr_in_dccm_dc1 & end_addr_in_pic_dc1) | diff --git a/design/lsu/lsu_bus_buffer.sv b/design/lsu/lsu_bus_buffer.sv index 2bc4abf..7a0e807 100644 --- a/design/lsu/lsu_bus_buffer.sv +++ b/design/lsu/lsu_bus_buffer.sv @@ -42,7 +42,7 @@ module lsu_bus_buffer input logic clk, input logic rst_l, input logic scan_mode, - input logic dec_tlu_non_blocking_disable, // disable non block + input logic dec_tlu_dccm_nonblock_dma_disable, // disable dma nonblock input logic dec_tlu_wb_coalescing_disable, // disable write buffer coalescing input logic dec_tlu_ld_miss_byp_wb_disable, // disable ld miss bypass of the write buffer input logic dec_tlu_sideeffect_posted_disable, // disable posted writes to sideeffect addr to the bus @@ -752,7 +752,7 @@ module lsu_bus_buffer // Freeze logic assign FreezePtrEn = lsu_busreq_dc3 & lsu_pkt_dc3.load & ld_freeze_dc3; - assign ld_freeze_en = (is_sideeffects_dc2 | dec_nonblock_load_freeze_dc2 | dec_tlu_non_blocking_disable) & lsu_busreq_dc2 & lsu_pkt_dc2.load & ~lsu_freeze_dc3 & ~flush_dc2_up & ~ld_full_hit_dc2; + assign ld_freeze_en = (dec_nonblock_load_freeze_dc2 | (dec_tlu_dccm_nonblock_dma_disable & is_sideeffects_dc2)) & lsu_busreq_dc2 & lsu_pkt_dc2.load & ~lsu_freeze_dc3 & ~flush_dc2_up & ~ld_full_hit_dc2; always_comb begin ld_freeze_rst = flush_dc3 | (dec_tlu_cancel_e4 & ld_freeze_dc3); for (int i=0; i AHB Gasket for LSU axi4_to_ahb #(.TAG(LSU_BUS_TAG)) lsu_axi4_to_ahb ( + .rst_l(core_rst_l), .clk_override(dec_tlu_bus_clk_override), .bus_clk_en(lsu_bus_clk_en), @@ -1089,6 +1094,7 @@ module swerv // AXI4 -> AHB Gasket for System Bus axi4_to_ahb #(.TAG(SB_BUS_TAG)) sb_axi4_to_ahb ( + .rst_l(dbg_rst_l), .clk_override(dec_tlu_bus_clk_override), .bus_clk_en(dbg_bus_clk_en), @@ -1144,6 +1150,7 @@ module swerv axi4_to_ahb #(.TAG(IFU_BUS_TAG)) ifu_axi4_to_ahb ( .clk(clk), + .rst_l(core_rst_l), .clk_override(dec_tlu_bus_clk_override), .bus_clk_en(ifu_bus_clk_en), @@ -1199,6 +1206,7 @@ module swerv //AHB -> AXI4 Gasket for DMA ahb_to_axi4 #(.TAG(DMA_BUS_TAG)) dma_ahb_to_axi4 ( + .rst_l(core_rst_l), .clk_override(dec_tlu_bus_clk_override), .bus_clk_en(dma_bus_clk_en), diff --git a/design/swerv_wrapper.sv b/design/swerv_wrapper.sv index 83b5326..ed9097e 100644 --- a/design/swerv_wrapper.sv +++ b/design/swerv_wrapper.sv @@ -401,6 +401,7 @@ module swerv_wrapper logic [31:0] dmi_reg_rdata; logic dmi_hard_reset; + // Instantiate the swerv core swerv swerv ( .* @@ -414,7 +415,6 @@ module swerv_wrapper // Instantiate the JTAG/DMI dmi_wrapper dmi_wrapper ( - // JTAG signals .trst_n(jtag_trst_n), // JTAG reset .tck (jtag_tck), // JTAG clock @@ -433,7 +433,7 @@ module swerv_wrapper .reg_en (dmi_reg_en), // 1 bit Write interface bit to Processor .reg_wr_en (dmi_reg_wr_en), // 1 bit Write enable to Processor .dmi_hard_reset (dmi_hard_reset) //a hard reset of the DTM, causing the DTM to forget about any outstanding DMI transactions -); + ); endmodule diff --git a/release-notes.md b/release-notes.md index efc9f48..65401fd 100644 --- a/release-notes.md +++ b/release-notes.md @@ -1,3 +1,20 @@ +# SweRV RISC-V CoreTM 1.9 from Western Digital +## Release Notes + +* Removed unused scan_mode input from dmi_wrapper (PR#89) +* Enhanced DMA/Side-Effect-load interlock to conditionally allow Side-Effect loads to be non-blocking + * See PRM for new enable bit in MFDC[13] +* Bug fixes for NMI, MPC, PMU corner cases, MPC ack timing fixes +* Trigger chaining compliance fixes for 0.13.2 missing cases +* Fixed qualification in DCCM access fault equation +* Updated reset hookup for AHB gasket +* Demo TB updates: + * added AXI LSU/DMA bridge and ICCM preload by CPU test, + * dhrystone test, + * exec.log shows instruction mnemonics + + + # SweRV RISC-V CoreTM 1.8 from Western Digital ## Release Notes diff --git a/testbench/asm/cmark.c b/testbench/asm/cmark.c index 4b41bfb..b366c80 100644 --- a/testbench/asm/cmark.c +++ b/testbench/asm/cmark.c @@ -1,35 +1,6 @@ #include "defines.h" #define ITERATIONS 1 -extern int STACK; -void main(); - - -#define STDOUT 0xd0580000 - -__asm (".section .text"); -__asm (".global _start"); -__asm ("_start:"); - -// Enable Caches in MRAC -__asm ("li t0, 0x5f555555"); -__asm ("csrw 0x7c0, t0"); - -// Set stack pointer. -__asm ("la sp, STACK"); - -__asm ("jal main"); - -// Write 0xff to STDOUT for TB to termiate test. -__asm (".global _finish"); -__asm ("_finish:"); -__asm ("li t0, 0xd0580000"); -__asm ("addi t1, zero, 0xff"); -__asm ("sb t1, 0(t0)"); -__asm ("beq x0, x0, _finish"); -__asm (".rept 10"); -__asm ("nop"); -__asm (".endr"); /* @@ -1200,7 +1171,7 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) { ee_printf("Total time (secs): %d\n",time_in_secs(total_time)); if (time_in_secs(total_time) > 0) // ee_printf("Iterations/Sec : %d\n",default_num_contexts*results[0].iterations/time_in_secs(total_time)); - ee_printf("Iterat/Sec/MHz : %d.%d\n",1000*default_num_contexts*results[0].iterations/time_in_secs(total_time), + ee_printf("Iterat/Sec/MHz : %d.%02d\n",1000*default_num_contexts*results[0].iterations/time_in_secs(total_time), 100000*default_num_contexts*results[0].iterations/time_in_secs(total_time) % 100); #endif if (time_in_secs(total_time) < 10) { @@ -2182,213 +2153,6 @@ void portable_fini(core_portable *p) } -#include - -// Special address. Writing (store byte instruction) to this address -// causes the simulator to write to the console. -volatile char __whisper_console_io = 0; - - -static int -whisperPutc(char c) -{ -// __whisper_console_io = c; -// __whisper_console_io = c; - *(volatile char*)(STDOUT) = c; - return c; -} - - -static int -whisperPuts(const char* s) -{ - while (*s) - whisperPutc(*s++); - return 1; -} - - -static int -whisperPrintDecimal(int value) -{ - char buffer[20]; - int charCount = 0; - - unsigned neg = value < 0; - if (neg) - { - value = -value; - whisperPutc('-'); - } - - do - { - char c = '0' + (value % 10); - value = value / 10; - buffer[charCount++] = c; - } - while (value); - - char* p = buffer + charCount - 1; - for (unsigned i = 0; i < charCount; ++i) - whisperPutc(*p--); - - if (neg) - charCount++; - - return charCount; -} - - -static int -whisperPrintInt(int value, int base) -{ - if (base == 10) - return whisperPrintDecimal(value); - - char buffer[20]; - int charCount = 0; - - unsigned uu = value; - - if (base == 8) - { - do - { - char c = '0' + (uu & 7); - buffer[charCount++] = c; - uu >>= 3; - } - while (uu); - } - else if (base == 16) - { - do - { - int digit = uu & 0xf; - char c = digit < 10 ? '0' + digit : 'a' + digit - 10; - buffer[charCount++] = c; - uu >>= 4; - } - while (uu); - } - else - return -1; - - char* p = buffer + charCount - 1; - for (unsigned i = 0; i < charCount; ++i) - whisperPutc(*p--); - - return charCount; -} - - -int -whisperPrintfImpl(const char* format, va_list ap) -{ - int count = 0; // Printed character count - - for (const char* fp = format; *fp; fp++) - { - if (*fp != '%') - { - whisperPutc(*fp); - ++count; - continue; - } - - ++fp; // Skip % - - if (*fp == 0) - break; - - if (*fp == '%') - { - whisperPutc('%'); - continue; - } - - if (*fp == '-') - { - fp++; // Pad right not yet implemented. - } - - while (*fp == '0') - { - fp++; // Pad zero not yet implented. - } - - if (*fp == '*') - { - int width = va_arg(ap, int); - fp++; // Width not yet implemented. - } - else - { - while (*fp >= '0' && *fp <= '9') - ++fp; // Width not yet implemented. - } - - switch (*fp) - { - case 'd': - count += whisperPrintDecimal(va_arg(ap, int)); - break; - - case 'u': - count += whisperPrintDecimal((unsigned) va_arg(ap, unsigned)); - break; - - case 'x': - case 'X': - count += whisperPrintInt(va_arg(ap, int), 16); - break; - - case 'o': - count += whisperPrintInt(va_arg(ap, int), 8); - break; - - case 'c': - whisperPutc(va_arg(ap, int)); - ++count; - break; - - case 's': - count += whisperPuts(va_arg(ap, char*)); - break; - } - } - - return count; -} - - -int -whisperPrintf(const char* format, ...) -{ - va_list ap; - - va_start(ap, format); - int code = whisperPrintfImpl(format, ap); - va_end(ap); - - return code; -} - - -int -printf(const char* format, ...) -{ - va_list ap; - - va_start(ap, format); - int code = whisperPrintfImpl(format, ap); - va_end(ap); - - return code; -} - - void* memset(void* s, int c, size_t n) { asm("mv t0, a0"); diff --git a/testbench/asm/cmark.mki b/testbench/asm/cmark.mki new file mode 100644 index 0000000..8fc681d --- /dev/null +++ b/testbench/asm/cmark.mki @@ -0,0 +1 @@ +OFILES = crt0.o printf.o cmark.o diff --git a/testbench/asm/cmark_dccm.mki b/testbench/asm/cmark_dccm.mki new file mode 120000 index 0000000..e4bd4bc --- /dev/null +++ b/testbench/asm/cmark_dccm.mki @@ -0,0 +1 @@ +cmark.mki \ No newline at end of file diff --git a/testbench/asm/cmark_iccm.ld b/testbench/asm/cmark_iccm.ld index e7a80a7..6e3161a 100644 --- a/testbench/asm/cmark_iccm.ld +++ b/testbench/asm/cmark_iccm.ld @@ -7,10 +7,12 @@ MEMORY { ICCM : ORIGIN = 0xee000000, LENGTH = 0x80000 DCCM : ORIGIN = 0xf0040000, LENGTH = 0x10000 CTL : ORIGIN = 0xfffffff0, LENGTH = 16 + IO : ORIGIN = 0xd0580000, LENGTH = 0x1000 } SECTIONS { - .text_init : {*(.text_init)} > EXTCODE + .text.init : {*(.text.init)} > EXTCODE init_end = .; + .data.io : { *(.data.io) } > IO .text : { *(.text) *(.text.startup)} > ICCM text_end = .; .data : { *(.*data) *(.rodata*) *(.sbss) STACK = ALIGN(16) + 0x1000;} > DCCM diff --git a/testbench/asm/cmark_iccm.mki b/testbench/asm/cmark_iccm.mki new file mode 120000 index 0000000..e4bd4bc --- /dev/null +++ b/testbench/asm/cmark_iccm.mki @@ -0,0 +1 @@ +cmark.mki \ No newline at end of file diff --git a/testbench/asm/crt0.s b/testbench/asm/crt0.s new file mode 100644 index 0000000..17878a9 --- /dev/null +++ b/testbench/asm/crt0.s @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright 2020 Western Digital Corporation or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +// startup code to support HLL programs + +#include "defines.h" + +.section .text.init +.global _start +_start: + +// enable caching, except region 0xd + li t0, 0x59555555 + csrw 0x7c0, t0 + + la sp, STACK + + call main + + +.global _finish +_finish: + la t0, tohost + li t1, 0xff + sb t1, 0(t0) // DemoTB test termination + li t1, 1 + sw t1, 0(t0) // Whisper test termination + beq x0, x0, _finish + .rept 10 + nop + .endr + +.section .data.io +.global tohost +tohost: .word 0 + diff --git a/testbench/asm/hello_world_dccm.ld b/testbench/asm/hello_world_dccm.ld index 3481080..56efa1c 100644 --- a/testbench/asm/hello_world_dccm.ld +++ b/testbench/asm/hello_world_dccm.ld @@ -5,9 +5,10 @@ ENTRY(_start) SECTIONS { .text : { *(.text*) } _end = .; + . = 0xd0580000; + .data.io : { *(.data.io) } . = 0xf0040000; - .data : { *(.*data) *(.rodata*) *(.sbss) STACK = ALIGN(16) + 0x1000;} - .bss : { *(.bss) } + .data : { *(.*data) *(.rodata*) *(.*bss) STACK = ALIGN(16) + 0x1000;} . = 0xfffffff8; .data.ctl : { LONG(0xf0040000); LONG(STACK) } } diff --git a/testbench/asm/hello_world_iccm.ld b/testbench/asm/hello_world_iccm.ld new file mode 100644 index 0000000..0692d8c --- /dev/null +++ b/testbench/asm/hello_world_iccm.ld @@ -0,0 +1,13 @@ +OUTPUT_ARCH( "riscv" ) +ENTRY(_start) + +SECTIONS { + .text : { *(.text*) } + . = 0x10000; + .data : { *(.*data) *(.rodata*)} + . = ALIGN(4); + printf_start = .; + . = 0xee000000; + .data_load : AT(printf_start) {*(.data_text)} + printf_end = printf_start + SIZEOF(.data_load); +} diff --git a/testbench/asm/hello_world_iccm.s b/testbench/asm/hello_world_iccm.s new file mode 100644 index 0000000..8f9cdd0 --- /dev/null +++ b/testbench/asm/hello_world_iccm.s @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Assembly code for Hello World +// Not using only ALU ops for creating the string + + +#include "defines.h" + +#define STDOUT 0xd0580000 + + .set mfdc, 0x7f9 +.extern printf_start, printf_end +// Code to execute +.section .text +.global _start +_start: + + + + // Enable Caches in MRAC + li x1, 0x5f555555 + csrw 0x7c0, x1 + li x3, 4 + (1<<13) // unblock DMA be stalled by fences mfdc[13]!!! + csrw mfdc, x3 // disable store merging mfdc[2] + li x3, RV_ICCM_SADR + la x4, printf_start + la x5, printf_end + + +load: + lw x6, 0 (x4) + sw x6, 0 (x3) + addi x4,x4,4 + addi x3,x3,4 + bltu x4, x5, load + + fence.i + call printf + +// Write 0xff to STDOUT for TB to termiate test. +_finish: + li x3, STDOUT + addi x5, x0, 0xff + sb x5, 0(x3) + beq x0, x0, _finish +.rept 100 + nop +.endr + +.data +hw_data: +.ascii "----------------------------------------\n" +.ascii "Hello World from SweRV EL2 ICCM @WDC !!\n" +.ascii "----------------------------------------\n" +.byte 0 + +.section .data_text, "ax" + // Load string from hw_data + // and write to stdout address + +printf: + li x3, STDOUT + la x4, hw_data + +loop: + lb x5, 0(x4) + sb x5, 0(x3) + addi x4, x4, 1 + bnez x5, loop + ret +.long 0,1,2,3,4 diff --git a/testbench/asm/printf.c b/testbench/asm/printf.c new file mode 100644 index 0000000..c25f4bc --- /dev/null +++ b/testbench/asm/printf.c @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2020 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include +extern volatile char tohost; + +static int +whisperPutc(char c) +{ + tohost = c; + return c; +} + + +static int +whisperPuts(const char* s) +{ + while (*s) + whisperPutc(*s++); + whisperPutc('\n'); +// whisperPutc(0xd); + return 1; +} + + +static int +whisperPrintUnsigned(unsigned value, int width, char pad) +{ + char buffer[20]; + int charCount = 0; + + do + { + char c = '0' + (value % 10); + value = value / 10; + buffer[charCount++] = c; + } + while (value); + + for (int i = charCount; i < width; ++i) + whisperPutc(pad); + + char* p = buffer + charCount - 1; + for (int i = 0; i < charCount; ++i) + whisperPutc(*p--); + + return charCount; +} + + +static int +whisperPrintDecimal(int value, int width, char pad) +{ + char buffer[20]; + int charCount = 0; + + unsigned neg = value < 0; + if (neg) + { + value = -value; + whisperPutc('-'); + width--; + } + + do + { + char c = '0' + (value % 10); + value = value / 10; + buffer[charCount++] = c; + } + while (value); + + for (int i = charCount; i < width; ++i) + whisperPutc(pad); + + char* p = buffer + charCount - 1; + for (int i = 0; i < charCount; ++i) + whisperPutc(*p--); + + if (neg) + charCount++; + + return charCount; +} + + +static int +whisperPrintInt(int value, int width, int pad, int base) +{ + if (base == 10) + return whisperPrintDecimal(value, width, pad); + + char buffer[20]; + int charCount = 0; + + unsigned uu = value; + + if (base == 8) + { + do + { + char c = '0' + (uu & 7); + buffer[charCount++] = c; + uu >>= 3; + } + while (uu); + } + else if (base == 16) + { + do + { + int digit = uu & 0xf; + char c = digit < 10 ? '0' + digit : 'a' + digit - 10; + buffer[charCount++] = c; + uu >>= 4; + } + while (uu); + } + else + return -1; + + char* p = buffer + charCount - 1; + for (unsigned i = 0; i < charCount; ++i) + whisperPutc(*p--); + + return charCount; +} + +/* +// Print with g format +static int +whisperPrintDoubleG(double value) +{ + return 0; +} + + +// Print with f format +static int +whisperPrintDoubleF(double value) +{ + return 0; +} +*/ + +int +whisperPrintfImpl(const char* format, va_list ap) +{ + int count = 0; // Printed character count + + for (const char* fp = format; *fp; fp++) + { + char pad = ' '; + int width = 0; // Field width + + if (*fp != '%') + { + whisperPutc(*fp); + ++count; + continue; + } + + ++fp; // Skip % + + if (*fp == 0) + break; + + if (*fp == '%') + { + whisperPutc('%'); + continue; + } + + while (*fp == '0') + { + pad = '0'; + fp++; // Pad zero not yet implented. + } + + if (*fp == '-') + { + fp++; // Pad right not yet implemented. + } + + if (*fp == '*') + { + int outWidth = va_arg(ap, int); + fp++; // Width not yet implemented. + } + else if (*fp >= '0' && *fp <= '9') + { // Width not yet implemented. + while (*fp >= '0' && *fp <= '9') + width = width * 10 + (*fp++ - '0'); + } + + switch (*fp) + { + case 'd': + count += whisperPrintDecimal(va_arg(ap, int), width, pad); + break; + + case 'u': + count += whisperPrintUnsigned((unsigned) va_arg(ap, unsigned), width, pad); + break; + + case 'x': + case 'X': + count += whisperPrintInt(va_arg(ap, int), width, pad, 16); + break; + + case 'o': + count += whisperPrintInt(va_arg(ap, int), width, pad, 8); + break; + + case 'c': + whisperPutc(va_arg(ap, int)); + ++count; + break; + + case 's': + count += whisperPuts(va_arg(ap, char*)); + break; +/* + case 'g': + count += whisperPrintDoubleG(va_arg(ap, double)); + break; + + case 'f': + count += whisperPrintDoubleF(va_arg(ap, double)); +*/ + } + } + + return count; +} + + +int +whisperPrintf(const char* format, ...) +{ + va_list ap; + + va_start(ap, format); + int code = whisperPrintfImpl(format, ap); + va_end(ap); + + return code; +} + +int +putchar(int c) +{ + return whisperPutc(c); +} + +struct FILE; + +int +putc(int c, struct FILE* f) +{ + return whisperPutc(c); +} + + +int +puts(const char* s) +{ + return whisperPuts(s); +} + +int +printf(const char* format, ...) +{ + va_list ap; + + va_start(ap, format); + int code = whisperPrintfImpl(format, ap); + va_end(ap); + + return code; +} + +// function to read cpu mcycle csr for performance measurements +// simplified version +uint64_t get_mcycle(){ +unsigned int mcyclel; +unsigned int mcycleh0 = 0, mcycleh1=1; +uint64_t cycles; + +while(mcycleh0 != mcycleh1) { + asm volatile ("csrr %0,mcycleh" : "=r" (mcycleh0) ); + asm volatile ("csrr %0,mcycle" : "=r" (mcyclel) ); + asm volatile ("csrr %0,mcycleh" : "=r" (mcycleh1) ); +} +cycles = mcycleh1; +return (cycles << 32) | mcyclel; + +} diff --git a/testbench/axi_lsu_dma_bridge.sv b/testbench/axi_lsu_dma_bridge.sv new file mode 100644 index 0000000..2c3b844 --- /dev/null +++ b/testbench/axi_lsu_dma_bridge.sv @@ -0,0 +1,201 @@ + +// connects LSI master to external AXI slave and DMA slave +module axi_lsu_dma_bridge +#( +parameter M_ID_WIDTH = 8, +parameter S0_ID_WIDTH = 8 +) +( +input clk, +input reset_l, + +// master read bus +input m_arvalid, +input [M_ID_WIDTH-1:0] m_arid, +input[31:0] m_araddr, +output m_arready, + +output m_rvalid, +input m_rready, +output [63:0] m_rdata, +output [M_ID_WIDTH-1:0] m_rid, +output [1:0] m_rresp, +output m_rlast, + +// master write bus +input m_awvalid, +input [M_ID_WIDTH-1:0] m_awid, +input[31:0] m_awaddr, +output m_awready, + +input m_wvalid, +output m_wready, + +output[1:0] m_bresp, +output m_bvalid, +output[M_ID_WIDTH-1:0] m_bid, +input m_bready, + +// slave 0 if general ext memory +output s0_arvalid, +input s0_arready, + +input s0_rvalid, +input[S0_ID_WIDTH-1:0] s0_rid, +input[1:0] s0_rresp, +input[63:0] s0_rdata, +input s0_rlast, +output s0_rready, + +output s0_awvalid, +input s0_awready, + +output s0_wvalid, +input s0_wready, + +input[1:0] s0_bresp, +input s0_bvalid, +input[S0_ID_WIDTH-1:0] s0_bid, +output s0_bready, + +// slave 1 if DMA port +output s1_arvalid, +input s1_arready, + +input s1_rvalid, +input[1:0] s1_rresp, +input[63:0] s1_rdata, +input s1_rlast, +output s1_rready, + +output s1_awvalid, +input s1_awready, + +output s1_wvalid, +input s1_wready, + +input[1:0] s1_bresp, +input s1_bvalid, +output s1_bready +); + +parameter ICCM_BASE = `RV_ICCM_BITS; // in LSBs +localparam IDFIFOSZ = $clog2(`RV_DMA_BUF_DEPTH); +bit[31:0] iccm_real_base_addr = `RV_ICCM_SADR ; + +wire ar_slave_select; +wire aw_slave_select; +wire w_slave_select; + +wire rresp_select; +wire bresp_select; +wire ar_iccm_select; +wire aw_iccm_select; + +reg [1:0] wsel_iptr, wsel_optr; +reg [2:0] wsel_count; +reg [3:0] wsel; + + +reg [M_ID_WIDTH-1:0] arid [1< +typedef clock_t CORE_TICKS; + +/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION + Initialize these strings per platform +*/ +#ifndef COMPILER_VERSION + #ifdef __GNUC__ + #define COMPILER_VERSION "GCC"__VERSION__ + #else + #define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" + #endif +#endif +#ifndef COMPILER_FLAGS + #define COMPILER_FLAGS "-O2" +#endif + +#ifndef MEM_LOCATION +// #define MEM_LOCATION "STACK" + #define MEM_LOCATION "STATIC" +#endif + +/* Data Types : + To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in . + + *Imprtant* : + ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!! +*/ +typedef signed short ee_s16; +typedef unsigned short ee_u16; +typedef signed int ee_s32; +typedef double ee_f32; +typedef unsigned char ee_u8; +typedef unsigned int ee_u32; +typedef ee_u32 ee_ptr_int; +typedef size_t ee_size_t; +/* align_mem : + This macro is used to align an offset to point to a 32b value. It is used in the Matrix algorithm to initialize the input memory blocks. +*/ +#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3)) + +/* Configuration : SEED_METHOD + Defines method to get seed values that cannot be computed at compile time. + + Valid values : + SEED_ARG - from command line. + SEED_FUNC - from a system function. + SEED_VOLATILE - from volatile variables. +*/ +#ifndef SEED_METHOD +#define SEED_METHOD SEED_VOLATILE +#endif + +/* Configuration : MEM_METHOD + Defines method to get a block of memry. + + Valid values : + MEM_MALLOC - for platforms that implement malloc and have malloc.h. + MEM_STATIC - to use a static memory array. + MEM_STACK - to allocate the data block on the stack (NYI). +*/ +#ifndef MEM_METHOD +//#define MEM_METHOD MEM_STACK +#define MEM_METHOD MEM_STATIC +#endif + +/* Configuration : MULTITHREAD + Define for parallel execution + + Valid values : + 1 - only one context (default). + N>1 - will execute N copies in parallel. + + Note : + If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined. + + Two sample implementations are provided. Use or to enable them. + + It is valid to have a different implementation of and in , + to fit a particular architecture. +*/ +#ifndef MULTITHREAD +#define MULTITHREAD 1 +#define USE_PTHREAD 0 +#define USE_FORK 0 +#define USE_SOCKET 0 +#endif + +/* Configuration : MAIN_HAS_NOARGC + Needed if platform does not support getting arguments to main. + + Valid values : + 0 - argc/argv to main is supported + 1 - argc/argv to main is not supported + + Note : + This flag only matters if MULTITHREAD has been defined to a value greater then 1. +*/ +#ifndef MAIN_HAS_NOARGC +#define MAIN_HAS_NOARGC 1 +#endif + +/* Configuration : MAIN_HAS_NORETURN + Needed if platform does not support returning a value from main. + + Valid values : + 0 - main returns an int, and return value will be 0. + 1 - platform does not support returning a value from main +*/ +#ifndef MAIN_HAS_NORETURN +#define MAIN_HAS_NORETURN 1 +#endif + +/* Variable : default_num_contexts + Not used for this simple port, must cintain the value 1. +*/ +extern ee_u32 default_num_contexts; + +typedef struct CORE_PORTABLE_S { + ee_u8 portable_id; +} core_portable; + +/* target specific init/fini */ +void portable_init(core_portable *p, int *argc, char *argv[]); +void portable_fini(core_portable *p); + +#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) && !defined(VALIDATION_RUN) +#if (TOTAL_DATA_SIZE==1200) +#define PROFILE_RUN 1 +#elif (TOTAL_DATA_SIZE==2000) +#define PERFORMANCE_RUN 1 +#else +#define VALIDATION_RUN 1 +#endif +#endif + +#endif /* CORE_PORTME_H */ + + +#if HAS_STDIO +#include +#endif +#if HAS_PRINTF +#ifndef ee_printf +#define ee_printf printf +#endif +#endif + +/* Actual benchmark execution in iterate */ +void *iterate(void *pres); + +/* Typedef: secs_ret + For machines that have floating point support, get number of seconds as a double. + Otherwise an unsigned int. +*/ +#if HAS_FLOAT +typedef double secs_ret; +#else +typedef ee_u32 secs_ret; +#endif + +#if MAIN_HAS_NORETURN +#define MAIN_RETURN_VAL +#define MAIN_RETURN_TYPE void +#else +#define MAIN_RETURN_VAL 0 +#define MAIN_RETURN_TYPE int +#endif + +void start_time(void); +void stop_time(void); +CORE_TICKS get_time(void); +secs_ret time_in_secs(CORE_TICKS ticks); + +/* Misc useful functions */ +ee_u16 crcu8(ee_u8 data, ee_u16 crc); +ee_u16 crc16(ee_s16 newval, ee_u16 crc); +ee_u16 crcu16(ee_u16 newval, ee_u16 crc); +ee_u16 crcu32(ee_u32 newval, ee_u16 crc); +ee_u8 check_data_types(); +void *portable_malloc(ee_size_t size); +void portable_free(void *p); +ee_s32 parseval(char *valstring); + +/* Algorithm IDS */ +#define ID_LIST (1<<0) +#define ID_MATRIX (1<<1) +#define ID_STATE (1<<2) +#define ALL_ALGORITHMS_MASK (ID_LIST|ID_MATRIX|ID_STATE) +#define NUM_ALGORITHMS 3 + +/* list data structures */ +typedef struct list_data_s { + ee_s16 data16; + ee_s16 idx; +} list_data; + +typedef struct list_head_s { + struct list_head_s *next; + struct list_data_s *info; +} list_head; + + +/*matrix benchmark related stuff */ +#define MATDAT_INT 1 +#if MATDAT_INT +typedef ee_s16 MATDAT; +typedef ee_s32 MATRES; +#else +typedef ee_f16 MATDAT; +typedef ee_f32 MATRES; +#endif + +typedef struct MAT_PARAMS_S { + int N; + MATDAT *A; + MATDAT *B; + MATRES *C; +} mat_params; + +/* state machine related stuff */ +/* List of all the possible states for the FSM */ +typedef enum CORE_STATE { + CORE_START=0, + CORE_INVALID, + CORE_S1, + CORE_S2, + CORE_INT, + CORE_FLOAT, + CORE_EXPONENT, + CORE_SCIENTIFIC, + NUM_CORE_STATES +} core_state_e ; + + +/* Helper structure to hold results */ +typedef struct RESULTS_S { + /* inputs */ + ee_s16 seed1; /* Initializing seed */ + ee_s16 seed2; /* Initializing seed */ + ee_s16 seed3; /* Initializing seed */ + void *memblock[4]; /* Pointer to safe memory location */ + ee_u32 size; /* Size of the data */ + ee_u32 iterations; /* Number of iterations to execute */ + ee_u32 execs; /* Bitmask of operations to execute */ + struct list_head_s *list; + mat_params mat; + /* outputs */ + ee_u16 crc; + ee_u16 crclist; + ee_u16 crcmatrix; + ee_u16 crcstate; + ee_s16 err; + /* ultithread specific */ + core_portable port; +} core_results; + +/* Multicore execution handling */ +#if (MULTITHREAD>1) +ee_u8 core_start_parallel(core_results *res); +ee_u8 core_stop_parallel(core_results *res); +#endif + +/* list benchmark functions */ +list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed); +ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx); + +/* state benchmark functions */ +void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p); +ee_u16 core_bench_state(ee_u32 blksize, ee_u8 *memblock, + ee_s16 seed1, ee_s16 seed2, ee_s16 step, ee_u16 crc); + +/* matrix benchmark functions */ +ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p); +ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc); + + + + + +/* +Topic: Description + Benchmark using a linked list. + + Linked list is a common data structure used in many applications. + + For our purposes, this will excercise the memory units of the processor. + In particular, usage of the list pointers to find and alter data. + + We are not using Malloc since some platforms do not support this library. + + Instead, the memory block being passed in is used to create a list, + and the benchmark takes care not to add more items then can be + accomodated by the memory block. The porting layer will make sure + that we have a valid memory block. + + All operations are done in place, without using any extra memory. + + The list itself contains list pointers and pointers to data items. + Data items contain the following: + + idx - An index that captures the initial order of the list. + data - Variable data initialized based on the input parameters. The 16b are divided as follows: + o Upper 8b are backup of original data. + o Bit 7 indicates if the lower 7 bits are to be used as is or calculated. + o Bits 0-2 indicate type of operation to perform to get a 7b value. + o Bits 3-6 provide input for the operation. + +*/ + +/* local functions */ + +list_head *core_list_find(list_head *list,list_data *info); +list_head *core_list_reverse(list_head *list); +list_head *core_list_remove(list_head *item); +list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modified); +list_head *core_list_insert_new(list_head *insert_point + , list_data *info, list_head **memblock, list_data **datablock + , list_head *memblock_end, list_data *datablock_end); +typedef ee_s32(*list_cmp)(list_data *a, list_data *b, core_results *res); +list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res); + +ee_s16 calc_func(ee_s16 *pdata, core_results *res) { + ee_s16 data=*pdata; + ee_s16 retval; + ee_u8 optype=(data>>7) & 1; /* bit 7 indicates if the function result has been cached */ + if (optype) /* if cached, use cache */ + return (data & 0x007f); + else { /* otherwise calculate and cache the result */ + ee_s16 flag=data & 0x7; /* bits 0-2 is type of function to perform */ + ee_s16 dtype=((data>>3) & 0xf); /* bits 3-6 is specific data for the operation */ + dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */ + switch (flag) { + case 0: + if (dtype<0x22) /* set min period for bit corruption */ + dtype=0x22; + retval=core_bench_state(res->size,res->memblock[3],res->seed1,res->seed2,dtype,res->crc); + if (res->crcstate==0) + res->crcstate=retval; + break; + case 1: + retval=core_bench_matrix(&(res->mat),dtype,res->crc); + if (res->crcmatrix==0) + res->crcmatrix=retval; + break; + default: + retval=data; + break; + } + res->crc=crcu16(retval,res->crc); + retval &= 0x007f; + *pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */ + return retval; + } +} +/* Function: cmp_complex + Compare the data item in a list cell. + + Can be used by mergesort. +*/ +ee_s32 cmp_complex(list_data *a, list_data *b, core_results *res) { + ee_s16 val1=calc_func(&(a->data16),res); + ee_s16 val2=calc_func(&(b->data16),res); + return val1 - val2; +} + +/* Function: cmp_idx + Compare the idx item in a list cell, and regen the data. + + Can be used by mergesort. +*/ +ee_s32 cmp_idx(list_data *a, list_data *b, core_results *res) { + if (res==NULL) { + a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16>>8)); + b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16>>8)); + } + return a->idx - b->idx; +} + +void copy_info(list_data *to,list_data *from) { + to->data16=from->data16; + to->idx=from->idx; +} + +/* Benchmark for linked list: + - Try to find multiple data items. + - List sort + - Operate on data from list (crc) + - Single remove/reinsert + * At the end of this function, the list is back to original state +*/ +ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) { + ee_u16 retval=0; + ee_u16 found=0,missed=0; + list_head *list=res->list; + ee_s16 find_num=res->seed3; + list_head *this_find; + list_head *finder, *remover; + list_data info; + ee_s16 i; + + info.idx=finder_idx; + /* find values in the list, and change the list each time (reverse and cache if value found) */ + for (i=0; inext->info->data16 >> 8) & 1; + } + else { + found++; + if (this_find->info->data16 & 0x1) /* use found value */ + retval+=(this_find->info->data16 >> 9) & 1; + /* and cache next item at the head of the list (if any) */ + if (this_find->next != NULL) { + finder = this_find->next; + this_find->next = finder->next; + finder->next=list->next; + list->next=finder; + } + } + if (info.idx>=0) + info.idx++; +#if CORE_DEBUG + ee_printf("List find %d: [%d,%d,%d]\n",i,retval,missed,found); +#endif + } + retval+=found*4-missed; + /* sort the list by data content and remove one item*/ + if (finder_idx>0) + list=core_list_mergesort(list,cmp_complex,res); + remover=core_list_remove(list->next); + /* CRC data content of list from location of index N forward, and then undo remove */ + finder=core_list_find(list,&info); + if (!finder) + finder=list->next; + while (finder) { + retval=crc16(list->info->data16,retval); + finder=finder->next; + } +#if CORE_DEBUG + ee_printf("List sort 1: %04x\n",retval); +#endif + remover=core_list_undo_remove(remover,list->next); + /* sort the list by index, in effect returning the list to original state */ + list=core_list_mergesort(list,cmp_idx,NULL); + /* CRC data content of list */ + finder=list->next; + while (finder) { + retval=crc16(list->info->data16,retval); + finder=finder->next; + } +#if CORE_DEBUG + ee_printf("List sort 2: %04x\n",retval); +#endif + return retval; +} +/* Function: core_list_init + Initialize list with data. + + Parameters: + blksize - Size of memory to be initialized. + memblock - Pointer to memory block. + seed - Actual values chosen depend on the seed parameter. + The seed parameter MUST be supplied from a source that cannot be determined at compile time + + Returns: + Pointer to the head of the list. + +*/ +list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed) { + /* calculated pointers for the list */ + ee_u32 per_item=16+sizeof(struct list_data_s); + ee_u32 size=(blksize/per_item)-2; /* to accomodate systems with 64b pointers, and make sure same code is executed, set max list elements */ + list_head *memblock_end=memblock+size; + list_data *datablock=(list_data *)(memblock_end); + list_data *datablock_end=datablock+size; + /* some useful variables */ + ee_u32 i; + list_head *finder,*list=memblock; + list_data info; + + /* create a fake items for the list head and tail */ + list->next=NULL; + list->info=datablock; + list->info->idx=0x0000; + list->info->data16=(ee_s16)0x8080; + memblock++; + datablock++; + info.idx=0x7fff; + info.data16=(ee_s16)0xffff; + core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end); + + /* then insert size items */ + for (i=0; inext; + i=1; + while (finder->next!=NULL) { + if (iinfo->idx=i++; + else { + ee_u16 pat=(ee_u16)(i++ ^ seed); /* get a pseudo random number */ + finder->info->idx=0x3fff & (((i & 0x07) << 8) | pat); /* make sure the mixed items end up after the ones in sequence */ + } + finder=finder->next; + } + list = core_list_mergesort(list,cmp_idx,NULL); +#if CORE_DEBUG + ee_printf("Initialized list:\n"); + finder=list; + while (finder) { + ee_printf("[%04x,%04x]",finder->info->idx,(ee_u16)finder->info->data16); + finder=finder->next; + } + ee_printf("\n"); +#endif + return list; +} + +/* Function: core_list_insert + Insert an item to the list + + Parameters: + insert_point - where to insert the item. + info - data for the cell. + memblock - pointer for the list header + datablock - pointer for the list data + memblock_end - end of region for list headers + datablock_end - end of region for list data + + Returns: + Pointer to new item. +*/ +list_head *core_list_insert_new(list_head *insert_point, list_data *info, list_head **memblock, list_data **datablock + , list_head *memblock_end, list_data *datablock_end) { + list_head *newitem; + + if ((*memblock+1) >= memblock_end) + return NULL; + if ((*datablock+1) >= datablock_end) + return NULL; + + newitem=*memblock; + (*memblock)++; + newitem->next=insert_point->next; + insert_point->next=newitem; + + newitem->info=*datablock; + (*datablock)++; + copy_info(newitem->info,info); + + return newitem; +} + +/* Function: core_list_remove + Remove an item from the list. + + Operation: + For a singly linked list, remove by copying the data from the next item + over to the current cell, and unlinking the next item. + + Note: + since there is always a fake item at the end of the list, no need to check for NULL. + + Returns: + Removed item. +*/ +list_head *core_list_remove(list_head *item) { + list_data *tmp; + list_head *ret=item->next; + /* swap data pointers */ + tmp=item->info; + item->info=ret->info; + ret->info=tmp; + /* and eliminate item */ + item->next=item->next->next; + ret->next=NULL; + return ret; +} + +/* Function: core_list_undo_remove + Undo a remove operation. + + Operation: + Since we want each iteration of the benchmark to be exactly the same, + we need to be able to undo a remove. + Link the removed item back into the list, and switch the info items. + + Parameters: + item_removed - Return value from the + item_modified - List item that was modified during + + Returns: + The item that was linked back to the list. + +*/ +list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modified) { + list_data *tmp; + /* swap data pointers */ + tmp=item_removed->info; + item_removed->info=item_modified->info; + item_modified->info=tmp; + /* and insert item */ + item_removed->next=item_modified->next; + item_modified->next=item_removed; + return item_removed; +} + +/* Function: core_list_find + Find an item in the list + + Operation: + Find an item by idx (if not 0) or specific data value + + Parameters: + list - list head + info - idx or data to find + + Returns: + Found item, or NULL if not found. +*/ +list_head *core_list_find(list_head *list,list_data *info) { + if (info->idx>=0) { + while (list && (list->info->idx != info->idx)) + list=list->next; + return list; + } else { + while (list && ((list->info->data16 & 0xff) != info->data16)) + list=list->next; + return list; + } +} +/* Function: core_list_reverse + Reverse a list + + Operation: + Rearrange the pointers so the list is reversed. + + Parameters: + list - list head + info - idx or data to find + + Returns: + Found item, or NULL if not found. +*/ + +list_head *core_list_reverse(list_head *list) { + list_head *next=NULL, *tmp; + while (list) { + tmp=list->next; + list->next=next; + next=list; + list=tmp; + } + return next; +} +/* Function: core_list_mergesort + Sort the list in place without recursion. + + Description: + Use mergesort, as for linked list this is a realistic solution. + Also, since this is aimed at embedded, care was taken to use iterative rather then recursive algorithm. + The sort can either return the list to original order (by idx) , + or use the data item to invoke other other algorithms and change the order of the list. + + Parameters: + list - list to be sorted. + cmp - cmp function to use + + Returns: + New head of the list. + + Note: + We have a special header for the list that will always be first, + but the algorithm could theoretically modify where the list starts. + + */ +list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res) { + list_head *p, *q, *e, *tail; + ee_s32 insize, nmerges, psize, qsize, i; + + insize = 1; + + while (1) { + p = list; + list = NULL; + tail = NULL; + + nmerges = 0; /* count number of merges we do in this pass */ + + while (p) { + nmerges++; /* there exists a merge to be done */ + /* step `insize' places along from p */ + q = p; + psize = 0; + for (i = 0; i < insize; i++) { + psize++; + q = q->next; + if (!q) break; + } + + /* if q hasn't fallen off end, we have two lists to merge */ + qsize = insize; + + /* now we have two lists; merge them */ + while (psize > 0 || (qsize > 0 && q)) { + + /* decide whether next element of merge comes from p or q */ + if (psize == 0) { + /* p is empty; e must come from q. */ + e = q; q = q->next; qsize--; + } else if (qsize == 0 || !q) { + /* q is empty; e must come from p. */ + e = p; p = p->next; psize--; + } else if (cmp(p->info,q->info,res) <= 0) { + /* First element of p is lower (or same); e must come from p. */ + e = p; p = p->next; psize--; + } else { + /* First element of q is lower; e must come from q. */ + e = q; q = q->next; qsize--; + } + + /* add the next element to the merged list */ + if (tail) { + tail->next = e; + } else { + list = e; + } + tail = e; + } + + /* now p has stepped `insize' places along, and q has too */ + p = q; + } + + tail->next = NULL; + + /* If we have done only one merge, we're finished. */ + if (nmerges <= 1) /* allow for nmerges==0, the empty list case */ + return list; + + /* Otherwise repeat, merging lists twice the size */ + insize *= 2; + } +#if COMPILER_REQUIRES_SORT_RETURN + return list; +#endif +} +/* +Author : Shay Gal-On, EEMBC + +This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009 +All rights reserved. + +EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the +CoreMark License that is distributed with the official EEMBC COREMARK Software release. +If you received this EEMBC CoreMark Software without the accompanying CoreMark License, +you must discontinue use and download the official release from www.coremark.org. + +Also, if you are publicly displaying scores generated from the EEMBC CoreMark software, +make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file. + +EEMBC +4354 Town Center Blvd. Suite 114-200 +El Dorado Hills, CA, 95762 +*/ +/* File: core_main.c + This file contains the framework to acquire a block of memory, seed initial parameters, tun t he benchmark and report the results. +*/ +//#include "coremark.h" + +/* Function: iterate + Run the benchmark for a specified number of iterations. + + Operation: + For each type of benchmarked algorithm: + a - Initialize the data block for the algorithm. + b - Execute the algorithm N times. + + Returns: + NULL. +*/ +static ee_u16 list_known_crc[] = {(ee_u16)0xd4b0,(ee_u16)0x3340,(ee_u16)0x6a79,(ee_u16)0xe714,(ee_u16)0xe3c1}; +static ee_u16 matrix_known_crc[] = {(ee_u16)0xbe52,(ee_u16)0x1199,(ee_u16)0x5608,(ee_u16)0x1fd7,(ee_u16)0x0747}; +static ee_u16 state_known_crc[] = {(ee_u16)0x5e47,(ee_u16)0x39bf,(ee_u16)0xe5a4,(ee_u16)0x8e3a,(ee_u16)0x8d84}; +void *iterate(void *pres) { + ee_u32 i; + ee_u16 crc; + core_results *res=(core_results *)pres; + ee_u32 iterations=res->iterations; + res->crc=0; + res->crclist=0; + res->crcmatrix=0; + res->crcstate=0; + + for (i=0; icrc=crcu16(crc,res->crc); + crc=core_bench_list(res,-1); + res->crc=crcu16(crc,res->crc); + if (i==0) res->crclist=res->crc; + } + return NULL; +} + +#if (SEED_METHOD==SEED_ARG) +ee_s32 get_seed_args(int i, int argc, char *argv[]); +#define get_seed(x) (ee_s16)get_seed_args(x,argc,argv) +#define get_seed_32(x) get_seed_args(x,argc,argv) +#else /* via function or volatile */ +ee_s32 get_seed_32(int i); +#define get_seed(x) (ee_s16)get_seed_32(x) +#endif + +#if (MEM_METHOD==MEM_STATIC) +ee_u8 static_memblk[TOTAL_DATA_SIZE]; +#endif +char *mem_name[3] = {"Static","Heap","Stack"}; +/* Function: main + Main entry routine for the benchmark. + This function is responsible for the following steps: + + 1 - Initialize input seeds from a source that cannot be determined at compile time. + 2 - Initialize memory block for use. + 3 - Run and time the benchmark. + 4 - Report results, testing the validity of the output if the seeds are known. + + Arguments: + 1 - first seed : Any value + 2 - second seed : Must be identical to first for iterations to be identical + 3 - third seed : Any value, should be at least an order of magnitude less then the input size, but bigger then 32. + 4 - Iterations : Special, if set to 0, iterations will be automatically determined such that the benchmark will run between 10 to 100 secs + +*/ + +#if MAIN_HAS_NOARGC +MAIN_RETURN_TYPE main(void) { + int argc=0; + char *argv[1]; +#else +MAIN_RETURN_TYPE main(int argc, char *argv[]) { +#endif + ee_u16 i,j=0,num_algorithms=0; + ee_s16 known_id=-1,total_errors=0; + ee_u16 seedcrc=0; + CORE_TICKS total_time; + core_results results[MULTITHREAD]; +#if (MEM_METHOD==MEM_STACK) + ee_u8 stack_memblock[TOTAL_DATA_SIZE*MULTITHREAD]; +#endif + /* first call any initializations needed */ + portable_init(&(results[0].port), &argc, argv); + /* First some checks to make sure benchmark will run ok */ + if (sizeof(struct list_head_s)>128) { + ee_printf("list_head structure too big for comparable data!\n"); + return MAIN_RETURN_VAL; + } + results[0].seed1=get_seed(1); + results[0].seed2=get_seed(2); + results[0].seed3=get_seed(3); + results[0].iterations=get_seed_32(4); +#if CORE_DEBUG + results[0].iterations=1; +#endif + results[0].execs=get_seed_32(5); + if (results[0].execs==0) { /* if not supplied, execute all algorithms */ + results[0].execs=ALL_ALGORITHMS_MASK; + } + /* put in some default values based on one seed only for easy testing */ + if ((results[0].seed1==0) && (results[0].seed2==0) && (results[0].seed3==0)) { /* validation run */ + results[0].seed1=0; + results[0].seed2=0; + results[0].seed3=0x66; + } + if ((results[0].seed1==1) && (results[0].seed2==0) && (results[0].seed3==0)) { /* perfromance run */ + results[0].seed1=0x3415; + results[0].seed2=0x3415; + results[0].seed3=0x66; + } +#if (MEM_METHOD==MEM_STATIC) + results[0].memblock[0]=(void *)static_memblk; + results[0].size=TOTAL_DATA_SIZE; + results[0].err=0; + #if (MULTITHREAD>1) + #error "Cannot use a static data area with multiple contexts!" + #endif +#elif (MEM_METHOD==MEM_MALLOC) + for (i=0 ; i1) + if (default_num_contexts>MULTITHREAD) { + default_num_contexts=MULTITHREAD; + } + for (i=0 ; i=0) { + for (i=0 ; i 0) + ee_printf("Iterations/Sec : %f\n",default_num_contexts*results[0].iterations/time_in_secs(total_time)); +#else + ee_printf("Total time (secs): %d\n",time_in_secs(total_time)); + if (time_in_secs(total_time) > 0) +// ee_printf("Iterations/Sec : %d\n",default_num_contexts*results[0].iterations/time_in_secs(total_time)); + ee_printf("Iterat/Sec/MHz : %d.%d\n",1000*default_num_contexts*results[0].iterations/time_in_secs(total_time), + 100000*default_num_contexts*results[0].iterations/time_in_secs(total_time) % 100); +#endif + if (time_in_secs(total_time) < 10) { + ee_printf("ERROR! Must execute for at least 10 secs for a valid result!\n"); + total_errors++; + } + + ee_printf("Iterations : %u\n",(ee_u32)default_num_contexts*results[0].iterations); + ee_printf("Compiler version : %s\n",COMPILER_VERSION); + ee_printf("Compiler flags : %s\n",COMPILER_FLAGS); +#if (MULTITHREAD>1) + ee_printf("Parallel %s : %d\n",PARALLEL_METHOD,default_num_contexts); +#endif + ee_printf("Memory location : %s\n",MEM_LOCATION); + /* output for verification */ + ee_printf("seedcrc : 0x%04x\n",seedcrc); + if (results[0].execs & ID_LIST) + for (i=0 ; i1) + ee_printf(" / %d:%s",default_num_contexts,PARALLEL_METHOD); +#endif + ee_printf("\n"); + } +#endif + } + if (total_errors>0) + ee_printf("Errors detected\n"); + if (total_errors<0) + ee_printf("Cannot validate operation for these seed values, please compare with results on a known platform.\n"); + +#if (MEM_METHOD==MEM_MALLOC) + for (i=0 ; i>(from)) & (~(0xffffffff << (to)))) + +#if CORE_DEBUG +void printmat(MATDAT *A, ee_u32 N, char *name) { + ee_u32 i,j; + ee_printf("Matrix %s [%dx%d]:\n",name,N,N); + for (i=0; i N times, + changing the matrix values slightly by a constant amount each time. +*/ +ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc) { + ee_u32 N=p->N; + MATRES *C=p->C; + MATDAT *A=p->A; + MATDAT *B=p->B; + MATDAT val=(MATDAT)seed; + + crc=crc16(matrix_test(N,C,A,B,val),crc); + + return crc; +} + +/* Function: matrix_test + Perform matrix manipulation. + + Parameters: + N - Dimensions of the matrix. + C - memory for result matrix. + A - input matrix + B - operator matrix (not changed during operations) + + Returns: + A CRC value that captures all results calculated in the function. + In particular, crc of the value calculated on the result matrix + after each step by . + + Operation: + + 1 - Add a constant value to all elements of a matrix. + 2 - Multiply a matrix by a constant. + 3 - Multiply a matrix by a vector. + 4 - Multiply a matrix by a matrix. + 5 - Add a constant value to all elements of a matrix. + + After the last step, matrix A is back to original contents. +*/ +ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val) { + ee_u16 crc=0; + MATDAT clipval=matrix_big(val); + + matrix_add_const(N,A,val); /* make sure data changes */ +#if CORE_DEBUG + printmat(A,N,"matrix_add_const"); +#endif + matrix_mul_const(N,C,A,val); + crc=crc16(matrix_sum(N,C,clipval),crc); +#if CORE_DEBUG + printmatC(C,N,"matrix_mul_const"); +#endif + matrix_mul_vect(N,C,A,B); + crc=crc16(matrix_sum(N,C,clipval),crc); +#if CORE_DEBUG + printmatC(C,N,"matrix_mul_vect"); +#endif + matrix_mul_matrix(N,C,A,B); + crc=crc16(matrix_sum(N,C,clipval),crc); +#if CORE_DEBUG + printmatC(C,N,"matrix_mul_matrix"); +#endif + matrix_mul_matrix_bitextract(N,C,A,B); + crc=crc16(matrix_sum(N,C,clipval),crc); +#if CORE_DEBUG + printmatC(C,N,"matrix_mul_matrix_bitextract"); +#endif + + matrix_add_const(N,A,-val); /* return matrix to initial value */ + return crc; +} + +/* Function : matrix_init + Initialize the memory block for matrix benchmarking. + + Parameters: + blksize - Size of memory to be initialized. + memblk - Pointer to memory block. + seed - Actual values chosen depend on the seed parameter. + p - pointers to containing initialized matrixes. + + Returns: + Matrix dimensions. + + Note: + The seed parameter MUST be supplied from a source that cannot be determined at compile time +*/ +ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p) { + ee_u32 N=0; + MATDAT *A; + MATDAT *B; + ee_s32 order=1; + MATDAT val; + ee_u32 i=0,j=0; + if (seed==0) + seed=1; + while (jA=A; + p->B=B; + p->C=(MATRES *)align_mem(B+N*N); + p->N=N; +#if CORE_DEBUG + printmat(A,N,"A"); + printmat(B,N,"B"); +#endif + return N; +} + +/* Function: matrix_sum + Calculate a function that depends on the values of elements in the matrix. + + For each element, accumulate into a temporary variable. + + As long as this value is under the parameter clipval, + add 1 to the result if the element is bigger then the previous. + + Otherwise, reset the accumulator and add 10 to the result. +*/ +ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval) { + MATRES tmp=0,prev=0,cur=0; + ee_s16 ret=0; + ee_u32 i,j; + for (i=0; iclipval) { + ret+=10; + tmp=0; + } else { + ret += (cur>prev) ? 1 : 0; + } + prev=cur; + } + } + return ret; +} + +/* Function: matrix_mul_const + Multiply a matrix by a constant. + This could be used as a scaler for instance. +*/ +void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val) { + ee_u32 i,j; + for (i=0; i0) { + for(i=0;i>3) & 0x3]; + next=4; + break; + case 3: /* float */ + case 4: /* float */ + buf=floatpat[(seed>>3) & 0x3]; + next=8; + break; + case 5: /* scientific */ + case 6: /* scientific */ + buf=scipat[(seed>>3) & 0x3]; + next=8; + break; + case 7: /* invalid */ + buf=errpat[(seed>>3) & 0x3]; + next=8; + break; + default: /* Never happen, just to make some compilers happy */ + break; + } + } + size++; + while (total='0') & (c<='9')) ? 1 : 0; + return retval; +} + +/* Function: core_state_transition + Actual state machine. + + The state machine will continue scanning until either: + 1 - an invalid input is detcted. + 2 - a valid number has been detected. + + The input pointer is updated to point to the end of the token, and the end state is returned (either specific format determined or invalid). +*/ + +enum CORE_STATE core_state_transition( ee_u8 **instr , ee_u32 *transition_count) { + ee_u8 *str=*instr; + ee_u8 NEXT_SYMBOL; + enum CORE_STATE state=CORE_START; + for( ; *str && state != CORE_INVALID; str++ ) { + NEXT_SYMBOL = *str; + if (NEXT_SYMBOL==',') /* end of this input */ { + str++; + break; + } + switch(state) { + case CORE_START: + if(ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INT; + } + else if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) { + state = CORE_S1; + } + else if( NEXT_SYMBOL == '.' ) { + state = CORE_FLOAT; + } + else { + state = CORE_INVALID; + transition_count[CORE_INVALID]++; + } + transition_count[CORE_START]++; + break; + case CORE_S1: + if(ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INT; + transition_count[CORE_S1]++; + } + else if( NEXT_SYMBOL == '.' ) { + state = CORE_FLOAT; + transition_count[CORE_S1]++; + } + else { + state = CORE_INVALID; + transition_count[CORE_S1]++; + } + break; + case CORE_INT: + if( NEXT_SYMBOL == '.' ) { + state = CORE_FLOAT; + transition_count[CORE_INT]++; + } + else if(!ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INVALID; + transition_count[CORE_INT]++; + } + break; + case CORE_FLOAT: + if( NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e' ) { + state = CORE_S2; + transition_count[CORE_FLOAT]++; + } + else if(!ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INVALID; + transition_count[CORE_FLOAT]++; + } + break; + case CORE_S2: + if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) { + state = CORE_EXPONENT; + transition_count[CORE_S2]++; + } + else { + state = CORE_INVALID; + transition_count[CORE_S2]++; + } + break; + case CORE_EXPONENT: + if(ee_isdigit(NEXT_SYMBOL)) { + state = CORE_SCIENTIFIC; + transition_count[CORE_EXPONENT]++; + } + else { + state = CORE_INVALID; + transition_count[CORE_EXPONENT]++; + } + break; + case CORE_SCIENTIFIC: + if(!ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INVALID; + transition_count[CORE_INVALID]++; + } + break; + default: + break; + } + } + *instr=str; + return state; +} +/* +Author : Shay Gal-On, EEMBC + +This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009 +All rights reserved. + +EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the +CoreMark License that is distributed with the official EEMBC COREMARK Software release. +If you received this EEMBC CoreMark Software without the accompanying CoreMark License, +you must discontinue use and download the official release from www.coremark.org. + +Also, if you are publicly displaying scores generated from the EEMBC CoreMark software, +make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file. + +EEMBC +4354 Town Center Blvd. Suite 114-200 +El Dorado Hills, CA, 95762 +*/ +//#include "coremark.h" +/* Function: get_seed + Get a values that cannot be determined at compile time. + + Since different embedded systems and compilers are used, 3 different methods are provided: + 1 - Using a volatile variable. This method is only valid if the compiler is forced to generate code that + reads the value of a volatile variable from memory at run time. + Please note, if using this method, you would need to modify core_portme.c to generate training profile. + 2 - Command line arguments. This is the preferred method if command line arguments are supported. + 3 - System function. If none of the first 2 methods is available on the platform, + a system function which is not a stub can be used. + + e.g. read the value on GPIO pins connected to switches, or invoke special simulator functions. +*/ +#if (SEED_METHOD==SEED_VOLATILE) + extern volatile ee_s32 seed1_volatile; + extern volatile ee_s32 seed2_volatile; + extern volatile ee_s32 seed3_volatile; + extern volatile ee_s32 seed4_volatile; + extern volatile ee_s32 seed5_volatile; + ee_s32 get_seed_32(int i) { + ee_s32 retval; + switch (i) { + case 1: + retval=seed1_volatile; + break; + case 2: + retval=seed2_volatile; + break; + case 3: + retval=seed3_volatile; + break; + case 4: + retval=seed4_volatile; + break; + case 5: + retval=seed5_volatile; + break; + default: + retval=0; + break; + } + return retval; + } +#elif (SEED_METHOD==SEED_ARG) +ee_s32 parseval(char *valstring) { + ee_s32 retval=0; + ee_s32 neg=1; + int hexmode=0; + if (*valstring == '-') { + neg=-1; + valstring++; + } + if ((valstring[0] == '0') && (valstring[1] == 'x')) { + hexmode=1; + valstring+=2; + } + /* first look for digits */ + if (hexmode) { + while (((*valstring >= '0') && (*valstring <= '9')) || ((*valstring >= 'a') && (*valstring <= 'f'))) { + ee_s32 digit=*valstring-'0'; + if (digit>9) + digit=10+*valstring-'a'; + retval*=16; + retval+=digit; + valstring++; + } + } else { + while ((*valstring >= '0') && (*valstring <= '9')) { + ee_s32 digit=*valstring-'0'; + retval*=10; + retval+=digit; + valstring++; + } + } + /* now add qualifiers */ + if (*valstring=='K') + retval*=1024; + if (*valstring=='M') + retval*=1024*1024; + + retval*=neg; + return retval; +} + +ee_s32 get_seed_args(int i, int argc, char *argv[]) { + if (argc>i) + return parseval(argv[i]); + return 0; +} + +#elif (SEED_METHOD==SEED_FUNC) +/* If using OS based function, you must define and implement the functions below in core_portme.h and core_portme.c ! */ +ee_s32 get_seed_32(int i) { + ee_s32 retval; + switch (i) { + case 1: + retval=portme_sys1(); + break; + case 2: + retval=portme_sys2(); + break; + case 3: + retval=portme_sys3(); + break; + case 4: + retval=portme_sys4(); + break; + case 5: + retval=portme_sys5(); + break; + default: + retval=0; + break; + } + return retval; +} +#endif + +/* Function: crc* + Service functions to calculate 16b CRC code. + +*/ +ee_u16 crcu8(ee_u8 data, ee_u16 crc ) +{ + ee_u8 i=0,x16=0,carry=0; + + for (i = 0; i < 8; i++) + { + x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1)); + data >>= 1; + + if (x16 == 1) + { + crc ^= 0x4002; + carry = 1; + } + else + carry = 0; + crc >>= 1; + if (carry) + crc |= 0x8000; + else + crc &= 0x7fff; + } + return crc; +} +ee_u16 crcu16(ee_u16 newval, ee_u16 crc) { + crc=crcu8( (ee_u8) (newval) ,crc); + crc=crcu8( (ee_u8) ((newval)>>8) ,crc); + return crc; +} +ee_u16 crcu32(ee_u32 newval, ee_u16 crc) { + crc=crc16((ee_s16) newval ,crc); + crc=crc16((ee_s16) (newval>>16) ,crc); + return crc; +} +ee_u16 crc16(ee_s16 newval, ee_u16 crc) { + return crcu16((ee_u16)newval, crc); +} + +ee_u8 check_data_types() { + ee_u8 retval=0; + if (sizeof(ee_u8) != 1) { + ee_printf("ERROR: ee_u8 is not an 8b datatype!\n"); + retval++; + } + if (sizeof(ee_u16) != 2) { + ee_printf("ERROR: ee_u16 is not a 16b datatype!\n"); + retval++; + } + if (sizeof(ee_s16) != 2) { + ee_printf("ERROR: ee_s16 is not a 16b datatype!\n"); + retval++; + } + if (sizeof(ee_s32) != 4) { + ee_printf("ERROR: ee_s32 is not a 32b datatype!\n"); + retval++; + } + if (sizeof(ee_u32) != 4) { + ee_printf("ERROR: ee_u32 is not a 32b datatype!\n"); + retval++; + } + if (sizeof(ee_ptr_int) != sizeof(int *)) { + ee_printf("ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n"); + retval++; + } + if (retval>0) { + ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n"); + } + return retval; +} +/* + File : core_portme.c +*/ +/* + Author : Shay Gal-On, EEMBC + Legal : TODO! +*/ +#include +#include +//#include "coremark.h" + +#if VALIDATION_RUN + volatile ee_s32 seed1_volatile=0x3415; + volatile ee_s32 seed2_volatile=0x3415; + volatile ee_s32 seed3_volatile=0x66; +#endif +#if PERFORMANCE_RUN + volatile ee_s32 seed1_volatile=0x0; + volatile ee_s32 seed2_volatile=0x0; + volatile ee_s32 seed3_volatile=0x66; +#endif +#if PROFILE_RUN + volatile ee_s32 seed1_volatile=0x8; + volatile ee_s32 seed2_volatile=0x8; + volatile ee_s32 seed3_volatile=0x8; +#endif + volatile ee_s32 seed4_volatile=ITERATIONS; + volatile ee_s32 seed5_volatile=0; +/* Porting : Timing functions + How to capture time and convert to seconds must be ported to whatever is supported by the platform. + e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc. + Sample implementation for standard time.h and windows.h definitions included. +*/ +/* Define : TIMER_RES_DIVIDER + Divider to trade off timer resolution and total time that can be measured. + + Use lower values to increase resolution, but make sure that overflow does not occur. + If there are issues with the return value overflowing, increase this value. + */ +//#define NSECS_PER_SEC CLOCKS_PER_SEC +#define NSECS_PER_SEC 1000000000 +#define CORETIMETYPE clock_t +//#define GETMYTIME(_t) (*_t=clock()) +#define GETMYTIME(_t) (*_t=0) +#define MYTIMEDIFF(fin,ini) ((fin)-(ini)) +#define TIMER_RES_DIVIDER 1 +#define SAMPLE_TIME_IMPLEMENTATION 1 +//#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) + +#define EE_TICKS_PER_SEC 1000 + +/** Define Host specific (POSIX), or target specific global time variables. */ +static CORETIMETYPE start_time_val, stop_time_val; + +/* Function : start_time + This function will be called right before starting the timed portion of the benchmark. + + Implementation may be capturing a system timer (as implemented in the example code) + or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0. +*/ +void start_time(void) { +uint32_t mcyclel; + asm volatile ("csrr %0,mcycle" : "=r" (mcyclel) ); + start_time_val = mcyclel; +} +/* Function : stop_time + This function will be called right after ending the timed portion of the benchmark. + + Implementation may be capturing a system timer (as implemented in the example code) + or other system parameters - e.g. reading the current value of cpu cycles counter. +*/ +void stop_time(void) { +uint32_t mcyclel; + asm volatile ("csrr %0,mcycle" : "=r" (mcyclel) ); + stop_time_val = mcyclel; +} +/* Function : get_time + Return an abstract "ticks" number that signifies time on the system. + + Actual value returned may be cpu cycles, milliseconds or any other value, + as long as it can be converted to seconds by . + This methodology is taken to accomodate any hardware or simulated platform. + The sample implementation returns millisecs by default, + and the resolution is controlled by +*/ +CORE_TICKS get_time(void) { + CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); + return elapsed; +} +/* Function : time_in_secs + Convert the value returned by get_time to seconds. + + The type is used to accomodate systems with no support for floating point. + Default implementation implemented by the EE_TICKS_PER_SEC macro above. +*/ +secs_ret time_in_secs(CORE_TICKS ticks) { + secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; + return retval; +} + +ee_u32 default_num_contexts=1; + +/* Function : portable_init + Target specific initialization code + Test for some common mistakes. +*/ +void portable_init(core_portable *p, int *argc, char *argv[]) +{ + if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) { + ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n"); + } + if (sizeof(ee_u32) != 4) { + ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); + } + p->portable_id=1; +} +/* Function : portable_fini + Target specific final code +*/ +void portable_fini(core_portable *p) +{ + p->portable_id=0; +} + + +void* memset(void* s, int c, size_t n) +{ + asm("mv t0, a0"); + asm("add a2, a2, a0"); // end = s + n + asm(".memset_loop: bge a0, a2, .memset_end"); + asm("sb a1, 0(a0)"); + asm("addi a0, a0, 1"); + asm("j .memset_loop"); + asm(".memset_end:"); + asm("mv a0, t0"); + asm("jr ra"); +} diff --git a/testbench/tests/cmark_dccm/cmark_dccm.ld b/testbench/tests/cmark_dccm/cmark_dccm.ld new file mode 100644 index 0000000..f088a41 --- /dev/null +++ b/testbench/tests/cmark_dccm/cmark_dccm.ld @@ -0,0 +1,12 @@ +OUTPUT_ARCH( "riscv" ) +ENTRY(_start) + +SECTIONS { + .text : { *(.text*) } + _end = .; + . = 0xf0040000; + .data : { *(.*data) *(.rodata*) *(.sbss) STACK = ALIGN(16) + 0x1000;} + .bss : { *(.bss) } + . = 0xfffffff8; + .data.ctl : { LONG(0xf0040000); LONG(STACK) } +} diff --git a/testbench/tests/cmark_dccm/crt0.c b/testbench/tests/cmark_dccm/crt0.c new file mode 100644 index 0000000..c4bb3a0 --- /dev/null +++ b/testbench/tests/cmark_dccm/crt0.c @@ -0,0 +1,29 @@ +extern int STACK; +void main(); + + +#define STDOUT 0xd0580000 + +__asm (".section .text"); +__asm (".global _start"); +__asm ("_start:"); + +// Enable Caches in MRAC +__asm ("li t0, 0x5f555555"); +__asm ("csrw 0x7c0, t0"); + +// Set stack pointer. +__asm ("la sp, STACK"); + +__asm ("jal main"); + +// Write 0xff to STDOUT for TB to termiate test. +__asm (".global _finish"); +__asm ("_finish:"); +__asm ("li t0, 0xd0580000"); +__asm ("addi t1, zero, 0xff"); +__asm ("sb t1, 0(t0)"); +__asm ("beq x0, x0, _finish"); +__asm (".rept 10"); +__asm ("nop"); +__asm (".endr"); diff --git a/testbench/tests/cmark_dccm/printf.c b/testbench/tests/cmark_dccm/printf.c new file mode 100644 index 0000000..d93a583 --- /dev/null +++ b/testbench/tests/cmark_dccm/printf.c @@ -0,0 +1,191 @@ +#include + +// This should be in some .h file. +#define STDOUT 0xd0580000 + +static int +whisperPutc(char c) +{ +// __whisper_console_io = c; +// __whisper_console_io = c; + *(volatile char*)(STDOUT) = c; + return c; +} + + +static int +whisperPuts(const char* s) +{ + while (*s) + whisperPutc(*s++); + return 1; +} + + +static int +whisperPrintDecimal(int value) +{ + char buffer[20]; + int charCount = 0; + + unsigned neg = value < 0; + if (neg) + { + value = -value; + whisperPutc('-'); + } + + do + { + char c = '0' + (value % 10); + value = value / 10; + buffer[charCount++] = c; + } + while (value); + + char* p = buffer + charCount - 1; + for (unsigned i = 0; i < charCount; ++i) + whisperPutc(*p--); + + if (neg) + charCount++; + + return charCount; +} + + +static int +whisperPrintInt(int value, int base) +{ + if (base == 10) + return whisperPrintDecimal(value); + + char buffer[20]; + int charCount = 0; + + unsigned uu = value; + + if (base == 8) + { + do + { + char c = '0' + (uu & 7); + buffer[charCount++] = c; + uu >>= 3; + } + while (uu); + } + else if (base == 16) + { + do + { + int digit = uu & 0xf; + char c = digit < 10 ? '0' + digit : 'a' + digit - 10; + buffer[charCount++] = c; + uu >>= 4; + } + while (uu); + } + else + return -1; + + char* p = buffer + charCount - 1; + for (unsigned i = 0; i < charCount; ++i) + whisperPutc(*p--); + + return charCount; +} + + +int +whisperPrintfImpl(const char* format, va_list ap) +{ + int count = 0; // Printed character count + + for (const char* fp = format; *fp; fp++) + { + if (*fp != '%') + { + whisperPutc(*fp); + ++count; + continue; + } + + ++fp; // Skip % + + if (*fp == 0) + break; + + if (*fp == '%') + { + whisperPutc('%'); + continue; + } + + if (*fp == '-') + { + fp++; // Pad right not yet implemented. + } + + while (*fp == '0') + { + fp++; // Pad zero not yet implented. + } + + if (*fp == '*') + { + int width = va_arg(ap, int); + fp++; // Width not yet implemented. + } + else + { + while (*fp >= '0' && *fp <= '9') + ++fp; // Width not yet implemented. + } + + switch (*fp) + { + case 'd': + count += whisperPrintDecimal(va_arg(ap, int)); + break; + + case 'u': + count += whisperPrintDecimal((unsigned) va_arg(ap, unsigned)); + break; + + case 'x': + case 'X': + count += whisperPrintInt(va_arg(ap, int), 16); + break; + + case 'o': + count += whisperPrintInt(va_arg(ap, int), 8); + break; + + case 'c': + whisperPutc(va_arg(ap, int)); + ++count; + break; + + case 's': + count += whisperPuts(va_arg(ap, char*)); + break; + } + } + + return count; +} + + +int +whisperPrintf(const char* format, ...) +{ + va_list ap; + + va_start(ap, format); + int code = whisperPrintfImpl(format, ap); + va_end(ap); + + return code; +} + diff --git a/testbench/tests/dhry/README b/testbench/tests/dhry/README new file mode 100644 index 0000000..9e7b668 --- /dev/null +++ b/testbench/tests/dhry/README @@ -0,0 +1,7 @@ +This is dhrystone, compiled according to the spec: + 1. Files dhry_1.c and dhry2_.c compiled separately. + 2. No inlining. + to run in demo TB: + + make -f $RV_ROOT/tools/Makefile [] TEST=dhry + diff --git a/testbench/tests/dhry/crt0.s b/testbench/tests/dhry/crt0.s new file mode 120000 index 0000000..d09de58 --- /dev/null +++ b/testbench/tests/dhry/crt0.s @@ -0,0 +1 @@ +../../asm/crt0.s \ No newline at end of file diff --git a/testbench/tests/dhry/dhry.h b/testbench/tests/dhry/dhry.h new file mode 100644 index 0000000..d894ba1 --- /dev/null +++ b/testbench/tests/dhry/dhry.h @@ -0,0 +1,437 @@ +#pragma once + +/* + **************************************************************************** + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry.h (part 1 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * Siemens AG, E STE 35 + * Postfach 3240 + * 8520 Erlangen + * Germany (West) + * Phone: [xxx-49]-9131-7-20330 + * (8-17 Central European Time) + * Usenet: ..!mcvax!unido!estevax!weicker + * + * Original Version (in Ada) published in + * "Communications of the ACM" vol. 27., no. 10 (Oct. 1984), + * pp. 1013 - 1030, together with the statistics + * on which the distribution of statements etc. is based. + * + * In this C version, the following C library functions are used: + * - strcpy, strcmp (inside the measurement loop) + * - printf, scanf (outside the measurement loop) + * In addition, Berkeley UNIX system calls "times ()" or "time ()" + * are used for execution time measurement. For measurements + * on other systems, these calls have to be changed. + * + * Collection of Results: + * Reinhold Weicker (address see above) and + * + * Rick Richardson + * PC Research. Inc. + * 94 Apple Orchard Drive + * Tinton Falls, NJ 07724 + * Phone: (201) 389-8963 (9-17 EST) + * Usenet: ...!uunet!pcrat!rick + * + * Please send results to Rick Richardson and/or Reinhold Weicker. + * Complete information should be given on hardware and software used. + * Hardware information includes: Machine type, CPU, type and size + * of caches; for microprocessors: clock frequency, memory speed + * (number of wait states). + * Software information includes: Compiler (and runtime library) + * manufacturer and version, compilation switches, OS version. + * The Operating System version may give an indication about the + * compiler; Dhrystone itself performs no OS calls in the measurement loop. + * + * The complete output generated by the program should be mailed + * such that at least some checks for correctness can be made. + * + *************************************************************************** + * + * History: This version C/2.1 has been made for two reasons: + * + * 1) There is an obvious need for a common C version of + * Dhrystone, since C is at present the most popular system + * programming language for the class of processors + * (microcomputers, minicomputers) where Dhrystone is used most. + * There should be, as far as possible, only one C version of + * Dhrystone such that results can be compared without + * restrictions. In the past, the C versions distributed + * by Rick Richardson (Version 1.1) and by Reinhold Weicker + * had small (though not significant) differences. + * + * 2) As far as it is possible without changes to the Dhrystone + * statistics, optimizing compilers should be prevented from + * removing significant statements. + * + * This C version has been developed in cooperation with + * Rick Richardson (Tinton Falls, NJ), it incorporates many + * ideas from the "Version 1.1" distributed previously by + * him over the UNIX network Usenet. + * I also thank Chaim Benedelac (National Semiconductor), + * David Ditzel (SUN), Earl Killian and John Mashey (MIPS), + * Alan Smith and Rafael Saavedra-Barrera (UC at Berkeley) + * for their help with comments on earlier versions of the + * benchmark. + * + * Changes: In the initialization part, this version follows mostly + * Rick Richardson's version distributed via Usenet, not the + * version distributed earlier via floppy disk by Reinhold Weicker. + * As a concession to older compilers, names have been made + * unique within the first 8 characters. + * Inside the measurement loop, this version follows the + * version previously distributed by Reinhold Weicker. + * + * At several places in the benchmark, code has been added, + * but within the measurement loop only in branches that + * are not executed. The intention is that optimizing compilers + * should be prevented from moving code out of the measurement + * loop, or from removing code altogether. Since the statements + * that are executed within the measurement loop have NOT been + * changed, the numbers defining the "Dhrystone distribution" + * (distribution of statements, operand types and locality) + * still hold. Except for sophisticated optimizing compilers, + * execution times for this version should be the same as + * for previous versions. + * + * Since it has proven difficult to subtract the time for the + * measurement loop overhead in a correct way, the loop check + * has been made a part of the benchmark. This does have + * an impact - though a very minor one - on the distribution + * statistics which have been updated for this version. + * + * All changes within the measurement loop are described + * and discussed in the companion paper "Rationale for + * Dhrystone version 2". + * + * Because of the self-imposed limitation that the order and + * distribution of the executed statements should not be + * changed, there are still cases where optimizing compilers + * may not generate code for some statements. To a certain + * degree, this is unavoidable for small synthetic benchmarks. + * Users of the benchmark are advised to check code listings + * whether code is generated for all statements of Dhrystone. + * + * Version 2.1 is identical to version 2.0 distributed via + * the UNIX network Usenet in March 1988 except that it corrects + * some minor deficiencies that were found by users of version 2.0. + * The only change within the measurement loop is that a + * non-executed "else" part was added to the "if" statement in + * Func_3, and a non-executed "else" part removed from Proc_3. + * + *************************************************************************** + * + * Defines: The following "Defines" are possible: + * -DREG=register (default: Not defined) + * As an approximation to what an average C programmer + * might do, the "register" storage class is applied + * (if enabled by -DREG=register) + * - for local variables, if they are used (dynamically) + * five or more times + * - for parameters if they are used (dynamically) + * six or more times + * Note that an optimal "register" strategy is + * compiler-dependent, and that "register" declarations + * do not necessarily lead to faster execution. + * -DNOSTRUCTASSIGN (default: Not defined) + * Define if the C compiler does not support + * assignment of structures. + * -DNOENUMS (default: Not defined) + * Define if the C compiler does not support + * enumeration types. + * -DTIMES (default) + * -DTIME + * The "times" function of UNIX (returning process times) + * or the "time" function (returning wallclock time) + * is used for measurement. + * For single user machines, "time ()" is adequate. For + * multi-user machines where you cannot get single-user + * access, use the "times ()" function. If you have + * neither, use a stopwatch in the dead of night. + * "printf"s are provided marking the points "Start Timer" + * and "Stop Timer". DO NOT use the UNIX "time(1)" + * command, as this will measure the total time to + * run this program, which will (erroneously) include + * the time to allocate storage (malloc) and to perform + * the initialization. + * -DHZ=nnn + * In Berkeley UNIX, the function "times" returns process + * time in 1/HZ seconds, with HZ = 60 for most systems. + * CHECK YOUR SYSTEM DESCRIPTION BEFORE YOU JUST APPLY + * A VALUE. + * + *************************************************************************** + * + * Compilation model and measurement (IMPORTANT): + * + * This C version of Dhrystone consists of three files: + * - dhry.h (this file, containing global definitions and comments) + * - dhry_1.c (containing the code corresponding to Ada package Pack_1) + * - dhry_2.c (containing the code corresponding to Ada package Pack_2) + * + * The following "ground rules" apply for measurements: + * - Separate compilation + * - No procedure merging + * - Otherwise, compiler optimizations are allowed but should be indicated + * - Default results are those without register declarations + * See the companion paper "Rationale for Dhrystone Version 2" for a more + * detailed discussion of these ground rules. + * + * For 16-Bit processors (e.g. 80186, 80286), times for all compilation + * models ("small", "medium", "large" etc.) should be given if possible, + * together with a definition of these models for the compiler system used. + * + ************************************************************************** + * + * Dhrystone (C version) statistics: + * + * [Comment from the first distribution, updated for version 2. + * Note that because of language differences, the numbers are slightly + * different from the Ada version.] + * + * The following program contains statements of a high level programming + * language (here: C) in a distribution considered representative: + * + * assignments 52 (51.0 %) + * control statements 33 (32.4 %) + * procedure, function calls 17 (16.7 %) + * + * 103 statements are dynamically executed. The program is balanced with + * respect to the three aspects: + * + * - statement type + * - operand type + * - operand locality + * operand global, local, parameter, or constant. + * + * The combination of these three aspects is balanced only approximately. + * + * 1. Statement Type: + * ----------------- number + * + * V1 = V2 9 + * (incl. V1 = F(..) + * V = Constant 12 + * Assignment, 7 + * with array element + * Assignment, 6 + * with record component + * -- + * 34 34 + * + * X = Y +|-|"&&"|"|" Z 5 + * X = Y +|-|"==" Constant 6 + * X = X +|- 1 3 + * X = Y *|/ Z 2 + * X = Expression, 1 + * two operators + * X = Expression, 1 + * three operators + * -- + * 18 18 + * + * if .... 14 + * with "else" 7 + * without "else" 7 + * executed 3 + * not executed 4 + * for ... 7 | counted every time + * while ... 4 | the loop condition + * do ... while 1 | is evaluated + * switch ... 1 + * break 1 + * declaration with 1 + * initialization + * -- + * 34 34 + * + * P (...) procedure call 11 + * user procedure 10 + * library procedure 1 + * X = F (...) + * function call 6 + * user function 5 + * library function 1 + * -- + * 17 17 + * --- + * 103 + * + * The average number of parameters in procedure or function calls + * is 1.82 (not counting the function values aX * + * + * 2. Operators + * ------------ + * number approximate + * percentage + * + * Arithmetic 32 50.8 + * + * + 21 33.3 + * - 7 11.1 + * * 3 4.8 + * / (int div) 1 1.6 + * + * Comparison 27 42.8 + * + * == 9 14.3 + * /= 4 6.3 + * > 1 1.6 + * < 3 4.8 + * >= 1 1.6 + * <= 9 14.3 + * + * Logic 4 6.3 + * + * && (AND-THEN) 1 1.6 + * | (OR) 1 1.6 + * ! (NOT) 2 3.2 + * + * -- ----- + * 63 100.1 + * + * + * 3. Operand Type (counted once per operand reference): + * --------------- + * number approximate + * percentage + * + * Integer 175 72.3 % + * Character 45 18.6 % + * Pointer 12 5.0 % + * String30 6 2.5 % + * Array 2 0.8 % + * Record 2 0.8 % + * --- ------- + * 242 100.0 % + * + * When there is an access path leading to the final operand (e.g. a record + * component), only the final data type on the access path is counted. + * + * + * 4. Operand Locality: + * ------------------- + * number approximate + * percentage + * + * local variable 114 47.1 % + * global variable 22 9.1 % + * parameter 45 18.6 % + * value 23 9.5 % + * reference 22 9.1 % + * function result 6 2.5 % + * constant 55 22.7 % + * --- ------- + * 242 100.0 % + * + * + * The program does not compute anything meaningful, but it is syntactically + * and semantically correct. All variables have a value assigned to them + * before they are used as a source operand. + * + * There has been no explicit effort to account for the effects of a + * cache, or to balance the use of long or short displacements for code or + * data. + * + *************************************************************************** + */ + +/* Compiler and system dependent definitions: */ + +#ifndef TIME +#undef TIMES +#define TIMES +#endif + /* Use times(2) time function unless */ + /* explicitly defined otherwise */ + +#ifdef MSC_CLOCK +#undef HZ +#undef TIMES +#include +#define HZ CLK_TCK +#endif + /* Use Microsoft C hi-res clock */ + +#ifdef TIMES +#include +#include + +#ifndef HZ +#define HZ 100 +#endif + /* for "times" */ +#endif + +#define Mic_secs_Per_Second 1000000.0 + /* Berkeley UNIX C returns process times in seconds/HZ */ + +#ifdef NOSTRUCTASSIGN +#define structassign(d, s) memcpy(&(d), &(s), sizeof(d)) +#else +#define structassign(d, s) d = s +#endif + +#ifdef NOENUM +#define Ident_1 0 +#define Ident_2 1 +#define Ident_3 2 +#define Ident_4 3 +#define Ident_5 4 + typedef int Enumeration; +#else + typedef enum {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5} + Enumeration; +#endif + /* for boolean and enumeration types in Ada, Pascal */ + +/* General definitions: */ + +//#include + /* for strcpy, strcmp */ + +#define Null 0 + /* Value of a Null pointer */ +#define true 1 +#define false 0 + +typedef int One_Thirty; +typedef int One_Fifty; +typedef char Capital_Letter; +typedef int Boolean; +typedef char Str_30 [31]; +typedef int Arr_1_Dim [50]; +typedef int Arr_2_Dim [50] [50]; + +typedef struct record + { + struct record *Ptr_Comp; + Enumeration Discr; + union { + struct { + Enumeration Enum_Comp; + int Int_Comp; + char Str_Comp [31]; + } var_1; + struct { + Enumeration E_Comp_2; + char Str_2_Comp [31]; + } var_2; + struct { + char Ch_1_Comp; + char Ch_2_Comp; + } var_3; + } variant; + } Rec_Type, *Rec_Pointer; + + diff --git a/testbench/tests/dhry/dhry.ld b/testbench/tests/dhry/dhry.ld new file mode 120000 index 0000000..d70ba74 --- /dev/null +++ b/testbench/tests/dhry/dhry.ld @@ -0,0 +1 @@ +../../asm/hello_world_dccm.ld \ No newline at end of file diff --git a/testbench/tests/dhry/dhry.mki b/testbench/tests/dhry/dhry.mki new file mode 100644 index 0000000..aa1d63c --- /dev/null +++ b/testbench/tests/dhry/dhry.mki @@ -0,0 +1,2 @@ +OFILES = crt0.o dhry_1.o dhry_2.o printf.o +TEST_CFLAGS = -g -O3 diff --git a/testbench/tests/dhry/dhry_1.c b/testbench/tests/dhry/dhry_1.c new file mode 100644 index 0000000..6ebbd9c --- /dev/null +++ b/testbench/tests/dhry/dhry_1.c @@ -0,0 +1,452 @@ +#define SWERV +/* + **************************************************************************** + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry_1.c (part 2 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * + **************************************************************************** + */ + +#ifdef SWERV +#include +#include +extern uint64_t get_mcycle(); +#endif + +#include "dhry.h" + +/* Global Variables: */ + +Rec_Pointer Ptr_Glob, + Next_Ptr_Glob; +int Int_Glob; +Boolean Bool_Glob; +char Ch_1_Glob, + Ch_2_Glob; +int Arr_1_Glob [50]; +int Arr_2_Glob [50] [50]; + +Enumeration Func_1 (); + /* forward declaration necessary since Enumeration may not simply be int */ + +#ifndef REG + Boolean Reg = false; +#define REG + /* REG becomes defined as empty */ + /* i.e. no register variables */ +#else + Boolean Reg = true; +#endif + +/* variables for time measurement: */ + +#ifdef TIMES +struct tms time_info; +#define Too_Small_Time (2*HZ) + /* Measurements should last at least about 2 seconds */ +#endif +#ifdef TIME +extern long time(); + /* see library function "time" */ +#define Too_Small_Time 2 + /* Measurements should last at least 2 seconds */ +#endif +#ifdef MSC_CLOCK +extern clock_t clock(); +#define Too_Small_Time (2*HZ) +#endif + +long + Begin_Time, + End_Time, + User_Time; + +float Microseconds, + Dhrystones_Per_Second; + +/* end of variables for time measurement */ + + +extern char* strcpy(char*, const char*); + +extern Boolean Func_2 (Str_30, Str_30); +extern void Proc_7 (One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val, + One_Fifty *Int_Par_Ref); + +extern void Proc_8 (Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref, + int Int_1_Par_Val, int Int_2_Par_Val); + +extern void Proc_6 (Enumeration Enum_Val_Par, + Enumeration *Enum_Ref_Par); + +void Proc_5(); +void Proc_4(); + +void Proc_1(Rec_Pointer Ptr_Val_Par); +void Proc_2(One_Fifty *Int_Par_Ref); +void Proc_3(Rec_Pointer *Ptr_Ref_Par); + + +int +main () +/*****/ + + /* main program, corresponds to procedures */ + /* Main and Proc_0 in the Ada version */ +{ + One_Fifty Int_1_Loc; + REG One_Fifty Int_2_Loc; + One_Fifty Int_3_Loc; + REG char Ch_Index; + Enumeration Enum_Loc; + Str_30 Str_1_Loc; + Str_30 Str_2_Loc; + REG int Run_Index; + REG int Number_Of_Runs; + + /* Initializations */ + + Rec_Type rec0; + Rec_Type rec1; + + Next_Ptr_Glob = &rec0; + Ptr_Glob = &rec1; + + Ptr_Glob->Ptr_Comp = Next_Ptr_Glob; + Ptr_Glob->Discr = Ident_1; + Ptr_Glob->variant.var_1.Enum_Comp = Ident_3; + Ptr_Glob->variant.var_1.Int_Comp = 40; + strcpy (Ptr_Glob->variant.var_1.Str_Comp, + "DHRYSTONE PROGRAM, SOME STRING"); + strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING"); + + Arr_2_Glob [8][7] = 10; + /* Was missing in published program. Without this statement, */ + /* Arr_2_Glob [8][7] would have an undefined value. */ + /* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */ + /* overflow may occur for this array element. */ + + printf ("Dhrystone Benchmark, Version 2.1 (Language: C)\n"); + if (Reg) + { + printf ("Program compiled with 'register' attribute\n"); + } + else + { + printf ("Program compiled without 'register' attribute\n"); + } + + #ifndef SWERV + printf ("Please give the number of runs through the benchmark: "); + { + int n = 1000; + scanf ("%d", &n); + Number_Of_Runs = n; + } + printf ("\n"); + #else + // We do not have scanf. Hardwire number of runs. + Number_Of_Runs = 1000; + #endif + + printf ("Execution starts, %d runs through Dhrystone\n", Number_Of_Runs); + + /***************/ + /* Start timer */ + /***************/ + +#ifdef SWERV + Begin_Time = get_mcycle(); +#else + +#ifdef TIMES + times (&time_info); + Begin_Time = (long) time_info.tms_utime; +#endif +#ifdef TIME + Begin_Time = time ( (long *) 0); +#endif +#ifdef MSC_CLOCK + Begin_Time = clock(); +#endif + +#endif + + __asm("__perf_start:"); + + for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index) + { + __asm("__loop_start:"); + + Proc_5(); + Proc_4(); + /* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */ + Int_1_Loc = 2; + Int_2_Loc = 3; + strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING"); + Enum_Loc = Ident_2; + Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc); + /* Bool_Glob == 1 */ + while (Int_1_Loc < Int_2_Loc) /* loop body executed once */ + { + Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc; + /* Int_3_Loc == 7 */ + Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc); + /* Int_3_Loc == 7 */ + Int_1_Loc += 1; + } /* while */ + /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */ + Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc); + /* Int_Glob == 5 */ + Proc_1 (Ptr_Glob); + for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index) + /* loop body executed twice */ + { + if (Enum_Loc == Func_1 (Ch_Index, 'C')) + /* then, not executed */ + { + Proc_6 (Ident_1, &Enum_Loc); + strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING"); + Int_2_Loc = Run_Index; + Int_Glob = Run_Index; + } + } + /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */ + Int_2_Loc = Int_2_Loc * Int_1_Loc; + Int_1_Loc = Int_2_Loc / Int_3_Loc; + Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc; + /* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */ + Proc_2 (&Int_1_Loc); + /* Int_1_Loc == 5 */ + + } /* loop "for Run_Index" */ + + __asm("__perf_end:"); + + /**************/ + /* Stop timer */ + /**************/ + +#ifdef SWERV + End_Time = get_mcycle(); + printf("End_time=%d\n", (int) End_Time); +#else +#ifdef TIMES + times (&time_info); + End_Time = (long) time_info.tms_utime; +#endif +#ifdef TIME + End_Time = time ( (long *) 0); +#endif +#ifdef MSC_CLOCK + End_Time = clock(); +#endif + +#endif + + printf ("Final values of the variables used in the benchmark:\n\n"); + printf ("Int_Glob: %d\n", Int_Glob); + printf (" should be: %d\n", 5); + printf ("Bool_Glob: %d\n", Bool_Glob); + printf (" should be: %d\n", 1); + printf ("Ch_1_Glob: %c\n", Ch_1_Glob); + printf (" should be: %c\n", 'A'); + printf ("Ch_2_Glob: %c\n", Ch_2_Glob); + printf (" should be: %c\n", 'B'); + printf ("Arr_1_Glob[8]: %d\n", Arr_1_Glob[8]); + printf (" should be: %d\n", 7); + printf ("Arr_2_Glob[8][7]: %d\n", Arr_2_Glob[8][7]); + printf (" should be: Number_Of_Runs + 10\n"); + printf ("Ptr_Glob->Ptr_Comp: %x\n", (int) Ptr_Glob->Ptr_Comp); + printf (" should be: (implementation-dependent)\n"); + printf (" Discr: %d\n", Ptr_Glob->Discr); + printf (" should be: %d\n", 0); + printf (" Enum_Comp: %d\n", Ptr_Glob->variant.var_1.Enum_Comp); + printf (" should be: %d\n", 2); + printf (" Int_Comp: %d\n", Ptr_Glob->variant.var_1.Int_Comp); + printf (" should be: %d\n", 17); + printf (" Str_Comp: %s", Ptr_Glob->variant.var_1.Str_Comp); + printf (" should be: DHRYSTONE PROGRAM, SOME STRING\n"); + printf ("Next_Ptr_Glob->Ptr_Comp:%x\n", (int) Next_Ptr_Glob->Ptr_Comp); + printf (" should be: (implementation-dependent), same as above\n"); + printf (" Discr: %d\n", Next_Ptr_Glob->Discr); + printf (" should be: %d\n", 0); + printf (" Enum_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp); + printf (" should be: %d\n", 1); + printf (" Int_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Int_Comp); + printf (" should be: %d\n", 18); + printf (" Str_Comp: %s", Next_Ptr_Glob->variant.var_1.Str_Comp); + printf (" should be: DHRYSTONE PROGRAM, SOME STRING\n"); + printf ("Int_1_Loc: %d\n", Int_1_Loc); + printf (" should be: %d\n", 5); + printf ("Int_2_Loc: %d\n", Int_2_Loc); + printf (" should be: %d\n", 13); + printf ("Int_3_Loc: %d\n", Int_3_Loc); + printf (" should be: %d\n", 7); + printf ("Enum_Loc: %d\n", Enum_Loc); + printf (" should be: %d\n", 1); + printf ("Str_1_Loc: %s", Str_1_Loc); + printf (" should be: DHRYSTONE PROGRAM, 1'ST STRING\n"); + printf ("Str_2_Loc: %s", Str_2_Loc); + printf (" should be: DHRYSTONE PROGRAM, 2'ND STRING\n"); + printf ("\n"); + + User_Time = End_Time - Begin_Time; + + if (User_Time < Too_Small_Time) + { + printf ("User time %d\n", User_Time); + printf ("Measured time too small to obtain meaningful results\n"); + printf ("Please increase number of runs\n"); + printf ("\n"); + } + else + { +#ifdef SWERV + printf ("Run time = %d clocks for %d Dhrystones\n", User_Time, Number_Of_Runs ); + printf ("Dhrystones per Second per MHz: "); + printf ("%d.%02d", 1000000*Number_Of_Runs/User_Time,(100000000*Number_Of_Runs/User_Time) % 100); +#else +#ifdef TIME + Microseconds = (float) User_Time * Mic_secs_Per_Second + / (float) Number_Of_Runs; + Dhrystones_Per_Second = (float) Number_Of_Runs / (float) User_Time; +#else + Microseconds = (float) User_Time * Mic_secs_Per_Second + / ((float) HZ * ((float) Number_Of_Runs)); + Dhrystones_Per_Second = ((float) HZ * (float) Number_Of_Runs) + / (float) User_Time; +#endif + printf ("Microseconds for one run through Dhrystone: "); + printf ("%6.1f \n", Microseconds); + printf ("Dhrystones per Second: "); + printf ("%6.1f \n", Dhrystones_Per_Second); + +#endif + + printf ("\n"); + } + +} + + +void +Proc_1 (Ptr_Val_Par) +/******************/ + +REG Rec_Pointer Ptr_Val_Par; + /* executed once */ +{ + REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp; + /* == Ptr_Glob_Next */ + /* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */ + /* corresponds to "rename" in Ada, "with" in Pascal */ + + structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob); + Ptr_Val_Par->variant.var_1.Int_Comp = 5; + Next_Record->variant.var_1.Int_Comp + = Ptr_Val_Par->variant.var_1.Int_Comp; + Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp; + Proc_3 (&Next_Record->Ptr_Comp); + /* Ptr_Val_Par->Ptr_Comp->Ptr_Comp + == Ptr_Glob->Ptr_Comp */ + if (Next_Record->Discr == Ident_1) + /* then, executed */ + { + Next_Record->variant.var_1.Int_Comp = 6; + Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp, + &Next_Record->variant.var_1.Enum_Comp); + Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp; + Proc_7 (Next_Record->variant.var_1.Int_Comp, 10, + &Next_Record->variant.var_1.Int_Comp); + } + else /* not executed */ + structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp); +} /* Proc_1 */ + + +void +Proc_2 (Int_Par_Ref) +/******************/ + /* executed once */ + /* *Int_Par_Ref == 1, becomes 4 */ + +One_Fifty *Int_Par_Ref; +{ + One_Fifty Int_Loc; + Enumeration Enum_Loc; + + Int_Loc = *Int_Par_Ref + 10; + do /* executed once */ + if (Ch_1_Glob == 'A') + /* then, executed */ + { + Int_Loc -= 1; + *Int_Par_Ref = Int_Loc - Int_Glob; + Enum_Loc = Ident_1; + } /* if */ + while (Enum_Loc != Ident_1); /* true */ +} /* Proc_2 */ + + +void +Proc_3 (Ptr_Ref_Par) +/******************/ + /* executed once */ + /* Ptr_Ref_Par becomes Ptr_Glob */ + +Rec_Pointer *Ptr_Ref_Par; + +{ + if (Ptr_Glob != Null) + /* then, executed */ + *Ptr_Ref_Par = Ptr_Glob->Ptr_Comp; + Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp); +} /* Proc_3 */ + + +void +Proc_4 () /* without parameters */ +/*******/ + /* executed once */ +{ + Boolean Bool_Loc; + + Bool_Loc = Ch_1_Glob == 'A'; + Bool_Glob = Bool_Loc | Bool_Glob; + Ch_2_Glob = 'B'; +} /* Proc_4 */ + + +void +Proc_5 () /* without parameters */ +/*******/ + /* executed once */ +{ + Ch_1_Glob = 'A'; + Bool_Glob = false; +} /* Proc_5 */ + + + /* Procedure for the assignment of structures, */ + /* if the C compiler doesn't support this feature */ +#ifdef NOSTRUCTASSIGN +memcpy (d, s, l) +register char *d; +register char *s; +register int l; +{ + while (l--) *d++ = *s++; +} +#endif + + diff --git a/testbench/tests/dhry/dhry_2.c b/testbench/tests/dhry/dhry_2.c new file mode 100644 index 0000000..19ee89a --- /dev/null +++ b/testbench/tests/dhry/dhry_2.c @@ -0,0 +1,214 @@ +/* + **************************************************************************** + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry_2.c (part 3 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * + **************************************************************************** + */ + +#include "dhry.h" + +#ifndef REG +#define REG + /* REG becomes defined as empty */ + /* i.e. no register variables */ +#endif + +extern int Int_Glob; +extern char Ch_1_Glob; + +#if 0 +int +strcmp(const char* s1, const char* s2) +{ + while (*s1 && *s1 == *s2) + { + s1++; + s2++; + } + if (*s1 == *s2) + return 0; + return *s1 > *s2? 1 : -1; +} +#else +extern int strcmp( char* s1, char* s2); +#endif + +Boolean Func_3 (Enumeration Enum_Par_Val); + + +void +Proc_6 (Enum_Val_Par, Enum_Ref_Par) +/*********************************/ + /* executed once */ + /* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */ + +Enumeration Enum_Val_Par; +Enumeration *Enum_Ref_Par; +{ + *Enum_Ref_Par = Enum_Val_Par; + if (! Func_3 (Enum_Val_Par)) + /* then, not executed */ + *Enum_Ref_Par = Ident_4; + switch (Enum_Val_Par) + { + case Ident_1: + *Enum_Ref_Par = Ident_1; + break; + case Ident_2: + if (Int_Glob > 100) + /* then */ + *Enum_Ref_Par = Ident_1; + else *Enum_Ref_Par = Ident_4; + break; + case Ident_3: /* executed */ + *Enum_Ref_Par = Ident_2; + break; + case Ident_4: break; + case Ident_5: + *Enum_Ref_Par = Ident_3; + break; + } /* switch */ +} /* Proc_6 */ + + +void +Proc_7 (Int_1_Par_Val, Int_2_Par_Val, Int_Par_Ref) +/**********************************************/ + /* executed three times */ + /* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */ + /* Int_Par_Ref becomes 7 */ + /* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */ + /* Int_Par_Ref becomes 17 */ + /* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */ + /* Int_Par_Ref becomes 18 */ +One_Fifty Int_1_Par_Val; +One_Fifty Int_2_Par_Val; +One_Fifty *Int_Par_Ref; +{ + One_Fifty Int_Loc; + + Int_Loc = Int_1_Par_Val + 2; + *Int_Par_Ref = Int_2_Par_Val + Int_Loc; +} /* Proc_7 */ + + +void +Proc_8 (Arr_1_Par_Ref, Arr_2_Par_Ref, Int_1_Par_Val, Int_2_Par_Val) +/*********************************************************************/ + /* executed once */ + /* Int_Par_Val_1 == 3 */ + /* Int_Par_Val_2 == 7 */ +Arr_1_Dim Arr_1_Par_Ref; +Arr_2_Dim Arr_2_Par_Ref; +int Int_1_Par_Val; +int Int_2_Par_Val; +{ + REG One_Fifty Int_Index; + REG One_Fifty Int_Loc; + + Int_Loc = Int_1_Par_Val + 5; + Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val; + Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc]; + Arr_1_Par_Ref [Int_Loc+30] = Int_Loc; + for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index) + Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc; + Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1; + Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc]; + Int_Glob = 5; +} /* Proc_8 */ + + +Enumeration Func_1 (Ch_1_Par_Val, Ch_2_Par_Val) +/*************************************************/ + /* executed three times */ + /* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */ + /* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */ + /* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */ + +Capital_Letter Ch_1_Par_Val; +Capital_Letter Ch_2_Par_Val; +{ + Capital_Letter Ch_1_Loc; + Capital_Letter Ch_2_Loc; + + Ch_1_Loc = Ch_1_Par_Val; + Ch_2_Loc = Ch_1_Loc; + if (Ch_2_Loc != Ch_2_Par_Val) + /* then, executed */ + return (Ident_1); + else /* not executed */ + { + Ch_1_Glob = Ch_1_Loc; + return (Ident_2); + } +} /* Func_1 */ + + +Boolean Func_2 (Str_1_Par_Ref, Str_2_Par_Ref) +/*************************************************/ + /* executed once */ + /* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */ + /* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */ + +Str_30 Str_1_Par_Ref; +Str_30 Str_2_Par_Ref; +{ + REG One_Thirty Int_Loc; + Capital_Letter Ch_Loc; + + Int_Loc = 2; + while (Int_Loc <= 2) /* loop body executed once */ + if (Func_1 (Str_1_Par_Ref[Int_Loc], + Str_2_Par_Ref[Int_Loc+1]) == Ident_1) + /* then, executed */ + { + Ch_Loc = 'A'; + Int_Loc += 1; + } /* if, while */ + if (Ch_Loc >= 'W' && Ch_Loc < 'Z') + /* then, not executed */ + Int_Loc = 7; + if (Ch_Loc == 'R') + /* then, not executed */ + return (true); + else /* executed */ + { + if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0) + /* then, not executed */ + { + Int_Loc += 7; + Int_Glob = Int_Loc; + return (true); + } + else /* executed */ + return (false); + } /* if Ch_Loc */ +} /* Func_2 */ + + +Boolean Func_3 (Enum_Par_Val) +/***************************/ + /* executed once */ + /* Enum_Par_Val == Ident_3 */ +Enumeration Enum_Par_Val; +{ + Enumeration Enum_Loc; + + Enum_Loc = Enum_Par_Val; + if (Enum_Loc == Ident_3) + /* then, executed */ + return (true); + else /* not executed */ + return (false); +} /* Func_3 */ + diff --git a/testbench/tests/dhry/printf.c b/testbench/tests/dhry/printf.c new file mode 120000 index 0000000..430ba5d --- /dev/null +++ b/testbench/tests/dhry/printf.c @@ -0,0 +1 @@ +../../asm/printf.c \ No newline at end of file diff --git a/testbench/tests/hello_world/Makefile b/testbench/tests/hello_world/Makefile new file mode 100644 index 0000000..b1e6717 --- /dev/null +++ b/testbench/tests/hello_world/Makefile @@ -0,0 +1,6 @@ +export TEST = hello_world +export OFILES = hello_world.o +export BUILD_DIR = ../snapshots/default + +clean .DEFAULT: + $(MAKE) -e -f $(RV_ROOT)/tools/MakeHex $@ diff --git a/testbench/tests/hello_world/hello_world.s b/testbench/tests/hello_world/hello_world.s new file mode 100644 index 0000000..4a45444 --- /dev/null +++ b/testbench/tests/hello_world/hello_world.s @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2019 Western Digital Corporation or its affiliates. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Assembly code for Hello World +// Not using only ALU ops for creating the string + + +#include "defines.h" + +#define STDOUT 0xd0580000 + + +// Code to execute +.section .text +.global _start +_start: + + // Clear minstret + csrw minstret, zero + csrw minstreth, zero + + // Set up MTVEC - not expecting to use it though + li x1, RV_ICCM_SADR + csrw mtvec, x1 + + + // Enable Caches in MRAC + li x1, 0x5f555555 + csrw 0x7c0, x1 + + // Load string from hw_data + // and write to stdout address + + li x3, STDOUT + la x4, hw_data + +loop: + lb x5, 0(x4) + sb x5, 0(x3) + addi x4, x4, 1 + bnez x5, loop + +// Write 0xff to STDOUT for TB to terminate test. +_finish: + li x3, STDOUT + addi x5, x0, 0xff + sb x5, 0(x3) + beq x0, x0, _finish +.rept 100 + nop +.endr + +.global hw_data +.data +hw_data: +.ascii "----------------------------------\n" +.ascii "Hello World from SweRV EH1 @WDC !!\n" +.ascii "----------------------------------\n" +.byte 0 diff --git a/tools/Makefile b/tools/Makefile index db5ae22..ef3db6b 100755 --- a/tools/Makefile +++ b/tools/Makefile @@ -13,12 +13,15 @@ # See the License for the specific language governing permissions and # limitations under the License. # - +CONF_PARAMS = -set iccm_enable # Check for RV_ROOT ifeq (,$(wildcard ${RV_ROOT}/configs/swerv.config)) $(error env var RV_ROOT does not point to a valid dir! Exiting!) endif +TEST_CFLAGS = -g -O3 -funroll-all-loops +ABI = -mabi=ilp32 -march=rv32imc + # Allow snapshot override target = default snapshot = $(target) @@ -40,6 +43,9 @@ TEST = hello_world # Define default test directory TEST_DIR = $(TBDIR)/asm HEX_DIR = $(TBDIR)/hex +ifneq (,$(wildcard $(TBDIR)/tests/$(TEST))) + TEST_DIR = $(TBDIR)/tests/$(TEST) +endif ifdef debug DEBUG_PLUS = +dumpon @@ -57,6 +63,10 @@ else LINK = $(TEST_DIR)/$(TEST).ld endif +OFILES = $(TEST).o + +-include $(TEST_DIR)/$(TEST).mki + VPATH = $(TEST_DIR) $(BUILD_DIR) $(TBDIR) TBFILES = $(TBDIR)/tb_top.sv $(TBDIR)/ahb_sif.sv @@ -67,13 +77,13 @@ includes = -I${RV_ROOT}/design/include -I${RV_ROOT}/design/lib -I${BUILD_DIR} CFLAGS += "-std=c++11" # Optimization for better performance; alternative is nothing for slower runtime (faster compiles) # -O2 for faster runtime (slower compiles), or -O for balance. -VERILATOR_MAKE_FLAGS = OPT_FAST="-O2" +VERILATOR_MAKE_FLAGS = OPT_FAST="-Os" # Targets all: clean verilator clean: - rm -rf *.log *.s *.hex *.dis *.tbl irun* vcs* simv* snapshots swerv* \ + rm -rf *.log *.s *.hex *.dis *.tbl irun* vcs* simv* *.map snapshots swerv* \ verilator* *.exe obj* *.o ucli.key vc_hdrs.h csrc *.csv \ work dataset.asdb library.cfg @@ -81,16 +91,18 @@ clean: ${BUILD_DIR}/defines.h : BUILD_PATH=${BUILD_DIR} ${SWERV_CONFIG} -target=$(target) $(CONF_PARAMS) +##################### Verilog Builds ##################################### + verilator-build: ${TBFILES} ${BUILD_DIR}/defines.h test_tb_top.cpp echo '`undef ASSERT_ON' >> ${BUILD_DIR}/common_defines.vh - $(VERILATOR) '-UASSERT_ON' --cc -CFLAGS ${CFLAGS} $(defines) $(includes) \ + $(VERILATOR) --cc -CFLAGS ${CFLAGS} $(defines) $(includes) \ -Wno-UNOPTFLAT \ -I${RV_ROOT}/testbench \ -f ${RV_ROOT}/testbench/flist \ ${TBFILES} \ --top-module tb_top -exe test_tb_top.cpp --autoflush $(VERILATOR_DEBUG) cp ${RV_ROOT}/testbench/test_tb_top.cpp obj_dir - $(MAKE) -C obj_dir/ -f Vtb_top.mk $(VERILATOR_MAKE_FLAGS) + $(MAKE) -j -C obj_dir/ -f Vtb_top.mk $(VERILATOR_MAKE_FLAGS) touch verilator-build vcs-build: ${TBFILES} ${BUILD_DIR}/defines.h @@ -108,7 +120,7 @@ irun-build: ${TBFILES} ${BUILD_DIR}/defines.h -incdir ${RV_ROOT}/design/lib -incdir ${RV_ROOT}/design/include -incdir ${BUILD_DIR} -vlog_ext +.vh+.h\ $(defines) -f ${RV_ROOT}/testbench/flist\ -top tb_top ${TBFILES} -I${RV_ROOT}/testbench \ - -elaborate -snapshot $(snapshot) + -elaborate -snapshot $(snapshot) $(profile) touch irun-build riviera-build: ${TBFILES} ${BUILD_DIR}/defines.h @@ -121,23 +133,30 @@ riviera-build: ${TBFILES} ${BUILD_DIR}/defines.h ${TBFILES} touch riviera-build +##################### Simulation Runs ##################################### + verilator: program.hex verilator-build ./obj_dir/Vtb_top ${DEBUG_PLUS} irun: program.hex irun-build - $(IRUN) -64bit -abvglobalfailurelimit 1 +lic_queue -licqueue -status -xmlibdirpath . -xmlibdirname swerv.build \ - -snapshot ${snapshot} -r ${snapshot} $(IRUN_DEBUG_RUN) + $(IRUN) -64bit +lic_queue -licqueue -status -xmlibdirpath . -xmlibdirname swerv.build \ + -snapshot ${snapshot} -r ${snapshot} $(IRUN_DEBUG_RUN) $(profile) vcs: program.hex vcs-build ./simv $(DEBUG_PLUS) +vcs+lic+wait -l vcs.log vlog: program.hex ${TBFILES} ${BUILD_DIR}/defines.h $(VLOG) -l vlog.log -sv -mfcu +incdir+${BUILD_DIR}+${RV_ROOT}/design/include+${RV_ROOT}/design/lib\ - $(defines) -f ${RV_ROOT}/testbench/flist ${TBFILES} -R ${DEBUG_PLUS} + $(defines) -f ${RV_ROOT}/testbench/flist ${TBFILES} -R +nowarn3829 ${DEBUG_PLUS} riviera: program.hex riviera-build vsim -c -lib work ${DEBUG_PLUS} ${RIVIERA_DEBUG} tb_top -do "run -all; exit" -l riviera.log + + +##################### Test Build ##################################### + + ifeq ($(shell which $(GCC_PREFIX)-gcc 2> /dev/null),) program.hex: ${BUILD_DIR}/defines.h @echo " !!! No $(GCC_PREFIX)-gcc in path, using canned hex files !!" @@ -147,19 +166,17 @@ ifneq (,$(wildcard $(TEST_DIR)/$(TEST).makefile)) program.hex: $(MAKE) -f $(TEST_DIR)/$(TEST).makefile else -program.hex: $(TEST).o $(LINK) +program.hex: $(OFILES) $(LINK) @echo Building $(TEST) - $(GCC_PREFIX)-ld -m elf32lriscv --discard-none -T$(LINK) -o $(TEST).exe $(TEST).o + $(GCC_PREFIX)-gcc $(ABI) -Wl,-Map=$(TEST).map -lgcc -T$(LINK) -o $(TEST).exe $(OFILES) -nostartfiles $(TEST_LIBS) $(GCC_PREFIX)-objcopy -O verilog $(TEST).exe program.hex $(GCC_PREFIX)-objdump -S $(TEST).exe > $(TEST).dis @echo Completed building $(TEST) %.o : %.s ${BUILD_DIR}/defines.h - $(GCC_PREFIX)-cpp -I${BUILD_DIR} $< > $(TEST).cpp.s - $(GCC_PREFIX)-as -march=rv32gc $(TEST).cpp.s -o $(TEST).o + $(GCC_PREFIX)-cpp -I${BUILD_DIR} $< > $*.cpp.s + $(GCC_PREFIX)-as $(ABI) $*.cpp.s -o $@ -TEST_CFLAGS = -g -O3 -funroll-all-loops -ABI = -mabi=ilp32 -march=rv32imc %.o : %.c ${BUILD_DIR}/defines.h $(GCC_PREFIX)-gcc -I${BUILD_DIR} ${TEST_CFLAGS} ${ABI} -nostdlib -c $< -o $@ diff --git a/tools/vivado.tcl b/tools/vivado.tcl index 3e8dab0..869d2d0 100644 --- a/tools/vivado.tcl +++ b/tools/vivado.tcl @@ -1 +1,2 @@ set_property is_global_include true [get_files config/common_defines.vh] +set_property IS_GLOBAL_INCLUDE 1 [get_files include/def.sv]