Using newlib libc from riscv-tools in dhrystone benchmark

This commit is contained in:
Clifford Wolf 2016-06-07 17:09:26 +02:00
parent bf062e39ac
commit b99610fee3
7 changed files with 258 additions and 223 deletions

View File

@ -326,9 +326,9 @@ any other ALU operation.
The following dhrystone benchmark results are for a core with enabled The following dhrystone benchmark results are for a core with enabled
`ENABLE_MUL`, `ENABLE_DIV`, and `BARREL_SHIFTER` options. `ENABLE_MUL`, `ENABLE_DIV`, and `BARREL_SHIFTER` options.
Dhrystone benchmark results: 0.406 DMIPS/MHz (715 Dhrystones/Second/MHz) Dhrystone benchmark results: 0.505 DMIPS/MHz (888 Dhrystones/Second/MHz)
For the Dhrystone benchmark the average CPI is 4.072. For the Dhrystone benchmark the average CPI is 4.208.
PicoRV32 Native Memory Interface PicoRV32 Native Memory Interface

View File

@ -1,7 +1,14 @@
USE_MYSTDLIB = 0
OBJS = dhry_1.o dhry_2.o stdlib.o
CFLAGS = -MD -O3 -m32 -march=RV32IM -DTIME -DRISCV
TOOLCHAIN_PREFIX = /opt/riscv32im/bin/riscv32-unknown-elf-
OBJS = start.o dhry_1.o dhry_2.o stdlib.o ifeq ($(USE_MYSTDLIB),1)
CFLAGS = -MD -O3 -m32 -march=RV32IM -ffreestanding -nostdlib -DTIME -DRISCV CFLAGS += -DUSE_MYSTDLIB -ffreestanding -nostdlib
TOOLCHAIN_PREFIX = riscv32-unknown-elf- OBJS += start.o
else
OBJS += syscalls.o
endif
test: testbench.vvp dhry.hex test: testbench.vvp dhry.hex
vvp -N testbench.vvp vvp -N testbench.vvp
@ -21,16 +28,18 @@ timing.vvp: testbench.v ../picorv32.v
iverilog -o timing.vvp -DTIMING testbench.v ../picorv32.v iverilog -o timing.vvp -DTIMING testbench.v ../picorv32.v
chmod -x timing.vvp chmod -x timing.vvp
dhry.hex: dhry.bin ../firmware/makehex.py dhry.hex: dhry.elf
python3 ../firmware/makehex.py $< 16384 > $@ riscv32-unknown-elf-objcopy -O verilog $< $@
dhry.bin: dhry.elf ifeq ($(USE_MYSTDLIB),1)
$(TOOLCHAIN_PREFIX)objcopy -O binary $< $@ dhry.elf: $(OBJS) sections.lds
$(TOOLCHAIN_PREFIX)gcc $(CFLAGS) -Wl,-Bstatic,-T,sections.lds,-Map,dhry.map,--strip-debug -o $@ $(OBJS) -lgcc
chmod -x $@ chmod -x $@
else
dhry.elf: $(OBJS) ../firmware/sections.lds dhry.elf: $(OBJS)
$(TOOLCHAIN_PREFIX)gcc $(CFLAGS) -Wl,-Bstatic,-T,../firmware/sections.lds,-Map,dhry.map,--strip-debug -o $@ $(OBJS) -lgcc $(TOOLCHAIN_PREFIX)gcc $(CFLAGS) -Wl,-Bstatic,-Map,dhry.map,--strip-debug -o $@ $(OBJS) -lgcc -lc
chmod -x $@ chmod -x $@
endif
%.o: %.c %.o: %.c
$(TOOLCHAIN_PREFIX)gcc -c $(CFLAGS) $< $(TOOLCHAIN_PREFIX)gcc -c $(CFLAGS) $<

View File

@ -17,6 +17,13 @@
#include "dhry.h" #include "dhry.h"
#ifdef USE_MYSTDLIB
extern char *malloc ();
#else
# include <stdlib.h>
# include <string.h>
#endif
/* Global Variables: */ /* Global Variables: */
Rec_Pointer Ptr_Glob, Rec_Pointer Ptr_Glob,
@ -28,7 +35,6 @@ char Ch_1_Glob,
int Arr_1_Glob [50]; int Arr_1_Glob [50];
int Arr_2_Glob [50] [50]; int Arr_2_Glob [50] [50];
extern char *malloc ();
Enumeration Func_1 (); Enumeration Func_1 ();
/* forward declaration necessary since Enumeration may not simply be int */ /* forward declaration necessary since Enumeration may not simply be int */
@ -43,7 +49,7 @@ Enumeration Func_1 ();
/* variables for time measurement: */ /* variables for time measurement: */
#ifdef TIMES #ifdef IGN_TIMES
struct tms time_info; struct tms time_info;
extern int times (); extern int times ();
/* see library function "times" */ /* see library function "times" */
@ -124,9 +130,9 @@ main ()
} }
printf ("Please give the number of runs through the benchmark: "); printf ("Please give the number of runs through the benchmark: ");
{ {
int n; // int n;
scanf ("%d", &n); // scanf ("%d", &n);
Number_Of_Runs = n; Number_Of_Runs = 100;
} }
printf ("\n"); printf ("\n");
@ -136,7 +142,7 @@ main ()
/* Start timer */ /* Start timer */
/***************/ /***************/
#ifdef TIMES #ifdef IGN_TIMES
times (&time_info); times (&time_info);
Begin_Time = (long) time_info.tms_utime; Begin_Time = (long) time_info.tms_utime;
#endif #endif
@ -197,7 +203,7 @@ main ()
/* Stop timer */ /* Stop timer */
/**************/ /**************/
#ifdef TIMES #ifdef IGN_TIMES
times (&time_info); times (&time_info);
End_Time = (long) time_info.tms_utime; End_Time = (long) time_info.tms_utime;
#endif #endif

18
dhrystone/sections.lds Normal file
View File

@ -0,0 +1,18 @@
/*
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
*/
SECTIONS {
.memory : {
. = 0x10000;
start*(.text);
*(.text);
*(*);
end = .;
}
}

View File

@ -1,18 +1,27 @@
// This is free and unencumbered software released into the public domain.
//
// Anyone is free to copy, modify, publish, use, compile, sell, or
// distribute this software, either in source code form or as a compiled
// binary, for any purpose, commercial or non-commercial, and by any
// means.
#include <stdarg.h> #include <stdarg.h>
#include <stdint.h>
extern long time(); extern long time();
extern long insn(); extern long insn();
#ifdef USE_MYSTDLIB
extern char *malloc(); extern char *malloc();
extern int printf(const char *format, ...); extern int printf(const char *format, ...);
extern int scanf(const char *format, ...);
// implementations are copy&paste from riscv newlib
extern void *memcpy(void *dest, const void *src, long n); extern void *memcpy(void *dest, const void *src, long n);
extern char *strcpy(char *dest, const char *src); extern char *strcpy(char *dest, const char *src);
extern int strcmp(const char *s1, const char *s2); extern int strcmp(const char *s1, const char *s2);
char heap_memory[1024]; char heap_memory[1024];
int heap_memory_used = 0; int heap_memory_used = 0;
#endif
long time() long time()
{ {
@ -30,6 +39,7 @@ long insn()
return insns; return insns;
} }
#ifdef USE_MYSTDLIB
char *malloc(int size) char *malloc(int size)
{ {
char *p = heap_memory + heap_memory_used; char *p = heap_memory + heap_memory_used;
@ -96,213 +106,105 @@ int printf(const char *format, ...)
va_end(ap); va_end(ap);
} }
int scanf(const char *format, ...) void *memcpy(void *aa, const void *bb, long n)
{ {
// printf("[scanf(\"%s\")]\n", format); // printf("**MEMCPY**\n");
va_list ap; char *a = aa;
va_start(ap, format); const char *b = bb;
*va_arg(ap,int*) = 100; while (n--) *(a++) = *(b++);
va_end(ap); return aa;
}
char *strcpy(char* dst, const char* src)
{
char *r = dst;
while ((((uint32_t)dst | (uint32_t)src) & 3) != 0)
{
char c = *(src++);
*(dst++) = c;
if (!c) return r;
}
while (1)
{
uint32_t v = *(uint32_t*)src;
if (__builtin_expect((((v) - 0x01010101UL) & ~(v) & 0x80808080UL), 0))
{
dst[0] = v & 0xff;
if ((v & 0xff) == 0)
return r;
v = v >> 8;
dst[1] = v & 0xff;
if ((v & 0xff) == 0)
return r;
v = v >> 8;
dst[2] = v & 0xff;
if ((v & 0xff) == 0)
return r;
v = v >> 8;
dst[3] = v & 0xff;
return r;
}
*(uint32_t*)dst = v;
src += 4;
dst += 4;
}
}
int strcmp(const char *s1, const char *s2)
{
while ((((uint32_t)s1 | (uint32_t)s2) & 3) != 0)
{
char c1 = *(s1++);
char c2 = *(s2++);
if (c1 != c2)
return c1 < c2 ? -1 : +1;
else if (!c1)
return 0; return 0;
}
while (1)
{
uint32_t v1 = *(uint32_t*)s1;
uint32_t v2 = *(uint32_t*)s2;
if (__builtin_expect(v1 != v2, 0))
{
char c1, c2;
c1 = v1 & 0xff, c2 = v2 & 0xff;
if (c1 != c2) return c1 < c2 ? -1 : +1;
if (!c1) return 0;
v1 = v1 >> 8, v2 = v2 >> 8;
c1 = v1 & 0xff, c2 = v2 & 0xff;
if (c1 != c2) return c1 < c2 ? -1 : +1;
if (!c1) return 0;
v1 = v1 >> 8, v2 = v2 >> 8;
c1 = v1 & 0xff, c2 = v2 & 0xff;
if (c1 != c2) return c1 < c2 ? -1 : +1;
if (!c1) return 0;
v1 = v1 >> 8, v2 = v2 >> 8;
c1 = v1 & 0xff, c2 = v2 & 0xff;
if (c1 != c2) return c1 < c2 ? -1 : +1;
return 0;
}
if (__builtin_expect((((v1) - 0x01010101UL) & ~(v1) & 0x80808080UL), 0))
return 0;
s1 += 4;
s2 += 4;
}
} }
#endif
// -------------------------------------------------------
// Copy&paste from RISC-V newlib:
void* memcpy(void* aa, const void* bb, long n)
{
#define BODY(a, b, t) { \
t tt = *b; \
a++, b++; \
*(a-1) = tt; \
}
char* a = (char*)aa;
const char* b = (const char*)bb;
char* end = a+n;
unsigned long msk = sizeof(long)-1;
if (__builtin_expect(((unsigned long)a & msk) != ((unsigned long)b & msk) || n < sizeof(long), 0))
{
small:
if (__builtin_expect(a < end, 1))
while (a < end)
BODY(a, b, char);
return aa;
}
if (__builtin_expect(((unsigned long)a & msk) != 0, 0))
while ((unsigned long)a & msk)
BODY(a, b, char);
long* la = (long*)a;
const long* lb = (const long*)b;
long* lend = (long*)((unsigned long)end & ~msk);
if (__builtin_expect(la < lend-8, 0))
{
while (la < lend-8)
{
long b0 = *lb++;
long b1 = *lb++;
long b2 = *lb++;
long b3 = *lb++;
long b4 = *lb++;
long b5 = *lb++;
long b6 = *lb++;
long b7 = *lb++;
long b8 = *lb++;
*la++ = b0;
*la++ = b1;
*la++ = b2;
*la++ = b3;
*la++ = b4;
*la++ = b5;
*la++ = b6;
*la++ = b7;
*la++ = b8;
}
}
while (la < lend)
BODY(la, lb, long);
a = (char*)la;
b = (const char*)lb;
if (__builtin_expect(a < end, 0))
goto small;
return aa;
}
static inline unsigned long __libc_detect_null(unsigned long w)
{
unsigned long mask = 0x7f7f7f7f;
if (sizeof(long) == 8)
mask = ((mask << 16) << 16) | mask;
return ~(((w & mask) + mask) | w | mask);
}
char* strcpy(char* dst, const char* src)
{
char* dst0 = dst;
#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
int misaligned = ((unsigned long)dst | (unsigned long)src) & (sizeof(long)-1);
if (__builtin_expect(!misaligned, 1))
{
long* ldst = (long*)dst;
const long* lsrc = (const long*)src;
while (!__libc_detect_null(*lsrc))
*ldst++ = *lsrc++;
dst = (char*)ldst;
src = (const char*)lsrc;
char c0 = src[0];
char c1 = src[1];
char c2 = src[2];
if (!(*dst++ = c0)) return dst0;
if (!(*dst++ = c1)) return dst0;
char c3 = src[3];
if (!(*dst++ = c2)) return dst0;
if (sizeof(long) == 4) goto out;
char c4 = src[4];
if (!(*dst++ = c3)) return dst0;
char c5 = src[5];
if (!(*dst++ = c4)) return dst0;
char c6 = src[6];
if (!(*dst++ = c5)) return dst0;
if (!(*dst++ = c6)) return dst0;
out:
*dst++ = 0;
return dst0;
}
#endif /* not PREFER_SIZE_OVER_SPEED */
char ch;
do
{
ch = *src;
src++;
dst++;
*(dst-1) = ch;
} while(ch);
return dst0;
}
/* copy&paste from disassembled libc */
// strcmp.S: Artisanally coded in California by A. Shell Waterman
asm (
" .global strcmp; "
" strcmp: or a4,a0,a1; "
" li t2,-1; "
" andi a4,a4,3; "
" bnez a4,.K1; "
" lui t3,0x7f7f8; "
" addi t3,t3,-129; "
" .K6: lw a2,0(a0); "
" lw a3,0(a1); "
" and t0,a2,t3; "
" or t1,a2,t3; "
" add t0,t0,t3; "
" or t0,t0,t1; "
" bne t0,t2,.K2; "
" bne a2,a3,.K3; "
" lw a2,4(a0); "
" lw a3,4(a1); "
" and t0,a2,t3; "
" or t1,a2,t3; "
" add t0,t0,t3; "
" or t0,t0,t1; "
" bne t0,t2,.K4; "
" bne a2,a3,.K3; "
" lw a2,8(a0); "
" lw a3,8(a1); "
" and t0,a2,t3; "
" or t1,a2,t3; "
" add t0,t0,t3; "
" or t0,t0,t1; "
" bne t0,t2,.K5; "
" addi a0,a0,12; "
" addi a1,a1,12; "
" beq a2,a3,.K6; "
" .K3: slli a4,a2,0x10; "
" slli a5,a3,0x10; "
" bne a4,a5,.K7; "
" srli a4,a2,0x10; "
" srli a5,a3,0x10; "
" sub a0,a4,a5; "
" andi a1,a0,255; "
" bnez a1,.K8; "
" ret; "
" .K7: srli a4,a4,0x10; "
" srli a5,a5,0x10; "
" sub a0,a4,a5; "
" andi a1,a0,255; "
" bnez a1,.K8; "
" ret; "
" .K8: andi a4,a4,255; "
" andi a5,a5,255; "
" sub a0,a4,a5; "
" ret; "
" .K1: lbu a2,0(a0); "
" lbu a3,0(a1); "
" addi a0,a0,1; "
" addi a1,a1,1; "
" bne a2,a3,.K9; "
" bnez a2,.K1; "
" .K9: sub a0,a2,a3; "
" ret; "
" .K4: addi a0,a0,4; "
" addi a1,a1,4; "
" .K2: bne a2,a3,.K1; "
" li a0,0; "
" ret; "
" .K5: addi a0,a0,8; "
" addi a1,a1,8; "
" bne a2,a3,.K1; "
" li a0,0; "
" ret; "
);

95
dhrystone/syscalls.c Normal file
View File

@ -0,0 +1,95 @@
// An extremely minimalist syscalls.c for newlib
// Based on riscv newlib libgloss/riscv/machine/syscall.h
// Written by Clifford Wolf.
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
#define UNIMPL_FUNC(_f) ".globl " #_f "\n.type " #_f ", @function\n" #_f ":\n"
asm (
".text\n"
".align 2\n"
UNIMPL_FUNC(open)
UNIMPL_FUNC(openat)
UNIMPL_FUNC(lseek)
UNIMPL_FUNC(stat)
UNIMPL_FUNC(lstat)
UNIMPL_FUNC(fstatat)
UNIMPL_FUNC(isatty)
UNIMPL_FUNC(access)
UNIMPL_FUNC(faccessat)
UNIMPL_FUNC(link)
UNIMPL_FUNC(unlink)
UNIMPL_FUNC(execve)
UNIMPL_FUNC(getpid)
UNIMPL_FUNC(fork)
UNIMPL_FUNC(kill)
UNIMPL_FUNC(wait)
UNIMPL_FUNC(times)
UNIMPL_FUNC(gettimeofday)
UNIMPL_FUNC(ftime)
UNIMPL_FUNC(utime)
UNIMPL_FUNC(chown)
UNIMPL_FUNC(chmod)
UNIMPL_FUNC(chdir)
UNIMPL_FUNC(getcwd)
UNIMPL_FUNC(sysconf)
"j unimplemented_syscall\n"
);
void unimplemented_syscall()
{
const char *p = "Unimplemented system call called!\n";
while (*p)
*(volatile int*)0x10000000 = *(p++);
asm volatile ("ebreak");
__builtin_unreachable();
}
ssize_t read(int file, void *ptr, size_t len)
{
// always EOF
return 0;
}
ssize_t write(int file, const void *ptr, size_t len)
{
const void *eptr = ptr + len;
while (ptr != eptr)
*(volatile int*)0x10000000 = *(char*)(ptr++);
return len;
}
int close(int file)
{
// close is called before _exit()
return 0;
}
int fstat(int file, struct stat *st)
{
// fstat is called during libc startup
errno = ENOENT;
return -1;
}
void *sbrk(ptrdiff_t incr)
{
extern unsigned char _end[]; // Defined by linker
static unsigned long heap_end;
if (heap_end == 0)
heap_end = (long)_end;
heap_end += incr;
return (void *)(heap_end - incr);
}
void _exit(int exit_status)
{
asm volatile ("ebreak");
__builtin_unreachable();
}

View File

@ -29,7 +29,9 @@ module testbench;
picorv32 #( picorv32 #(
.BARREL_SHIFTER(1), .BARREL_SHIFTER(1),
.ENABLE_MUL(1), .ENABLE_MUL(1),
.ENABLE_DIV(1) .ENABLE_DIV(1),
.PROGADDR_RESET('h10000),
.STACKADDR('h10000)
) uut ( ) uut (
.clk (clk ), .clk (clk ),
.resetn (resetn ), .resetn (resetn ),
@ -48,13 +50,16 @@ module testbench;
.mem_la_wstrb(mem_la_wstrb) .mem_la_wstrb(mem_la_wstrb)
); );
reg [31:0] memory [0:64*1024/4-1]; reg [7:0] memory [0:256*1024-1];
initial $readmemh("dhry.hex", memory); initial $readmemh("dhry.hex", memory);
assign mem_ready = 1; assign mem_ready = 1;
always @(posedge clk) begin always @(posedge clk) begin
mem_rdata <= mem_la_read ? memory[mem_la_addr >> 2] : 'bx; mem_rdata[ 7: 0] <= mem_la_read ? memory[mem_la_addr + 0] : 'bx;
mem_rdata[15: 8] <= mem_la_read ? memory[mem_la_addr + 1] : 'bx;
mem_rdata[23:16] <= mem_la_read ? memory[mem_la_addr + 2] : 'bx;
mem_rdata[31:24] <= mem_la_read ? memory[mem_la_addr + 3] : 'bx;
if (mem_la_write) begin if (mem_la_write) begin
case (mem_la_addr) case (mem_la_addr)
32'h1000_0000: begin 32'h1000_0000: begin
@ -64,10 +69,10 @@ module testbench;
`endif `endif
end end
default: begin default: begin
if (mem_la_wstrb[0]) memory[mem_la_addr >> 2][ 7: 0] <= mem_la_wdata[ 7: 0]; if (mem_la_wstrb[0]) memory[mem_la_addr + 0] <= mem_la_wdata[ 7: 0];
if (mem_la_wstrb[1]) memory[mem_la_addr >> 2][15: 8] <= mem_la_wdata[15: 8]; if (mem_la_wstrb[1]) memory[mem_la_addr + 1] <= mem_la_wdata[15: 8];
if (mem_la_wstrb[2]) memory[mem_la_addr >> 2][23:16] <= mem_la_wdata[23:16]; if (mem_la_wstrb[2]) memory[mem_la_addr + 2] <= mem_la_wdata[23:16];
if (mem_la_wstrb[3]) memory[mem_la_addr >> 2][31:24] <= mem_la_wdata[31:24]; if (mem_la_wstrb[3]) memory[mem_la_addr + 3] <= mem_la_wdata[31:24];
end end
endcase endcase
end end