Merge branch 'master' into compressed

Conflicts:
	picorv32.v
This commit is contained in:
Clifford Wolf 2016-02-03 16:21:53 +01:00
commit d7894ca41a
3 changed files with 97 additions and 13 deletions

View File

@ -27,14 +27,14 @@ PicoRV32 is free and open hardware licensed under the [ISC license](http://en.wi
Features and Typical Applications Features and Typical Applications
--------------------------------- ---------------------------------
- Small (~1000 LUTs in a 7-Series Xilinx FPGA) - Small (750-1700 LUTs in 7-Series Xilinx Architecture)
- High fMAX (~250 MHz on 7-Series Xilinx FPGAs) - High f<sub>max</sub> (250-450 MHz on 7-Series Xilinx FPGAs)
- Selectable native memory interface or AXI4-Lite master - Selectable native memory interface or AXI4-Lite master
- Optional IRQ support (using a simple custom ISA) - Optional IRQ support (using a simple custom ISA)
- Optional Co-Processor Interface - Optional Co-Processor Interface
This CPU is meant to be used as auxiliary processor in FPGA designs and ASICs. Due This CPU is meant to be used as auxiliary processor in FPGA designs and ASICs. Due
to its high fMAX it can be integrated in most existing designs without crossing to its high f<sub>max</sub> it can be integrated in most existing designs without crossing
clock domains. When operated on a lower frequency, it will have a lot of timing clock domains. When operated on a lower frequency, it will have a lot of timing
slack and thus can be added to a design without compromising timing closure. slack and thus can be added to a design without compromising timing closure.
@ -251,16 +251,16 @@ The start address of the interrupt handler.
Cycles per Instruction Performance Cycles per Instruction Performance
---------------------------------- ----------------------------------
*A short reminder: This core is optimized for size, not performance.* *A short reminder: This core is optimized for size and f<sub>max</sub>, not performance.*
Unless stated otherwise, the following numbers apply to a PicoRV32 with Unless stated otherwise, the following numbers apply to a PicoRV32 with
ENABLE_REGS_DUALPORT active and connected to a memory that can accommodate ENABLE_REGS_DUALPORT active and connected to a memory that can accommodate
requests within one clock cycle. requests within one clock cycle.
The average Cycles per Instruction (CPI) is 4 to 5, depending on the mix of The average Cycles per Instruction (CPI) is approximately 4, depending on the mix of
instructions in the code. The CPI numbers for the individual instructions instructions in the code. The CPI numbers for the individual instructions can
can be found in the table below. The column "CPI (SP)" contains the be found in the table below. The column "CPI (SP)" contains the CPI numbers for
CPI numbers for a core built without ENABLE_REGS_DUALPORT. a core built without ENABLE_REGS_DUALPORT.
| Instruction | CPI | CPI (SP) | | Instruction | CPI | CPI (SP) |
| ---------------------| ----:| --------:| | ---------------------| ----:| --------:|
@ -277,9 +277,9 @@ CPI numbers for a core built without ENABLE_REGS_DUALPORT.
When `ENABLE_MUL` is activated, then a `MUL` instruction will execute When `ENABLE_MUL` is activated, then a `MUL` instruction will execute
in 40 cycles and a `MULH[SU|U]` instruction will execute in 72 cycles. in 40 cycles and a `MULH[SU|U]` instruction will execute in 72 cycles.
Dhrystone benchmark results: 0.311 DMIPS/MHz (547 Dhrystones/Second/MHz) Dhrystone benchmark results: 0.327 DMIPS/MHz (575 Dhrystones/Second/MHz)
For the Dhrystone benchmark the average CPI is 4.144. For the Dhrystone benchmark the average CPI is 3.945.
PicoRV32 Native Memory Interface PicoRV32 Native Memory Interface
@ -531,7 +531,7 @@ pure RV32I target, and install it in `/opt/riscv32i`:
git clone https://github.com/riscv/riscv-gnu-toolchain riscv-gnu-toolchain-rv32i git clone https://github.com/riscv/riscv-gnu-toolchain riscv-gnu-toolchain-rv32i
cd riscv-gnu-toolchain-rv32i cd riscv-gnu-toolchain-rv32i
git checkout 4bcd4f5 git checkout 06c957a
mkdir build; cd build mkdir build; cd build
../configure --with-xlen=32 --with-arch=I --prefix=/opt/riscv32i ../configure --with-xlen=32 --with-arch=I --prefix=/opt/riscv32i
@ -541,7 +541,7 @@ The commands will all be named using the prefix `riscv32-unknown-elf-`, which
makes it easy to install them side-by-side with the regular riscv-tools, which makes it easy to install them side-by-side with the regular riscv-tools, which
are using the name prefix `riscv64-unknown-elf-` by default. are using the name prefix `riscv64-unknown-elf-` by default.
*Note: This instructions are for git rev 4bcd4f5 (2015-12-14) of riscv-gnu-toolchain.* *Note: This instructions are for git rev 06c957a (2016-01-20) of riscv-gnu-toolchain.*
Evaluation: Timing and Utilization on Xilinx 7-Series FPGAs Evaluation: Timing and Utilization on Xilinx 7-Series FPGAs

View File

@ -15,6 +15,11 @@
# undef ENABLE_RVTST # undef ENABLE_RVTST
#endif #endif
// Only save registers in IRQ wrapper that are to be saved by the caller in
// the RISC-V ABI, with the excpetion of the stack pointer. The IRQ handler
// will save the rest if necessary. I.e. skip x3, x4, x8, x9, and x18-x27.
#undef ENABLE_FASTIRQ
#include "custom_ops.S" #include "custom_ops.S"
.section .text .section .text
@ -58,6 +63,23 @@ irq_vec:
getq x2, q3 getq x2, q3
sw x2, 2*4(x1) sw x2, 2*4(x1)
#ifdef ENABLE_FASTIRQ
sw x5, 5*4(x1)
sw x6, 6*4(x1)
sw x7, 7*4(x1)
sw x10, 10*4(x1)
sw x11, 11*4(x1)
sw x12, 12*4(x1)
sw x13, 13*4(x1)
sw x14, 14*4(x1)
sw x15, 15*4(x1)
sw x16, 16*4(x1)
sw x17, 17*4(x1)
sw x28, 28*4(x1)
sw x29, 29*4(x1)
sw x30, 30*4(x1)
sw x31, 31*4(x1)
#else
sw x3, 3*4(x1) sw x3, 3*4(x1)
sw x4, 4*4(x1) sw x4, 4*4(x1)
sw x5, 5*4(x1) sw x5, 5*4(x1)
@ -87,9 +109,30 @@ irq_vec:
sw x29, 29*4(x1) sw x29, 29*4(x1)
sw x30, 30*4(x1) sw x30, 30*4(x1)
sw x31, 31*4(x1) sw x31, 31*4(x1)
#endif
#else // ENABLE_QREGS #else // ENABLE_QREGS
#ifdef ENABLE_FASTIRQ
sw gp, 0*4+0x200(zero)
sw x1, 1*4+0x200(zero)
sw x2, 2*4+0x200(zero)
sw x5, 5*4+0x200(zero)
sw x6, 6*4+0x200(zero)
sw x7, 7*4+0x200(zero)
sw x10, 10*4+0x200(zero)
sw x11, 11*4+0x200(zero)
sw x12, 12*4+0x200(zero)
sw x13, 13*4+0x200(zero)
sw x14, 14*4+0x200(zero)
sw x15, 15*4+0x200(zero)
sw x16, 16*4+0x200(zero)
sw x17, 17*4+0x200(zero)
sw x28, 28*4+0x200(zero)
sw x29, 29*4+0x200(zero)
sw x30, 30*4+0x200(zero)
sw x31, 31*4+0x200(zero)
#else
sw gp, 0*4+0x200(zero) sw gp, 0*4+0x200(zero)
sw x1, 1*4+0x200(zero) sw x1, 1*4+0x200(zero)
sw x2, 2*4+0x200(zero) sw x2, 2*4+0x200(zero)
@ -122,6 +165,7 @@ irq_vec:
sw x29, 29*4+0x200(zero) sw x29, 29*4+0x200(zero)
sw x30, 30*4+0x200(zero) sw x30, 30*4+0x200(zero)
sw x31, 31*4+0x200(zero) sw x31, 31*4+0x200(zero)
#endif
#endif // ENABLE_QREGS #endif // ENABLE_QREGS
@ -160,6 +204,23 @@ irq_vec:
lw x2, 2*4(x1) lw x2, 2*4(x1)
setq q2, x2 setq q2, x2
#ifdef ENABLE_FASTIRQ
lw x5, 5*4(x1)
lw x6, 6*4(x1)
lw x7, 7*4(x1)
lw x10, 10*4(x1)
lw x11, 11*4(x1)
lw x12, 12*4(x1)
lw x13, 13*4(x1)
lw x14, 14*4(x1)
lw x15, 15*4(x1)
lw x16, 16*4(x1)
lw x17, 17*4(x1)
lw x28, 28*4(x1)
lw x29, 29*4(x1)
lw x30, 30*4(x1)
lw x31, 31*4(x1)
#else
lw x3, 3*4(x1) lw x3, 3*4(x1)
lw x4, 4*4(x1) lw x4, 4*4(x1)
lw x5, 5*4(x1) lw x5, 5*4(x1)
@ -189,6 +250,7 @@ irq_vec:
lw x29, 29*4(x1) lw x29, 29*4(x1)
lw x30, 30*4(x1) lw x30, 30*4(x1)
lw x31, 31*4(x1) lw x31, 31*4(x1)
#endif
getq x1, q1 getq x1, q1
getq x2, q2 getq x2, q2
@ -201,6 +263,26 @@ irq_vec:
sbreak sbreak
1: 1:
#ifdef ENABLE_FASTIRQ
lw gp, 0*4+0x200(zero)
lw x1, 1*4+0x200(zero)
lw x2, 2*4+0x200(zero)
lw x5, 5*4+0x200(zero)
lw x6, 6*4+0x200(zero)
lw x7, 7*4+0x200(zero)
lw x10, 10*4+0x200(zero)
lw x11, 11*4+0x200(zero)
lw x12, 12*4+0x200(zero)
lw x13, 13*4+0x200(zero)
lw x14, 14*4+0x200(zero)
lw x15, 15*4+0x200(zero)
lw x16, 16*4+0x200(zero)
lw x17, 17*4+0x200(zero)
lw x28, 28*4+0x200(zero)
lw x29, 29*4+0x200(zero)
lw x30, 30*4+0x200(zero)
lw x31, 31*4+0x200(zero)
#else
lw gp, 0*4+0x200(zero) lw gp, 0*4+0x200(zero)
lw x1, 1*4+0x200(zero) lw x1, 1*4+0x200(zero)
lw x2, 2*4+0x200(zero) lw x2, 2*4+0x200(zero)
@ -233,6 +315,7 @@ irq_vec:
lw x29, 29*4+0x200(zero) lw x29, 29*4+0x200(zero)
lw x30, 30*4+0x200(zero) lw x30, 30*4+0x200(zero)
lw x31, 31*4+0x200(zero) lw x31, 31*4+0x200(zero)
#endif
#endif // ENABLE_QREGS #endif // ENABLE_QREGS

View File

@ -353,7 +353,7 @@ module picorv32 #(
0: begin 0: begin
mem_addr <= mem_la_addr; mem_addr <= mem_la_addr;
mem_wdata <= mem_la_wdata; mem_wdata <= mem_la_wdata;
mem_wstrb <= mem_la_wstrb; mem_wstrb <= mem_la_wstrb & {4{mem_la_write}};
if (mem_do_prefetch || mem_do_rinst) begin if (mem_do_prefetch || mem_do_rinst) begin
current_insn_addr <= next_pc; current_insn_addr <= next_pc;
end end
@ -945,6 +945,7 @@ module picorv32 #(
latched_is_lh <= 0; latched_is_lh <= 0;
latched_is_lb <= 0; latched_is_lb <= 0;
pcpi_valid <= 0; pcpi_valid <= 0;
pcpi_timeout <= 0;
irq_active <= 0; irq_active <= 0;
irq_mask <= ~0; irq_mask <= ~0;
next_irq_pending = 0; next_irq_pending = 0;