Merge branch 'master' into compressed
Conflicts: picorv32.v
This commit is contained in:
commit
d7894ca41a
24
README.md
24
README.md
|
@ -27,14 +27,14 @@ PicoRV32 is free and open hardware licensed under the [ISC license](http://en.wi
|
||||||
Features and Typical Applications
|
Features and Typical Applications
|
||||||
---------------------------------
|
---------------------------------
|
||||||
|
|
||||||
- Small (~1000 LUTs in a 7-Series Xilinx FPGA)
|
- Small (750-1700 LUTs in 7-Series Xilinx Architecture)
|
||||||
- High fMAX (~250 MHz on 7-Series Xilinx FPGAs)
|
- High f<sub>max</sub> (250-450 MHz on 7-Series Xilinx FPGAs)
|
||||||
- Selectable native memory interface or AXI4-Lite master
|
- Selectable native memory interface or AXI4-Lite master
|
||||||
- Optional IRQ support (using a simple custom ISA)
|
- Optional IRQ support (using a simple custom ISA)
|
||||||
- Optional Co-Processor Interface
|
- Optional Co-Processor Interface
|
||||||
|
|
||||||
This CPU is meant to be used as auxiliary processor in FPGA designs and ASICs. Due
|
This CPU is meant to be used as auxiliary processor in FPGA designs and ASICs. Due
|
||||||
to its high fMAX it can be integrated in most existing designs without crossing
|
to its high f<sub>max</sub> it can be integrated in most existing designs without crossing
|
||||||
clock domains. When operated on a lower frequency, it will have a lot of timing
|
clock domains. When operated on a lower frequency, it will have a lot of timing
|
||||||
slack and thus can be added to a design without compromising timing closure.
|
slack and thus can be added to a design without compromising timing closure.
|
||||||
|
|
||||||
|
@ -251,16 +251,16 @@ The start address of the interrupt handler.
|
||||||
Cycles per Instruction Performance
|
Cycles per Instruction Performance
|
||||||
----------------------------------
|
----------------------------------
|
||||||
|
|
||||||
*A short reminder: This core is optimized for size, not performance.*
|
*A short reminder: This core is optimized for size and f<sub>max</sub>, not performance.*
|
||||||
|
|
||||||
Unless stated otherwise, the following numbers apply to a PicoRV32 with
|
Unless stated otherwise, the following numbers apply to a PicoRV32 with
|
||||||
ENABLE_REGS_DUALPORT active and connected to a memory that can accommodate
|
ENABLE_REGS_DUALPORT active and connected to a memory that can accommodate
|
||||||
requests within one clock cycle.
|
requests within one clock cycle.
|
||||||
|
|
||||||
The average Cycles per Instruction (CPI) is 4 to 5, depending on the mix of
|
The average Cycles per Instruction (CPI) is approximately 4, depending on the mix of
|
||||||
instructions in the code. The CPI numbers for the individual instructions
|
instructions in the code. The CPI numbers for the individual instructions can
|
||||||
can be found in the table below. The column "CPI (SP)" contains the
|
be found in the table below. The column "CPI (SP)" contains the CPI numbers for
|
||||||
CPI numbers for a core built without ENABLE_REGS_DUALPORT.
|
a core built without ENABLE_REGS_DUALPORT.
|
||||||
|
|
||||||
| Instruction | CPI | CPI (SP) |
|
| Instruction | CPI | CPI (SP) |
|
||||||
| ---------------------| ----:| --------:|
|
| ---------------------| ----:| --------:|
|
||||||
|
@ -277,9 +277,9 @@ CPI numbers for a core built without ENABLE_REGS_DUALPORT.
|
||||||
When `ENABLE_MUL` is activated, then a `MUL` instruction will execute
|
When `ENABLE_MUL` is activated, then a `MUL` instruction will execute
|
||||||
in 40 cycles and a `MULH[SU|U]` instruction will execute in 72 cycles.
|
in 40 cycles and a `MULH[SU|U]` instruction will execute in 72 cycles.
|
||||||
|
|
||||||
Dhrystone benchmark results: 0.311 DMIPS/MHz (547 Dhrystones/Second/MHz)
|
Dhrystone benchmark results: 0.327 DMIPS/MHz (575 Dhrystones/Second/MHz)
|
||||||
|
|
||||||
For the Dhrystone benchmark the average CPI is 4.144.
|
For the Dhrystone benchmark the average CPI is 3.945.
|
||||||
|
|
||||||
|
|
||||||
PicoRV32 Native Memory Interface
|
PicoRV32 Native Memory Interface
|
||||||
|
@ -531,7 +531,7 @@ pure RV32I target, and install it in `/opt/riscv32i`:
|
||||||
|
|
||||||
git clone https://github.com/riscv/riscv-gnu-toolchain riscv-gnu-toolchain-rv32i
|
git clone https://github.com/riscv/riscv-gnu-toolchain riscv-gnu-toolchain-rv32i
|
||||||
cd riscv-gnu-toolchain-rv32i
|
cd riscv-gnu-toolchain-rv32i
|
||||||
git checkout 4bcd4f5
|
git checkout 06c957a
|
||||||
|
|
||||||
mkdir build; cd build
|
mkdir build; cd build
|
||||||
../configure --with-xlen=32 --with-arch=I --prefix=/opt/riscv32i
|
../configure --with-xlen=32 --with-arch=I --prefix=/opt/riscv32i
|
||||||
|
@ -541,7 +541,7 @@ The commands will all be named using the prefix `riscv32-unknown-elf-`, which
|
||||||
makes it easy to install them side-by-side with the regular riscv-tools, which
|
makes it easy to install them side-by-side with the regular riscv-tools, which
|
||||||
are using the name prefix `riscv64-unknown-elf-` by default.
|
are using the name prefix `riscv64-unknown-elf-` by default.
|
||||||
|
|
||||||
*Note: This instructions are for git rev 4bcd4f5 (2015-12-14) of riscv-gnu-toolchain.*
|
*Note: This instructions are for git rev 06c957a (2016-01-20) of riscv-gnu-toolchain.*
|
||||||
|
|
||||||
|
|
||||||
Evaluation: Timing and Utilization on Xilinx 7-Series FPGAs
|
Evaluation: Timing and Utilization on Xilinx 7-Series FPGAs
|
||||||
|
|
|
@ -15,6 +15,11 @@
|
||||||
# undef ENABLE_RVTST
|
# undef ENABLE_RVTST
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Only save registers in IRQ wrapper that are to be saved by the caller in
|
||||||
|
// the RISC-V ABI, with the excpetion of the stack pointer. The IRQ handler
|
||||||
|
// will save the rest if necessary. I.e. skip x3, x4, x8, x9, and x18-x27.
|
||||||
|
#undef ENABLE_FASTIRQ
|
||||||
|
|
||||||
#include "custom_ops.S"
|
#include "custom_ops.S"
|
||||||
|
|
||||||
.section .text
|
.section .text
|
||||||
|
@ -58,6 +63,23 @@ irq_vec:
|
||||||
getq x2, q3
|
getq x2, q3
|
||||||
sw x2, 2*4(x1)
|
sw x2, 2*4(x1)
|
||||||
|
|
||||||
|
#ifdef ENABLE_FASTIRQ
|
||||||
|
sw x5, 5*4(x1)
|
||||||
|
sw x6, 6*4(x1)
|
||||||
|
sw x7, 7*4(x1)
|
||||||
|
sw x10, 10*4(x1)
|
||||||
|
sw x11, 11*4(x1)
|
||||||
|
sw x12, 12*4(x1)
|
||||||
|
sw x13, 13*4(x1)
|
||||||
|
sw x14, 14*4(x1)
|
||||||
|
sw x15, 15*4(x1)
|
||||||
|
sw x16, 16*4(x1)
|
||||||
|
sw x17, 17*4(x1)
|
||||||
|
sw x28, 28*4(x1)
|
||||||
|
sw x29, 29*4(x1)
|
||||||
|
sw x30, 30*4(x1)
|
||||||
|
sw x31, 31*4(x1)
|
||||||
|
#else
|
||||||
sw x3, 3*4(x1)
|
sw x3, 3*4(x1)
|
||||||
sw x4, 4*4(x1)
|
sw x4, 4*4(x1)
|
||||||
sw x5, 5*4(x1)
|
sw x5, 5*4(x1)
|
||||||
|
@ -87,9 +109,30 @@ irq_vec:
|
||||||
sw x29, 29*4(x1)
|
sw x29, 29*4(x1)
|
||||||
sw x30, 30*4(x1)
|
sw x30, 30*4(x1)
|
||||||
sw x31, 31*4(x1)
|
sw x31, 31*4(x1)
|
||||||
|
#endif
|
||||||
|
|
||||||
#else // ENABLE_QREGS
|
#else // ENABLE_QREGS
|
||||||
|
|
||||||
|
#ifdef ENABLE_FASTIRQ
|
||||||
|
sw gp, 0*4+0x200(zero)
|
||||||
|
sw x1, 1*4+0x200(zero)
|
||||||
|
sw x2, 2*4+0x200(zero)
|
||||||
|
sw x5, 5*4+0x200(zero)
|
||||||
|
sw x6, 6*4+0x200(zero)
|
||||||
|
sw x7, 7*4+0x200(zero)
|
||||||
|
sw x10, 10*4+0x200(zero)
|
||||||
|
sw x11, 11*4+0x200(zero)
|
||||||
|
sw x12, 12*4+0x200(zero)
|
||||||
|
sw x13, 13*4+0x200(zero)
|
||||||
|
sw x14, 14*4+0x200(zero)
|
||||||
|
sw x15, 15*4+0x200(zero)
|
||||||
|
sw x16, 16*4+0x200(zero)
|
||||||
|
sw x17, 17*4+0x200(zero)
|
||||||
|
sw x28, 28*4+0x200(zero)
|
||||||
|
sw x29, 29*4+0x200(zero)
|
||||||
|
sw x30, 30*4+0x200(zero)
|
||||||
|
sw x31, 31*4+0x200(zero)
|
||||||
|
#else
|
||||||
sw gp, 0*4+0x200(zero)
|
sw gp, 0*4+0x200(zero)
|
||||||
sw x1, 1*4+0x200(zero)
|
sw x1, 1*4+0x200(zero)
|
||||||
sw x2, 2*4+0x200(zero)
|
sw x2, 2*4+0x200(zero)
|
||||||
|
@ -122,6 +165,7 @@ irq_vec:
|
||||||
sw x29, 29*4+0x200(zero)
|
sw x29, 29*4+0x200(zero)
|
||||||
sw x30, 30*4+0x200(zero)
|
sw x30, 30*4+0x200(zero)
|
||||||
sw x31, 31*4+0x200(zero)
|
sw x31, 31*4+0x200(zero)
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // ENABLE_QREGS
|
#endif // ENABLE_QREGS
|
||||||
|
|
||||||
|
@ -160,6 +204,23 @@ irq_vec:
|
||||||
lw x2, 2*4(x1)
|
lw x2, 2*4(x1)
|
||||||
setq q2, x2
|
setq q2, x2
|
||||||
|
|
||||||
|
#ifdef ENABLE_FASTIRQ
|
||||||
|
lw x5, 5*4(x1)
|
||||||
|
lw x6, 6*4(x1)
|
||||||
|
lw x7, 7*4(x1)
|
||||||
|
lw x10, 10*4(x1)
|
||||||
|
lw x11, 11*4(x1)
|
||||||
|
lw x12, 12*4(x1)
|
||||||
|
lw x13, 13*4(x1)
|
||||||
|
lw x14, 14*4(x1)
|
||||||
|
lw x15, 15*4(x1)
|
||||||
|
lw x16, 16*4(x1)
|
||||||
|
lw x17, 17*4(x1)
|
||||||
|
lw x28, 28*4(x1)
|
||||||
|
lw x29, 29*4(x1)
|
||||||
|
lw x30, 30*4(x1)
|
||||||
|
lw x31, 31*4(x1)
|
||||||
|
#else
|
||||||
lw x3, 3*4(x1)
|
lw x3, 3*4(x1)
|
||||||
lw x4, 4*4(x1)
|
lw x4, 4*4(x1)
|
||||||
lw x5, 5*4(x1)
|
lw x5, 5*4(x1)
|
||||||
|
@ -189,6 +250,7 @@ irq_vec:
|
||||||
lw x29, 29*4(x1)
|
lw x29, 29*4(x1)
|
||||||
lw x30, 30*4(x1)
|
lw x30, 30*4(x1)
|
||||||
lw x31, 31*4(x1)
|
lw x31, 31*4(x1)
|
||||||
|
#endif
|
||||||
|
|
||||||
getq x1, q1
|
getq x1, q1
|
||||||
getq x2, q2
|
getq x2, q2
|
||||||
|
@ -201,6 +263,26 @@ irq_vec:
|
||||||
sbreak
|
sbreak
|
||||||
1:
|
1:
|
||||||
|
|
||||||
|
#ifdef ENABLE_FASTIRQ
|
||||||
|
lw gp, 0*4+0x200(zero)
|
||||||
|
lw x1, 1*4+0x200(zero)
|
||||||
|
lw x2, 2*4+0x200(zero)
|
||||||
|
lw x5, 5*4+0x200(zero)
|
||||||
|
lw x6, 6*4+0x200(zero)
|
||||||
|
lw x7, 7*4+0x200(zero)
|
||||||
|
lw x10, 10*4+0x200(zero)
|
||||||
|
lw x11, 11*4+0x200(zero)
|
||||||
|
lw x12, 12*4+0x200(zero)
|
||||||
|
lw x13, 13*4+0x200(zero)
|
||||||
|
lw x14, 14*4+0x200(zero)
|
||||||
|
lw x15, 15*4+0x200(zero)
|
||||||
|
lw x16, 16*4+0x200(zero)
|
||||||
|
lw x17, 17*4+0x200(zero)
|
||||||
|
lw x28, 28*4+0x200(zero)
|
||||||
|
lw x29, 29*4+0x200(zero)
|
||||||
|
lw x30, 30*4+0x200(zero)
|
||||||
|
lw x31, 31*4+0x200(zero)
|
||||||
|
#else
|
||||||
lw gp, 0*4+0x200(zero)
|
lw gp, 0*4+0x200(zero)
|
||||||
lw x1, 1*4+0x200(zero)
|
lw x1, 1*4+0x200(zero)
|
||||||
lw x2, 2*4+0x200(zero)
|
lw x2, 2*4+0x200(zero)
|
||||||
|
@ -233,6 +315,7 @@ irq_vec:
|
||||||
lw x29, 29*4+0x200(zero)
|
lw x29, 29*4+0x200(zero)
|
||||||
lw x30, 30*4+0x200(zero)
|
lw x30, 30*4+0x200(zero)
|
||||||
lw x31, 31*4+0x200(zero)
|
lw x31, 31*4+0x200(zero)
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // ENABLE_QREGS
|
#endif // ENABLE_QREGS
|
||||||
|
|
||||||
|
|
|
@ -353,7 +353,7 @@ module picorv32 #(
|
||||||
0: begin
|
0: begin
|
||||||
mem_addr <= mem_la_addr;
|
mem_addr <= mem_la_addr;
|
||||||
mem_wdata <= mem_la_wdata;
|
mem_wdata <= mem_la_wdata;
|
||||||
mem_wstrb <= mem_la_wstrb;
|
mem_wstrb <= mem_la_wstrb & {4{mem_la_write}};
|
||||||
if (mem_do_prefetch || mem_do_rinst) begin
|
if (mem_do_prefetch || mem_do_rinst) begin
|
||||||
current_insn_addr <= next_pc;
|
current_insn_addr <= next_pc;
|
||||||
end
|
end
|
||||||
|
@ -945,6 +945,7 @@ module picorv32 #(
|
||||||
latched_is_lh <= 0;
|
latched_is_lh <= 0;
|
||||||
latched_is_lb <= 0;
|
latched_is_lb <= 0;
|
||||||
pcpi_valid <= 0;
|
pcpi_valid <= 0;
|
||||||
|
pcpi_timeout <= 0;
|
||||||
irq_active <= 0;
|
irq_active <= 0;
|
||||||
irq_mask <= ~0;
|
irq_mask <= ~0;
|
||||||
next_irq_pending = 0;
|
next_irq_pending = 0;
|
||||||
|
|
Loading…
Reference in New Issue