Added BARREL_SHIFTER config parameter

This commit is contained in:
Clifford Wolf 2016-04-12 17:30:31 +02:00
parent 0e9bdd0082
commit 2fdafb9c16
4 changed files with 39 additions and 13 deletions

View File

@ -3,7 +3,7 @@ PicoRV32 - A Size-Optimized RISC-V CPU
======================================
PicoRV32 is a CPU core that implements the [RISC-V RV32IMC Instruction Set](http://riscv.org/).
It can be configured to be a RV32E, RV32I, RV32IC, RV32IM, or RV32IMC core, and optionally
It can be configured as RV32E, RV32I, RV32IC, RV32IM, or RV32IMC core, and optionally
contains a built-in interrupt controller.
Tools (gcc, binutils, etc..) can be obtained via the [RISC-V Website](http://riscv.org/download.html#tab_tools).
@ -29,7 +29,7 @@ PicoRV32 is free and open hardware licensed under the [ISC license](http://en.wi
Features and Typical Applications
---------------------------------
- Small (750-1700 LUTs in 7-Series Xilinx Architecture)
- Small (750-2000 LUTs in 7-Series Xilinx Architecture)
- High f<sub>max</sub> (250-450 MHz on 7-Series Xilinx FPGAs)
- Selectable native memory interface or AXI4-Lite master
- Optional IRQ support (using a simple custom ISA)
@ -170,6 +170,12 @@ of 4 bits and then shift in units of 1 bit. This speeds up shift operations,
but adds additional hardware. Set this parameter to 0 to disable the two-stage
shift to further reduce the size of the core.
#### BARREL_SHIFTER (default = 0)
By default shift operations are performed by successively shifting by a
small amount (see `TWO_STAGE_SHIFT` above). With this option set, a barrel
shifter is used instead instead.
#### TWO_CYCLE_COMPARE (default = 0)
This relaxes the longest data path a bit by adding an additional FF stage
@ -294,9 +300,15 @@ in 40 cycles and a `MULH[SU|U]` instruction will execute in 72 cycles.
When `ENABLE_DIV` is activated, then a `DIV[U]/REM[U]` instruction will
execute in 40 cycles.
Dhrystone benchmark results: 0.391 DMIPS/MHz (688 Dhrystones/Second/MHz)
When `BARREL_SHIFTER` is activated, a shift operation takes as long as
any other ALU operation.
For the Dhrystone benchmark the average CPI is 4.110.
The following dhrystone benchmark results are for a core with enabled
`ENABLE_MUL`, `ENABLE_DIV`, and `BARREL_SHIFTER` options.
Dhrystone benchmark results: 0.399 DMIPS/MHz (702 Dhrystones/Second/MHz)
For the Dhrystone benchmark the average CPI is 4.030.
PicoRV32 Native Memory Interface
@ -586,7 +598,7 @@ once in advance.
Evaluation: Timing and Utilization on Xilinx 7-Series FPGAs
-----------------------------------------------------------
The following evaluations have been performed with Vivado 2015.1.
The following evaluations have been performed with Vivado 2015.4.
#### Timing on Xilinx 7-Series FPGAs
@ -622,7 +634,7 @@ for the following three cores:
- **PicoRV32 (regular):** The `picorv32` module in its default configuration.
- **PicoRV32 (large):** The `picorv32` module with enabled PCPI, IRQ, MUL,
DIV, and COMPRESSED_ISA features.
DIV, BARREL_SHIFTER, and COMPRESSED_ISA features.
See `make area` in [scripts/vivado/](scripts/vivado/).

View File

@ -27,6 +27,7 @@ module testbench;
wire [3:0] mem_la_wstrb;
picorv32 #(
.BARREL_SHIFTER(1),
.ENABLE_MUL(1),
.ENABLE_DIV(1)
) uut (

View File

@ -43,6 +43,7 @@ module picorv32 #(
parameter [ 0:0] ENABLE_REGS_DUALPORT = 1,
parameter [ 0:0] LATCHED_MEM_RDATA = 0,
parameter [ 0:0] TWO_STAGE_SHIFT = 1,
parameter [ 0:0] BARREL_SHIFTER = 0,
parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
parameter [ 0:0] TWO_CYCLE_ALU = 0,
parameter [ 0:0] COMPRESSED_ISA = 0,
@ -889,6 +890,7 @@ module picorv32 #(
reg alu_wait, alu_wait_2;
reg [31:0] alu_add_sub;
reg [31:0] alu_shl, alu_shr;
reg alu_eq, alu_ltu, alu_lts;
generate if (TWO_CYCLE_ALU) begin
@ -897,6 +899,8 @@ module picorv32 #(
alu_eq <= reg_op1 == reg_op2;
alu_lts <= $signed(reg_op1) < $signed(reg_op2);
alu_ltu <= reg_op1 < reg_op2;
alu_shl <= reg_op1 << reg_op2[4:0];
alu_shr <= $signed({instr_sra || instr_srai ? reg_op1[31] : 1'b0, reg_op1}) >>> reg_op2[4:0];
end
end else begin
always @* begin
@ -904,6 +908,8 @@ module picorv32 #(
alu_eq = reg_op1 == reg_op2;
alu_lts = $signed(reg_op1) < $signed(reg_op2);
alu_ltu = reg_op1 < reg_op2;
alu_shl = reg_op1 << reg_op2[4:0];
alu_shr = $signed({instr_sra || instr_srai ? reg_op1[31] : 1'b0, reg_op1}) >>> reg_op2[4:0];
end
end endgenerate
@ -938,6 +944,10 @@ module picorv32 #(
alu_out = reg_op1 | reg_op2;
instr_andi || instr_and:
alu_out = reg_op1 & reg_op2;
BARREL_SHIFTER && (instr_sll || instr_slli):
alu_out = alu_shl;
BARREL_SHIFTER && (instr_srl || instr_srli || instr_sra || instr_srai):
alu_out = alu_shr;
endcase
end
@ -1208,16 +1218,16 @@ module picorv32 #(
cpu_state <= cpu_state_ldmem;
mem_do_rinst <= 1;
end
is_slli_srli_srai: begin
is_slli_srli_srai && !BARREL_SHIFTER: begin
`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
reg_sh <= decoded_rs2;
cpu_state <= cpu_state_shift;
end
is_jalr_addi_slti_sltiu_xori_ori_andi: begin
is_jalr_addi_slti_sltiu_xori_ori_andi, is_slli_srli_srai && BARREL_SHIFTER: begin
`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
reg_op2 <= decoded_imm;
reg_op2 <= is_slli_srli_srai && BARREL_SHIFTER ? decoded_rs2 : decoded_imm;
if (TWO_CYCLE_ALU)
alu_wait <= 1;
else
@ -1237,7 +1247,7 @@ module picorv32 #(
cpu_state <= cpu_state_stmem;
mem_do_rinst <= 1;
end
is_sll_srl_sra: begin
is_sll_srl_sra && !BARREL_SHIFTER: begin
cpu_state <= cpu_state_shift;
end
default: begin
@ -1284,7 +1294,7 @@ module picorv32 #(
cpu_state <= cpu_state_stmem;
mem_do_rinst <= 1;
end
is_sll_srl_sra: begin
is_sll_srl_sra && !BARREL_SHIFTER: begin
cpu_state <= cpu_state_shift;
end
default: begin
@ -1299,8 +1309,6 @@ module picorv32 #(
end
cpu_state_exec: begin
latched_store <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
latched_branch <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
reg_out <= reg_pc + decoded_imm;
if ((TWO_CYCLE_ALU || TWO_CYCLE_COMPARE) && (alu_wait || alu_wait_2)) begin
mem_do_rinst <= mem_do_prefetch && !alu_wait_2;
@ -1308,6 +1316,8 @@ module picorv32 #(
end else
if (is_beq_bne_blt_bge_bltu_bgeu) begin
latched_rd <= 0;
latched_store <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
latched_branch <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
if (mem_done)
cpu_state <= cpu_state_fetch;
if (TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0) begin
@ -1704,6 +1714,7 @@ module picorv32_axi #(
parameter [ 0:0] ENABLE_REGS_16_31 = 1,
parameter [ 0:0] ENABLE_REGS_DUALPORT = 1,
parameter [ 0:0] TWO_STAGE_SHIFT = 1,
parameter [ 0:0] BARREL_SHIFTER = 0,
parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
parameter [ 0:0] TWO_CYCLE_ALU = 0,
parameter [ 0:0] COMPRESSED_ISA = 0,
@ -1803,6 +1814,7 @@ module picorv32_axi #(
.ENABLE_REGS_16_31 (ENABLE_REGS_16_31 ),
.ENABLE_REGS_DUALPORT(ENABLE_REGS_DUALPORT),
.TWO_STAGE_SHIFT (TWO_STAGE_SHIFT ),
.BARREL_SHIFTER (BARREL_SHIFTER ),
.TWO_CYCLE_COMPARE (TWO_CYCLE_COMPARE ),
.TWO_CYCLE_ALU (TWO_CYCLE_ALU ),
.COMPRESSED_ISA (COMPRESSED_ISA ),

View File

@ -105,6 +105,7 @@ module top_large (
);
picorv32 #(
.COMPRESSED_ISA(1),
.BARREL_SHIFTER(1),
.ENABLE_PCPI(1),
.ENABLE_MUL(1),
.ENABLE_IRQ(1)