Cleanups in PCPI interface

This commit is contained in:
Clifford Wolf 2015-06-28 15:41:55 +02:00
parent e34dcf77e3
commit 21157b8f1d
2 changed files with 133 additions and 73 deletions

101
README.md
View File

@ -79,8 +79,8 @@ Simply copy this file into your project.
#### Makefile and testbench.v #### Makefile and testbench.v
A basic test environment run `make test`, `make test_sp` and/or `make test_axi` to run A basic test environment. Run `make test`, `make test_sp` and/or `make test_axi` to run
the test firmware in different environments. the test firmware in different hardware configurations.
#### firmware/ #### firmware/
@ -205,13 +205,99 @@ For the Dhrystone benchmark the average CPI is 4.167.
PicoRV32 Native Memory Interface PicoRV32 Native Memory Interface
-------------------------------- --------------------------------
This section is under construction. The native memory interface of PicoRV32 is a simple valid-ready interface
that can run one memory transfer at a time:
output mem_valid
output mem_instr
input mem_ready
output [31:0] mem_addr
output [31:0] mem_wdata
output [ 3:0] mem_wstrb
input [31:0] mem_rdata
The core initiates a memory transfer by asserting `mem_valid`. The valid
signal stays high until the peer asserts `mem_ready`. All core outputs
are stable over the `mem_valid` period.
#### Read Transfer
In a read transfer `mem_wstrb` has the value 0 and `mem_wdata` is unused.
The memory reads the address `mem_addr` and makes the read value available on
`mem_rdata` in the cycle `mem_ready` is high.
There is no need for an external wait cycle. The memory read can be implemented
asynchronously with `mem_ready` going high in the same cycle as `mem_valid`, or
`mem_ready` being tied to constant 1.
#### Write Transfer
In a write transfer `mem_wstrb` is not 0 and `mem_rdata` is unused. The memory
write the data at `mem_wdata` to the address `mem_addr` and acknowledges the
transfer by asserting `mem_ready`.
There is no need for an external wait cycle. The memory can acknowledge the
write immediately with `mem_ready` going high in the same cycle as
`mem_valid`, or `mem_ready` being tied to constant 1.
#### Look-Ahead Interface
The PicoRV32 core also provides a "Look-Ahead Memory Interface" that provides
all information about the next memory transfer one clock cycle earlier than the
normal interface.
output mem_la_read
output mem_la_write
output [31:0] mem_la_addr
output [31:0] mem_la_wdata
output [ 3:0] mem_la_wstrb
In the clock cycle before `mem_valid` goes high, this interface will output a
pulse on `mem_la_read` or `mem_la_write` to indicate the start of a read or
write transaction in the next clock cycles.
*Note: The signals `mem_la_read`, `mem_la_write`, and `mem_la_addr` are driven
by combinatorical circuits within the PicoRV32 core. It might be harder to
achieve timing closure with the look-ahead interface than with the normal
memory interface described above.*
Pico Co-Processor Interface (PCPI) Pico Co-Processor Interface (PCPI)
---------------------------------- ----------------------------------
This section is under construction. The Pico Co-Processor Interface (PCPI) can be used to implement non-branching
instructions in external cores:
output pcpi_valid
output [31:0] pcpi_insn
output [31:0] pcpi_rs1
output [31:0] pcpi_rs2
input pcpi_wr
input [31:0] pcpi_rd
input pcpi_wait
input pcpi_ready
When an unsupported instruction is encountered and the PCPI feature is
activated (see ENABLE_PCPI above), then `pcpi_valid` is asserted, the
instruction word itself is output on `pcpi_insn`, the `rs1` and `rs2`
fields are decoded and the values in those registers are output
on `pcpi_rs1` and `pcpi_rs2`.
An external PCPI core can then decode the instruction, execute it, and assert
`pcpi_ready` when execution of the instruction is finished. Optionally a
result value can be written to `pcpi_rd` and `pcpi_wr` asserted. The
PicoRV32 core will then decode the `rd` field of the instruction and
write the value from `pcpi_rd` to the respective register.
When no external PCPI core acknowledges the instruction within 16 clock
cycles, then an illegal instruction exception is raised and the respective
interrupt handler is called. A PCPI core that needs more than a couple of
cycles to execute an instruction, should assert `pcpi_wait` as soon as
the instruction has been decoded successfully and keep it asserted until
it asserts `pcpi_ready`. This will prevent the PicoRV32 core from raising
an illegal instruction exception.
Custom Instructions for IRQ Handling Custom Instructions for IRQ Handling
@ -408,10 +494,3 @@ enabled PCPI, IRQ and MUL features.
*Note: Most of the size reduction in the "small" core comes from eliminating *Note: Most of the size reduction in the "small" core comes from eliminating
the counter instructions, not from reducing the size of the register file.* the counter instructions, not from reducing the size of the register file.*
Todos:
------
- Optional support for compressed ISA
- Improved documentation and examples

View File

@ -57,13 +57,11 @@ module picorv32 #(
output reg [ 3:0] mem_la_wstrb, output reg [ 3:0] mem_la_wstrb,
// Pico Co-Processor Interface (PCPI) // Pico Co-Processor Interface (PCPI)
output reg pcpi_insn_valid, output reg pcpi_valid,
output reg [31:0] pcpi_insn, output reg [31:0] pcpi_insn,
output reg pcpi_rs1_valid,
output [31:0] pcpi_rs1, output [31:0] pcpi_rs1,
output reg pcpi_rs2_valid,
output [31:0] pcpi_rs2, output [31:0] pcpi_rs2,
input pcpi_rd_valid, input pcpi_wr,
input [31:0] pcpi_rd, input [31:0] pcpi_rd,
input pcpi_wait, input pcpi_wait,
input pcpi_ready, input pcpi_ready,
@ -100,12 +98,12 @@ module picorv32 #(
// Internal PCPI Cores // Internal PCPI Cores
wire pcpi_mul_rd_valid; wire pcpi_mul_wr;
wire [31:0] pcpi_mul_rd; wire [31:0] pcpi_mul_rd;
wire pcpi_mul_wait; wire pcpi_mul_wait;
wire pcpi_mul_ready; wire pcpi_mul_ready;
reg pcpi_int_rd_valid; reg pcpi_int_wr;
reg [31:0] pcpi_int_rd; reg [31:0] pcpi_int_rd;
reg pcpi_int_wait; reg pcpi_int_wait;
reg pcpi_int_ready; reg pcpi_int_ready;
@ -114,26 +112,24 @@ module picorv32 #(
picorv32_pcpi_mul pcpi_mul ( picorv32_pcpi_mul pcpi_mul (
.clk (clk ), .clk (clk ),
.resetn (resetn ), .resetn (resetn ),
.pcpi_insn_valid(pcpi_insn_valid ), .pcpi_valid(pcpi_valid ),
.pcpi_insn (pcpi_insn ), .pcpi_insn (pcpi_insn ),
.pcpi_rs1_valid (pcpi_rs1_valid ),
.pcpi_rs1 (pcpi_rs1 ), .pcpi_rs1 (pcpi_rs1 ),
.pcpi_rs2_valid (pcpi_rs2_valid ),
.pcpi_rs2 (pcpi_rs2 ), .pcpi_rs2 (pcpi_rs2 ),
.pcpi_rd_valid (pcpi_mul_rd_valid), .pcpi_wr (pcpi_mul_wr ),
.pcpi_rd (pcpi_mul_rd ), .pcpi_rd (pcpi_mul_rd ),
.pcpi_wait (pcpi_mul_wait ), .pcpi_wait (pcpi_mul_wait ),
.pcpi_ready (pcpi_mul_ready ) .pcpi_ready(pcpi_mul_ready )
); );
end else begin end else begin
assign pcpi_mul_rd_valid = 0; assign pcpi_mul_wr = 0;
assign pcpi_mul_rd = 1'bx; assign pcpi_mul_rd = 1'bx;
assign pcpi_mul_wait = 0; assign pcpi_mul_wait = 0;
assign pcpi_mul_ready = 0; assign pcpi_mul_ready = 0;
end endgenerate end endgenerate
always @* begin always @* begin
pcpi_int_rd_valid = 0; pcpi_int_wr = 0;
pcpi_int_rd = 1'bx; pcpi_int_rd = 1'bx;
pcpi_int_wait = |{ENABLE_PCPI && pcpi_wait, ENABLE_MUL && pcpi_mul_wait}; pcpi_int_wait = |{ENABLE_PCPI && pcpi_wait, ENABLE_MUL && pcpi_mul_wait};
pcpi_int_ready = |{ENABLE_PCPI && pcpi_ready, ENABLE_MUL && pcpi_mul_ready}; pcpi_int_ready = |{ENABLE_PCPI && pcpi_ready, ENABLE_MUL && pcpi_mul_ready};
@ -141,11 +137,11 @@ module picorv32 #(
(* parallel_case *) (* parallel_case *)
case (1'b1) case (1'b1)
ENABLE_PCPI && pcpi_ready: begin ENABLE_PCPI && pcpi_ready: begin
pcpi_int_rd_valid = pcpi_rd_valid; pcpi_int_wr = pcpi_wr;
pcpi_int_rd = pcpi_rd; pcpi_int_rd = pcpi_rd;
end end
ENABLE_MUL && pcpi_mul_ready: begin ENABLE_MUL && pcpi_mul_ready: begin
pcpi_int_rd_valid = pcpi_mul_rd_valid; pcpi_int_wr = pcpi_mul_wr;
pcpi_int_rd = pcpi_mul_rd; pcpi_int_rd = pcpi_mul_rd;
end end
endcase endcase
@ -570,7 +566,7 @@ module picorv32 #(
reg_alu_out <= alu_out; reg_alu_out <= alu_out;
if (WITH_PCPI) begin if (WITH_PCPI) begin
if (pcpi_insn_valid && !pcpi_int_wait) begin if (pcpi_valid && !pcpi_int_wait) begin
if (pcpi_timeout_counter) if (pcpi_timeout_counter)
pcpi_timeout_counter <= pcpi_timeout_counter - 1; pcpi_timeout_counter <= pcpi_timeout_counter - 1;
end else end else
@ -609,9 +605,7 @@ module picorv32 #(
latched_is_lu <= 0; latched_is_lu <= 0;
latched_is_lh <= 0; latched_is_lh <= 0;
latched_is_lb <= 0; latched_is_lb <= 0;
pcpi_insn_valid <= 0; pcpi_valid <= 0;
pcpi_rs1_valid <= 0;
pcpi_rs2_valid <= 0;
irq_active <= 0; irq_active <= 0;
irq_mask <= ~0; irq_mask <= ~0;
next_irq_pending = 0; next_irq_pending = 0;
@ -711,20 +705,16 @@ module picorv32 #(
`endif `endif
if (instr_trap) begin if (instr_trap) begin
if (WITH_PCPI) begin if (WITH_PCPI) begin
pcpi_rs1_valid <= 1;
pcpi_insn_valid <= 1;
reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0; reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
if (ENABLE_REGS_DUALPORT) begin if (ENABLE_REGS_DUALPORT) begin
pcpi_rs2_valid <= 1; pcpi_valid <= 1;
reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0;
reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0;
if (pcpi_int_ready) begin if (pcpi_int_ready) begin
mem_do_rinst <= 1; mem_do_rinst <= 1;
pcpi_insn_valid <= 0; pcpi_valid <= 0;
pcpi_rs1_valid <= 0;
pcpi_rs2_valid <= 0;
reg_out <= pcpi_int_rd; reg_out <= pcpi_int_rd;
latched_store <= pcpi_int_rd_valid; latched_store <= pcpi_int_wr;
cpu_state <= cpu_state_fetch; cpu_state <= cpu_state_fetch;
end else end else
if (pcpi_timeout) begin if (pcpi_timeout) begin
@ -842,15 +832,13 @@ module picorv32 #(
`endif `endif
reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0;
reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0;
if (WITH_PCPI && pcpi_insn_valid) begin if (WITH_PCPI && instr_trap) begin
pcpi_rs2_valid <= 1; pcpi_valid <= 1;
if (pcpi_int_ready) begin if (pcpi_int_ready) begin
mem_do_rinst <= 1; mem_do_rinst <= 1;
pcpi_insn_valid <= 0; pcpi_valid <= 0;
pcpi_rs1_valid <= 0;
pcpi_rs2_valid <= 0;
reg_out <= pcpi_int_rd; reg_out <= pcpi_int_rd;
latched_store <= pcpi_int_rd_valid; latched_store <= pcpi_int_wr;
cpu_state <= cpu_state_fetch; cpu_state <= cpu_state_fetch;
end else end else
if (pcpi_timeout) begin if (pcpi_timeout) begin
@ -1030,13 +1018,11 @@ module picorv32_pcpi_mul #(
) ( ) (
input clk, resetn, input clk, resetn,
input pcpi_insn_valid, input pcpi_valid,
input [31:0] pcpi_insn, input [31:0] pcpi_insn,
input pcpi_rs1_valid,
input [31:0] pcpi_rs1, input [31:0] pcpi_rs1,
input pcpi_rs2_valid,
input [31:0] pcpi_rs2, input [31:0] pcpi_rs2,
output reg pcpi_rd_valid, output reg pcpi_wr,
output reg [31:0] pcpi_rd, output reg [31:0] pcpi_rd,
output reg pcpi_wait, output reg pcpi_wait,
output reg pcpi_ready output reg pcpi_ready
@ -1056,8 +1042,7 @@ module picorv32_pcpi_mul #(
instr_mulhsu <= 0; instr_mulhsu <= 0;
instr_mulhu <= 0; instr_mulhu <= 0;
if (resetn && pcpi_insn_valid && pcpi_rs1_valid && pcpi_rs2_valid && if (resetn && pcpi_valid && pcpi_insn[6:0] == 7'b0110011 && pcpi_insn[31:25] == 7'b0000001) begin
pcpi_insn[6:0] == 7'b0110011 && pcpi_insn[31:25] == 7'b0000001) begin
case (pcpi_insn[14:12]) case (pcpi_insn[14:12])
3'b000: instr_mul <= 1; 3'b000: instr_mul <= 1;
3'b001: instr_mulh <= 1; 3'b001: instr_mulh <= 1;
@ -1133,12 +1118,12 @@ module picorv32_pcpi_mul #(
end end
always @(posedge clk) begin always @(posedge clk) begin
pcpi_rd_valid <= 0; pcpi_wr <= 0;
pcpi_ready <= 0; pcpi_ready <= 0;
if (mul_finish) begin if (mul_finish) begin
pcpi_rd <= instr_any_mulh ? rd >> 32 : rd; pcpi_wr <= 1;
pcpi_rd_valid <= 1;
pcpi_ready <= 1; pcpi_ready <= 1;
pcpi_rd <= instr_any_mulh ? rd >> 32 : rd;
end end
end end
endmodule endmodule
@ -1187,13 +1172,11 @@ module picorv32_axi #(
input [31:0] mem_axi_rdata, input [31:0] mem_axi_rdata,
// Pico Co-Processor Interface (PCPI) // Pico Co-Processor Interface (PCPI)
output pcpi_insn_valid, output pcpi_valid,
output [31:0] pcpi_insn, output [31:0] pcpi_insn,
output pcpi_rs1_valid,
output [31:0] pcpi_rs1, output [31:0] pcpi_rs1,
output pcpi_rs2_valid,
output [31:0] pcpi_rs2, output [31:0] pcpi_rs2,
input pcpi_rd_valid, input pcpi_wr,
input [31:0] pcpi_rd, input [31:0] pcpi_rd,
input pcpi_wait, input pcpi_wait,
input pcpi_ready, input pcpi_ready,
@ -1262,16 +1245,14 @@ module picorv32_axi #(
.mem_ready(mem_ready), .mem_ready(mem_ready),
.mem_rdata(mem_rdata), .mem_rdata(mem_rdata),
.pcpi_insn_valid(pcpi_insn_valid), .pcpi_valid(pcpi_valid),
.pcpi_insn (pcpi_insn ), .pcpi_insn (pcpi_insn ),
.pcpi_rs1_valid (pcpi_rs1_valid ),
.pcpi_rs1 (pcpi_rs1 ), .pcpi_rs1 (pcpi_rs1 ),
.pcpi_rs2_valid (pcpi_rs2_valid ),
.pcpi_rs2 (pcpi_rs2 ), .pcpi_rs2 (pcpi_rs2 ),
.pcpi_rd_valid (pcpi_rd_valid ), .pcpi_wr (pcpi_wr ),
.pcpi_rd (pcpi_rd ), .pcpi_rd (pcpi_rd ),
.pcpi_wait (pcpi_wait ), .pcpi_wait (pcpi_wait ),
.pcpi_ready (pcpi_ready ), .pcpi_ready(pcpi_ready),
.irq(irq), .irq(irq),
.eoi(eoi) .eoi(eoi)