First plausibly working AMOs. Add AMOs to instruction timings list
This commit is contained in:
parent
5c098866f2
commit
df658d86ff
11599
doc/hazard3.pdf
11599
doc/hazard3.pdf
File diff suppressed because it is too large
Load Diff
|
@ -32,7 +32,7 @@ All timings are given assuming perfect bus behaviour (no downstream bus stalls).
|
|||
| `lui rd, imm` | 1 |
|
||||
| `auipc rd, imm` | 1 |
|
||||
3+| Control Transfer
|
||||
| `jal rd, label` | 2footnote:unaligned_branch[A branch to a 32-bit instruction which is not 32-bit-aligned requires one additional cycle, because two naturally-aligned bus cycles are required to fetch the target instruction.]|
|
||||
| `jal rd, label` | 2footnote:unaligned_branch[A branch to a 32-bit instruction which is not 32-bit-aligned requires one additional cycle, because two naturally aligned bus cycles are required to fetch the target instruction.]|
|
||||
| `jalr rd, rs1, imm` | 2footnote:unaligned_branch[] |
|
||||
| `beq rs1, rs2, label`| 1 or 2footnote:unaligned_branch[] | 1 if nontaken, 2 if taken.
|
||||
| `bne rs1, rs2, label`| 1 or 2footnote:unaligned_branch[] | 1 if nontaken, 2 if taken.
|
||||
|
@ -78,12 +78,20 @@ Timings assume the core is configured with `MULDIV_UNROLL = 2` and `MUL_FAST = 1
|
|||
|===
|
||||
| Instruction | Cycles | Note
|
||||
3+| Load-Reserved/Store-Conditional
|
||||
| `lr.w rd, (rs1)` | 1 or 2 | 2 if next instruction is dependentfootnote:data_dependency[], or is an `lr.w`, `sc.w` or `amo*`.footnote:exclusive_pipelining[A pipeline bubble is inserted between `lr.w`/`sc.w` and an immediately-following `lr.w`/`sc.w`/`amo*`, because the AHB5 bus standard does not permit pipelined exclusive accesses. A stall would be inserted between `lr.w` and `sc.w` anyhow, so the local monitor can be updated based on `lr.w` data phase in time to suppress `sc.w` data phase.]
|
||||
| `sc.w rd, rs2, (rs1)` | 1 or 2 | 2 if next instruction is an `lr.w`, `sc.w` or `amo*`.footnote:exclusive_pipelining[]
|
||||
| `lr.w rd, (rs1)` | 1 or 2 | 2 if next instruction is dependentfootnote:data_dependency[], or is an `lr.w`, `sc.w` or `amo*.w`.footnote:exclusive_pipelining[A pipeline bubble is inserted between `lr.w`/`sc.w` and an immediately-following `lr.w`/`sc.w`/`amo*`, because the AHB5 bus standard does not permit pipelined exclusive accesses. A stall would be inserted between `lr.w` and `sc.w` anyhow, so the local monitor can be updated based on the `lr.w` data phase in time to suppress the `sc.w` address phase.]
|
||||
| `sc.w rd, rs2, (rs1)` | 1 or 2 | 2 if next instruction is an `lr.w`, `sc.w` or `amo*.w`.footnote:exclusive_pipelining[]
|
||||
3+| Atomic Memory Operations
|
||||
|`amoswap.w rd, rs2, (rs1)` | 4+ | 4 per attempt. Multiple attempts if reservation is lost.footnote:amo_timing[AMOs are issued as a paired exclusive read and exclusive write on the bus, at the maximum speed of 2 cycles per access, since the bus does not permit pipelining of exclusive reads/writes. If the write phase fails due to the global monitor reporting a lost reservation, the instruction loops at a rate of 4 cycles per loop, until success. If the read reservation is refused by the global monitor, the instruction generates a Store/AMO Fault exception, to avoid an infinite loop.]
|
||||
|`amoadd.w rd, rs2, (rs1)` | 4+ | 4 per attempt. Multiple attempts if reservation is lost.footnote:amo_timing[]
|
||||
|`amoxor.w rd, rs2, (rs1)` | 4+ | 4 per attempt. Multiple attempts if reservation is lost.footnote:amo_timing[]
|
||||
|`amoand.w rd, rs2, (rs1)` | 4+ | 4 per attempt. Multiple attempts if reservation is lost.footnote:amo_timing[]
|
||||
|`amoor.w rd, rs2, (rs1)` | 4+ | 4 per attempt. Multiple attempts if reservation is lost.footnote:amo_timing[]
|
||||
|`amomin.w rd, rs2, (rs1)` | 4+ | 4 per attempt. Multiple attempts if reservation is lost.footnote:amo_timing[]
|
||||
|`amomax.w rd, rs2, (rs1)` | 4+ | 4 per attempt. Multiple attempts if reservation is lost.footnote:amo_timing[]
|
||||
|`amominu.w rd, rs2, (rs1)` | 4+ | 4 per attempt. Multiple attempts if reservation is lost.footnote:amo_timing[]
|
||||
|`amomaxu.w rd, rs2, (rs1)` | 4+ | 4 per attempt. Multiple attempts if reservation is lost.footnote:amo_timing[]
|
||||
|===
|
||||
|
||||
AMOs are currently not supported.
|
||||
|
||||
=== C Extension
|
||||
|
||||
All C extension 16-bit instructions on Hazard3 are aliases of base RV32I instructions. They perform identically to their 32-bit counterparts.
|
||||
|
|
|
@ -403,7 +403,7 @@ hazard3_alu #(
|
|||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
x_amo_phase <= 3'h0;
|
||||
end else if (|EXTENSION_A && !x_stall) begin
|
||||
end else if (|EXTENSION_A && (bus_aph_ready_d || bus_dph_ready_d || m_trap_enter_vld)) begin
|
||||
if (!d_memop_is_amo) begin
|
||||
x_amo_phase <= 3'h0;
|
||||
end else if (x_stall_on_raw) begin
|
||||
|
@ -413,7 +413,7 @@ always @ (posedge clk or negedge rst_n) begin
|
|||
assert(x_amo_phase == 3'h0);
|
||||
`endif
|
||||
x_amo_phase <= 3'h0;
|
||||
end else if (m_trap_enter_soon) begin
|
||||
end else if (m_trap_enter_vld) begin
|
||||
x_amo_phase <= 3'h0;
|
||||
end else if (x_amo_phase == 3'h1 && !bus_dph_exokay_d) begin
|
||||
// Load reserve fail indicates the memory region does not support
|
||||
|
@ -449,7 +449,11 @@ wire x_unaligned_addr = d_memop != MEMOP_NONE && (
|
|||
);
|
||||
|
||||
// Always query the global monitor, except for store-conditional suppressed by local monitor.
|
||||
assign bus_aph_excl_d = |EXTENSION_A && (d_memop == MEMOP_LR_W || d_memop == MEMOP_SC_W);
|
||||
assign bus_aph_excl_d = |EXTENSION_A && (
|
||||
d_memop == MEMOP_LR_W ||
|
||||
d_memop == MEMOP_SC_W ||
|
||||
d_memop_is_amo
|
||||
);
|
||||
|
||||
always @ (*) begin
|
||||
// Need to be careful not to use anything hready-sourced to gate htrans!
|
||||
|
@ -681,7 +685,8 @@ always @ (posedge clk or negedge rst_n) begin
|
|||
if (!m_stall) begin
|
||||
{xm_rs1, xm_rs2, xm_rd} <= {d_rs1, d_rs2, d_rd};
|
||||
// If the transfer is unaligned, make sure it is completely NOP'd on the bus
|
||||
xm_memop <= d_memop | {x_unaligned_addr, 3'h0};
|
||||
// Likewise, AMOs are handled entirely in X (well it's ambiguous; anyway different logic & stalls)
|
||||
xm_memop <= x_unaligned_addr || d_memop_is_amo ? MEMOP_NONE : d_memop;
|
||||
xm_except <= x_except;
|
||||
xm_wfi <= d_wfi;
|
||||
if (x_stall || m_trap_enter_soon) begin
|
||||
|
@ -706,6 +711,8 @@ always @ (posedge clk or negedge rst_n) begin
|
|||
end
|
||||
end
|
||||
|
||||
reg [W_DATA-1:0] amo_load_data;
|
||||
|
||||
// Datapath flops
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
|
@ -713,9 +720,10 @@ always @ (posedge clk or negedge rst_n) begin
|
|||
xm_store_data <= {W_DATA{1'b0}};
|
||||
end else if (!m_stall) begin
|
||||
xm_result <=
|
||||
d_csr_ren ? x_csr_rdata :
|
||||
EXTENSION_M && d_aluop == ALUOP_MULDIV ? x_muldiv_result :
|
||||
x_alu_result;
|
||||
d_csr_ren ? x_csr_rdata :
|
||||
|EXTENSION_A && d_memop_is_amo ? amo_load_data :
|
||||
|EXTENSION_M && d_aluop == ALUOP_MULDIV ? x_muldiv_result :
|
||||
x_alu_result;
|
||||
xm_store_data <= x_rs2_bypass;
|
||||
|
||||
end else if (d_memop_is_amo && x_amo_phase == 3'h1 && bus_dph_ready_d) begin
|
||||
|
@ -773,7 +781,6 @@ generate
|
|||
if (EXTENSION_A) begin: has_amo_alu
|
||||
|
||||
reg [W_MEMOP-1:0] amo_memop;
|
||||
reg [W_DATA-1:0] amo_load_data;
|
||||
reg m_amo_wdata_valid_r;
|
||||
|
||||
assign m_amo_wdata_valid = m_amo_wdata_valid_r;
|
||||
|
@ -805,6 +812,7 @@ end else begin: no_amo_alu
|
|||
|
||||
assign m_amo_wdata = {W_DATA{1'b0}};
|
||||
assign m_amo_wdata_valid = 1'b0;
|
||||
always @ (*) amo_load_data = {W_DATA{1'b0}};
|
||||
|
||||
end
|
||||
endgenerate
|
||||
|
@ -822,7 +830,7 @@ always @ (*) begin
|
|||
MEMOP_SB: bus_wdata_d = {4{m_wdata[7:0]}};
|
||||
default: bus_wdata_d = m_wdata;
|
||||
endcase
|
||||
if (|EXTENSION_A && amo_wdata_valid)
|
||||
if (|EXTENSION_A && m_amo_wdata_valid)
|
||||
bus_wdata_d = m_amo_wdata;
|
||||
|
||||
casez ({xm_memop, xm_result[1:0]})
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
SRCS := ../common/init.S main.c
|
||||
APP := amo_smoke
|
||||
CCFLAGS = -march=rv32imac -Os
|
||||
|
||||
include ../common/src_only_app.mk
|
|
@ -0,0 +1,46 @@
|
|||
[*]
|
||||
[*] GTKWave Analyzer v3.3.103 (w)1999-2019 BSI
|
||||
[*] Sat Dec 4 23:25:19 2021
|
||||
[*]
|
||||
[dumpfile] "/home/luke/proj/hazard3/test/sim/amo_smoke/amo_smoke_run.vcd"
|
||||
[dumpfile_mtime] "Sat Dec 4 23:21:36 2021"
|
||||
[dumpfile_size] 6246687
|
||||
[savefile] "/home/luke/proj/hazard3/test/sim/amo_smoke/amo_smoke.gtkw"
|
||||
[timestart] 420
|
||||
[size] 1975 1095
|
||||
[pos] -1 -1
|
||||
*-3.000000 458 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
|
||||
[treeopen] core.
|
||||
[sst_width] 233
|
||||
[signals_width] 246
|
||||
[sst_expanded] 1
|
||||
[sst_vpaned_height] 317
|
||||
@22
|
||||
core.d_pc[31:0]
|
||||
@28
|
||||
core.mw_local_exclusive_reserved
|
||||
core.m_stall
|
||||
core.x_stall
|
||||
core.x_stall_on_amo
|
||||
@200
|
||||
-
|
||||
@22
|
||||
d_haddr[31:0]
|
||||
@28
|
||||
d_htrans[1:0]
|
||||
d_hsize[2:0]
|
||||
@29
|
||||
d_hexcl
|
||||
@28
|
||||
d_hwrite
|
||||
d_hready
|
||||
@22
|
||||
d_hwdata[31:0]
|
||||
d_hrdata[31:0]
|
||||
@200
|
||||
-
|
||||
@22
|
||||
core.x_rs2_bypass[31:0]
|
||||
core.xm_store_data[31:0]
|
||||
[pattern_trace] 1
|
||||
[pattern_trace] 0
|
|
@ -0,0 +1,25 @@
|
|||
#include "tb_cxxrtl_io.h"
|
||||
#include <stdint.h>
|
||||
|
||||
volatile uint32_t scratch[2];
|
||||
|
||||
#define test_assert(cond, ...) if (!(cond)) {tb_printf(__VA_ARGS__); return -1;}
|
||||
|
||||
int main() {
|
||||
|
||||
scratch[0] = 0;
|
||||
|
||||
tb_puts("Initial value: 0\n");
|
||||
for (int i = 1; i <= 10; ++i) {
|
||||
uint32_t fetched;
|
||||
asm volatile (
|
||||
"amoadd.w %0, %1, (%2)\n"
|
||||
: "=r" (fetched)
|
||||
: "r" (i), "r" (&scratch[0])
|
||||
);
|
||||
tb_printf("amoadd.w rd, %d, (&addr) -> fetched %d\n", i, fetched);
|
||||
}
|
||||
tb_printf("Final value: %d\n", scratch[0]);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue