Fix illegal issue of pipelined exclusives on the bus, and document correct timings
This commit is contained in:
parent
5e17bb805e
commit
a8933c332d
4574
doc/hazard3.pdf
4574
doc/hazard3.pdf
File diff suppressed because it is too large
Load Diff
|
@ -77,8 +77,9 @@ Timings assume the core is configured with `MULDIV_UNROLL = 2` and `MUL_FAST = 1
|
||||||
[%autowidth.stretch, options="header"]
|
[%autowidth.stretch, options="header"]
|
||||||
|===
|
|===
|
||||||
| Instruction | Cycles | Note
|
| Instruction | Cycles | Note
|
||||||
| `lr.w rd, (rs1)` | 1 or 2 | 1 if next instruction is independent, 2 if dependent.footnote:data_dependency[]
|
3+| Load-Reserved/Store-Conditional
|
||||||
| `sc.w rd, rs2, (rs1)` | 1 | `lr.w` followed by `sc.w` always inserts a dependency stall.footnote:lr_to_sc[A 1-cycle pipeline bubble is inserted in between an `lr.w` and an immediately-following `sc.w`, so that the store can be suppressed by a reservation failure on the load. It does not matter whether the `lr.w` and `sc.w` use the same registers. Load reservation may fail if the memory region does not support exclusive transfers.]
|
| `lr.w rd, (rs1)` | 1 or 2 | 2 if next instruction is dependentfootnote:data_dependency[], or is an `lr.w`, `sc.w` or `amo*`.footnote:exclusive_pipelining[A pipeline bubble is inserted between `lr.w`/`sc.w` and an immediately-following `lr.w`/`sc.w`/`amo*`, because the AHB5 bus standard does not permit pipelined exclusive accesses. A stall would be inserted between `lr.w` and `sc.w` anyhow, so the local monitor can be updated based on `lr.w` data phase in time to suppress `sc.w` data phase.]
|
||||||
|
| `sc.w rd, rs2, (rs1)` | 1 or 2 | 2 if next instruction is an `lr.w`, `sc.w` or `amo*`.footnote:exclusive_pipelining[]
|
||||||
|===
|
|===
|
||||||
|
|
||||||
AMOs are currently not supported.
|
AMOs are currently not supported.
|
||||||
|
|
|
@ -285,9 +285,24 @@ wire m_wfi_stall_clear;
|
||||||
wire x_stall_on_trap = m_trap_enter_vld && !m_trap_enter_rdy ||
|
wire x_stall_on_trap = m_trap_enter_vld && !m_trap_enter_rdy ||
|
||||||
m_trap_enter_soon && !m_trap_enter_vld;
|
m_trap_enter_soon && !m_trap_enter_vld;
|
||||||
|
|
||||||
|
// Stall inserted to avoid illegal pipelining of exclusive accesses on the bus
|
||||||
|
// (also gives time to update local monitor on direct lr.w -> sc.w instruction
|
||||||
|
// sequences). Note we don't check for AMOs in stage M, because AMOs fully
|
||||||
|
// fence off on their own completion before passing down the pipe.
|
||||||
|
|
||||||
|
wire d_memop_is_amo = |EXTENSION_A && (
|
||||||
|
d_memop >= MEMOP_AMOSWAP_W && d_memop <= MEMOP_AMOMAXU_W
|
||||||
|
);
|
||||||
|
|
||||||
|
wire x_stall_on_exclusive_overlap = |EXTENSION_A && (
|
||||||
|
(d_memop_is_amo || d_memop == MEMOP_SC_W || d_memop == MEMOP_LR_W) &&
|
||||||
|
(xm_memop == MEMOP_SC_W || xm_memop == MEMOP_LR_W)
|
||||||
|
);
|
||||||
|
|
||||||
assign x_stall =
|
assign x_stall =
|
||||||
m_stall ||
|
m_stall ||
|
||||||
x_stall_on_trap ||
|
x_stall_on_trap ||
|
||||||
|
x_stall_on_exclusive_overlap ||
|
||||||
x_stall_raw || x_stall_muldiv ||
|
x_stall_raw || x_stall_muldiv ||
|
||||||
bus_aph_req_d && !bus_aph_ready_d ||
|
bus_aph_req_d && !bus_aph_ready_d ||
|
||||||
x_jump_req && !f_jump_rdy;
|
x_jump_req && !f_jump_rdy;
|
||||||
|
@ -296,15 +311,13 @@ wire m_fast_mul_result_vld;
|
||||||
wire m_generating_result = xm_memop < MEMOP_SW || m_fast_mul_result_vld;
|
wire m_generating_result = xm_memop < MEMOP_SW || m_fast_mul_result_vld;
|
||||||
|
|
||||||
// Load-use hazard detection
|
// Load-use hazard detection
|
||||||
|
|
||||||
always @ (*) begin
|
always @ (*) begin
|
||||||
x_stall_raw = 1'b0;
|
x_stall_raw = 1'b0;
|
||||||
if (REDUCED_BYPASS) begin
|
if (REDUCED_BYPASS) begin
|
||||||
x_stall_raw =
|
x_stall_raw =
|
||||||
|xm_rd && (xm_rd == d_rs1 || xm_rd == d_rs2) ||
|
|xm_rd && (xm_rd == d_rs1 || xm_rd == d_rs2) ||
|
||||||
|mw_rd && (mw_rd == d_rs1 || mw_rd == d_rs2);
|
|mw_rd && (mw_rd == d_rs1 || mw_rd == d_rs2);
|
||||||
end else if (|EXTENSION_A && xm_memop == MEMOP_LR_W && d_memop == MEMOP_SC_W) begin
|
|
||||||
// Conditional-store address phase depends on data-phase update of local monitor bit
|
|
||||||
x_stall_raw = 1'b1;
|
|
||||||
end else if (m_generating_result) begin
|
end else if (m_generating_result) begin
|
||||||
// With the full bypass network, load-use (or fast multiply-use) is the only RAW stall
|
// With the full bypass network, load-use (or fast multiply-use) is the only RAW stall
|
||||||
if (|xm_rd && xm_rd == d_rs1) begin
|
if (|xm_rd && xm_rd == d_rs1) begin
|
||||||
|
@ -398,6 +411,7 @@ always @ (*) begin
|
||||||
endcase
|
endcase
|
||||||
bus_aph_req_d = x_memop_vld && !(
|
bus_aph_req_d = x_memop_vld && !(
|
||||||
x_stall_raw ||
|
x_stall_raw ||
|
||||||
|
x_stall_on_exclusive_overlap ||
|
||||||
x_unaligned_addr ||
|
x_unaligned_addr ||
|
||||||
m_trap_enter_soon ||
|
m_trap_enter_soon ||
|
||||||
(xm_wfi && !m_wfi_stall_clear) // FIXME will cause a timing issue, better to stall til *after* clear
|
(xm_wfi && !m_wfi_stall_clear) // FIXME will cause a timing issue, better to stall til *after* clear
|
||||||
|
|
Loading…
Reference in New Issue