From d03a82a8263ca1aeccffab201e1411618fc8ea36 Mon Sep 17 00:00:00 2001
From: Luke Wren <wren6991@gmail.com>
Date: Sat, 4 Sep 2021 02:57:39 +0100
Subject: [PATCH] Add instruction fetch faults

---
 hdl/hazard3_core.v      |  4 +++
 hdl/hazard3_cpu_1port.v |  2 +-
 hdl/hazard3_decode.v    | 14 ++++++++--
 hdl/hazard3_frontend.v  | 59 ++++++++++++++++++++++++++++++++++-------
 4 files changed, 67 insertions(+), 12 deletions(-)

diff --git a/hdl/hazard3_core.v b/hdl/hazard3_core.v
index 3e33c6e..4be29f9 100644
--- a/hdl/hazard3_core.v
+++ b/hdl/hazard3_core.v
@@ -104,6 +104,7 @@ wire [W_REGADDR-1:0] f_rs1;
 wire [W_REGADDR-1:0] f_rs2;
 
 wire [31:0]          fd_cir;
+wire [1:0]           fd_cir_err;
 wire [1:0]           fd_cir_vld;
 wire [1:0]           df_cir_use;
 wire                 df_cir_lock;
@@ -126,6 +127,7 @@ hazard3_frontend #(
 	.mem_addr_rdy       (bus_aph_ready_i),
 
 	.mem_data           (bus_rdata_i),
+	.mem_data_err       (bus_dph_err_i),
 	.mem_data_vld       (bus_dph_ready_i),
 
 	.jump_target        (f_jump_target),
@@ -133,6 +135,7 @@ hazard3_frontend #(
 	.jump_target_rdy    (f_jump_rdy),
 
 	.cir                (fd_cir),
+	.cir_err            (fd_cir_err),
 	.cir_vld            (fd_cir_vld),
 	.cir_use            (df_cir_use),
 	.cir_lock           (df_cir_lock),
@@ -196,6 +199,7 @@ hazard3_decode #(
 	.rst_n                (rst_n),
 
 	.fd_cir               (fd_cir),
+	.fd_cir_err           (fd_cir_err),
 	.fd_cir_vld           (fd_cir_vld),
 	.df_cir_use           (df_cir_use),
 	.df_cir_lock          (df_cir_lock),
diff --git a/hdl/hazard3_cpu_1port.v b/hdl/hazard3_cpu_1port.v
index 21e801d..34509bd 100644
--- a/hdl/hazard3_cpu_1port.v
+++ b/hdl/hazard3_cpu_1port.v
@@ -229,7 +229,7 @@ assign ahblm_hwdata = core_wdata_d;
 // Handhshake based on grant and bus stall
 assign core_aph_ready_i = ahblm_hready && bus_gnt_i;
 assign core_dph_ready_i = ahblm_hready && bus_active_dph_i;
-assign core_dph_err_i   = ahblm_hready && bus_active_dph_i && ahblm_hresp;
+assign core_dph_err_i   = bus_active_dph_i && ahblm_hresp;
 
 // D-side errors are reported even when not ready, so that the core can make
 // use of the two-phase error response to cleanly squash a second load/store
diff --git a/hdl/hazard3_decode.v b/hdl/hazard3_decode.v
index cda5c92..2b51dd2 100644
--- a/hdl/hazard3_decode.v
+++ b/hdl/hazard3_decode.v
@@ -26,6 +26,7 @@ module hazard3_decode #(
 	input wire rst_n,
 
 	input wire  [31:0]          fd_cir,
+	input wire  [1:0]           fd_cir_err,
 	input wire  [1:0]           fd_cir_vld,
 	output wire [1:0]           df_cir_use,
 	output wire                 df_cir_lock,
@@ -92,6 +93,13 @@ wire [31:0] d_imm_j = {{12{d_instr[31]}}, d_instr[19:12], d_instr[20], d_instr[3
 // ----------------------------------------------------------------------------
 // PC/CIR control
 
+// Must not flag bus error for a valid 16-bit instruction *followed by* an
+// error, because instruction fetch errors are speculative, and can be
+// flushed by e.g. a branch instruction. Note the 16 LSBs must be valid for
+// us to know an instruction's size.
+wire d_except_instr_bus_fault = fd_cir_vld > 2'd0 && fd_cir_err[0] ||
+	fd_cir_vld > 2'd1 && d_instr_is_32bit && fd_cir_err[1];
+
 assign d_starved = ~|fd_cir_vld || fd_cir_vld[0] && d_instr_is_32bit;
 wire d_stall = x_stall || d_starved;
 
@@ -246,7 +254,7 @@ always @ (*) begin
 	default:    begin d_invalid_32bit = 1'b1; end
 	endcase
 
-	if (d_invalid || d_starved) begin
+	if (d_invalid || d_starved || d_except_instr_bus_fault) begin
 		d_rs1        = {W_REGADDR{1'b0}};
 		d_rs2        = {W_REGADDR{1'b0}};
 		d_rd         = {W_REGADDR{1'b0}};
@@ -258,7 +266,9 @@ always @ (*) begin
 		if (EXTENSION_M)
 			d_aluop = ALUOP_ADD;
 
-		if (d_invalid && !d_starved)
+		if (d_except_instr_bus_fault)
+			d_except = EXCEPT_INSTR_FAULT;
+		else if (d_invalid && !d_starved)
 			d_except = EXCEPT_INSTR_ILLEGAL;
 	end
 	if (cir_lock_prev) begin
diff --git a/hdl/hazard3_frontend.v b/hdl/hazard3_frontend.v
index 7757f22..f69d2ca 100644
--- a/hdl/hazard3_frontend.v
+++ b/hdl/hazard3_frontend.v
@@ -34,16 +34,17 @@ module hazard3_frontend #(
 	output wire              mem_size, // 1'b1 -> 32 bit access
 	output wire [W_ADDR-1:0] mem_addr,
 	output wire              mem_addr_vld,
-	input wire               mem_addr_rdy,
-	input wire  [W_DATA-1:0] mem_data,
-	input wire               mem_data_vld,
+	input  wire              mem_addr_rdy,
+	input  wire [W_DATA-1:0] mem_data,
+	input  wire              mem_data_err,
+	input  wire              mem_data_vld,
 
 	// Jump/flush interface
 	// Processor may assert vld at any time. The request will not go through
 	// unless rdy is high. Processor *may* alter request during this time.
 	// Inputs must not be a function of hready.
-	input wire  [W_ADDR-1:0] jump_target,
-	input wire               jump_target_vld,
+	input  wire [W_ADDR-1:0] jump_target,
+	input  wire              jump_target_vld,
 	output wire              jump_target_rdy,
 
 	// Interface to Decode
@@ -53,9 +54,10 @@ module hazard3_frontend #(
 	// This works OK because size is decoded from 2 LSBs of instruction, so cheap.
 	output reg  [31:0]       cir,
 	output reg  [1:0]        cir_vld, // number of valid halfwords in CIR
-	input wire  [1:0]        cir_use, // number of halfwords D intends to consume
+	input  wire [1:0]        cir_use, // number of halfwords D intends to consume
 	                                  // *may* be a function of hready
-	input wire               cir_lock,// Lock-in current contents and level of CIR.
+	output wire [1:0]        cir_err, // Bus error on upper/lower halfword of CIR.
+	input  wire              cir_lock,// Lock-in current contents and level of CIR.
 	                                  // Assert simultaneously with a jump request,
 	                                  // if decode is going to stall. This stops the CIR
 	                                  // from being trashed by incoming fetch data;
@@ -91,7 +93,15 @@ wire jump_now = jump_target_vld && jump_target_rdy;
 // an extra entry which is constant-0. These are just there to handle loop
 // boundary conditions.
 
+// err has an error (HRESP) bit associated with each FIFO entry, so that we
+// can correctly speculate and flush fetch errors. The error bit moves
+// through the prefetch queue alongside the corresponding bus data. We sample
+// bus errors like an extra data bit -- fetch continues to speculate forward
+// past an error, and we eventually flush and redirect the frontent if an
+// errored fetch makes it to the execute stage.
+
 reg [W_DATA-1:0]   fifo_mem [0:FIFO_DEPTH];
+reg [FIFO_DEPTH:0] fifo_err;
 reg [FIFO_DEPTH:0] fifo_valid;
 
 wire [W_DATA-1:0] fifo_wdata = mem_data;
@@ -121,13 +131,16 @@ always @ (posedge clk) begin: fifo_data_shift
 	for (i = 0; i < FIFO_DEPTH; i = i + 1) begin
 		if (fifo_pop || (fifo_push && !fifo_valid[i])) begin
 			fifo_mem[i] <= fifo_valid[i + 1] ? fifo_mem[i + 1] : fifo_wdata;
+			fifo_err[i] <= fifo_err[i + 1] ? fifo_err[i + 1] : mem_data_err;
 		end
 	end
 	// Allow DM to inject instructions directly into the lowest-numbered queue
 	// entry. This mux should not extend critical path since it is balanced
 	// with the instruction-assembly muxes on the queue bypass path.
-	if (fifo_dbg_inject)
+	if (fifo_dbg_inject) begin
 		fifo_mem[0] <= dbg_instr_data;
+		fifo_err[0] <= 1'b0;
+	end
 end
 
 // ----------------------------------------------------------------------------
@@ -290,8 +303,10 @@ wire [1:0] level_next_no_fetch = buf_level - cir_use_clipped;
 // Overlay fresh fetch data onto the shifted/recycled instruction data
 // Again, if something won't be looked at, generate cheapest possible garbage.
 // Don't care if fetch data is valid or not, as will just retry next cycle (as long as flags set correctly)
+wire instr_fetch_overlay_blocked = cir_lock || (level_next_no_fetch[1] && !unaligned_jump_dph);
+
 wire [3*W_BUNDLE-1:0] instr_data_plus_fetch =
-	cir_lock || (level_next_no_fetch[1] && !unaligned_jump_dph) ? instr_data_shifted :
+	instr_fetch_overlay_blocked           ? instr_data_shifted :
 	unaligned_jump_dph     && EXTENSION_C ? {instr_data_shifted[W_BUNDLE +: 2*W_BUNDLE], fetch_data[W_BUNDLE +: W_BUNDLE]} :
 	level_next_no_fetch[0] && EXTENSION_C ? {fetch_data, instr_data_shifted[0 +: W_BUNDLE]} :
 	                         {instr_data_shifted[2*W_BUNDLE +: W_BUNDLE], fetch_data};
@@ -326,6 +341,32 @@ end
 always @ (posedge clk)
 	{hwbuf, cir} <= instr_data_plus_fetch;
 
+// Also keep track of bus errors associated with CIR contents, shifted in the
+// same way as instruction data. Errors may come straight from the bus, or
+// may be buffered in the prefetch queue.
+
+wire fetch_bus_err = fifo_empty ? mem_data_err : fifo_err[0];
+
+reg  [2:0] cir_bus_err;
+wire [2:0] cir_bus_err_shifted =
+	cir_use[1]                ? cir_bus_err >> 2 :
+	cir_use[0] && EXTENSION_C ? cir_bus_err >> 1 : cir_bus_err;
+
+wire [2:0] cir_bus_err_plus_fetch =
+	instr_fetch_overlay_blocked        ? cir_bus_err_shifted :
+	unaligned_jump_dph  && EXTENSION_C ? {cir_bus_err_shifted[2:1], fetch_bus_err} :
+	level_next_no_fetch && EXTENSION_C ? {{2{fetch_bus_err}}, cir_bus_err_shifted[0]} :
+                                         {cir_bus_err_shifted[2], {2{fetch_bus_err}}};
+
+always @ (posedge clk or negedge rst_n) begin
+	if (!rst_n) begin
+		cir_bus_err <= 3'h0;
+	end else if (CSR_M_TRAP) begin
+		cir_bus_err <= cir_bus_err_plus_fetch;
+	end
+end
+
+assign cir_err = cir_bus_err[1:0];
 
 // ----------------------------------------------------------------------------
 // Register number predecode