From e9fccffca00431f301a6f4e7e94a8b3fd60a80bf Mon Sep 17 00:00:00 2001
From: Luke Wren <wren6991@gmail.com>
Date: Sun, 5 Sep 2021 04:45:38 +0100
Subject: [PATCH] Fix break and single-step on a load/store access fault
 dropping the exception. Better fix for halt request on definitely-excepting
 stage M instruction giving unreachable next-instruction DPC.

---
 hdl/hazard3_core.v | 22 ++++++++++++-------
 hdl/hazard3_csr.v  | 54 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/hdl/hazard3_core.v b/hdl/hazard3_core.v
index 4be29f9..97e90c8 100644
--- a/hdl/hazard3_core.v
+++ b/hdl/hazard3_core.v
@@ -257,6 +257,7 @@ wire                 x_alu_cmp;
 wire [W_DATA-1:0]    m_trap_addr;
 wire                 m_trap_is_irq;
 wire                 m_trap_enter_vld;
+wire                 m_trap_enter_soon;
 wire                 m_trap_enter_rdy = f_jump_rdy;
 
 reg  [W_REGADDR-1:0] xm_rs1;
@@ -275,7 +276,8 @@ wire x_jump_req;
 
 // IRQs squeeze in between the instructions in X and M, so in this case X
 // stalls but M can continue. -> X always stalls on M trap, M *may* stall.
-wire x_stall_on_trap = m_trap_enter_vld && !m_trap_enter_rdy;
+wire x_stall_on_trap = m_trap_enter_vld && !m_trap_enter_rdy ||
+	m_trap_enter_soon && !m_trap_enter_vld;
 
 assign x_stall =
 	m_stall ||
@@ -368,7 +370,7 @@ always @ (*) begin
 		MEMOP_SH:  bus_hsize_d = HSIZE_HWORD;
 		default:   bus_hsize_d = HSIZE_BYTE;
 	endcase
-	bus_aph_req_d = x_memop_vld && !(x_stall_raw || x_unaligned_addr || m_trap_enter_vld);
+	bus_aph_req_d = x_memop_vld && !(x_stall_raw || x_unaligned_addr || m_trap_enter_soon);
 end
 
 // Multiply/divide
@@ -391,7 +393,7 @@ if (EXTENSION_M) begin: has_muldiv
 		else
 			x_muldiv_posted <= (x_muldiv_posted || (x_muldiv_op_vld && x_muldiv_op_rdy)) && x_stall;
 
-	wire x_muldiv_kill = m_trap_enter_vld;
+	wire x_muldiv_kill = m_trap_enter_soon;
 
 	wire x_use_fast_mul = MUL_FAST && d_aluop == ALUOP_MULDIV && d_mulop == M_OP_MUL;
 
@@ -473,6 +475,8 @@ wire [W_DATA-1:0] x_csr_wdata = d_csr_w_imm ?
 wire [W_DATA-1:0] x_csr_rdata;
 wire              x_csr_illegal_access;
 
+// "Previous" refers to next-most-recent instruction to be in D/X, i.e. the
+// most recent instruction to reach stage M (which may or may not still be in M).
 reg prev_instr_was_32_bit;
 
 always @ (posedge clk or negedge rst_n) begin
@@ -522,19 +526,21 @@ hazard3_csr #(
 	// Can generate access faults (hence traps), but do not actually perform access.
 	.addr                       (d_imm[11:0]), // todo could just connect this to the instruction bits
 	.wdata                      (x_csr_wdata),
-	.wen_soon                   (d_csr_wen && !m_trap_enter_vld),
-	.wen                        (d_csr_wen && !m_trap_enter_vld && !x_stall),
+	.wen_soon                   (d_csr_wen && !m_trap_enter_soon),
+	.wen                        (d_csr_wen && !m_trap_enter_soon && !x_stall),
 	.wtype                      (d_csr_wtype),
 	.rdata                      (x_csr_rdata),
-	.ren_soon                   (d_csr_ren && !m_trap_enter_vld),
-	.ren                        (d_csr_ren && !m_trap_enter_vld && !x_stall),
+	.ren_soon                   (d_csr_ren && !m_trap_enter_soon),
+	.ren                        (d_csr_ren && !m_trap_enter_soon && !x_stall),
 	.illegal                    (x_csr_illegal_access),
 
 	// Trap signalling
 	.trap_addr                  (m_trap_addr),
 	.trap_is_irq                (m_trap_is_irq),
+	.trap_enter_soon            (m_trap_enter_soon),
 	.trap_enter_vld             (m_trap_enter_vld),
 	.trap_enter_rdy             (m_trap_enter_rdy),
+	.loadstore_dphase_pending   (!xm_memop[3]),
 	.mepc_in                    (m_exception_return_addr),
 
 	// IRQ and exception requests
@@ -566,7 +572,7 @@ always @ (posedge clk or negedge rst_n) begin
 			// If the transfer is unaligned, make sure it is completely NOP'd on the bus
 			xm_memop <= d_memop | {x_unaligned_addr, 3'h0};
 			xm_except <= x_except;
-			if (x_stall || m_trap_enter_vld) begin
+			if (x_stall || m_trap_enter_soon) begin
 				// Insert bubble
 				xm_rd <= {W_REGADDR{1'b0}};
 				xm_memop <= MEMOP_NONE;
diff --git a/hdl/hazard3_csr.v b/hdl/hazard3_csr.v
index 82cfbce..326ee92 100644
--- a/hdl/hazard3_csr.v
+++ b/hdl/hazard3_csr.v
@@ -81,6 +81,15 @@ module hazard3_csr #(
 	output wire                trap_is_irq,
 	output wire                trap_enter_vld,
 	input  wire                trap_enter_rdy,
+	// True when we are about to trap into debug mode, but are waiting for an
+	// excepting or potentially-excepting instruction to clear M first. The
+	// instruction in X is suppressed, X PC does not increment but still
+	// tracks exception addresses.
+	output wire                trap_enter_soon,
+	// We need to know about load/stores in data phase because their exception
+	// status is still unknown, so we fence off on them before entering debug
+	// mode.
+	input  wire                loadstore_dphase_pending,
 	input  wire [XLEN-1:0]     mepc_in,
 
 	// Exceptions must *not* be a function of bus stall.
@@ -917,30 +926,45 @@ end
 // instructions in stage 2 and stage 3) or in an exception-like manner
 // (replace the instruction in stage 3).
 //
-// A tricky case is halt request: this normally performs an IRQ-like entry,
-// because the instruction in stage 3 can not in general be discarded, as it
-// may already have had system side effects: for example a load/store on an
-// IO region.
+// Halt request and step-break are IRQ-like. We need to be careful when a halt
+// request lines up with an instruction in M which either has generated an
+// exception (e.g. an ecall) or may yet generate an exception (a load). In
+// this case the correct thing to do is to:
 //
-// However an asynchronous halt request when the instruction in stage 3 is
-// itself generating an exception is an exception-like halt entry. Otherwise,
-// we set DPC to the instruction *after* (in X) the excepting one, which is
-// never actually reached, due to the exception.
+// - Squash whatever instruction may be in X, and inhibit X PC increment
+// - Wait until after the exception entry is taken (or the load/store
+//   completes successfully)
+// - Immediately trigger an IRQ-like debug entry.
+//
+// This ensures the debugger sees mcause/mepc set correctly, with dpc pointing
+// to the handler entry point, if the instruction excepts.
 
 wire exception_req_any;
+wire halt_delayed_by_exception = exception_req_any || loadstore_dphase_pending;
 
 // This would also include triggers, if/when those are implemented:
 wire want_halt_except = DEBUG_SUPPORT && !debug_mode && (
-	dcsr_ebreakm && except == EXCEPT_EBREAK ||
-	// This clause takes priority over the IRQ-like dbg_req_halt clause below:
-	dbg_req_halt && exception_req_any
+	dcsr_ebreakm && except == EXCEPT_EBREAK
 );
 
-// Note all exception-like causes (trigger, ebreak) are higher priority than IRQ-like
-wire want_halt_irq = DEBUG_SUPPORT && !debug_mode && !want_halt_except && (
-	dbg_req_halt || (dbg_req_halt_on_reset && have_just_reset) || step_halt_req
+// Note exception-like causes (trigger, ebreak) are higher priority than
+// IRQ-like.
+//
+// We must mask halt_req with delay_irq_entry (true on cycle 2 and beyond of
+// load/store address phase) because at that point we can't suppress the bus
+// access..
+wire want_halt_irq_if_no_exception = DEBUG_SUPPORT && !debug_mode && !want_halt_except && (
+	(dbg_req_halt && !delay_irq_entry) ||
+	(dbg_req_halt_on_reset && have_just_reset) ||
+	step_halt_req
 );
 
+// Exception (or potential exception due to load/store) in M delays halt
+// entry. The halt intention still blocks X, so we can't get blocked forever
+// by a string of load/stores. This is just here to get sequencing between an
+// exception (or potential exception) in M, and debug mode entry.
+wire want_halt_irq = want_halt_irq_if_no_exception && !halt_delayed_by_exception;
+
 assign dcause_next =
 	// Trigger would be highest priority if implemented
 	except == EXCEPT_EBREAK                                    ? 3'h1 : // ebreak (priority 3)
@@ -1045,6 +1069,8 @@ assign trap_enter_vld =
 	DEBUG_SUPPORT && (
 		(!delay_irq_entry && want_halt_irq) || want_halt_except || pending_dbg_resume);
 
+assign trap_enter_soon = trap_enter_vld || (DEBUG_SUPPORT && want_halt_irq_if_no_exception);
+
 assign mcause_irq_next = !exception_req_any;
 assign mcause_code_next = exception_req_any ? {2'h0, except} : mcause_irq_num;