diff --git a/hdl/hazard3_core.v b/hdl/hazard3_core.v
index e9bb2f1..01c78a7 100644
--- a/hdl/hazard3_core.v
+++ b/hdl/hazard3_core.v
@@ -108,6 +108,8 @@ wire [W_ADDR-1:0]    x_btb_set_src_addr;
 wire [W_ADDR-1:0]    x_btb_set_target_addr;
 wire                 x_btb_clear;
 
+wire [W_ADDR-1:0]    d_btb_target_addr;
+
 assign bus_aph_panic_i = 1'b0;
 
 wire f_mem_size;
@@ -138,6 +140,7 @@ hazard3_frontend #(
 	.btb_set_src_addr     (x_btb_set_src_addr),
 	.btb_set_target_addr  (x_btb_set_target_addr),
 	.btb_clear            (x_btb_clear),
+	.btb_target_addr_out  (d_btb_target_addr),
 
 	.cir                  (fd_cir),
 	.cir_err              (fd_cir_err),
@@ -226,6 +229,7 @@ hazard3_decode #(
 	.x_stall              (x_stall),
 	.f_jump_now           (f_jump_now),
 	.f_jump_target        (f_jump_target),
+	.d_btb_target_addr    (d_btb_target_addr),
 
 	.d_imm                (d_imm),
 	.d_rs1                (d_rs1),
@@ -738,7 +742,8 @@ wire x_jump_req_unchecked = !x_stall_on_raw && (
 assign x_jump_req = x_jump_req_unchecked && !x_jump_misaligned && !x_exec_pmp_fail;
 
 assign x_btb_set = |BRANCH_PREDICTOR && (
-	x_jump_req_unchecked && d_addr_offs[W_ADDR - 1] && !x_branch_was_predicted
+	x_jump_req_unchecked && d_addr_offs[W_ADDR - 1] && !x_branch_was_predicted &&
+	(d_branchcond == BCOND_NZERO || d_branchcond == BCOND_ZERO)
 );
 
 assign x_btb_clear = d_fence_i || (m_trap_enter_vld && m_trap_enter_rdy) || (|BRANCH_PREDICTOR && (
diff --git a/hdl/hazard3_decode.v b/hdl/hazard3_decode.v
index 1a1365e..9fe7a90 100644
--- a/hdl/hazard3_decode.v
+++ b/hdl/hazard3_decode.v
@@ -30,6 +30,7 @@ module hazard3_decode #(
 	input  wire                 f_jump_now,
 	input  wire [W_ADDR-1:0]    f_jump_target,
 	input  wire                 x_jump_not_except,
+	input  wire [W_ADDR-1:0]    d_btb_target_addr,
 
 	output reg  [W_DATA-1:0]    d_imm,
 	output reg  [W_REGADDR-1:0] d_rs1,
@@ -123,31 +124,39 @@ always @ (posedge clk or negedge rst_n) begin
 	end
 end
 
-reg  [W_ADDR-1:0]    pc;
-wire [W_ADDR-1:0]    pc_next = pc + (d_instr_is_32bit ? 32'h4 : 32'h2);
+reg  [W_ADDR-1:0] pc;
+wire [W_ADDR-1:0] pc_seq_next = pc + (d_instr_is_32bit ? 32'h4 : 32'h2);
 assign d_pc = pc;
 
+wire predicted_branch = fd_cir_predbranch[d_instr_is_32bit] && |BRANCH_PREDICTOR;
+
 always @ (posedge clk or negedge rst_n) begin
 	if (!rst_n) begin
 		pc <= RESET_VECTOR;
 	end else begin
 		if ((f_jump_now && !assert_cir_lock) || (cir_lock_prev && deassert_cir_lock)) begin
 			pc <= f_jump_target;
-`ifdef FORMAL
-			// Being cheeky above to save a 32 bit mux. Check that we never get an M target by mistake.
-
-			// FIXME disabled this for now -- we do sometimes see an exception taking
-			// place during the stall, which then leads to a different branch target
-			// appearing. (i.e. f_jump_now is asserted for two cycles, the first one
-			// from this instruction and the second from the exception; this is ok,
-			// because the exception will return to this branch when done.)
-
-			// if (cir_lock_prev && deassert_cir_lock)
-			// 	assert(f_jump_target == d_jump_target);
-`endif
 		end else if (!d_stall && !cir_lock) begin
-			pc <= pc_next;
+			// If this instruction is a predicted-taken branch (and has not
+			// generated a mispredict recovery jump) then set PC to the
+			// prediction target instead of the sequentially next PC
+			pc <= predicted_branch ? d_btb_target_addr : pc_seq_next;
 		end
+		// If the current instruction is marked as being a predicted-taken
+		// branch, it *must* be a valid branch instruction, because executing
+		// the branch is how we recover from misprediction
+`ifdef FORMAL
+		// This can be defeated if you branch backward halfway into a 32-bit instruction that immediately precedes the branch.
+		if (predicted_branch && !d_starved && !debug_mode) begin
+			assert(!d_invalid);
+			assert(d_branchcond == BCOND_ZERO || d_branchcond == BCOND_NZERO);
+		end
+		if (&fd_cir_predbranch && !d_instr_is_32bit) begin
+			// Only way to get two back-to-back predicted-taken instructions
+			// is if they are the same instruction
+			assert(d_btb_target_addr == pc);
+		end
+`endif
 	end
 end
 
diff --git a/hdl/hazard3_frontend.v b/hdl/hazard3_frontend.v
index 26652a0..51b1e20 100644
--- a/hdl/hazard3_frontend.v
+++ b/hdl/hazard3_frontend.v
@@ -43,6 +43,7 @@ module hazard3_frontend #(
 	input  wire [W_ADDR-1:0] btb_set_src_addr,
 	input  wire [W_ADDR-1:0] btb_set_target_addr,
 	input  wire              btb_clear,
+	output wire [W_ADDR-1:0] btb_target_addr_out,
 
 	// Interface to Decode
 	// Note reg/wire distinction
@@ -181,12 +182,14 @@ if (BRANCH_PREDICTOR) begin: have_btb
 			btb_src_addr <= {W_ADDR{1'b0}};
 			btb_target_addr <= {W_ADDR{1'b0}};
 			btb_valid <= 1'b0;
+		end else if (btb_clear) begin
+			// Clear takes precedences over set. E.g. if a taken branch is in
+			// stage 2 and an exception is in stage 3, we must clear the BTB.
+			btb_valid <= 1'b0;
 		end else if (btb_set) begin
 			btb_src_addr <= btb_set_src_addr;
 			btb_target_addr <= btb_set_target_addr;
 			btb_valid <= 1'b1;
-		end else if (btb_clear) begin
-			btb_valid <= 1'b0;
 		end
 	end
 end else begin: no_btb
@@ -198,6 +201,17 @@ end else begin: no_btb
 end
 endgenerate
 
+// Decode uses the target address to set the PC to the correct branch target
+// value following a predicted-taken branch (as normally it would update PC
+// by following an X jump request, and in this case there is none).
+//
+// Note this assumes the BTB target has not changed by the time the predicted
+// branch arrives at decode! This is always true because the only way for the
+// target address to change is when an older branch is taken, which would
+// flush the younger predicted-taken branch before it reaches decode. 
+
+assign btb_target_addr_out = btb_target_addr;
+
 // ----------------------------------------------------------------------------
 // Fetch request generation
 
@@ -330,25 +344,30 @@ always @ (posedge clk or negedge rst_n) begin
 		mem_data_hwvld <= 2'b11;
 		mem_aph_hwvld <= 2'b11;
 		mem_data_predbranch <= 1'b0;
-	end else if (EXTENSION_C) begin
+	end else begin
 		if (jump_now) begin
-			if (mem_addr_rdy) begin
-				mem_data_hwvld <= {1'b1, !jump_target[1]};
-			end else begin
-				mem_aph_hwvld <= {1'b1, !jump_target[1]};
+			if (|EXTENSION_C) begin
+				if (mem_addr_rdy) begin
+					mem_data_hwvld <= {1'b1, !jump_target[1]};
+				end else begin
+					mem_aph_hwvld <= {1'b1, !jump_target[1]};
+				end
 			end
+			mem_data_predbranch <= 1'b0;
 		end else if (mem_addr_vld && mem_addr_rdy) begin
-			// If a predicted-taken branch instruction only spans the first
-			// half of a word, need to flag the second half as invalid. 
-			mem_data_hwvld <= mem_aph_hwvld & {
-				!(|BRANCH_PREDICTOR && btb_match_now && !btb_src_addr[1]),
-				1'b1
-			};
-			// Also need to take the alignment of the destination into account.
-			mem_aph_hwvld <= {
-				1'b1,
-				!(|BRANCH_PREDICTOR && btb_match_now && btb_target_addr[1])
-			};
+			if (|EXTENSION_C) begin
+				// If a predicted-taken branch instruction only spans the first
+				// half of a word, need to flag the second half as invalid. 
+				mem_data_hwvld <= mem_aph_hwvld & {
+					!(|BRANCH_PREDICTOR && btb_match_now && !btb_src_addr[1]),
+					1'b1
+				};
+				// Also need to take the alignment of the destination into account.
+				mem_aph_hwvld <= {
+					1'b1,
+					!(|BRANCH_PREDICTOR && btb_match_now && btb_target_addr[1])
+				};
+			end
 			mem_data_predbranch <= |BRANCH_PREDICTOR && btb_match_now;
 		end
 	end
@@ -397,7 +416,6 @@ wire [3*W_BUNDLE-1:0] instr_data_plus_fetch =
 // may be buffered in the prefetch queue.
 
 wire fetch_bus_err = fifo_empty ? mem_data_err : fifo_err[0];
-wire fetch_predbranch = fifo_empty ? mem_data_predbranch : fifo_predbranch[0];
 
 reg  [2:0] cir_bus_err;
 wire [2:0] cir_bus_err_shifted =
@@ -413,6 +431,10 @@ wire [2:0] cir_bus_err_plus_fetch =
 // And the same thing again for whether CIR contains a predicted-taken branch.
 // One day I should clean up this copy/paste.
 
+wire fetch_predbranch = fifo_empty ? mem_data_predbranch : fifo_predbranch[0];
+// We mark only the last halfword of the branch instruction as being predicted.
+wire [1:0] fetch_predbranch_hw = &fetch_data_hwvld ? {fetch_predbranch, 1'b0} : {1'b0, fetch_predbranch};
+
 reg  [2:0] cir_predbranch_reg;
 wire [2:0] cir_predbranch_shifted =
 	cir_use[1]                ? cir_predbranch_reg >> 2 :
@@ -420,9 +442,9 @@ wire [2:0] cir_predbranch_shifted =
 
 wire [2:0] cir_predbranch_plus_fetch =
 	!cir_room_for_fetch                    ? cir_predbranch_shifted :
-	level_next_no_fetch[1] && |EXTENSION_C ? {fetch_predbranch, cir_predbranch_shifted[1:0]} :
-	level_next_no_fetch[0] && |EXTENSION_C ? {{2{fetch_predbranch}}, cir_predbranch_shifted[0]} :
-	                                         {cir_predbranch_shifted[2], {2{fetch_predbranch}}};
+	level_next_no_fetch[1] && |EXTENSION_C ? {fetch_predbranch_hw[0], cir_predbranch_shifted[1:0]} :
+	level_next_no_fetch[0] && |EXTENSION_C ? {fetch_predbranch_hw, cir_predbranch_shifted[0]} :
+	                                         {cir_predbranch_shifted[2], fetch_predbranch_hw};
 
 wire [1:0] fetch_fill_amount = cir_room_for_fetch && fetch_data_vld ? (
 	&fetch_data_hwvld ? 2'h2 : 2'h1
diff --git a/test/formal/instruction_fetch_match/hazard3_formal_regression.vh b/test/formal/instruction_fetch_match/hazard3_formal_regression.vh
index e239a81..999d7b4 100644
--- a/test/formal/instruction_fetch_match/hazard3_formal_regression.vh
+++ b/test/formal/instruction_fetch_match/hazard3_formal_regression.vh
@@ -59,7 +59,12 @@ assign expect_cir[31:16] = instr_mem[(d_pc + 2) / 4] >> (d_pc[1] ? 0  : 16);
 // Note we can get a mismatch in the upper halfword during a CIR lock of a
 // jump in the lower halfword -- the fetch will tumble into the top and this
 // is fine as long as we correctly update the PC when the lock clears.
-wire allow_upper_half_mismatch = fd_cir[15:0] == expect_cir[15:0] && fd_cir[1:0] != 2'b11;
+
+// Note also following a predicted branch the upper half of CIR may be
+// nonsequential, so we can't check it.
+wire allow_upper_half_mismatch = fd_cir[15:0] == expect_cir[15:0] && (
+	fd_cir[1:0] != 2'b11 || fd_cir_predbranch[0]
+);
 
 always @ (posedge clk) if (rst_n) begin
 	if (fd_cir_vld >= 2'd1)