diff --git a/hdl/hazard3_core.v b/hdl/hazard3_core.v
index 6e87b3a..2596c73 100644
--- a/hdl/hazard3_core.v
+++ b/hdl/hazard3_core.v
@@ -89,7 +89,6 @@ wire                 f_jump_rdy;
 wire                 f_jump_now = f_jump_req && f_jump_rdy;
 
 // Predecoded register numbers, for register file access
-wire                 f_regnum_vld;
 wire [W_REGADDR-1:0] f_rs1;
 wire [W_REGADDR-1:0] f_rs2;
 
@@ -132,7 +131,6 @@ hazard3_frontend #(
 
 	.next_regs_rs1      (f_rs1),
 	.next_regs_rs2      (f_rs2),
-	.next_regs_vld      (f_regnum_vld),
 
 	.debug_mode         (debug_mode),
 	.dbg_instr_data     (dbg_instr_data),
@@ -344,21 +342,41 @@ wire m_wfi_stall_clear;
 
 // ALU, operand muxes and bypass
 
+// Approximate regnums were predecoded in stage 1, for regfile read.
+// (Approximate in the sense that they are invalid when the instruction
+// doesn't *have* a register operand on that port.) These aren't usable for
+// hazard checking but are fine for bypass, and make the bypass mux
+// independent of stage 2 decode.
+
+reg [W_REGADDR-1:0] d_rs1_predecoded;
+reg [W_REGADDR-1:0] d_rs2_predecoded;
+
+always @ (posedge clk or negedge rst_n) begin
+	if (!rst_n) begin
+		d_rs1_predecoded <= {W_REGADDR{1'b0}};
+		d_rs2_predecoded <= {W_REGADDR{1'b0}};
+	end else if (!x_stall) begin
+		d_rs1_predecoded <= f_rs1;
+		d_rs2_predecoded <= f_rs2;
+	end
+end
+
 always @ (*) begin
 	if (~|d_rs1) begin
+		// Note the predecoded version is not sufficiently precise for zeroing
 		x_rs1_bypass = {W_DATA{1'b0}};
-	end else if (xm_rd == d_rs1) begin
+	end else if (xm_rd == d_rs1_predecoded) begin
 		x_rs1_bypass = xm_result;
-	end else if (mw_rd == d_rs1 && !REDUCED_BYPASS) begin
+	end else if (mw_rd == d_rs1_predecoded && !REDUCED_BYPASS) begin
 		x_rs1_bypass = mw_result;
 	end else begin
 		x_rs1_bypass = x_rdata1;
 	end
 	if (~|d_rs2) begin
 		x_rs2_bypass = {W_DATA{1'b0}};
-	end else if (xm_rd == d_rs2) begin
+	end else if (xm_rd == d_rs2_predecoded) begin
 		x_rs2_bypass = xm_result;
-	end else if (mw_rd == d_rs2 && !REDUCED_BYPASS) begin
+	end else if (mw_rd == d_rs2_predecoded && !REDUCED_BYPASS) begin
 		x_rs2_bypass = mw_result;
 	end else begin
 		x_rs2_bypass = x_rdata2;
diff --git a/hdl/hazard3_decode.v b/hdl/hazard3_decode.v
index 0f69b8f..feed576 100644
--- a/hdl/hazard3_decode.v
+++ b/hdl/hazard3_decode.v
@@ -195,7 +195,7 @@ always @ (*) begin
 	RV_BGEU:      begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_rd = X0; d_aluop = ALUOP_LTU; d_branchcond = BCOND_ZERO; end
 	RV_JALR:      begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_branchcond = BCOND_ALWAYS; d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_aluop = ALUOP_ADD; d_alusrc_a = ALUSRCA_PC; d_alusrc_b = ALUSRCB_IMM; d_imm = d_instr_is_32bit ? 32'h4 : 32'h2; end
 	RV_JAL:       begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_branchcond = BCOND_ALWAYS; d_rs1 = X0;               d_rs2 = X0; d_aluop = ALUOP_ADD; d_alusrc_a = ALUSRCA_PC; d_alusrc_b = ALUSRCB_IMM; d_imm = d_instr_is_32bit ? 32'h4 : 32'h2; end
-	RV_LUI:       begin d_aluop = ALUOP_ADD; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_rs1 = X0; end
+	RV_LUI:       begin d_aluop = ALUOP_RS2; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_rs1 = X0; end
 	RV_AUIPC:     begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_aluop = ALUOP_ADD; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_alusrc_a = ALUSRCA_PC;  d_rs1 = X0; end
 	RV_ADDI:      begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
 	RV_SLLI:      begin d_aluop = ALUOP_SLL; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
diff --git a/hdl/hazard3_frontend.v b/hdl/hazard3_frontend.v
index 4bf9449..d368b03 100644
--- a/hdl/hazard3_frontend.v
+++ b/hdl/hazard3_frontend.v
@@ -55,7 +55,6 @@ module hazard3_frontend #(
 	// cycle. These go straight to the register file read ports.
 	output wire [4:0]        next_regs_rs1,
 	output wire [4:0]        next_regs_rs2,
-	output wire              next_regs_vld,
 
 	// Debugger instruction injection: instruction fetch is suppressed when in
 	// debug halt state, and the DM can then inject instructions into the last
@@ -389,8 +388,6 @@ assign cir_err = cir_bus_err[1:0];
 wire [31:0] next_instr = instr_data_plus_fetch[31:0];
 wire next_instr_is_32bit = next_instr[1:0] == 2'b11;
 
-assign next_regs_vld = next_instr_is_32bit ? buf_level_next[1] : |buf_level_next;
-
 assign next_regs_rs1 =
 	next_instr_is_32bit                                     ? next_instr[19:15]       : // 32-bit R, S, B formats
 	next_instr[1:0] == 2'b00 && next_instr[15:13] == 3'b000 ? 5'd2                    : // c.addi4spn