Remove op_b (rs2) register from muldiv_seq for modest LUT/FF savings

2022-10-08 18:22:08 +01:00 · 2022-10-08 18:22:08 +01:00 · aa438fc37c
parent d3667769d2
commit aa438fc37c
1 changed files with 20 additions and 15 deletions
--- a/hdl/arith/hazard3_muldiv_seq.v
+++ b/hdl/arith/hazard3_muldiv_seq.v
@ -3,14 +3,14 @@
 |                     SPDX-License-Identifier: Apache-2.0                     |
 \*****************************************************************************/

-// Combined multiply/divide/modulo circuit.
-// All operations performed at 1 bit per clock; aiming for minimal resource usage.
-// There are lots of opportunities for off-by-one errors here. See muldiv_model.py
-// for a simple reference model of the mul/div/mod iterations.
+// Combined multiply/divide/modulo circuit. All operations performed at 1 bit
+// per clock; aiming for minimal resource usage on iCE40 FPGA. Optionally the
+// circuit can be unrolled for slightly higher performance.
 //
-// When op_kill is high, the current calculation halts immediately. op_vld can be
-// asserted on the same cycle, and the new calculation begins without delay, regardless
-// of op_rdy. This may be used by the processor on e.g. mispredict or trap.
+// When op_kill is high, the current calculation halts immediately. op_vld can
+// be asserted on the same cycle, and the new calculation begins without
+// delay, regardless of op_rdy. This may be used by the processor on e.g.
+// mispredict or trap.
 //
 // The actual multiply/divide hardware is unsigned. We handle signedness at
 // input/output.
@ -57,10 +57,11 @@ parameter W_CTR = $clog2(XLEN + 1);

 reg [W_MULOP-1:0] op_r;
 reg [2*XLEN-1:0]  accum;
-reg [XLEN-1:0]    op_b_r;
 reg               op_a_neg_r;
 reg               op_b_neg_r;

+reg               prenegate_op_b;
+
 wire op_a_signed =
 	op_r == M_OP_MULH ||
 	op_r == M_OP_MULHSU ||
@ -73,13 +74,16 @@ wire op_b_signed =
 	op_r == M_OP_REM;

 wire op_a_neg = op_a_signed && accum[XLEN-1];
-wire op_b_neg = op_b_signed && op_b_r[XLEN-1];
+wire op_b_neg = op_b_signed && op_b[XLEN-1];

 // Non-divide parts of the circuit should be constant-folded if all the MUL
 // operations are handled by the fast multiplier

 wire is_div = op_r[2] || (MUL_FAST && MULH_FAST);

+
+wire [XLEN-1:0] op_b_sign_adj = prenegate_op_b ? -op_b : op_b;
+
 // Controls for modifying sign of all/part of accumulator
 wire accum_neg_l;
 wire accum_inv_h;
@ -103,7 +107,7 @@ always @ (*) begin: alu
 	addsub_tmp = {2*XLEN{1'b0}};
 	neg_l_borrow = 1'b0;
 	for (i = 0; i < MULDIV_UNROLL; i = i + 1) begin
-		addend = {is_div && |op_b_r, op_b_r, {XLEN-1{1'b0}}};
+		addend = {is_div && |op_b_sign_adj, op_b_sign_adj, {XLEN-1{1'b0}}};
 		shift_tmp = is_div ? accum_next : accum_next >> 1;
 		addsub_tmp = shift_tmp + addend;
 		accum_next = (is_div ? !addsub_tmp[2 * XLEN - 1] : accum_next[0]) ?
@ -138,7 +142,7 @@ always @ (posedge clk or negedge rst_n) begin
 		op_r <= {W_MULOP{1'b0}};
 		op_a_neg_r <= 1'b0;
 		op_b_neg_r <= 1'b0;
-		op_b_r <= {XLEN{1'b0}};
+		prenegate_op_b <= 1'b0;
 		accum <= {XLEN*2{1'b0}};
 	end else if (op_kill || (op_vld && op_rdy)) begin
 		// Initialise circuit with operands + state
@ -146,8 +150,8 @@ always @ (posedge clk or negedge rst_n) begin
 		sign_preadj_done <= !op_vld;
 		sign_postadj_done <= !op_vld;
 		sign_postadj_carry <= 1'b0;
+		prenegate_op_b <= 1'b0;
 		op_r <= op;
-		op_b_r <= op_b;
 		accum <= {{XLEN{1'b0}}, op_a};
 	end else if (!sign_preadj_done) begin
 		// Pre-adjust sign if necessary, else perform first iteration immediately
@ -157,8 +161,9 @@ always @ (posedge clk or negedge rst_n) begin
 		if (accum_neg_l || (op_b_neg ^ is_div)) begin
 			if (accum_neg_l)
 				accum[0 +: XLEN] <= accum_next[0 +: XLEN];
-			if (op_b_neg ^ is_div)
-				op_b_r <= -op_b_r;
+			if (op_b_neg ^ is_div) begin
+				prenegate_op_b <= 1'b1;
+			end
 		end else begin
 			ctr <= ctr - MULDIV_UNROLL[W_CTR-1:0];
 			accum <= accum_next;
@ -206,7 +211,7 @@ wire op_signs_differ = op_a_neg_r ^ op_b_neg_r;

 assign accum_neg_l =
 	!sign_preadj_done && op_a_neg ||
-	do_postadj && !sign_postadj_carry && op_signs_differ && !(is_div && ~|op_b_r);
+	do_postadj && !sign_postadj_carry && op_signs_differ && !(is_div && ~|op_b_sign_adj);

 assign {accum_incr_h, accum_inv_h} =
 	do_postadj &&  is_div && op_a_neg_r                             ? 2'b11 :