Perf option for dedicated branch comparator
This commit is contained in:
parent
3c61fae9ef
commit
7dc5046505
|
@ -0,0 +1,42 @@
|
|||
/*****************************************************************************\
|
||||
| Copyright (C) 2022 Luke Wren |
|
||||
| SPDX-License-Identifier: Apache-2.0 |
|
||||
\*****************************************************************************/
|
||||
|
||||
`default_nettype none
|
||||
|
||||
// The branch decision path through the ALU is slow because:
|
||||
//
|
||||
// - Sees immediates and PC on its inputs, as well as regs
|
||||
// - Add/sub rather than just add (with complex decode of the sub condition)
|
||||
// - 2 extra mux layers in front of adder if Zba extension is enabled
|
||||
//
|
||||
// So there is sometimes timing benefit to a dedicated branch comparator.
|
||||
|
||||
module hazard3_branchcmp #(
|
||||
`include "hazard3_config.vh"
|
||||
,
|
||||
`include "hazard3_width_const.vh"
|
||||
) (
|
||||
input wire [W_ALUOP-1:0] aluop,
|
||||
input wire [W_DATA-1:0] op_a,
|
||||
input wire [W_DATA-1:0] op_b,
|
||||
output wire cmp
|
||||
);
|
||||
|
||||
`include "hazard3_ops.vh"
|
||||
|
||||
wire [W_DATA-1:0] diff = op_a - op_b;
|
||||
|
||||
wire cmp_is_unsigned = aluop[2]; // aluop == ALUOP_LTU;
|
||||
|
||||
wire lt = op_a[W_DATA-1] == op_b[W_DATA-1] ? diff[W_DATA-1] :
|
||||
cmp_is_unsigned ? op_b[W_DATA-1] :
|
||||
op_a[W_DATA-1] ;
|
||||
|
||||
// ALUOP_SUB is used for equality check by main ALU
|
||||
assign cmp = aluop[0] ? op_a != op_b : lt;
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
|
@ -2,6 +2,7 @@ file hazard3_core.v
|
|||
file hazard3_cpu_1port.v
|
||||
file hazard3_cpu_2port.v
|
||||
file arith/hazard3_alu.v
|
||||
file arith/hazard3_branchcmp.v
|
||||
file arith/hazard3_muldiv_seq.v
|
||||
file arith/hazard3_mul_fast.v
|
||||
file arith/hazard3_priority_encode.v
|
||||
|
|
|
@ -114,6 +114,11 @@ parameter MUL_FAST = 0,
|
|||
// Requires; MUL_FAST
|
||||
parameter MULH_FAST = 0,
|
||||
|
||||
// FAST_BRANCHCMP: Instantiate a separate comparator (eq/lt/ltu) for branch
|
||||
// resolution, rather than using the ALU. May improve fetch address delay.
|
||||
// (Especially if Zba extension is enabled)
|
||||
parameter FAST_BRANCHCMP = 0,
|
||||
|
||||
// MTVEC_WMASK: Mask of which bits in MTVEC are modifiable. Save gates by
|
||||
// making trap vector base partly fixed (legal, as it's WARL).
|
||||
//
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
.MULDIV_UNROLL (MULDIV_UNROLL),
|
||||
.MUL_FAST (MUL_FAST),
|
||||
.MULH_FAST (MULH_FAST),
|
||||
.FAST_BRANCHCMP (FAST_BRANCHCMP),
|
||||
.MTVEC_WMASK (MTVEC_WMASK),
|
||||
.W_ADDR (W_ADDR),
|
||||
.W_DATA (W_DATA)
|
||||
|
|
|
@ -835,14 +835,33 @@ end
|
|||
|
||||
// For JALR, the LSB of the result must be cleared by hardware
|
||||
wire [W_ADDR-1:0] x_jump_target = x_addr_sum & ~32'h1;
|
||||
wire x_branch_cmp;
|
||||
|
||||
generate
|
||||
if (~|FAST_BRANCHCMP) begin: alu_branchcmp
|
||||
|
||||
assign x_branch_cmp = x_alu_cmp;
|
||||
|
||||
end else begin: fast_branchcmp
|
||||
|
||||
hazard3_branchcmp #(
|
||||
`include "hazard3_config_inst.vh"
|
||||
) branchcmp_u (
|
||||
.aluop (d_aluop),
|
||||
.op_a (x_rs1_bypass),
|
||||
.op_b (x_rs2_bypass),
|
||||
.cmp (x_branch_cmp)
|
||||
);
|
||||
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// Be careful not to take branches whose comparisons depend on a load result
|
||||
assign x_jump_req = !x_stall_on_raw && (
|
||||
d_branchcond == BCOND_ALWAYS ||
|
||||
d_branchcond == BCOND_ZERO && !x_alu_cmp ||
|
||||
d_branchcond == BCOND_NZERO && x_alu_cmp
|
||||
d_branchcond == BCOND_ZERO && !x_branch_cmp ||
|
||||
d_branchcond == BCOND_NZERO && x_branch_cmp
|
||||
);
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Pipe Stage M
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ DEBUG_SUPPORT := 1
|
|||
MULDIV_UNROLL := 2
|
||||
MUL_FAST := 1
|
||||
MULH_FAST := 1
|
||||
FAST_BRANCHCMP := 1
|
||||
REDUCED_BYPASS := 0
|
||||
|
||||
MVENDORID_VAL := 32'hdeadbeef
|
||||
|
@ -38,6 +39,7 @@ SYNTH_CMD += chparam -set REDUCED_BYPASS $(REDUCED_BYPASS) $(TOP);
|
|||
SYNTH_CMD += chparam -set MULDIV_UNROLL $(MULDIV_UNROLL) $(TOP);
|
||||
SYNTH_CMD += chparam -set MUL_FAST $(MUL_FAST) $(TOP);
|
||||
SYNTH_CMD += chparam -set MULH_FAST $(MULH_FAST) $(TOP);
|
||||
SYNTH_CMD += chparam -set FAST_BRANCHCMP $(FAST_BRANCHCMP) $(TOP);
|
||||
SYNTH_CMD += chparam -set MVENDORID_VAL $(MVENDORID_VAL) $(TOP);
|
||||
SYNTH_CMD += chparam -set MIMPID_VAL $(MIMPID_VAL) $(TOP);
|
||||
SYNTH_CMD += chparam -set MCONFIGPTR_VAL $(MCONFIGPTR_VAL) $(TOP);
|
||||
|
|
Loading…
Reference in New Issue