Perf option for dedicated branch comparator
This commit is contained in:
parent
3c61fae9ef
commit
7dc5046505
|
@ -0,0 +1,42 @@
|
||||||
|
/*****************************************************************************\
|
||||||
|
| Copyright (C) 2022 Luke Wren |
|
||||||
|
| SPDX-License-Identifier: Apache-2.0 |
|
||||||
|
\*****************************************************************************/
|
||||||
|
|
||||||
|
`default_nettype none
|
||||||
|
|
||||||
|
// The branch decision path through the ALU is slow because:
|
||||||
|
//
|
||||||
|
// - Sees immediates and PC on its inputs, as well as regs
|
||||||
|
// - Add/sub rather than just add (with complex decode of the sub condition)
|
||||||
|
// - 2 extra mux layers in front of adder if Zba extension is enabled
|
||||||
|
//
|
||||||
|
// So there is sometimes timing benefit to a dedicated branch comparator.
|
||||||
|
|
||||||
|
module hazard3_branchcmp #(
|
||||||
|
`include "hazard3_config.vh"
|
||||||
|
,
|
||||||
|
`include "hazard3_width_const.vh"
|
||||||
|
) (
|
||||||
|
input wire [W_ALUOP-1:0] aluop,
|
||||||
|
input wire [W_DATA-1:0] op_a,
|
||||||
|
input wire [W_DATA-1:0] op_b,
|
||||||
|
output wire cmp
|
||||||
|
);
|
||||||
|
|
||||||
|
`include "hazard3_ops.vh"
|
||||||
|
|
||||||
|
wire [W_DATA-1:0] diff = op_a - op_b;
|
||||||
|
|
||||||
|
wire cmp_is_unsigned = aluop[2]; // aluop == ALUOP_LTU;
|
||||||
|
|
||||||
|
wire lt = op_a[W_DATA-1] == op_b[W_DATA-1] ? diff[W_DATA-1] :
|
||||||
|
cmp_is_unsigned ? op_b[W_DATA-1] :
|
||||||
|
op_a[W_DATA-1] ;
|
||||||
|
|
||||||
|
// ALUOP_SUB is used for equality check by main ALU
|
||||||
|
assign cmp = aluop[0] ? op_a != op_b : lt;
|
||||||
|
|
||||||
|
endmodule
|
||||||
|
|
||||||
|
`default_nettype wire
|
|
@ -2,6 +2,7 @@ file hazard3_core.v
|
||||||
file hazard3_cpu_1port.v
|
file hazard3_cpu_1port.v
|
||||||
file hazard3_cpu_2port.v
|
file hazard3_cpu_2port.v
|
||||||
file arith/hazard3_alu.v
|
file arith/hazard3_alu.v
|
||||||
|
file arith/hazard3_branchcmp.v
|
||||||
file arith/hazard3_muldiv_seq.v
|
file arith/hazard3_muldiv_seq.v
|
||||||
file arith/hazard3_mul_fast.v
|
file arith/hazard3_mul_fast.v
|
||||||
file arith/hazard3_priority_encode.v
|
file arith/hazard3_priority_encode.v
|
||||||
|
|
|
@ -114,6 +114,11 @@ parameter MUL_FAST = 0,
|
||||||
// Requires; MUL_FAST
|
// Requires; MUL_FAST
|
||||||
parameter MULH_FAST = 0,
|
parameter MULH_FAST = 0,
|
||||||
|
|
||||||
|
// FAST_BRANCHCMP: Instantiate a separate comparator (eq/lt/ltu) for branch
|
||||||
|
// resolution, rather than using the ALU. May improve fetch address delay.
|
||||||
|
// (Especially if Zba extension is enabled)
|
||||||
|
parameter FAST_BRANCHCMP = 0,
|
||||||
|
|
||||||
// MTVEC_WMASK: Mask of which bits in MTVEC are modifiable. Save gates by
|
// MTVEC_WMASK: Mask of which bits in MTVEC are modifiable. Save gates by
|
||||||
// making trap vector base partly fixed (legal, as it's WARL).
|
// making trap vector base partly fixed (legal, as it's WARL).
|
||||||
//
|
//
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
.MULDIV_UNROLL (MULDIV_UNROLL),
|
.MULDIV_UNROLL (MULDIV_UNROLL),
|
||||||
.MUL_FAST (MUL_FAST),
|
.MUL_FAST (MUL_FAST),
|
||||||
.MULH_FAST (MULH_FAST),
|
.MULH_FAST (MULH_FAST),
|
||||||
|
.FAST_BRANCHCMP (FAST_BRANCHCMP),
|
||||||
.MTVEC_WMASK (MTVEC_WMASK),
|
.MTVEC_WMASK (MTVEC_WMASK),
|
||||||
.W_ADDR (W_ADDR),
|
.W_ADDR (W_ADDR),
|
||||||
.W_DATA (W_DATA)
|
.W_DATA (W_DATA)
|
||||||
|
|
|
@ -835,14 +835,33 @@ end
|
||||||
|
|
||||||
// For JALR, the LSB of the result must be cleared by hardware
|
// For JALR, the LSB of the result must be cleared by hardware
|
||||||
wire [W_ADDR-1:0] x_jump_target = x_addr_sum & ~32'h1;
|
wire [W_ADDR-1:0] x_jump_target = x_addr_sum & ~32'h1;
|
||||||
|
wire x_branch_cmp;
|
||||||
|
|
||||||
|
generate
|
||||||
|
if (~|FAST_BRANCHCMP) begin: alu_branchcmp
|
||||||
|
|
||||||
|
assign x_branch_cmp = x_alu_cmp;
|
||||||
|
|
||||||
|
end else begin: fast_branchcmp
|
||||||
|
|
||||||
|
hazard3_branchcmp #(
|
||||||
|
`include "hazard3_config_inst.vh"
|
||||||
|
) branchcmp_u (
|
||||||
|
.aluop (d_aluop),
|
||||||
|
.op_a (x_rs1_bypass),
|
||||||
|
.op_b (x_rs2_bypass),
|
||||||
|
.cmp (x_branch_cmp)
|
||||||
|
);
|
||||||
|
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
// Be careful not to take branches whose comparisons depend on a load result
|
// Be careful not to take branches whose comparisons depend on a load result
|
||||||
assign x_jump_req = !x_stall_on_raw && (
|
assign x_jump_req = !x_stall_on_raw && (
|
||||||
d_branchcond == BCOND_ALWAYS ||
|
d_branchcond == BCOND_ALWAYS ||
|
||||||
d_branchcond == BCOND_ZERO && !x_alu_cmp ||
|
d_branchcond == BCOND_ZERO && !x_branch_cmp ||
|
||||||
d_branchcond == BCOND_NZERO && x_alu_cmp
|
d_branchcond == BCOND_NZERO && x_branch_cmp
|
||||||
);
|
);
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
// Pipe Stage M
|
// Pipe Stage M
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@ DEBUG_SUPPORT := 1
|
||||||
MULDIV_UNROLL := 2
|
MULDIV_UNROLL := 2
|
||||||
MUL_FAST := 1
|
MUL_FAST := 1
|
||||||
MULH_FAST := 1
|
MULH_FAST := 1
|
||||||
|
FAST_BRANCHCMP := 1
|
||||||
REDUCED_BYPASS := 0
|
REDUCED_BYPASS := 0
|
||||||
|
|
||||||
MVENDORID_VAL := 32'hdeadbeef
|
MVENDORID_VAL := 32'hdeadbeef
|
||||||
|
@ -38,6 +39,7 @@ SYNTH_CMD += chparam -set REDUCED_BYPASS $(REDUCED_BYPASS) $(TOP);
|
||||||
SYNTH_CMD += chparam -set MULDIV_UNROLL $(MULDIV_UNROLL) $(TOP);
|
SYNTH_CMD += chparam -set MULDIV_UNROLL $(MULDIV_UNROLL) $(TOP);
|
||||||
SYNTH_CMD += chparam -set MUL_FAST $(MUL_FAST) $(TOP);
|
SYNTH_CMD += chparam -set MUL_FAST $(MUL_FAST) $(TOP);
|
||||||
SYNTH_CMD += chparam -set MULH_FAST $(MULH_FAST) $(TOP);
|
SYNTH_CMD += chparam -set MULH_FAST $(MULH_FAST) $(TOP);
|
||||||
|
SYNTH_CMD += chparam -set FAST_BRANCHCMP $(FAST_BRANCHCMP) $(TOP);
|
||||||
SYNTH_CMD += chparam -set MVENDORID_VAL $(MVENDORID_VAL) $(TOP);
|
SYNTH_CMD += chparam -set MVENDORID_VAL $(MVENDORID_VAL) $(TOP);
|
||||||
SYNTH_CMD += chparam -set MIMPID_VAL $(MIMPID_VAL) $(TOP);
|
SYNTH_CMD += chparam -set MIMPID_VAL $(MIMPID_VAL) $(TOP);
|
||||||
SYNTH_CMD += chparam -set MCONFIGPTR_VAL $(MCONFIGPTR_VAL) $(TOP);
|
SYNTH_CMD += chparam -set MCONFIGPTR_VAL $(MCONFIGPTR_VAL) $(TOP);
|
||||||
|
|
Loading…
Reference in New Issue