314 lines
16 KiB
Systemverilog
314 lines
16 KiB
Systemverilog
// SPDX-License-Identifier: Apache-2.0
|
|
// Copyright 2020 Western Digital Corporation or it's affiliates.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
|
|
module el2_exu_div_ctl
|
|
import el2_pkg::*;
|
|
#(
|
|
`include "el2_param.vh"
|
|
)
|
|
(
|
|
input logic clk, // Top level clock
|
|
input logic rst_l, // Reset
|
|
input logic scan_mode, // Scan mode
|
|
|
|
input el2_div_pkt_t dp, // valid, sign, rem
|
|
input logic [31:0] dividend, // Numerator
|
|
input logic [31:0] divisor, // Denominator
|
|
|
|
input logic cancel, // Cancel divide
|
|
|
|
|
|
output logic finish_dly, // Finish to match data
|
|
output logic [31:0] out // Result
|
|
);
|
|
|
|
|
|
logic div_clken;
|
|
logic exu_div_clk;
|
|
logic run_in, run_state;
|
|
logic [5:0] count_in, count;
|
|
logic [32:0] m_ff;
|
|
logic qff_enable;
|
|
logic aff_enable;
|
|
logic [32:0] q_in, q_ff;
|
|
logic [32:0] a_in, a_ff;
|
|
logic [32:0] m_eff;
|
|
logic [32:0] a_shift;
|
|
logic dividend_neg_ff, divisor_neg_ff;
|
|
logic [31:0] dividend_comp;
|
|
logic [31:0] dividend_eff;
|
|
logic [31:0] q_ff_comp;
|
|
logic [31:0] q_ff_eff;
|
|
logic [31:0] a_ff_comp;
|
|
logic [31:0] a_ff_eff;
|
|
logic sign_ff, sign_eff;
|
|
logic rem_ff;
|
|
logic add;
|
|
logic [32:0] a_eff;
|
|
logic [55:0] a_eff_shift;
|
|
logic rem_correct;
|
|
logic valid_ff_x;
|
|
logic valid_x;
|
|
logic finish;
|
|
logic finish_ff;
|
|
|
|
logic smallnum_case, smallnum_case_ff;
|
|
logic [3:0] smallnum, smallnum_ff;
|
|
logic m_already_comp;
|
|
|
|
|
|
rvoclkhdr exu_div_cgc (.*, .en(div_clken), .l1clk(exu_div_clk));
|
|
|
|
rvdff #(1) e1val_ff (.*, .clk(exu_div_clk), .din(dp.valid & ~cancel), .dout(valid_ff_x));
|
|
rvdff #(1) i_finish_ff (.*, .clk(exu_div_clk), .din(finish & ~cancel), .dout(finish_ff));
|
|
|
|
rvdff #(1) runff (.*, .clk(exu_div_clk), .din(run_in), .dout(run_state));
|
|
rvdff #(6) countff (.*, .clk(exu_div_clk), .din(count_in[5:0]), .dout(count[5:0]));
|
|
rvdffs #(4) miscf (.*, .clk(exu_div_clk), .din({dividend[31],divisor[31],sign_eff,dp.rem}), .dout({dividend_neg_ff,divisor_neg_ff,sign_ff,rem_ff}), .en(dp.valid));
|
|
rvdff #(5) smallnumff (.*, .clk(exu_div_clk), .din({smallnum_case,smallnum[3:0]}), .dout({smallnum_case_ff,smallnum_ff[3:0]}));
|
|
rvdffe #(33) mff (.*, .en(dp.valid), .din({ ~dp.unsign & divisor[31], divisor[31:0]}), .dout(m_ff[32:0]));
|
|
rvdffe #(33) qff (.*, .en(qff_enable), .din(q_in[32:0]), .dout(q_ff[32:0]));
|
|
rvdffe #(33) aff (.*, .en(aff_enable), .din(a_in[32:0]), .dout(a_ff[32:0]));
|
|
|
|
rvtwoscomp #(32) i_dividend_comp (.din(q_ff[31:0]), .dout(dividend_comp[31:0]));
|
|
rvtwoscomp #(32) i_q_ff_comp (.din(q_ff[31:0]), .dout(q_ff_comp[31:0]));
|
|
rvtwoscomp #(32) i_a_ff_comp (.din(a_ff[31:0]), .dout(a_ff_comp[31:0]));
|
|
|
|
|
|
assign valid_x = valid_ff_x & ~cancel;
|
|
|
|
|
|
// START - short circuit logic for small numbers {{
|
|
|
|
// small number divides - any 4b / 4b is done in 1 cycle (divisor != 0)
|
|
// to generate espresso equations:
|
|
// 1) smalldiv > smalldiv.e
|
|
// 2) espresso -Dso -oeqntott smalldiv.e | addassign > smalldiv
|
|
|
|
// smallnum case does not cover divide by 0
|
|
assign smallnum_case = ((q_ff[31:4] == 28'b0) & (m_ff[31:4] == 28'b0) & (m_ff[31:0] != 32'b0) & ~rem_ff & valid_x) |
|
|
((q_ff[31:0] == 32'b0) & (m_ff[31:0] != 32'b0) & ~rem_ff & valid_x);
|
|
|
|
|
|
assign smallnum[3] = ( q_ff[3] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] );
|
|
|
|
|
|
assign smallnum[2] = ( q_ff[3] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) |
|
|
( q_ff[2] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ) |
|
|
( q_ff[3] & q_ff[2] & ~m_ff[3] & ~m_ff[2] );
|
|
|
|
|
|
assign smallnum[1] = ( q_ff[2] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) |
|
|
( q_ff[1] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ) |
|
|
( q_ff[3] & ~m_ff[3] & ~m_ff[1] & ~m_ff[0]) |
|
|
( q_ff[3] & ~q_ff[2] & ~m_ff[3] & ~m_ff[2] & m_ff[1] & m_ff[0]) |
|
|
(~q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & ~m_ff[2] ) |
|
|
( q_ff[3] & q_ff[2] & ~m_ff[3] & ~m_ff[0]) |
|
|
( q_ff[3] & q_ff[2] & ~m_ff[3] & m_ff[2] & ~m_ff[1] ) |
|
|
( q_ff[3] & q_ff[1] & ~m_ff[3] & ~m_ff[1] ) |
|
|
( q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & m_ff[2] );
|
|
|
|
|
|
assign smallnum[0] = ( q_ff[2] & q_ff[1] & q_ff[0] & ~m_ff[3] & ~m_ff[1] ) |
|
|
( q_ff[3] & ~q_ff[2] & q_ff[0] & ~m_ff[3] & m_ff[1] & m_ff[0]) |
|
|
( q_ff[2] & ~m_ff[3] & ~m_ff[1] & ~m_ff[0]) |
|
|
( q_ff[1] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) |
|
|
( q_ff[0] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ) |
|
|
(~q_ff[3] & q_ff[2] & ~q_ff[1] & ~m_ff[3] & ~m_ff[2] & m_ff[1] & m_ff[0]) |
|
|
(~q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & ~m_ff[0]) |
|
|
( q_ff[3] & ~m_ff[2] & ~m_ff[1] & ~m_ff[0]) |
|
|
( q_ff[3] & ~q_ff[2] & ~m_ff[3] & m_ff[2] & m_ff[1] ) |
|
|
(~q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & m_ff[2] & ~m_ff[1] ) |
|
|
(~q_ff[3] & q_ff[2] & q_ff[0] & ~m_ff[3] & ~m_ff[1] ) |
|
|
( q_ff[3] & ~q_ff[2] & ~q_ff[1] & ~m_ff[3] & m_ff[2] & m_ff[0]) |
|
|
( ~q_ff[2] & q_ff[1] & q_ff[0] & ~m_ff[3] & ~m_ff[2] ) |
|
|
( q_ff[3] & q_ff[2] & ~m_ff[1] & ~m_ff[0]) |
|
|
( q_ff[3] & q_ff[1] & ~m_ff[2] & ~m_ff[0]) |
|
|
(~q_ff[3] & q_ff[2] & q_ff[1] & q_ff[0] & ~m_ff[3] & m_ff[2] ) |
|
|
( q_ff[3] & q_ff[2] & m_ff[3] & ~m_ff[2] ) |
|
|
( q_ff[3] & q_ff[1] & m_ff[3] & ~m_ff[2] & ~m_ff[1] ) |
|
|
( q_ff[3] & q_ff[0] & ~m_ff[2] & ~m_ff[1] ) |
|
|
( q_ff[3] & ~q_ff[1] & ~m_ff[3] & m_ff[2] & m_ff[1] & m_ff[0]) |
|
|
( q_ff[3] & q_ff[2] & q_ff[1] & m_ff[3] & ~m_ff[0]) |
|
|
( q_ff[3] & q_ff[2] & q_ff[1] & m_ff[3] & ~m_ff[1] ) |
|
|
( q_ff[3] & q_ff[2] & q_ff[0] & m_ff[3] & ~m_ff[1] ) |
|
|
( q_ff[3] & ~q_ff[2] & q_ff[1] & ~m_ff[3] & m_ff[1] ) |
|
|
( q_ff[3] & q_ff[1] & q_ff[0] & ~m_ff[2] ) |
|
|
( q_ff[3] & q_ff[2] & q_ff[1] & q_ff[0] & m_ff[3] );
|
|
|
|
|
|
// END - short circuit logic for small numbers }}
|
|
|
|
|
|
// *** Start Short Q *** {{
|
|
|
|
logic [2:0] a_cls;
|
|
logic [2:0] b_cls;
|
|
logic [3:0] shortq_shift;
|
|
logic [4:0] shortq_shift_ff;
|
|
logic shortq_enable;
|
|
logic shortq_enable_ff;
|
|
logic [32:0] short_dividend;
|
|
|
|
assign short_dividend[31:0] = q_ff[31:0];
|
|
assign short_dividend[32] = sign_ff & q_ff[31];
|
|
|
|
|
|
// A B
|
|
// 210 210 SH
|
|
// --- --- --
|
|
// 1xx 000 0
|
|
// 1xx 001 8
|
|
// 1xx 01x 16
|
|
// 1xx 1xx 24
|
|
// 01x 000 8
|
|
// 01x 001 16
|
|
// 01x 01x 24
|
|
// 01x 1xx 32
|
|
// 001 000 16
|
|
// 001 001 24
|
|
// 001 01x 32
|
|
// 001 1xx 32
|
|
// 000 000 24
|
|
// 000 001 32
|
|
// 000 01x 32
|
|
// 000 1xx 32
|
|
|
|
logic [3:0] shortq_raw;
|
|
logic [3:0] shortq_shift_xx;
|
|
|
|
assign a_cls[2] = (~short_dividend[32] & (short_dividend[31:24] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[31:23] != {9{1'b1}}));
|
|
assign a_cls[1] = (~short_dividend[32] & (short_dividend[23:16] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[22:15] != {8{1'b1}}));
|
|
assign a_cls[0] = (~short_dividend[32] & (short_dividend[15:08] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[14:07] != {8{1'b1}}));
|
|
|
|
assign b_cls[2] = (~m_ff[32] & ( m_ff[31:24] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[31:24] != {8{1'b1}}));
|
|
assign b_cls[1] = (~m_ff[32] & ( m_ff[23:16] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[23:16] != {8{1'b1}}));
|
|
assign b_cls[0] = (~m_ff[32] & ( m_ff[15:08] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[15:08] != {8{1'b1}}));
|
|
|
|
assign shortq_raw[3] = ( (a_cls[2:1] == 2'b01 ) & (b_cls[2] == 1'b1 ) ) | // Shift by 32
|
|
( (a_cls[2:0] == 3'b001) & (b_cls[2] == 1'b1 ) ) |
|
|
( (a_cls[2:0] == 3'b000) & (b_cls[2] == 1'b1 ) ) |
|
|
( (a_cls[2:0] == 3'b001) & (b_cls[2:1] == 2'b01 ) ) |
|
|
( (a_cls[2:0] == 3'b000) & (b_cls[2:1] == 2'b01 ) ) |
|
|
( (a_cls[2:0] == 3'b000) & (b_cls[2:0] == 3'b001) );
|
|
|
|
assign shortq_raw[2] = ( (a_cls[2] == 1'b1 ) & (b_cls[2] == 1'b1 ) ) | // Shift by 24
|
|
( (a_cls[2:1] == 2'b01 ) & (b_cls[2:1] == 2'b01 ) ) |
|
|
( (a_cls[2:0] == 3'b001) & (b_cls[2:0] == 3'b001) ) |
|
|
( (a_cls[2:0] == 3'b000) & (b_cls[2:0] == 3'b000) );
|
|
|
|
assign shortq_raw[1] = ( (a_cls[2] == 1'b1 ) & (b_cls[2:1] == 2'b01 ) ) | // Shift by 16
|
|
( (a_cls[2:1] == 2'b01 ) & (b_cls[2:0] == 3'b001) ) |
|
|
( (a_cls[2:0] == 3'b001) & (b_cls[2:0] == 3'b000) );
|
|
|
|
assign shortq_raw[0] = ( (a_cls[2] == 1'b1 ) & (b_cls[2:0] == 3'b001) ) | // Shift by 8
|
|
( (a_cls[2:1] == 2'b01 ) & (b_cls[2:0] == 3'b000) );
|
|
|
|
|
|
assign shortq_enable = valid_ff_x & (m_ff[31:0] != 32'b0) & (shortq_raw[3:0] != 4'b0);
|
|
|
|
assign shortq_shift[3:0] = ({4{shortq_enable}} & shortq_raw[3:0]);
|
|
|
|
rvdff #(5) i_shortq_ff (.*, .clk(exu_div_clk), .din({shortq_enable,shortq_shift[3:0]}), .dout({shortq_enable_ff,shortq_shift_xx[3:0]}));
|
|
|
|
assign shortq_shift_ff[4:0] = ({5{shortq_shift_xx[3]}} & 5'b1_1111) | // 31
|
|
({5{shortq_shift_xx[2]}} & 5'b1_1000) | // 24
|
|
({5{shortq_shift_xx[1]}} & 5'b1_0000) | // 16
|
|
({5{shortq_shift_xx[0]}} & 5'b0_1000); // 8
|
|
|
|
`ifdef ASSERT_ON
|
|
|
|
logic div_assert_fail;
|
|
|
|
assign div_assert_fail = (shortq_shift_xx[3] & shortq_shift_xx[2]) |
|
|
(shortq_shift_xx[3] & shortq_shift_xx[1]) |
|
|
(shortq_shift_xx[3] & shortq_shift_xx[0]) |
|
|
(shortq_shift_xx[2] & shortq_shift_xx[1]) |
|
|
(shortq_shift_xx[2] & shortq_shift_xx[0]) |
|
|
(shortq_shift_xx[1] & shortq_shift_xx[0]);
|
|
|
|
assert_exu_div_shortq_shift_error: assert #0 (~div_assert_fail) else $display("ERROR: SHORTQ_SHIFT_XX with multiple shifts ON!");
|
|
|
|
`endif
|
|
|
|
|
|
// *** End Short *** }}
|
|
|
|
|
|
|
|
|
|
|
|
assign div_clken = dp.valid | run_state | finish | finish_ff;
|
|
|
|
assign run_in = (dp.valid | run_state) & ~finish & ~cancel;
|
|
|
|
assign count_in[5:0] = {6{run_state & ~finish & ~cancel & ~shortq_enable}} & (count[5:0] + {1'b0,shortq_shift_ff[4:0]} + 6'd1);
|
|
|
|
|
|
assign finish = (smallnum_case | ((~rem_ff) ? (count[5:0] == 6'd32) : (count[5:0] == 6'd33)));
|
|
|
|
assign finish_dly = finish_ff & ~cancel;
|
|
|
|
assign sign_eff = ~dp.unsign & (divisor[31:0] != 32'b0);
|
|
|
|
|
|
assign q_in[32:0] = ({33{~run_state }} & {1'b0,dividend[31:0]}) |
|
|
({33{ run_state & (valid_ff_x | shortq_enable_ff)}} & ({dividend_eff[31:0], ~a_in[32]} << shortq_shift_ff[4:0])) |
|
|
({33{ run_state & ~(valid_ff_x | shortq_enable_ff)}} & {q_ff[31:0], ~a_in[32]});
|
|
|
|
assign qff_enable = dp.valid | (run_state & ~shortq_enable);
|
|
|
|
|
|
|
|
|
|
assign dividend_eff[31:0] = (sign_ff & dividend_neg_ff) ? dividend_comp[31:0] : q_ff[31:0];
|
|
|
|
|
|
assign m_eff[32:0] = ( add ) ? m_ff[32:0] : ~m_ff[32:0];
|
|
|
|
assign a_eff_shift[55:0] = {24'b0, dividend_eff[31:0]} << shortq_shift_ff[4:0];
|
|
|
|
assign a_eff[32:0] = ({33{ rem_correct }} & a_ff[32:0] ) |
|
|
({33{~rem_correct & ~shortq_enable_ff}} & {a_ff[31:0], q_ff[32]} ) |
|
|
({33{~rem_correct & shortq_enable_ff}} & {9'b0,a_eff_shift[55:32]});
|
|
|
|
assign a_shift[32:0] = {33{run_state}} & a_eff[32:0];
|
|
|
|
assign a_in[32:0] = {33{run_state}} & (a_shift[32:0] + m_eff[32:0] + {32'b0,~add});
|
|
|
|
assign aff_enable = dp.valid | (run_state & ~shortq_enable & (count[5:0]!=6'd33)) | rem_correct;
|
|
|
|
|
|
assign m_already_comp = (divisor_neg_ff & sign_ff);
|
|
|
|
// if m already complemented, then invert operation add->sub, sub->add
|
|
assign add = (a_ff[32] | rem_correct) ^ m_already_comp;
|
|
|
|
assign rem_correct = (count[5:0] == 6'd33) & rem_ff & a_ff[32];
|
|
|
|
|
|
|
|
assign q_ff_eff[31:0] = (sign_ff & (dividend_neg_ff ^ divisor_neg_ff)) ? q_ff_comp[31:0] : q_ff[31:0];
|
|
|
|
assign a_ff_eff[31:0] = (sign_ff & dividend_neg_ff) ? a_ff_comp[31:0] : a_ff[31:0];
|
|
|
|
assign out[31:0] = ({32{ smallnum_case_ff }} & {28'b0, smallnum_ff[3:0]}) |
|
|
({32{ rem_ff}} & a_ff_eff[31:0] ) |
|
|
({32{~smallnum_case_ff & ~rem_ff}} & q_ff_eff[31:0] );
|
|
|
|
|
|
endmodule // el2_exu_div_ctl
|