// SPDX-License-Identifier: Apache-2.0 // Copyright 2019 Western Digital Corporation or its affiliates. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. module exu_div_ctl import swerv_types::*; ( input logic clk, // Top level clock input logic active_clk, // Level 1 active clock input logic rst_l, // Reset input logic scan_mode, // Scan mode input logic dec_tlu_fast_div_disable, // Disable small number optimization input logic [31:0] dividend, // Numerator input logic [31:0] divisor, // Denominator input div_pkt_t dp, // valid, sign, rem input logic flush_lower, // Flush pipeline output logic valid_ff_e1, // Valid E1 stage output logic finish_early, // Finish smallnum output logic finish, // Finish smallnum or normal divide output logic div_stall, // Divide is running output logic [31:0] out // Result ); logic run_in, run_state; logic [5:0] count_in, count; logic [32:0] m_ff; logic qff_enable; logic aff_enable; logic [32:0] q_in, q_ff; logic [32:0] a_in, a_ff; logic [32:0] m_eff; logic [32:0] a_shift; logic dividend_neg_ff, divisor_neg_ff; logic [31:0] dividend_comp; logic [31:0] dividend_eff; logic [31:0] q_ff_comp; logic [31:0] q_ff_eff; logic [31:0] a_ff_comp; logic [31:0] a_ff_eff; logic sign_ff, sign_eff; logic rem_ff; logic add; logic [32:0] a_eff; logic [64:0] a_eff_shift; logic rem_correct; logic flush_lower_ff; logic valid_e1; logic smallnum_case, smallnum_case_ff; logic [3:0] smallnum, smallnum_ff; logic m_already_comp; rvdff #(1) flush_any_ff (.*, .clk(active_clk), .din(flush_lower), .dout(flush_lower_ff)); rvdff #(1) e1val_ff (.*, .clk(active_clk), .din(dp.valid & ~flush_lower_ff), .dout(valid_ff_e1)); rvdff #(1) runff (.*, .clk(active_clk), .din(run_in), .dout(run_state)); rvdff #(6) countff (.*, .clk(active_clk), .din(count_in[5:0]), .dout(count[5:0])); rvdffs #(4) miscf (.*, .clk(active_clk), .din({dividend[31],divisor[31],sign_eff,dp.rem}), .dout({dividend_neg_ff,divisor_neg_ff,sign_ff,rem_ff}), .en(dp.valid)); rvdff #(5) smallnumff (.*, .clk(active_clk), .din({smallnum_case,smallnum[3:0]}), .dout({smallnum_case_ff,smallnum_ff[3:0]})); rvdffe #(33) mff (.*, .en(dp.valid), .din({ ~dp.unsign & divisor[31], divisor[31:0]}), .dout(m_ff[32:0])); rvdffe #(33) qff (.*, .en(qff_enable), .din(q_in[32:0]), .dout(q_ff[32:0])); rvdffe #(33) aff (.*, .en(aff_enable), .din(a_in[32:0]), .dout(a_ff[32:0])); rvtwoscomp #(32) dividend_c (.din(q_ff[31:0]), .dout(dividend_comp[31:0])); rvtwoscomp #(32) q_ff_c (.din(q_ff[31:0]), .dout(q_ff_comp[31:0])); rvtwoscomp #(32) a_ff_c (.din(a_ff[31:0]), .dout(a_ff_comp[31:0])); assign valid_e1 = valid_ff_e1 & ~flush_lower_ff; // START - short circuit logic for small numbers {{ // small number divides - any 4b / 4b is done in 1 cycle (divisor != 0) // to generate espresso equations: // 1) smalldiv > smalldiv.e // 2) espresso -Dso -oeqntott smalldiv.e | addassign > smalldiv // smallnum case does not cover divide by 0 assign smallnum_case = ((q_ff[31:4] == 28'b0) & (m_ff[31:4] == 28'b0) & (m_ff[31:0] != 32'b0) & ~rem_ff & valid_e1 & ~dec_tlu_fast_div_disable) | ((q_ff[31:0] == 32'b0) & (m_ff[31:0] != 32'b0) & ~rem_ff & valid_e1 & ~dec_tlu_fast_div_disable); assign smallnum[3] = ( q_ff[3] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ); assign smallnum[2] = ( q_ff[3] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) | ( q_ff[2] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ) | ( q_ff[3] & q_ff[2] & ~m_ff[3] & ~m_ff[2] ); assign smallnum[1] = ( q_ff[2] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) | ( q_ff[1] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ) | ( q_ff[3] & ~m_ff[3] & ~m_ff[1] & ~m_ff[0]) | ( q_ff[3] & ~q_ff[2] & ~m_ff[3] & ~m_ff[2] & m_ff[1] & m_ff[0]) | (~q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & ~m_ff[2] ) | ( q_ff[3] & q_ff[2] & ~m_ff[3] & ~m_ff[0]) | ( q_ff[3] & q_ff[2] & ~m_ff[3] & m_ff[2] & ~m_ff[1] ) | ( q_ff[3] & q_ff[1] & ~m_ff[3] & ~m_ff[1] ) | ( q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & m_ff[2] ); assign smallnum[0] = ( q_ff[2] & q_ff[1] & q_ff[0] & ~m_ff[3] & ~m_ff[1] ) | ( q_ff[3] & ~q_ff[2] & q_ff[0] & ~m_ff[3] & m_ff[1] & m_ff[0]) | ( q_ff[2] & ~m_ff[3] & ~m_ff[1] & ~m_ff[0]) | ( q_ff[1] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) | ( q_ff[0] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ) | (~q_ff[3] & q_ff[2] & ~q_ff[1] & ~m_ff[3] & ~m_ff[2] & m_ff[1] & m_ff[0]) | (~q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & ~m_ff[0]) | ( q_ff[3] & ~m_ff[2] & ~m_ff[1] & ~m_ff[0]) | ( q_ff[3] & ~q_ff[2] & ~m_ff[3] & m_ff[2] & m_ff[1] ) | (~q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & m_ff[2] & ~m_ff[1] ) | (~q_ff[3] & q_ff[2] & q_ff[0] & ~m_ff[3] & ~m_ff[1] ) | ( q_ff[3] & ~q_ff[2] & ~q_ff[1] & ~m_ff[3] & m_ff[2] & m_ff[0]) | ( ~q_ff[2] & q_ff[1] & q_ff[0] & ~m_ff[3] & ~m_ff[2] ) | ( q_ff[3] & q_ff[2] & ~m_ff[1] & ~m_ff[0]) | ( q_ff[3] & q_ff[1] & ~m_ff[2] & ~m_ff[0]) | (~q_ff[3] & q_ff[2] & q_ff[1] & q_ff[0] & ~m_ff[3] & m_ff[2] ) | ( q_ff[3] & q_ff[2] & m_ff[3] & ~m_ff[2] ) | ( q_ff[3] & q_ff[1] & m_ff[3] & ~m_ff[2] & ~m_ff[1] ) | ( q_ff[3] & q_ff[0] & ~m_ff[2] & ~m_ff[1] ) | ( q_ff[3] & ~q_ff[1] & ~m_ff[3] & m_ff[2] & m_ff[1] & m_ff[0]) | ( q_ff[3] & q_ff[2] & q_ff[1] & m_ff[3] & ~m_ff[0]) | ( q_ff[3] & q_ff[2] & q_ff[1] & m_ff[3] & ~m_ff[1] ) | ( q_ff[3] & q_ff[2] & q_ff[0] & m_ff[3] & ~m_ff[1] ) | ( q_ff[3] & ~q_ff[2] & q_ff[1] & ~m_ff[3] & m_ff[1] ) | ( q_ff[3] & q_ff[1] & q_ff[0] & ~m_ff[2] ) | ( q_ff[3] & q_ff[2] & q_ff[1] & q_ff[0] & m_ff[3] ); // END - short circuit logic for small numbers }} // *** Start Short Q *** {{ logic [4:0] a_cls; logic [4:0] b_cls; logic [5:0] shortq; logic [5:0] shortq_shift; logic [5:0] shortq_shift_ff; logic shortq_enable; logic shortq_enable_ff; logic [32:0] short_dividend; assign short_dividend[31:0] = q_ff[31:0]; assign short_dividend[32] = sign_ff & q_ff[31]; // A B // 210 210 SH // --- --- -- // 1xx 000 0 // 1xx 001 8 // 1xx 01x 16 // 1xx 1xx 24 // 01x 000 8 // 01x 001 16 // 01x 01x 24 // 01x 1xx 32 // 001 000 16 // 001 001 24 // 001 01x 32 // 001 1xx 32 // 000 000 24 // 000 001 32 // 000 01x 32 // 000 1xx 32 logic [3:0] shortq_raw; logic [3:0] shortq_shift_xx; assign a_cls[4:3] = 2'b0; assign a_cls[2] = (~short_dividend[32] & (short_dividend[31:24] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[31:23] != {9{1'b1}})); assign a_cls[1] = (~short_dividend[32] & (short_dividend[23:16] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[22:15] != {8{1'b1}})); assign a_cls[0] = (~short_dividend[32] & (short_dividend[15:08] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[14:07] != {8{1'b1}})); assign b_cls[4:3] = 2'b0; assign b_cls[2] = (~m_ff[32] & ( m_ff[31:24] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[31:24] != {8{1'b1}})); assign b_cls[1] = (~m_ff[32] & ( m_ff[23:16] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[23:16] != {8{1'b1}})); assign b_cls[0] = (~m_ff[32] & ( m_ff[15:08] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[15:08] != {8{1'b1}})); assign shortq_raw[3] = ( (a_cls[2:1] == 2'b01 ) & (b_cls[2] == 1'b1 ) ) | // Shift by 32 ( (a_cls[2:0] == 3'b001) & (b_cls[2] == 1'b1 ) ) | ( (a_cls[2:0] == 3'b000) & (b_cls[2] == 1'b1 ) ) | ( (a_cls[2:0] == 3'b001) & (b_cls[2:1] == 2'b01 ) ) | ( (a_cls[2:0] == 3'b000) & (b_cls[2:1] == 2'b01 ) ) | ( (a_cls[2:0] == 3'b000) & (b_cls[2:0] == 3'b001) ); assign shortq_raw[2] = ( (a_cls[2] == 1'b1 ) & (b_cls[2] == 1'b1 ) ) | // Shift by 24 ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:1] == 2'b01 ) ) | ( (a_cls[2:0] == 3'b001) & (b_cls[2:0] == 3'b001) ) | ( (a_cls[2:0] == 3'b000) & (b_cls[2:0] == 3'b000) ); assign shortq_raw[1] = ( (a_cls[2] == 1'b1 ) & (b_cls[2:1] == 2'b01 ) ) | // Shift by 16 ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:0] == 3'b001) ) | ( (a_cls[2:0] == 3'b001) & (b_cls[2:0] == 3'b000) ); assign shortq_raw[0] = ( (a_cls[2] == 1'b1 ) & (b_cls[2:0] == 3'b001) ) | // Shift by 8 ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:0] == 3'b000) ); assign shortq_enable = valid_ff_e1 & (m_ff[31:0] != 32'b0) & (shortq_raw[3:0] != 4'b0); assign shortq_shift[3:0] = ({4{shortq_enable}} & shortq_raw[3:0]); rvdff #(5) i_shortq_ff (.*, .clk(active_clk), .din({shortq_enable,shortq_shift[3:0]}), .dout({shortq_enable_ff,shortq_shift_xx[3:0]})); assign shortq_shift_ff[5:0] = ({6{shortq_shift_xx[3]}} & 6'b01_1111) | // 31 ({6{shortq_shift_xx[2]}} & 6'b01_1000) | // 24 ({6{shortq_shift_xx[1]}} & 6'b01_0000) | // 16 ({6{shortq_shift_xx[0]}} & 6'b00_1000); // 8 `ifdef ASSERT_ON logic div_assert_fail; assign div_assert_fail = (shortq_shift_xx[3] & shortq_shift_xx[2]) | (shortq_shift_xx[3] & shortq_shift_xx[1]) | (shortq_shift_xx[3] & shortq_shift_xx[0]) | (shortq_shift_xx[2] & shortq_shift_xx[1]) | (shortq_shift_xx[2] & shortq_shift_xx[0]) | (shortq_shift_xx[1] & shortq_shift_xx[0]); assert_exu_div_shortq_shift_error: assert #0 (~div_assert_fail) else $display("ERROR: SHORTQ_SHIFT_XX with multiple shifts ON!"); `endif // *** End Short Q *** }} assign div_stall = run_state; assign run_in = (dp.valid | run_state) & ~finish & ~flush_lower_ff; assign count_in[5:0] = {6{run_state & ~finish & ~flush_lower_ff & ~shortq_enable}} & (count[5:0] + shortq_shift_ff[5:0] + 6'd1); assign finish_early = smallnum_case; assign finish = (smallnum_case | ((~rem_ff) ? (count[5:0] == 6'd32) : (count[5:0] == 6'd33))) & ~flush_lower & ~flush_lower_ff; assign sign_eff = ~dp.unsign & (divisor[31:0] != 32'b0); assign q_in[32:0] = ({33{~run_state }} & {1'b0,dividend[31:0]}) | ({33{ run_state & (valid_ff_e1 | shortq_enable_ff)}} & ({dividend_eff[31:0], ~a_in[32]} << shortq_shift_ff[5:0])) | ({33{ run_state & ~(valid_ff_e1 | shortq_enable_ff)}} & {q_ff[31:0], ~a_in[32]}); assign qff_enable = dp.valid | (run_state & ~shortq_enable); assign dividend_eff[31:0] = (sign_ff & dividend_neg_ff) ? dividend_comp[31:0] : q_ff[31:0]; assign m_eff[32:0] = (add) ? m_ff[32:0] : ~m_ff[32:0]; assign a_eff_shift[64:0] = {33'b0, dividend_eff[31:0]} << shortq_shift_ff[5:0]; assign a_eff[32:0] = ({33{ rem_correct }} & a_ff[32:0] ) | ({33{~rem_correct & ~shortq_enable_ff}} & {a_ff[31:0], q_ff[32]}) | ({33{~rem_correct & shortq_enable_ff}} & a_eff_shift[64:32] ); assign a_shift[32:0] = {33{run_state}} & a_eff[32:0]; assign a_in[32:0] = {33{run_state}} & (a_shift[32:0] + m_eff[32:0] + {32'b0,~add}); assign aff_enable = dp.valid | (run_state & ~shortq_enable & (count[5:0]!=6'd33)) | rem_correct; assign m_already_comp = (divisor_neg_ff & sign_ff); // if m already complemented, then invert operation add->sub, sub->add assign add = (a_ff[32] | rem_correct) ^ m_already_comp; assign rem_correct = (count[5:0] == 6'd33) & rem_ff & a_ff[32]; assign q_ff_eff[31:0] = (sign_ff & (dividend_neg_ff ^ divisor_neg_ff)) ? q_ff_comp[31:0] : q_ff[31:0]; assign a_ff_eff[31:0] = (sign_ff & dividend_neg_ff) ? a_ff_comp[31:0] : a_ff[31:0]; assign out[31:0] = ({32{ smallnum_case_ff }} & {28'b0, smallnum_ff[3:0]}) | ({32{ rem_ff}} & a_ff_eff[31:0] ) | ({32{~smallnum_case_ff & ~rem_ff}} & q_ff_eff[31:0] ); endmodule // exu_div_ctl