cores-swerv-el2/design/exu/el2_exu_mul_ctl.sv

738 lines
30 KiB
Systemverilog

// SPDX-License-Identifier: Apache-2.0
// Copyright 2020 Western Digital Corporation or its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
module el2_exu_mul_ctl
import el2_pkg::*;
#(
`include "el2_param.vh"
)
(
input logic clk, // Top level clock
input logic rst_l, // Reset
input logic scan_mode, // Scan mode
input el2_mul_pkt_t mul_p, // {Valid, RS1 signed operand, RS2 signed operand, Select low 32-bits of result}
input logic [31:0] rs1_in, // A operand
input logic [31:0] rs2_in, // B operand
output logic [31:0] result_x // Result
);
logic mul_x_enable;
logic bit_x_enable;
logic signed [32:0] rs1_ext_in;
logic signed [32:0] rs2_ext_in;
logic [65:0] prod_x;
logic low_x;
// *** Start - BitManip ***
logic bitmanip_sel_d;
logic bitmanip_sel_x;
logic [31:0] bitmanip_d;
logic [31:0] bitmanip_x;
// ZBE
logic ap_bcompress;
logic ap_bdecompress;
// ZBC
logic ap_clmul;
logic ap_clmulh;
logic ap_clmulr;
// ZBP
logic ap_grev;
logic ap_gorc;
logic ap_shfl;
logic ap_unshfl;
logic ap_xperm_n;
logic ap_xperm_b;
logic ap_xperm_h;
// ZBR
logic ap_crc32_b;
logic ap_crc32_h;
logic ap_crc32_w;
logic ap_crc32c_b;
logic ap_crc32c_h;
logic ap_crc32c_w;
// ZBF
logic ap_bfp;
if (pt.BITMANIP_ZBE == 1)
begin
assign ap_bcompress = mul_p.bcompress;
assign ap_bdecompress = mul_p.bdecompress;
end
else
begin
assign ap_bcompress = 1'b0;
assign ap_bdecompress = 1'b0;
end
if (pt.BITMANIP_ZBC == 1)
begin
assign ap_clmul = mul_p.clmul;
assign ap_clmulh = mul_p.clmulh;
assign ap_clmulr = mul_p.clmulr;
end
else
begin
assign ap_clmul = 1'b0;
assign ap_clmulh = 1'b0;
assign ap_clmulr = 1'b0;
end
if (pt.BITMANIP_ZBP == 1)
begin
assign ap_grev = mul_p.grev;
assign ap_gorc = mul_p.gorc;
assign ap_shfl = mul_p.shfl;
assign ap_unshfl = mul_p.unshfl;
assign ap_xperm_n = mul_p.xperm_n;
assign ap_xperm_b = mul_p.xperm_b;
assign ap_xperm_h = mul_p.xperm_h;
end
else
begin
assign ap_grev = 1'b0;
assign ap_gorc = 1'b0;
assign ap_shfl = 1'b0;
assign ap_unshfl = 1'b0;
assign ap_xperm_n = 1'b0;
assign ap_xperm_b = 1'b0;
assign ap_xperm_h = 1'b0;
end
if (pt.BITMANIP_ZBR == 1)
begin
assign ap_crc32_b = mul_p.crc32_b;
assign ap_crc32_h = mul_p.crc32_h;
assign ap_crc32_w = mul_p.crc32_w;
assign ap_crc32c_b = mul_p.crc32c_b;
assign ap_crc32c_h = mul_p.crc32c_h;
assign ap_crc32c_w = mul_p.crc32c_w;
end
else
begin
assign ap_crc32_b = 1'b0;
assign ap_crc32_h = 1'b0;
assign ap_crc32_w = 1'b0;
assign ap_crc32c_b = 1'b0;
assign ap_crc32c_h = 1'b0;
assign ap_crc32c_w = 1'b0;
end
if (pt.BITMANIP_ZBF == 1)
begin
assign ap_bfp = mul_p.bfp;
end
else
begin
assign ap_bfp = 1'b0;
end
// *** End - BitManip ***
assign mul_x_enable = mul_p.valid;
assign bit_x_enable = mul_p.valid;
assign rs1_ext_in[32] = mul_p.rs1_sign & rs1_in[31];
assign rs2_ext_in[32] = mul_p.rs2_sign & rs2_in[31];
assign rs1_ext_in[31:0] = rs1_in[31:0];
assign rs2_ext_in[31:0] = rs2_in[31:0];
// --------------------------- Multiply ----------------------------------
logic signed [32:0] rs1_x;
logic signed [32:0] rs2_x;
rvdffe #(34) i_a_x_ff (.*, .clk(clk), .din({mul_p.low,rs1_ext_in[32:0]}), .dout({low_x,rs1_x[32:0]}), .en(mul_x_enable));
rvdffe #(33) i_b_x_ff (.*, .clk(clk), .din( rs2_ext_in[32:0] ), .dout( rs2_x[32:0] ), .en(mul_x_enable));
assign prod_x[65:0] = rs1_x * rs2_x;
// * * * * * * * * * * * * * * * * * * BitManip : BCOMPRESS, BDECOMPRESS * * * * * * * * * * * * *
// *** BCOMPRESS == "gather" ***
logic [31:0] bcompress_d;
logic bcompress_test_bit_d;
integer bcompress_i, bcompress_j;
always_comb
begin
bcompress_j = 0;
bcompress_test_bit_d = 1'b0;
bcompress_d[31:0] = 32'b0;
for (bcompress_i=0; bcompress_i<32; bcompress_i++)
begin
bcompress_test_bit_d = rs2_in[bcompress_i];
if (bcompress_test_bit_d)
begin
bcompress_d[bcompress_j] = rs1_in[bcompress_i];
bcompress_j = bcompress_j + 1;
end // IF bcompress_test_bit
end // FOR bcompress_i
end // ALWAYS_COMB
// *** BDECOMPRESS == "scatter" ***
logic [31:0] bdecompress_d;
logic bdecompress_test_bit_d;
integer bdecompress_i, bdecompress_j;
always_comb
begin
bdecompress_j = 0;
bdecompress_test_bit_d = 1'b0;
bdecompress_d[31:0] = 32'b0;
for (bdecompress_i=0; bdecompress_i<32; bdecompress_i++)
begin
bdecompress_test_bit_d = rs2_in[bdecompress_i];
if (bdecompress_test_bit_d)
begin
bdecompress_d[bdecompress_i] = rs1_in[bdecompress_j];
bdecompress_j = bdecompress_j + 1;
end // IF bdecompress_test_bit
end // FOR bdecompress_i
end // ALWAYS_COMB
// * * * * * * * * * * * * * * * * * * BitManip : CLMUL, CLMULH, CLMULR * * * * * * * * * * * * *
logic [62:0] clmul_raw_d;
assign clmul_raw_d[62:0] = ( {63{rs2_in[00]}} & {31'b0,rs1_in[31:0] } ) ^
( {63{rs2_in[01]}} & {30'b0,rs1_in[31:0], 1'b0} ) ^
( {63{rs2_in[02]}} & {29'b0,rs1_in[31:0], 2'b0} ) ^
( {63{rs2_in[03]}} & {28'b0,rs1_in[31:0], 3'b0} ) ^
( {63{rs2_in[04]}} & {27'b0,rs1_in[31:0], 4'b0} ) ^
( {63{rs2_in[05]}} & {26'b0,rs1_in[31:0], 5'b0} ) ^
( {63{rs2_in[06]}} & {25'b0,rs1_in[31:0], 6'b0} ) ^
( {63{rs2_in[07]}} & {24'b0,rs1_in[31:0], 7'b0} ) ^
( {63{rs2_in[08]}} & {23'b0,rs1_in[31:0], 8'b0} ) ^
( {63{rs2_in[09]}} & {22'b0,rs1_in[31:0], 9'b0} ) ^
( {63{rs2_in[10]}} & {21'b0,rs1_in[31:0],10'b0} ) ^
( {63{rs2_in[11]}} & {20'b0,rs1_in[31:0],11'b0} ) ^
( {63{rs2_in[12]}} & {19'b0,rs1_in[31:0],12'b0} ) ^
( {63{rs2_in[13]}} & {18'b0,rs1_in[31:0],13'b0} ) ^
( {63{rs2_in[14]}} & {17'b0,rs1_in[31:0],14'b0} ) ^
( {63{rs2_in[15]}} & {16'b0,rs1_in[31:0],15'b0} ) ^
( {63{rs2_in[16]}} & {15'b0,rs1_in[31:0],16'b0} ) ^
( {63{rs2_in[17]}} & {14'b0,rs1_in[31:0],17'b0} ) ^
( {63{rs2_in[18]}} & {13'b0,rs1_in[31:0],18'b0} ) ^
( {63{rs2_in[19]}} & {12'b0,rs1_in[31:0],19'b0} ) ^
( {63{rs2_in[20]}} & {11'b0,rs1_in[31:0],20'b0} ) ^
( {63{rs2_in[21]}} & {10'b0,rs1_in[31:0],21'b0} ) ^
( {63{rs2_in[22]}} & { 9'b0,rs1_in[31:0],22'b0} ) ^
( {63{rs2_in[23]}} & { 8'b0,rs1_in[31:0],23'b0} ) ^
( {63{rs2_in[24]}} & { 7'b0,rs1_in[31:0],24'b0} ) ^
( {63{rs2_in[25]}} & { 6'b0,rs1_in[31:0],25'b0} ) ^
( {63{rs2_in[26]}} & { 5'b0,rs1_in[31:0],26'b0} ) ^
( {63{rs2_in[27]}} & { 4'b0,rs1_in[31:0],27'b0} ) ^
( {63{rs2_in[28]}} & { 3'b0,rs1_in[31:0],28'b0} ) ^
( {63{rs2_in[29]}} & { 2'b0,rs1_in[31:0],29'b0} ) ^
( {63{rs2_in[30]}} & { 1'b0,rs1_in[31:0],30'b0} ) ^
( {63{rs2_in[31]}} & { rs1_in[31:0],31'b0} );
// * * * * * * * * * * * * * * * * * * BitManip : GREV * * * * * * * * * * * * * * * * * *
// uint32_t grev32(uint32_t rs1, uint32_t rs2)
// {
// uint32_t x = rs1;
// int shamt = rs2 & 31;
//
// if (shamt & 1) x = ( (x & 0x55555555) << 1) | ( (x & 0xAAAAAAAA) >> 1);
// if (shamt & 2) x = ( (x & 0x33333333) << 2) | ( (x & 0xCCCCCCCC) >> 2);
// if (shamt & 4) x = ( (x & 0x0F0F0F0F) << 4) | ( (x & 0xF0F0F0F0) >> 4);
// if (shamt & 8) x = ( (x & 0x00FF00FF) << 8) | ( (x & 0xFF00FF00) >> 8);
// if (shamt & 16) x = ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16);
//
// return x;
// }
logic [31:0] grev1_d;
logic [31:0] grev2_d;
logic [31:0] grev4_d;
logic [31:0] grev8_d;
logic [31:0] grev_d;
assign grev1_d[31:0] = (rs2_in[0]) ? {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25],
rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17],
rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09],
rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]} : rs1_in[31:0];
assign grev2_d[31:0] = (rs2_in[1]) ? {grev1_d[29:28],grev1_d[31:30],grev1_d[25:24],grev1_d[27:26],
grev1_d[21:20],grev1_d[23:22],grev1_d[17:16],grev1_d[19:18],
grev1_d[13:12],grev1_d[15:14],grev1_d[09:08],grev1_d[11:10],
grev1_d[05:04],grev1_d[07:06],grev1_d[01:00],grev1_d[03:02]} : grev1_d[31:0];
assign grev4_d[31:0] = (rs2_in[2]) ? {grev2_d[27:24],grev2_d[31:28],grev2_d[19:16],grev2_d[23:20],
grev2_d[11:08],grev2_d[15:12],grev2_d[03:00],grev2_d[07:04]} : grev2_d[31:0];
assign grev8_d[31:0] = (rs2_in[3]) ? {grev4_d[23:16],grev4_d[31:24],grev4_d[07:00],grev4_d[15:08]} : grev4_d[31:0];
assign grev_d[31:0] = (rs2_in[4]) ? {grev8_d[15:00],grev8_d[31:16]} : grev8_d[31:0];
// * * * * * * * * * * * * * * * * * * BitManip : GORC * * * * * * * * * * * * * * * * * *
// uint32_t gorc32(uint32_t rs1, uint32_t rs2)
// {
// uint32_t x = rs1;
// int shamt = rs2 & 31;
//
// if (shamt & 1) x |= ( (x & 0x55555555) << 1) | ( (x & 0xAAAAAAAA) >> 1);
// if (shamt & 2) x |= ( (x & 0x33333333) << 2) | ( (x & 0xCCCCCCCC) >> 2);
// if (shamt & 4) x |= ( (x & 0x0F0F0F0F) << 4) | ( (x & 0xF0F0F0F0) >> 4);
// if (shamt & 8) x |= ( (x & 0x00FF00FF) << 8) | ( (x & 0xFF00FF00) >> 8);
// if (shamt & 16) x |= ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16);
//
// return x;
// }
logic [31:0] gorc1_d;
logic [31:0] gorc2_d;
logic [31:0] gorc4_d;
logic [31:0] gorc8_d;
logic [31:0] gorc_d;
assign gorc1_d[31:0] = ( {32{rs2_in[0]}} & {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25],
rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17],
rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09],
rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]} ) | rs1_in[31:0];
assign gorc2_d[31:0] = ( {32{rs2_in[1]}} & {gorc1_d[29:28],gorc1_d[31:30],gorc1_d[25:24],gorc1_d[27:26],
gorc1_d[21:20],gorc1_d[23:22],gorc1_d[17:16],gorc1_d[19:18],
gorc1_d[13:12],gorc1_d[15:14],gorc1_d[09:08],gorc1_d[11:10],
gorc1_d[05:04],gorc1_d[07:06],gorc1_d[01:00],gorc1_d[03:02]} ) | gorc1_d[31:0];
assign gorc4_d[31:0] = ( {32{rs2_in[2]}} & {gorc2_d[27:24],gorc2_d[31:28],gorc2_d[19:16],gorc2_d[23:20],
gorc2_d[11:08],gorc2_d[15:12],gorc2_d[03:00],gorc2_d[07:04]} ) | gorc2_d[31:0];
assign gorc8_d[31:0] = ( {32{rs2_in[3]}} & {gorc4_d[23:16],gorc4_d[31:24],gorc4_d[07:00],gorc4_d[15:08]} ) | gorc4_d[31:0];
assign gorc_d[31:0] = ( {32{rs2_in[4]}} & {gorc8_d[15:00],gorc8_d[31:16]} ) | gorc8_d[31:0];
// * * * * * * * * * * * * * * * * * * BitManip : SHFL, UNSHLF * * * * * * * * * * * * * * * * * *
// uint32_t shuffle32_stage (uint32_t src, uint32_t maskL, uint32_t maskR, int N)
// {
// uint32_t x = src & ~(maskL | maskR);
// x |= ((src << N) & maskL) | ((src >> N) & maskR);
// return x;
// }
//
//
//
// uint32_t shfl32(uint32_t rs1, uint32_t rs2)
// {
// uint32_t x = rs1;
// int shamt = rs2 & 15
//
// if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
// if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
// if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2);
// if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
//
// return x;
// }
logic [31:0] shfl8_d;
logic [31:0] shfl4_d;
logic [31:0] shfl2_d;
logic [31:0] shfl_d;
assign shfl8_d[31:0] = (rs2_in[3]) ? {rs1_in[31:24],rs1_in[15:08],rs1_in[23:16],rs1_in[07:00]} : rs1_in[31:0];
assign shfl4_d[31:0] = (rs2_in[2]) ? {shfl8_d[31:28],shfl8_d[23:20],shfl8_d[27:24],shfl8_d[19:16],
shfl8_d[15:12],shfl8_d[07:04],shfl8_d[11:08],shfl8_d[03:00]} : shfl8_d[31:0];
assign shfl2_d[31:0] = (rs2_in[1]) ? {shfl4_d[31:30],shfl4_d[27:26],shfl4_d[29:28],shfl4_d[25:24],
shfl4_d[23:22],shfl4_d[19:18],shfl4_d[21:20],shfl4_d[17:16],
shfl4_d[15:14],shfl4_d[11:10],shfl4_d[13:12],shfl4_d[09:08],
shfl4_d[07:06],shfl4_d[03:02],shfl4_d[05:04],shfl4_d[01:00]} : shfl4_d[31:0];
assign shfl_d[31:0] = (rs2_in[0]) ? {shfl2_d[31],shfl2_d[29],shfl2_d[30],shfl2_d[28],shfl2_d[27],shfl2_d[25],shfl2_d[26],shfl2_d[24],
shfl2_d[23],shfl2_d[21],shfl2_d[22],shfl2_d[20],shfl2_d[19],shfl2_d[17],shfl2_d[18],shfl2_d[16],
shfl2_d[15],shfl2_d[13],shfl2_d[14],shfl2_d[12],shfl2_d[11],shfl2_d[09],shfl2_d[10],shfl2_d[08],
shfl2_d[07],shfl2_d[05],shfl2_d[06],shfl2_d[04],shfl2_d[03],shfl2_d[01],shfl2_d[02],shfl2_d[00]} : shfl2_d[31:0];
// uint32_t unshfl32(uint32_t rs1, uint32_t rs2)
// {
// uint32_t x = rs1;
// int shamt = rs2 & 15
//
// if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
// if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2);
// if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
// if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
//
// return x;
// }
logic [31:0] unshfl1_d;
logic [31:0] unshfl2_d;
logic [31:0] unshfl4_d;
logic [31:0] unshfl_d;
assign unshfl1_d[31:0] = (rs2_in[0]) ? {rs1_in[31],rs1_in[29],rs1_in[30],rs1_in[28],rs1_in[27],rs1_in[25],rs1_in[26],rs1_in[24],
rs1_in[23],rs1_in[21],rs1_in[22],rs1_in[20],rs1_in[19],rs1_in[17],rs1_in[18],rs1_in[16],
rs1_in[15],rs1_in[13],rs1_in[14],rs1_in[12],rs1_in[11],rs1_in[09],rs1_in[10],rs1_in[08],
rs1_in[07],rs1_in[05],rs1_in[06],rs1_in[04],rs1_in[03],rs1_in[01],rs1_in[02],rs1_in[00]} : rs1_in[31:0];
assign unshfl2_d[31:0] = (rs2_in[1]) ? {unshfl1_d[31:30],unshfl1_d[27:26],unshfl1_d[29:28],unshfl1_d[25:24],
unshfl1_d[23:22],unshfl1_d[19:18],unshfl1_d[21:20],unshfl1_d[17:16],
unshfl1_d[15:14],unshfl1_d[11:10],unshfl1_d[13:12],unshfl1_d[09:08],
unshfl1_d[07:06],unshfl1_d[03:02],unshfl1_d[05:04],unshfl1_d[01:00]} : unshfl1_d[31:0];
assign unshfl4_d[31:0] = (rs2_in[2]) ? {unshfl2_d[31:28],unshfl2_d[23:20],unshfl2_d[27:24],unshfl2_d[19:16],
unshfl2_d[15:12],unshfl2_d[07:04],unshfl2_d[11:08],unshfl2_d[03:00]} : unshfl2_d[31:0];
assign unshfl_d[31:0] = (rs2_in[3]) ? {unshfl4_d[31:24],unshfl4_d[15:08],unshfl4_d[23:16],unshfl4_d[07:00]} : unshfl4_d[31:0];
// * * * * * * * * * * * * * * * * * * BitManip : XPERM * * * * * * * * * * * * * * * * *
//
// These instructions operate on nibbles/bytes/half-words/words.
// rs1 is a vector of data words and rs2 is a vector of indices into rs1.
// The result of the instruction is the vector rs2 with each element replaced by the corresponding data word from rs1,
// or zero then the index in rs2 is out of bounds.
//
// uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2)
// {
// uint_xlen_t r = 0;
// uint_xlen_t sz = 1LL << sz_log2;
// uint_xlen_t mask = (1LL << sz) - 1;
// for (int i = 0; i < XLEN; i += sz)
// { uint_xlen_t pos = ((rs2 >> i) & mask) << sz_log2;
// if (pos < XLEN)
// r |= ((rs1 >> pos) & mask) << i;
// }
// return r;
// }
//
// uint_xlen_t xperm_n (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 2); }
// uint_xlen_t xperm_b (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 3); }
// uint_xlen_t xperm_h (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 4); }
// uint_xlen_t xperm_w (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 5); } Not part of RV32
//
// The xperm.[nbhw] instructions can be implemented with an XLEN/4-lane nibble-wide crossbarswitch.
// *** XPERM_B ***
// XLEN = 32
// SZ_LOG2 = 3
// SZ = 4'd8;
// MASK = ( 1 << 8 ) - 1
// = 8'hFF
// integer xperm_b_i;
// logic [31:0] xperm_b_r;
// logic [3:0] xperm_b_sz;
// logic [7:0] xperm_b_mask;
// logic [31:0] xperm_b_pos;
//
//
// assign xperm_b_sz[3:0] = 4'd8;
// assign xperm_b_mask[7:0] = 8'hff;
//
// always_comb
// begin
// xperm_b_r[31:0] = 32'b0;
//
// for (xperm_b_i=0; xperm_b_i<32; xperm_b_i = xperm_b_i + xperm_b_sz) // This code did not work...
// begin
// xperm_b_pos[31:0] = ( (rs2_in[31:0] >> xperm_b_i) & {24'h0,xperm_b_mask[7:0]} ) << 3;
// if (xperm_b_pos[31:0] < 32'd32)
// xperm_b_r[31:0] = xperm_b_r[31:0] | ( ((rs1_in[31:0] >> xperm_b_pos[4:0]) & {24'h0,xperm_b_mask[7:0]}) << xperm_b_i );
// end
// end
logic [31:0] xperm_n;
logic [31:0] xperm_b;
logic [31:0] xperm_h;
assign xperm_n[03:00] = { 4{ ~rs2_in[03] }} & ( (rs1_in[31:0] >> {rs2_in[02:00],2'b0}) & 4'hf ); // This is a 8:1 mux with qualified selects
assign xperm_n[07:04] = { 4{ ~rs2_in[07] }} & ( (rs1_in[31:0] >> {rs2_in[06:04],2'b0}) & 4'hf );
assign xperm_n[11:08] = { 4{ ~rs2_in[11] }} & ( (rs1_in[31:0] >> {rs2_in[10:08],2'b0}) & 4'hf );
assign xperm_n[15:12] = { 4{ ~rs2_in[15] }} & ( (rs1_in[31:0] >> {rs2_in[14:12],2'b0}) & 4'hf );
assign xperm_n[19:16] = { 4{ ~rs2_in[19] }} & ( (rs1_in[31:0] >> {rs2_in[18:16],2'b0}) & 4'hf );
assign xperm_n[23:20] = { 4{ ~rs2_in[23] }} & ( (rs1_in[31:0] >> {rs2_in[22:20],2'b0}) & 4'hf );
assign xperm_n[27:24] = { 4{ ~rs2_in[27] }} & ( (rs1_in[31:0] >> {rs2_in[26:24],2'b0}) & 4'hf );
assign xperm_n[31:28] = { 4{ ~rs2_in[31] }} & ( (rs1_in[31:0] >> {rs2_in[30:28],2'b0}) & 4'hf );
assign xperm_b[07:00] = { 8{ ~(| rs2_in[07:02]) }} & ( (rs1_in[31:0] >> {rs2_in[01:00],3'b0}) & 8'hff ); // This is a 4:1 mux with qualified selects
assign xperm_b[15:08] = { 8{ ~(| rs2_in[15:10]) }} & ( (rs1_in[31:0] >> {rs2_in[09:08],3'b0}) & 8'hff );
assign xperm_b[23:16] = { 8{ ~(| rs2_in[23:18]) }} & ( (rs1_in[31:0] >> {rs2_in[17:16],3'b0}) & 8'hff );
assign xperm_b[31:24] = { 8{ ~(| rs2_in[31:26]) }} & ( (rs1_in[31:0] >> {rs2_in[25:24],3'b0}) & 8'hff );
assign xperm_h[15:00] = {16{ ~(| rs2_in[15:01]) }} & ( (rs1_in[31:0] >> {rs2_in[00] ,4'b0}) & 16'hffff ); // This is a 2:1 mux with qualified selects
assign xperm_h[31:16] = {16{ ~(| rs2_in[31:17]) }} & ( (rs1_in[31:0] >> {rs2_in[16] ,4'b0}) & 16'hffff );
// * * * * * * * * * * * * * * * * * * BitManip : CRC32, CRC32c * * * * * * * * * * * * * * * * *
// *** computed from https: //crccalc.com ***
//
// "a" is 8'h61 = 8'b0110_0001 (8'h61 ^ 8'hff = 8'h9e)
//
// Input must first be XORed with 32'hffff_ffff
//
//
// CRC32
//
// Input Output Input Output
// ----- -------- -------- --------
// "a" e8b7be43 ffffff9e 174841bc
// "aa" 078a19d7 ffff9e9e f875e628
// "aaaa" ad98e545 9e9e9e9e 5267a1ba
//
//
//
// CRC32c
//
// Input Output Input Output
// ----- -------- -------- --------
// "a" c1d04330 ffffff9e 3e2fbccf
// "aa" f1f2dac2 ffff9e9e 0e0d253d
// "aaaa" 6a52eeb0 9e9e9e9e 95ad114f
logic crc32_all;
logic [31:0] crc32_poly_rev;
logic [31:0] crc32c_poly_rev;
integer crc32_bi, crc32_hi, crc32_wi, crc32c_bi, crc32c_hi, crc32c_wi;
logic [31:0] crc32_bd, crc32_hd, crc32_wd, crc32c_bd, crc32c_hd, crc32c_wd;
assign crc32_all = ap_crc32_b | ap_crc32_h | ap_crc32_w | ap_crc32c_b | ap_crc32c_h | ap_crc32c_w;
assign crc32_poly_rev[31:0] = 32'hEDB88320; // bit reverse of 32'h04C11DB7
assign crc32c_poly_rev[31:0] = 32'h82F63B78; // bit reverse of 32'h1EDC6F41
always_comb
begin
crc32_bd[31:0] = rs1_in[31:0];
for (crc32_bi=0; crc32_bi<8; crc32_bi++)
begin
crc32_bd[31:0] = (crc32_bd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_bd[0]}});
end // FOR crc32_bi
end // ALWAYS_COMB
always_comb
begin
crc32_hd[31:0] = rs1_in[31:0];
for (crc32_hi=0; crc32_hi<16; crc32_hi++)
begin
crc32_hd[31:0] = (crc32_hd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_hd[0]}});
end // FOR crc32_hi
end // ALWAYS_COMB
always_comb
begin
crc32_wd[31:0] = rs1_in[31:0];
for (crc32_wi=0; crc32_wi<32; crc32_wi++)
begin
crc32_wd[31:0] = (crc32_wd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_wd[0]}});
end // FOR crc32_wi
end // ALWAYS_COMB
always_comb
begin
crc32c_bd[31:0] = rs1_in[31:0];
for (crc32c_bi=0; crc32c_bi<8; crc32c_bi++)
begin
crc32c_bd[31:0] = (crc32c_bd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_bd[0]}});
end // FOR crc32c_bi
end // ALWAYS_COMB
always_comb
begin
crc32c_hd[31:0] = rs1_in[31:0];
for (crc32c_hi=0; crc32c_hi<16; crc32c_hi++)
begin
crc32c_hd[31:0] = (crc32c_hd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_hd[0]}});
end // FOR crc32c_hi
end // ALWAYS_COMB
always_comb
begin
crc32c_wd[31:0] = rs1_in[31:0];
for (crc32c_wi=0; crc32c_wi<32; crc32c_wi++)
begin
crc32c_wd[31:0] = (crc32c_wd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_wd[0]}});
end // FOR crc32c_wi
end // ALWAYS_COMB
// * * * * * * * * * * * * * * * * * * BitManip : BFP * * * * * * * * * * * * * * * * * *
// uint_xlen_t bfp(uint_xlen_t rs1, uint_xlen_t rs2)
// {
// uint_xlen_t cfg = rs2 >> (XLEN/2);
// if ((cfg >> 30) == 2) cfg = cfg >> 16;
// int len = (cfg >> 8) & (XLEN/2-1);
// int off = cfg & (XLEN-1);
// len = len ? len : XLEN/2;
// uint_xlen_t mask = slo(0, len) << off;
// uint_xlen_t data = rs2 << off;
// return (data & mask) | (rs1 & ~mask);
logic [4:0] bfp_len;
logic [4:0] bfp_off;
logic [31:0] bfp_len_mask_;
logic [31:0] bfp_off_mask_;
logic [15:0] bfp_preshift_data;
logic [31:0] bfp_shift_data;
logic [31:0] bfp_shift_mask;
logic [31:0] bfp_result_d;
assign bfp_len[3:0] = rs2_in[27:24];
assign bfp_len[4] = (bfp_len[3:0] == 4'b0); // If LEN field is zero, then LEN=16
assign bfp_off[4:0] = rs2_in[20:16];
assign bfp_len_mask_[31:0] = 32'hffff_ffff << bfp_len[4:0];
assign bfp_off_mask_[31:0] = 32'hffff_ffff << bfp_off[4:0];
assign bfp_preshift_data[15:0]= rs2_in[15:0] & ~bfp_len_mask_[15:0];
assign bfp_shift_data[31:0] = {16'b0,bfp_preshift_data[15:0]} << bfp_off[4:0];
assign bfp_shift_mask[31:0] = (bfp_len_mask_[31:0] << bfp_off[4:0]) | ~bfp_off_mask_[31:0];
assign bfp_result_d[31:0] = bfp_shift_data[31:0] | (rs1_in[31:0] & bfp_shift_mask[31:0]);
// * * * * * * * * * * * * * * * * * * BitManip : Common logic * * * * * * * * * * * * * * * * * *
assign bitmanip_sel_d = ap_bcompress | ap_bdecompress | ap_clmul | ap_clmulh | ap_clmulr | ap_grev | ap_gorc | ap_shfl | ap_unshfl | crc32_all | ap_bfp | ap_xperm_n | ap_xperm_b | ap_xperm_h;
assign bitmanip_d[31:0] = ( {32{ap_bcompress}} & bcompress_d[31:0] ) |
( {32{ap_bdecompress}} & bdecompress_d[31:0] ) |
( {32{ap_clmul}} & clmul_raw_d[31:0] ) |
( {32{ap_clmulh}} & {1'b0,clmul_raw_d[62:32]} ) |
( {32{ap_clmulr}} & clmul_raw_d[62:31] ) |
( {32{ap_grev}} & grev_d[31:0] ) |
( {32{ap_gorc}} & gorc_d[31:0] ) |
( {32{ap_shfl}} & shfl_d[31:0] ) |
( {32{ap_unshfl}} & unshfl_d[31:0] ) |
( {32{ap_crc32_b}} & crc32_bd[31:0] ) |
( {32{ap_crc32_h}} & crc32_hd[31:0] ) |
( {32{ap_crc32_w}} & crc32_wd[31:0] ) |
( {32{ap_crc32c_b}} & crc32c_bd[31:0] ) |
( {32{ap_crc32c_h}} & crc32c_hd[31:0] ) |
( {32{ap_crc32c_w}} & crc32c_wd[31:0] ) |
( {32{ap_bfp}} & bfp_result_d[31:0] ) |
( {32{ap_xperm_n}} & xperm_n[31:0] ) |
( {32{ap_xperm_b}} & xperm_b[31:0] ) |
( {32{ap_xperm_h}} & xperm_h[31:0] );
rvdffe #(33) i_bitmanip_ff (.*, .clk(clk), .din({bitmanip_sel_d,bitmanip_d[31:0]}), .dout({bitmanip_sel_x,bitmanip_x[31:0]}), .en(bit_x_enable));
assign result_x[31:0] = ( {32{~bitmanip_sel_x & ~low_x}} & prod_x[63:32] ) |
( {32{~bitmanip_sel_x & low_x}} & prod_x[31:0] ) |
bitmanip_x[31:0];
endmodule // el2_exu_mul_ctl