From 185194973fd263ba20ce0b3faf1b9c07e0022b85 Mon Sep 17 00:00:00 2001 From: Luke Wren Date: Sat, 6 Aug 2022 23:02:08 +0100 Subject: [PATCH] Add a custom instruction (bextm/bextmi: 1 to 8-bit version of bext/bexti from Zbs) for fooling around with toolchains --- hdl/arith/hazard3_alu.v | 121 +++++++++++++------------ hdl/hazard3_config.vh | 3 + hdl/hazard3_config_inst.vh | 1 + hdl/hazard3_core.v | 6 ++ hdl/hazard3_csr.v | 5 +- hdl/hazard3_decode.v | 19 +++- hdl/hazard3_ops.vh | 26 +++--- hdl/rv_opcodes.vh | 6 ++ test/sim/sw_testcases/extension_xh3b.c | 110 ++++++++++++++++++++++ 9 files changed, 219 insertions(+), 78 deletions(-) create mode 100644 test/sim/sw_testcases/extension_xh3b.c diff --git a/hdl/arith/hazard3_alu.v b/hdl/arith/hazard3_alu.v index 1143230..90d922c 100644 --- a/hdl/arith/hazard3_alu.v +++ b/hdl/arith/hazard3_alu.v @@ -11,6 +11,8 @@ module hazard3_alu #( `include "hazard3_width_const.vh" ) ( input wire [W_ALUOP-1:0] aluop, + input wire [6:0] funct7_32b, + input wire [2:0] funct3_32b, input wire [W_DATA-1:0] op_a, input wire [W_DATA-1:0] op_b, output reg [W_DATA-1:0] result, @@ -29,18 +31,17 @@ begin end endfunction -wire sub = !(aluop == ALUOP_ADD || (|EXTENSION_ZBA && ( - aluop == ALUOP_SH1ADD || aluop == ALUOP_SH2ADD || aluop == ALUOP_SH3ADD -))); +wire sub = !(aluop == ALUOP_ADD || (|EXTENSION_ZBA && aluop == ALUOP_SHXADD)); wire inv_op_b = sub && !( aluop == ALUOP_AND || aluop == ALUOP_OR || aluop == ALUOP_XOR || aluop == ALUOP_RS2 ); wire [W_DATA-1:0] op_a_shifted = - |EXTENSION_ZBA && aluop == ALUOP_SH1ADD ? op_a << 1 : - |EXTENSION_ZBA && aluop == ALUOP_SH2ADD ? op_a << 2 : - |EXTENSION_ZBA && aluop == ALUOP_SH3ADD ? op_a << 3 : op_a; + |EXTENSION_ZBA && aluop == ALUOP_SHXADD ? ( + !funct3_32b[2] ? op_a << 1 : + !funct3_32b[1] ? op_a << 2 : op_a << 3 + ) : op_a; wire [W_DATA-1:0] op_b_inv = op_b ^ {W_DATA{inv_op_b}}; @@ -52,19 +53,18 @@ wire cmp_is_unsigned = aluop == ALUOP_LTU || |EXTENSION_ZBB && aluop == ALUOP_MINU; wire lt = msb(op_a) == msb(op_b) ? msb(sum) : - cmp_is_unsigned ? msb(op_b) : - msb(op_a) ; + cmp_is_unsigned ? msb(op_b) : msb(op_a) ; assign cmp = aluop == ALUOP_SUB ? |op_xor : lt; - // ---------------------------------------------------------------------------- // Separate units for shift, ctz etc wire [W_DATA-1:0] shift_dout; wire shift_right_nleft = aluop == ALUOP_SRL || aluop == ALUOP_SRA || - |EXTENSION_ZBB && aluop == ALUOP_ROR || - |EXTENSION_ZBS && aluop == ALUOP_BEXT; + |EXTENSION_ZBB && aluop == ALUOP_ROR || + |EXTENSION_ZBS && aluop == ALUOP_BEXT || + |EXTENSION_XH3B && aluop == ALUOP_BEXTM; wire shift_arith = aluop == ALUOP_SRA; wire shift_rotate = |EXTENSION_ZBB & (aluop == ALUOP_ROR || aluop == ALUOP_ROL); @@ -111,15 +111,20 @@ always @ (*) begin: cpop_count end end -reg [2*W_DATA-1:0] clmul; +reg [2*W_DATA-1:0] clmul64; + always @ (*) begin: clmul_mul integer i; - clmul = {2*W_DATA{1'b0}}; + clmul64 = {2*W_DATA{1'b0}}; for (i = 0; i < W_DATA; i = i + 1) begin - clmul = clmul ^ (({{W_DATA{1'b0}}, op_a} << i) & {2*W_DATA{op_b[i]}}); + clmul64 = clmul64 ^ (({{W_DATA{1'b0}}, op_a} << i) & {2*W_DATA{op_b[i]}}); end end +// funct3: 1=clmul, 2=clmulr, 3=clmulh, never 0. +wire [W_DATA-1:0] clmul = + !funct3_32b[1] ? clmul64[31: 0] : + !funct3_32b[0] ? clmul64[62:31] : clmul64[63:32]; reg [W_DATA-1:0] zip; reg [W_DATA-1:0] unzip; @@ -153,57 +158,55 @@ end wire [W_DATA-1:0] zbs_mask = {{W_DATA-1{1'b0}}, 1'b1} << op_b[W_SHAMT-1:0]; always @ (*) begin - casez ({|EXTENSION_A, |EXTENSION_ZBA, |EXTENSION_ZBB, |EXTENSION_ZBC, |EXTENSION_ZBS, |EXTENSION_ZBKB, aluop}) + casez ({|EXTENSION_A, |EXTENSION_ZBA, |EXTENSION_ZBB, |EXTENSION_ZBC, |EXTENSION_ZBS, |EXTENSION_ZBKB, |EXTENSION_XH3B, aluop}) // Base ISA - {6'bzzzzzz, ALUOP_ADD }: result = sum; - {6'bzzzzzz, ALUOP_SUB }: result = sum; - {6'bzzzzzz, ALUOP_LT }: result = {{W_DATA-1{1'b0}}, lt}; - {6'bzzzzzz, ALUOP_LTU }: result = {{W_DATA-1{1'b0}}, lt}; - {6'bzzzzzz, ALUOP_SRL }: result = shift_dout; - {6'bzzzzzz, ALUOP_SRA }: result = shift_dout; - {6'bzzzzzz, ALUOP_SLL }: result = shift_dout; + {7'bzzzzzzz, ALUOP_ADD }: result = sum; + {7'bzzzzzzz, ALUOP_SUB }: result = sum; + {7'bzzzzzzz, ALUOP_LT }: result = {{W_DATA-1{1'b0}}, lt}; + {7'bzzzzzzz, ALUOP_LTU }: result = {{W_DATA-1{1'b0}}, lt}; + {7'bzzzzzzz, ALUOP_SRL }: result = shift_dout; + {7'bzzzzzzz, ALUOP_SRA }: result = shift_dout; + {7'bzzzzzzz, ALUOP_SLL }: result = shift_dout; // A (duplicates of Zbb) - {6'b1zzzzz, ALUOP_MAX }: result = lt ? op_b : op_a; - {6'b1zzzzz, ALUOP_MAXU }: result = lt ? op_b : op_a; - {6'b1zzzzz, ALUOP_MIN }: result = lt ? op_a : op_b; - {6'b1zzzzz, ALUOP_MINU }: result = lt ? op_a : op_b; + {7'b1zzzzzz, ALUOP_MAX }: result = lt ? op_b : op_a; + {7'b1zzzzzz, ALUOP_MAXU }: result = lt ? op_b : op_a; + {7'b1zzzzzz, ALUOP_MIN }: result = lt ? op_a : op_b; + {7'b1zzzzzz, ALUOP_MINU }: result = lt ? op_a : op_b; // Zba - {6'bz1zzzz, ALUOP_SH1ADD }: result = sum; - {6'bz1zzzz, ALUOP_SH2ADD }: result = sum; - {6'bz1zzzz, ALUOP_SH3ADD }: result = sum; + {7'bz1zzzzz, ALUOP_SHXADD }: result = sum; // Zbb - {6'bzz1zzz, ALUOP_ANDN }: result = bitwise; - {6'bzz1zzz, ALUOP_ORN }: result = bitwise; - {6'bzz1zzz, ALUOP_XNOR }: result = bitwise; - {6'bzz1zzz, ALUOP_CLZ }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz}; - {6'bzz1zzz, ALUOP_CTZ }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz}; - {6'bzz1zzz, ALUOP_CPOP }: result = {{W_DATA-W_SHAMT-1{1'b0}}, cpop}; - {6'bzz1zzz, ALUOP_MAX }: result = lt ? op_b : op_a; - {6'bzz1zzz, ALUOP_MAXU }: result = lt ? op_b : op_a; - {6'bzz1zzz, ALUOP_MIN }: result = lt ? op_a : op_b; - {6'bzz1zzz, ALUOP_MINU }: result = lt ? op_a : op_b; - {6'bzz1zzz, ALUOP_SEXT_B }: result = {{W_DATA-8{op_a[7]}}, op_a[7:0]}; - {6'bzz1zzz, ALUOP_SEXT_H }: result = {{W_DATA-16{op_a[15]}}, op_a[15:0]}; - {6'bzz1zzz, ALUOP_ZEXT_H }: result = {{W_DATA-16{1'b0}}, op_a[15:0]}; - {6'bzz1zzz, ALUOP_ORC_B }: result = {{8{|op_a[31:24]}}, {8{|op_a[23:16]}}, {8{|op_a[15:8]}}, {8{|op_a[7:0]}}}; - {6'bzz1zzz, ALUOP_REV8 }: result = {op_a[7:0], op_a[15:8], op_a[23:16], op_a[31:24]}; - {6'bzz1zzz, ALUOP_ROL }: result = shift_dout; - {6'bzz1zzz, ALUOP_ROR }: result = shift_dout; + {7'bzz1zzzz, ALUOP_ANDN }: result = bitwise; + {7'bzz1zzzz, ALUOP_ORN }: result = bitwise; + {7'bzz1zzzz, ALUOP_XNOR }: result = bitwise; + {7'bzz1zzzz, ALUOP_CLZ }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz}; + {7'bzz1zzzz, ALUOP_CTZ }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz}; + {7'bzz1zzzz, ALUOP_CPOP }: result = {{W_DATA-W_SHAMT-1{1'b0}}, cpop}; + {7'bzz1zzzz, ALUOP_MAX }: result = lt ? op_b : op_a; + {7'bzz1zzzz, ALUOP_MAXU }: result = lt ? op_b : op_a; + {7'bzz1zzzz, ALUOP_MIN }: result = lt ? op_a : op_b; + {7'bzz1zzzz, ALUOP_MINU }: result = lt ? op_a : op_b; + {7'bzz1zzzz, ALUOP_SEXT_B }: result = {{W_DATA-8{op_a[7]}}, op_a[7:0]}; + {7'bzz1zzzz, ALUOP_SEXT_H }: result = {{W_DATA-16{op_a[15]}}, op_a[15:0]}; + {7'bzz1zzzz, ALUOP_ZEXT_H }: result = {{W_DATA-16{1'b0}}, op_a[15:0]}; + {7'bzz1zzzz, ALUOP_ORC_B }: result = {{8{|op_a[31:24]}}, {8{|op_a[23:16]}}, {8{|op_a[15:8]}}, {8{|op_a[7:0]}}}; + {7'bzz1zzzz, ALUOP_REV8 }: result = {op_a[7:0], op_a[15:8], op_a[23:16], op_a[31:24]}; + {7'bzz1zzzz, ALUOP_ROL }: result = shift_dout; + {7'bzz1zzzz, ALUOP_ROR }: result = shift_dout; // Zbc - {6'bzzz1zz, ALUOP_CLMUL }: result = clmul[W_DATA-1:0]; - {6'bzzz1zz, ALUOP_CLMULH }: result = clmul[2*W_DATA-1:W_DATA]; - {6'bzzz1zz, ALUOP_CLMULR }: result = clmul[2*W_DATA-2:W_DATA-1]; + {7'bzzz1zzz, ALUOP_CLMUL }: result = clmul; // Zbs - {6'bzzzz1z, ALUOP_BCLR }: result = op_a & ~zbs_mask; - {6'bzzzz1z, ALUOP_BSET }: result = op_a | zbs_mask; - {6'bzzzz1z, ALUOP_BINV }: result = op_a ^ zbs_mask; - {6'bzzzz1z, ALUOP_BEXT }: result = {{W_DATA-1{1'b0}}, shift_dout[0]}; + {7'bzzzz1zz, ALUOP_BCLR }: result = op_a & ~zbs_mask; + {7'bzzzz1zz, ALUOP_BSET }: result = op_a | zbs_mask; + {7'bzzzz1zz, ALUOP_BINV }: result = op_a ^ zbs_mask; + {7'bzzzz1zz, ALUOP_BEXT }: result = {{W_DATA-1{1'b0}}, shift_dout[0]}; // Zbkb - {6'bzzzzz1, ALUOP_PACK }: result = {op_b[15:0], op_a[15:0]}; - {6'bzzzzz1, ALUOP_PACKH }: result = {{W_DATA-16{1'b0}}, op_b[7:0], op_a[7:0]}; - {6'bzzzzz1, ALUOP_BREV8 }: result = {op_a_rev[7:0], op_a_rev[15:8], op_a_rev[23:16], op_a_rev[31:24]}; - {6'bzzzzz1, ALUOP_UNZIP }: result = unzip; - {6'bzzzzz1, ALUOP_ZIP }: result = zip; + {7'bzzzzz1z, ALUOP_PACK }: result = {op_b[15:0], op_a[15:0]}; + {7'bzzzzz1z, ALUOP_PACKH }: result = {{W_DATA-16{1'b0}}, op_b[7:0], op_a[7:0]}; + {7'bzzzzz1z, ALUOP_BREV8 }: result = {op_a_rev[7:0], op_a_rev[15:8], op_a_rev[23:16], op_a_rev[31:24]}; + {7'bzzzzz1z, ALUOP_UNZIP }: result = unzip; + {7'bzzzzz1z, ALUOP_ZIP }: result = zip; + // Xh3b + {7'bzzzzzz1, ALUOP_BEXTM }: result = shift_dout & {24'h0, {~(8'hfe << funct7_32b[3:1])}}; default: result = bitwise; endcase @@ -238,4 +241,6 @@ end endmodule +`ifndef YOSYS `default_nettype wire +`endif diff --git a/hdl/hazard3_config.vh b/hdl/hazard3_config.vh index 4be454c..9756cb5 100644 --- a/hdl/hazard3_config.vh +++ b/hdl/hazard3_config.vh @@ -61,6 +61,9 @@ parameter EXTENSION_ZBS = 1, // Requires: Zbb. (This flag enables instructions in Zbkb which aren't in Zbb.) parameter EXTENSION_ZBKB = 1, +// EXTENSION_XH3B: Custom bit manipulation instructions for Hazard3 +parameter EXTENSION_XH3B = 1, + // EXTENSION_ZIFENCEI: Support for the fence.i instruction // Optional, since a plain branch/jump will also flush the prefetch queue. parameter EXTENSION_ZIFENCEI = 1, diff --git a/hdl/hazard3_config_inst.vh b/hdl/hazard3_config_inst.vh index cf7ef94..c650f85 100644 --- a/hdl/hazard3_config_inst.vh +++ b/hdl/hazard3_config_inst.vh @@ -22,6 +22,7 @@ .EXTENSION_ZBC (EXTENSION_ZBC), .EXTENSION_ZBS (EXTENSION_ZBS), .EXTENSION_ZBKB (EXTENSION_ZBKB), +.EXTENSION_XH3B (EXTENSION_XH3B), .EXTENSION_ZIFENCEI (EXTENSION_ZIFENCEI), .CSR_M_MANDATORY (CSR_M_MANDATORY), .CSR_M_TRAP (CSR_M_TRAP), diff --git a/hdl/hazard3_core.v b/hdl/hazard3_core.v index 564233a..0a84749 100644 --- a/hdl/hazard3_core.v +++ b/hdl/hazard3_core.v @@ -187,6 +187,8 @@ wire [W_DATA-1:0] d_imm; wire [W_REGADDR-1:0] d_rs1; wire [W_REGADDR-1:0] d_rs2; wire [W_REGADDR-1:0] d_rd; +wire [2:0] d_funct3_32b; +wire [6:0] d_funct7_32b; wire [W_ALUSRC-1:0] d_alusrc_a; wire [W_ALUSRC-1:0] d_alusrc_b; wire [W_ALUOP-1:0] d_aluop; @@ -237,6 +239,8 @@ hazard3_decode #( .d_rs1 (d_rs1), .d_rs2 (d_rs2), .d_rd (d_rd), + .d_funct3_32b (d_funct3_32b), + .d_funct7_32b (d_funct7_32b), .d_alusrc_a (d_alusrc_a), .d_alusrc_b (d_alusrc_b), .d_aluop (d_aluop), @@ -451,6 +455,8 @@ hazard3_alu #( `include "hazard3_config_inst.vh" ) alu ( .aluop (d_aluop), + .funct3_32b (d_funct3_32b), + .funct7_32b (d_funct7_32b), .op_a (x_op_a), .op_b (x_op_b), .result (x_alu_result), diff --git a/hdl/hazard3_csr.v b/hdl/hazard3_csr.v index df6204e..fb53d91 100644 --- a/hdl/hazard3_csr.v +++ b/hdl/hazard3_csr.v @@ -497,7 +497,10 @@ always @ (*) begin {XLEN-28{1'b0}}, // WLRL 2'd0, // Z, Y, no - |CSR_M_TRAP, // X is set for our non-standard interrupt enable CSRs + |{ // X is set for any custom extensions + |CSR_M_TRAP, + |EXTENSION_XH3B + }, 2'd0, // V, W, no |U_MODE, 7'd0, // T...N, no diff --git a/hdl/hazard3_decode.v b/hdl/hazard3_decode.v index 567a5ed..a8393d3 100644 --- a/hdl/hazard3_decode.v +++ b/hdl/hazard3_decode.v @@ -36,6 +36,8 @@ module hazard3_decode #( output reg [W_REGADDR-1:0] d_rs1, output reg [W_REGADDR-1:0] d_rs2, output reg [W_REGADDR-1:0] d_rd, + output reg [2:0] d_funct3_32b, + output reg [6:0] d_funct7_32b, output reg [W_ALUSRC-1:0] d_alusrc_a, output reg [W_ALUSRC-1:0] d_alusrc_b, output reg [W_ALUOP-1:0] d_aluop, @@ -199,6 +201,10 @@ always @ (*) begin d_except = EXCEPT_NONE; d_wfi = 1'b0; d_fence_i = 1'b0; + // Note this funct3/funct7 are valid only for 32-bit instructions. They + // are useful for clusters of related ALU ops, such as sh*add, clmul. + d_funct3_32b = fd_cir[14:12]; + d_funct7_32b = fd_cir[31:25]; casez (d_instr) RV_BEQ: begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_rd = X0; d_aluop = ALUOP_SUB; d_branchcond = BCOND_ZERO; end @@ -260,9 +266,9 @@ always @ (*) begin RV_AMOMINU_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_MINU; end else begin d_invalid_32bit = 1'b1; end RV_AMOMAXU_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_MAXU; end else begin d_invalid_32bit = 1'b1; end - RV_SH1ADD: if (EXTENSION_ZBA) begin d_aluop = ALUOP_SH1ADD; end else begin d_invalid_32bit = 1'b1; end - RV_SH2ADD: if (EXTENSION_ZBA) begin d_aluop = ALUOP_SH2ADD; end else begin d_invalid_32bit = 1'b1; end - RV_SH3ADD: if (EXTENSION_ZBA) begin d_aluop = ALUOP_SH3ADD; end else begin d_invalid_32bit = 1'b1; end + RV_SH1ADD: if (EXTENSION_ZBA) begin d_aluop = ALUOP_SHXADD; end else begin d_invalid_32bit = 1'b1; end + RV_SH2ADD: if (EXTENSION_ZBA) begin d_aluop = ALUOP_SHXADD; end else begin d_invalid_32bit = 1'b1; end + RV_SH3ADD: if (EXTENSION_ZBA) begin d_aluop = ALUOP_SHXADD; end else begin d_invalid_32bit = 1'b1; end RV_ANDN: if (EXTENSION_ZBB) begin d_aluop = ALUOP_ANDN; end else begin d_invalid_32bit = 1'b1; end RV_CLZ: if (EXTENSION_ZBB) begin d_aluop = ALUOP_CLZ; d_rs2 = X0; end else begin d_invalid_32bit = 1'b1; end @@ -284,8 +290,8 @@ always @ (*) begin RV_ZEXT_H: if (EXTENSION_ZBB) begin d_aluop = ALUOP_ZEXT_H; d_rs2 = X0; end else begin d_invalid_32bit = 1'b1; end RV_CLMUL: if (EXTENSION_ZBC) begin d_aluop = ALUOP_CLMUL; end else begin d_invalid_32bit = 1'b1; end - RV_CLMULH: if (EXTENSION_ZBC) begin d_aluop = ALUOP_CLMULH; end else begin d_invalid_32bit = 1'b1; end - RV_CLMULR: if (EXTENSION_ZBC) begin d_aluop = ALUOP_CLMULR; end else begin d_invalid_32bit = 1'b1; end + RV_CLMULH: if (EXTENSION_ZBC) begin d_aluop = ALUOP_CLMUL; end else begin d_invalid_32bit = 1'b1; end + RV_CLMULR: if (EXTENSION_ZBC) begin d_aluop = ALUOP_CLMUL; end else begin d_invalid_32bit = 1'b1; end RV_BCLR: if (EXTENSION_ZBS) begin d_aluop = ALUOP_BCLR; end else begin d_invalid_32bit = 1'b1; end RV_BCLRI: if (EXTENSION_ZBS) begin d_aluop = ALUOP_BCLR; d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end @@ -302,6 +308,9 @@ always @ (*) begin RV_UNZIP: if (EXTENSION_ZBKB) begin d_aluop = ALUOP_UNZIP; d_rs2 = X0; end else begin d_invalid_32bit = 1'b1; end RV_ZIP: if (EXTENSION_ZBKB) begin d_aluop = ALUOP_ZIP; d_rs2 = X0; end else begin d_invalid_32bit = 1'b1; end + RV_H3_BEXTM: if (EXTENSION_XH3B) begin d_aluop = ALUOP_BEXTM; end else begin d_invalid_32bit = 1'b1; end + RV_H3_BEXTMI: if (EXTENSION_XH3B) begin d_aluop = ALUOP_BEXTM; d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end + RV_FENCE: begin d_rs2 = X0; end // NOP, note rs1/rd are zero in instruction RV_FENCE_I: if (EXTENSION_ZIFENCEI) begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_branchcond = BCOND_ALWAYS; d_fence_i = 1'b1; end else begin d_invalid_32bit = 1'b1; end // note rs1/rs2/rd are zero in instruction RV_CSRRW: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = 1'b1 ; d_csr_ren = |d_rd; d_csr_wtype = CSR_WTYPE_W; end else begin d_invalid_32bit = 1'b1; end diff --git a/hdl/hazard3_ops.vh b/hdl/hazard3_ops.vh index 40d35e0..d8c9675 100644 --- a/hdl/hazard3_ops.vh +++ b/hdl/hazard3_ops.vh @@ -18,9 +18,7 @@ localparam ALUOP_SLL = 6'h0b; localparam ALUOP_MULDIV = 6'h0c; localparam ALUOP_RS2 = 6'h0d; // differs from AND/OR/XOR in [1:0] // Bitmanip ALU operations (some also used by AMOs): -localparam ALUOP_SH1ADD = 6'h20; -localparam ALUOP_SH2ADD = 6'h21; -localparam ALUOP_SH3ADD = 6'h22; +localparam ALUOP_SHXADD = 6'h20; localparam ALUOP_CLZ = 6'h23; localparam ALUOP_CPOP = 6'h24; localparam ALUOP_CTZ = 6'h25; @@ -40,19 +38,19 @@ localparam ALUOP_SEXT_H = 6'h32; localparam ALUOP_ZEXT_H = 6'h33; localparam ALUOP_CLMUL = 6'h34; -localparam ALUOP_CLMULH = 6'h35; -localparam ALUOP_CLMULR = 6'h36; -localparam ALUOP_BCLR = 6'h37; -localparam ALUOP_BEXT = 6'h38; -localparam ALUOP_BINV = 6'h39; -localparam ALUOP_BSET = 6'h3a; +localparam ALUOP_BCLR = 6'h35; +localparam ALUOP_BEXT = 6'h36; +localparam ALUOP_BINV = 6'h37; +localparam ALUOP_BSET = 6'h38; -localparam ALUOP_PACK = 6'h3b; -localparam ALUOP_PACKH = 6'h3c; -localparam ALUOP_BREV8 = 6'h3d; -localparam ALUOP_ZIP = 6'h3e; -localparam ALUOP_UNZIP = 6'h3f; +localparam ALUOP_PACK = 6'h39; +localparam ALUOP_PACKH = 6'h3a; +localparam ALUOP_BREV8 = 6'h3b; +localparam ALUOP_ZIP = 6'h3c; +localparam ALUOP_UNZIP = 6'h3d; + +localparam ALUOP_BEXTM = 6'h3e; // Parameters to control ALU input muxes. Bypass mux paths are // controlled by X, so D has no parameters to choose these. diff --git a/hdl/rv_opcodes.vh b/hdl/rv_opcodes.vh index 117291a..d2fdb0c 100644 --- a/hdl/rv_opcodes.vh +++ b/hdl/rv_opcodes.vh @@ -138,6 +138,12 @@ localparam RV_ZIP = 32'b000010001111?????001?????0010011; localparam RV_XPERM_B = 32'b0010100??????????100?????0110011; localparam RV_XPERM_N = 32'b0010100??????????010?????0110011; +// Hazard3 custom instructions + +// Xh3b (Hazard3 custom bitmanip): currently just a multi-bit version of bext/bexti from Zbs +localparam RV_H3_BEXTM = 32'b000???0??????????000?????0001011; // custom-0 funct3=0 +localparam RV_H3_BEXTMI = 32'b000???0??????????100?????0001011; // custom-0 funct3=4 + // C Extension localparam RV_C_ADDI4SPN = 16'b000???????????00; // *** illegal if imm 0 localparam RV_C_LW = 16'b010???????????00; diff --git a/test/sim/sw_testcases/extension_xh3b.c b/test/sim/sw_testcases/extension_xh3b.c new file mode 100644 index 0000000..08e1cb1 --- /dev/null +++ b/test/sim/sw_testcases/extension_xh3b.c @@ -0,0 +1,110 @@ +#include "tb_cxxrtl_io.h" + +// Smoke test for instructions in the Xh3b extension (Hazard3 custom +// bitmanip). Currently these are: + +// - h3_bextm: multiple bit version of the bext instruction from Zbs (1 to 8 bits) +// - h3_bextmi: immediate version of the above (as bexti is to bext) + +// nbits must be a constant expression +#define __hazard3_bextm(nbits, rs1, rs2) ({\ + uint32_t __h3_bextm_rd; \ + asm (".insn r 0x0b, 0, %3, %0, %1, %2"\ + : "=r" (__h3_bextm_rd) \ + : "r" (rs1), "r" (rs2), "i" ((((nbits) - 1) & 0x7) << 1)\ + ); \ + __h3_bextm_rd; \ +}) + +// nbits and shamt must be constant expressions +#define __hazard3_bextmi(nbits, rs1, shamt) ({\ + uint32_t __h3_bextmi_rd; \ + asm (".insn i 0x0b, 0x4, %0, %1, %2"\ + : "=r" (__h3_bextmi_rd) \ + : "r" (rs1), "i" ((((nbits) - 1) & 0x7) << 6 | ((shamt) & 0x1f)) \ + ); \ + __h3_bextmi_rd; \ +}) + + +// The instruction is just supposed to take a single static size... +__attribute__((noinline)) uint32_t bextm_dynamic_width(uint nbits, uint32_t rs1, uint32_t rs2) { + switch (nbits) { + case 1: + return __hazard3_bextm(1, rs1, rs2); + case 2: + return __hazard3_bextm(2, rs1, rs2); + case 3: + return __hazard3_bextm(3, rs1, rs2); + case 4: + return __hazard3_bextm(4, rs1, rs2); + case 5: + return __hazard3_bextm(5, rs1, rs2); + case 6: + return __hazard3_bextm(6, rs1, rs2); + case 7: + return __hazard3_bextm(7, rs1, rs2); + case 8: + default: + return __hazard3_bextm(8, rs1, rs2); + } +} + +int main() { + uint32_t x = 0xabcdef5a; + for (int nbits = 1; nbits <= 8; ++nbits) { + for (int shamt = 0; shamt < 32; ++shamt) { + uint32_t expect = (x >> shamt) & ~(~0u << nbits); + uint32_t actual = bextm_dynamic_width(nbits, x, shamt); + tb_assert( + expect == actual, + "Bad result for rs1=%08x shamt=%d nbits=%d: expected %08x, got %08x\n", + x, shamt, nbits, expect, actual + ); + } + } + // Quick smoke test for bextmi vs bextm + tb_assert(__hazard3_bextm(8, x, 0 ) == __hazard3_bextmi(8, x, 0 ), "bextm vs bextmi mismatch shamt=0 \n"); + tb_assert(__hazard3_bextm(8, x, 1 ) == __hazard3_bextmi(8, x, 1 ), "bextm vs bextmi mismatch shamt=1 \n"); + tb_assert(__hazard3_bextm(8, x, 2 ) == __hazard3_bextmi(8, x, 2 ), "bextm vs bextmi mismatch shamt=2 \n"); + tb_assert(__hazard3_bextm(8, x, 3 ) == __hazard3_bextmi(8, x, 3 ), "bextm vs bextmi mismatch shamt=3 \n"); + tb_assert(__hazard3_bextm(8, x, 4 ) == __hazard3_bextmi(8, x, 4 ), "bextm vs bextmi mismatch shamt=4 \n"); + tb_assert(__hazard3_bextm(8, x, 5 ) == __hazard3_bextmi(8, x, 5 ), "bextm vs bextmi mismatch shamt=5 \n"); + tb_assert(__hazard3_bextm(8, x, 6 ) == __hazard3_bextmi(8, x, 6 ), "bextm vs bextmi mismatch shamt=6 \n"); + tb_assert(__hazard3_bextm(8, x, 7 ) == __hazard3_bextmi(8, x, 7 ), "bextm vs bextmi mismatch shamt=7 \n"); + tb_assert(__hazard3_bextm(8, x, 8 ) == __hazard3_bextmi(8, x, 8 ), "bextm vs bextmi mismatch shamt=8 \n"); + tb_assert(__hazard3_bextm(8, x, 9 ) == __hazard3_bextmi(8, x, 9 ), "bextm vs bextmi mismatch shamt=9 \n"); + tb_assert(__hazard3_bextm(8, x, 10) == __hazard3_bextmi(8, x, 10), "bextm vs bextmi mismatch shamt=10\n"); + tb_assert(__hazard3_bextm(8, x, 11) == __hazard3_bextmi(8, x, 11), "bextm vs bextmi mismatch shamt=11\n"); + tb_assert(__hazard3_bextm(8, x, 12) == __hazard3_bextmi(8, x, 12), "bextm vs bextmi mismatch shamt=12\n"); + tb_assert(__hazard3_bextm(8, x, 13) == __hazard3_bextmi(8, x, 13), "bextm vs bextmi mismatch shamt=13\n"); + tb_assert(__hazard3_bextm(8, x, 14) == __hazard3_bextmi(8, x, 14), "bextm vs bextmi mismatch shamt=14\n"); + tb_assert(__hazard3_bextm(8, x, 15) == __hazard3_bextmi(8, x, 15), "bextm vs bextmi mismatch shamt=15\n"); + tb_assert(__hazard3_bextm(8, x, 16) == __hazard3_bextmi(8, x, 16), "bextm vs bextmi mismatch shamt=16\n"); + tb_assert(__hazard3_bextm(8, x, 17) == __hazard3_bextmi(8, x, 17), "bextm vs bextmi mismatch shamt=17\n"); + tb_assert(__hazard3_bextm(8, x, 18) == __hazard3_bextmi(8, x, 18), "bextm vs bextmi mismatch shamt=18\n"); + tb_assert(__hazard3_bextm(8, x, 19) == __hazard3_bextmi(8, x, 19), "bextm vs bextmi mismatch shamt=19\n"); + tb_assert(__hazard3_bextm(8, x, 20) == __hazard3_bextmi(8, x, 20), "bextm vs bextmi mismatch shamt=20\n"); + tb_assert(__hazard3_bextm(8, x, 21) == __hazard3_bextmi(8, x, 21), "bextm vs bextmi mismatch shamt=21\n"); + tb_assert(__hazard3_bextm(8, x, 22) == __hazard3_bextmi(8, x, 22), "bextm vs bextmi mismatch shamt=22\n"); + tb_assert(__hazard3_bextm(8, x, 23) == __hazard3_bextmi(8, x, 23), "bextm vs bextmi mismatch shamt=23\n"); + tb_assert(__hazard3_bextm(8, x, 24) == __hazard3_bextmi(8, x, 24), "bextm vs bextmi mismatch shamt=24\n"); + tb_assert(__hazard3_bextm(8, x, 25) == __hazard3_bextmi(8, x, 25), "bextm vs bextmi mismatch shamt=25\n"); + tb_assert(__hazard3_bextm(8, x, 26) == __hazard3_bextmi(8, x, 26), "bextm vs bextmi mismatch shamt=26\n"); + tb_assert(__hazard3_bextm(8, x, 27) == __hazard3_bextmi(8, x, 27), "bextm vs bextmi mismatch shamt=27\n"); + tb_assert(__hazard3_bextm(8, x, 28) == __hazard3_bextmi(8, x, 28), "bextm vs bextmi mismatch shamt=28\n"); + tb_assert(__hazard3_bextm(8, x, 29) == __hazard3_bextmi(8, x, 29), "bextm vs bextmi mismatch shamt=29\n"); + tb_assert(__hazard3_bextm(8, x, 30) == __hazard3_bextmi(8, x, 30), "bextm vs bextmi mismatch shamt=30\n"); + tb_assert(__hazard3_bextm(8, x, 31) == __hazard3_bextmi(8, x, 31), "bextm vs bextmi mismatch shamt=31\n"); + + tb_assert(__hazard3_bextm(1, 0xffu, 0) == __hazard3_bextmi(1, 0xffu, 0), "bextm vs bextmi mismatch nbits=1\n"); + tb_assert(__hazard3_bextm(2, 0xffu, 0) == __hazard3_bextmi(2, 0xffu, 0), "bextm vs bextmi mismatch nbits=2\n"); + tb_assert(__hazard3_bextm(3, 0xffu, 0) == __hazard3_bextmi(3, 0xffu, 0), "bextm vs bextmi mismatch nbits=3\n"); + tb_assert(__hazard3_bextm(4, 0xffu, 0) == __hazard3_bextmi(4, 0xffu, 0), "bextm vs bextmi mismatch nbits=4\n"); + tb_assert(__hazard3_bextm(5, 0xffu, 0) == __hazard3_bextmi(5, 0xffu, 0), "bextm vs bextmi mismatch nbits=5\n"); + tb_assert(__hazard3_bextm(6, 0xffu, 0) == __hazard3_bextmi(6, 0xffu, 0), "bextm vs bextmi mismatch nbits=6\n"); + tb_assert(__hazard3_bextm(7, 0xffu, 0) == __hazard3_bextmi(7, 0xffu, 0), "bextm vs bextmi mismatch nbits=7\n"); + tb_assert(__hazard3_bextm(8, 0xffu, 0) == __hazard3_bextmi(8, 0xffu, 0), "bextm vs bextmi mismatch nbits=8\n"); + + return 0; +}