From 43e0b1d16a3c7add88f8be94fba20a9b76a99734 Mon Sep 17 00:00:00 2001
From: Luke Wren <wren6991@gmail.com>
Date: Fri, 6 May 2022 17:36:25 +0100
Subject: [PATCH] Implement Zbkb (untested)

---
 hdl/arith/hazard3_alu.v    | 95 ++++++++++++++++++++++----------------
 hdl/hazard3_config.vh      |  4 ++
 hdl/hazard3_config_inst.vh |  1 +
 hdl/hazard3_decode.v       | 70 +++++++++++++++-------------
 hdl/hazard3_ops.vh         |  6 +++
 hdl/rv_opcodes.vh          | 13 ++++++
 6 files changed, 118 insertions(+), 71 deletions(-)

diff --git a/hdl/arith/hazard3_alu.v b/hdl/arith/hazard3_alu.v
index 3dc2c3b..01f798d 100644
--- a/hdl/arith/hazard3_alu.v
+++ b/hdl/arith/hazard3_alu.v
@@ -120,6 +120,17 @@ always @ (*) begin: clmul_mul
 	end
 end
 
+
+reg [W_DATA-1:0] zip;
+reg [W_DATA-1:0] unzip;
+always @ (*) begin: do_zip_unzip
+	integer i;
+	for (i = 0; i < W_DATA; i = i + 1) begin
+		zip[i]   = op_a[{i[0], i[4:1]}]; // Alternate high/low halves
+		unzip[i] = op_a[{i[3:0], i[4]}]; // All even then all odd
+	end
+end
+
 // ----------------------------------------------------------------------------
 // Output mux, with simple operations inline
 
@@ -142,51 +153,57 @@ end
 wire [W_DATA-1:0] zbs_mask = {{W_DATA-1{1'b0}}, 1'b1} << op_b[W_SHAMT-1:0];
 
 always @ (*) begin
-	casez ({|EXTENSION_A, |EXTENSION_ZBA, |EXTENSION_ZBB, |EXTENSION_ZBC, |EXTENSION_ZBS, aluop})
+	casez ({|EXTENSION_A, |EXTENSION_ZBA, |EXTENSION_ZBB, |EXTENSION_ZBC, |EXTENSION_ZBS, |EXTENSION_ZBKB, aluop})
 		// Base ISA
-		{5'bzzzzz, ALUOP_ADD    }: result = sum;
-		{5'bzzzzz, ALUOP_SUB    }: result = sum;
-		{5'bzzzzz, ALUOP_LT     }: result = {{W_DATA-1{1'b0}}, lt};
-		{5'bzzzzz, ALUOP_LTU    }: result = {{W_DATA-1{1'b0}}, lt};
-		{5'bzzzzz, ALUOP_SRL    }: result = shift_dout;
-		{5'bzzzzz, ALUOP_SRA    }: result = shift_dout;
-		{5'bzzzzz, ALUOP_SLL    }: result = shift_dout;
+		{6'bzzzzzz, ALUOP_ADD    }: result = sum;
+		{6'bzzzzzz, ALUOP_SUB    }: result = sum;
+		{6'bzzzzzz, ALUOP_LT     }: result = {{W_DATA-1{1'b0}}, lt};
+		{6'bzzzzzz, ALUOP_LTU    }: result = {{W_DATA-1{1'b0}}, lt};
+		{6'bzzzzzz, ALUOP_SRL    }: result = shift_dout;
+		{6'bzzzzzz, ALUOP_SRA    }: result = shift_dout;
+		{6'bzzzzzz, ALUOP_SLL    }: result = shift_dout;
 		// A (duplicates of Zbb)
-		{5'b1zzzz, ALUOP_MAX    }: result = lt ? op_b : op_a;
-		{5'b1zzzz, ALUOP_MAXU   }: result = lt ? op_b : op_a;
-		{5'b1zzzz, ALUOP_MIN    }: result = lt ? op_a : op_b;
-		{5'b1zzzz, ALUOP_MINU   }: result = lt ? op_a : op_b;
+		{6'b1zzzzz, ALUOP_MAX    }: result = lt ? op_b : op_a;
+		{6'b1zzzzz, ALUOP_MAXU   }: result = lt ? op_b : op_a;
+		{6'b1zzzzz, ALUOP_MIN    }: result = lt ? op_a : op_b;
+		{6'b1zzzzz, ALUOP_MINU   }: result = lt ? op_a : op_b;
 		// Zba
-		{5'bz1zzz, ALUOP_SH1ADD }: result = sum;
-		{5'bz1zzz, ALUOP_SH2ADD }: result = sum;
-		{5'bz1zzz, ALUOP_SH3ADD }: result = sum;
+		{6'bz1zzzz, ALUOP_SH1ADD }: result = sum;
+		{6'bz1zzzz, ALUOP_SH2ADD }: result = sum;
+		{6'bz1zzzz, ALUOP_SH3ADD }: result = sum;
 		// Zbb
-		{5'bzz1zz, ALUOP_ANDN   }: result = bitwise;
-		{5'bzz1zz, ALUOP_ORN    }: result = bitwise;
-		{5'bzz1zz, ALUOP_XNOR   }: result = bitwise;
-		{5'bzz1zz, ALUOP_CLZ    }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz};
-		{5'bzz1zz, ALUOP_CTZ    }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz};
-		{5'bzz1zz, ALUOP_CPOP   }: result = {{W_DATA-W_SHAMT-1{1'b0}}, cpop};
-		{5'bzz1zz, ALUOP_MAX    }: result = lt ? op_b : op_a;
-		{5'bzz1zz, ALUOP_MAXU   }: result = lt ? op_b : op_a;
-		{5'bzz1zz, ALUOP_MIN    }: result = lt ? op_a : op_b;
-		{5'bzz1zz, ALUOP_MINU   }: result = lt ? op_a : op_b;
-		{5'bzz1zz, ALUOP_SEXT_B }: result = {{W_DATA-8{op_a[7]}}, op_a[7:0]};
-		{5'bzz1zz, ALUOP_SEXT_H }: result = {{W_DATA-16{op_a[15]}}, op_a[15:0]};
-		{5'bzz1zz, ALUOP_ZEXT_H }: result = {{W_DATA-16{1'b0}}, op_a[15:0]};
-		{5'bzz1zz, ALUOP_ORC_B  }: result = {{8{|op_a[31:24]}}, {8{|op_a[23:16]}}, {8{|op_a[15:8]}}, {8{|op_a[7:0]}}};
-		{5'bzz1zz, ALUOP_REV8   }: result = {op_a[7:0], op_a[15:8], op_a[23:16], op_a[31:24]};
-		{5'bzz1zz, ALUOP_ROL    }: result = shift_dout;
-		{5'bzz1zz, ALUOP_ROR    }: result = shift_dout;
+		{6'bzz1zzz, ALUOP_ANDN   }: result = bitwise;
+		{6'bzz1zzz, ALUOP_ORN    }: result = bitwise;
+		{6'bzz1zzz, ALUOP_XNOR   }: result = bitwise;
+		{6'bzz1zzz, ALUOP_CLZ    }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz};
+		{6'bzz1zzz, ALUOP_CTZ    }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz};
+		{6'bzz1zzz, ALUOP_CPOP   }: result = {{W_DATA-W_SHAMT-1{1'b0}}, cpop};
+		{6'bzz1zzz, ALUOP_MAX    }: result = lt ? op_b : op_a;
+		{6'bzz1zzz, ALUOP_MAXU   }: result = lt ? op_b : op_a;
+		{6'bzz1zzz, ALUOP_MIN    }: result = lt ? op_a : op_b;
+		{6'bzz1zzz, ALUOP_MINU   }: result = lt ? op_a : op_b;
+		{6'bzz1zzz, ALUOP_SEXT_B }: result = {{W_DATA-8{op_a[7]}}, op_a[7:0]};
+		{6'bzz1zzz, ALUOP_SEXT_H }: result = {{W_DATA-16{op_a[15]}}, op_a[15:0]};
+		{6'bzz1zzz, ALUOP_ZEXT_H }: result = {{W_DATA-16{1'b0}}, op_a[15:0]};
+		{6'bzz1zzz, ALUOP_ORC_B  }: result = {{8{|op_a[31:24]}}, {8{|op_a[23:16]}}, {8{|op_a[15:8]}}, {8{|op_a[7:0]}}};
+		{6'bzz1zzz, ALUOP_REV8   }: result = {op_a[7:0], op_a[15:8], op_a[23:16], op_a[31:24]};
+		{6'bzz1zzz, ALUOP_ROL    }: result = shift_dout;
+		{6'bzz1zzz, ALUOP_ROR    }: result = shift_dout;
 		// Zbc
-		{5'bzzz1z, ALUOP_CLMUL  }: result = clmul[W_DATA-1:0];
-		{5'bzzz1z, ALUOP_CLMULH }: result = clmul[2*W_DATA-1:W_DATA];
-		{5'bzzz1z, ALUOP_CLMULR }: result = clmul[2*W_DATA-2:W_DATA-1];
+		{6'bzzz1zz, ALUOP_CLMUL  }: result = clmul[W_DATA-1:0];
+		{6'bzzz1zz, ALUOP_CLMULH }: result = clmul[2*W_DATA-1:W_DATA];
+		{6'bzzz1zz, ALUOP_CLMULR }: result = clmul[2*W_DATA-2:W_DATA-1];
 		// Zbs
-		{5'bzzzz1, ALUOP_BCLR   }: result = op_a & ~zbs_mask;
-		{5'bzzzz1, ALUOP_BSET   }: result = op_a |  zbs_mask;
-		{5'bzzzz1, ALUOP_BINV   }: result = op_a ^  zbs_mask;
-		{5'bzzzz1, ALUOP_BEXT   }: result = {{W_DATA-1{1'b0}}, shift_dout[0]};
+		{6'bzzzz1z, ALUOP_BCLR   }: result = op_a & ~zbs_mask;
+		{6'bzzzz1z, ALUOP_BSET   }: result = op_a |  zbs_mask;
+		{6'bzzzz1z, ALUOP_BINV   }: result = op_a ^  zbs_mask;
+		{6'bzzzz1z, ALUOP_BEXT   }: result = {{W_DATA-1{1'b0}}, shift_dout[0]};
+		// Zbkb
+		{6'bzzzzz1, ALUOP_PACK   }: result = {op_b[15:0], op_a[15:0]};
+		{6'bzzzzz1, ALUOP_PACKH  }: result = {{W_DATA-16{1'b0}}, op_b[7:0], op_a[7:0]};
+		{6'bzzzzz1, ALUOP_REV_B  }: result = op_a_rev;
+		{6'bzzzzz1, ALUOP_UNZIP  }: result = unzip;
+		{6'bzzzzz1, ALUOP_ZIP    }: result = zip;
 
 		default:                  result = bitwise;
 	endcase
diff --git a/hdl/hazard3_config.vh b/hdl/hazard3_config.vh
index 7199924..e272fa1 100644
--- a/hdl/hazard3_config.vh
+++ b/hdl/hazard3_config.vh
@@ -57,6 +57,10 @@ parameter EXTENSION_ZBC       = 1,
 // EXTENSION_ZBS: Support for Zbs single-bit manipulation instructions
 parameter EXTENSION_ZBS       = 1,
 
+// EXTENSION_ZBKB: Support for Zbkb basic bit manipulation for cryptography
+// Requires: Zbb. (This flag enables instructions in Zbkb which aren't in Zbb.)
+parameter EXTENSION_ZBKB      = 1,
+
 // EXTENSION_ZIFENCEI: Support for the fence.i instruction
 // Optional, since a plain branch/jump will also flush the prefetch queue.
 parameter EXTENSION_ZIFENCEI  = 1,
diff --git a/hdl/hazard3_config_inst.vh b/hdl/hazard3_config_inst.vh
index 2c995bb..1975af1 100644
--- a/hdl/hazard3_config_inst.vh
+++ b/hdl/hazard3_config_inst.vh
@@ -16,6 +16,7 @@
 .EXTENSION_ZBB      (EXTENSION_ZBB),
 .EXTENSION_ZBC      (EXTENSION_ZBC),
 .EXTENSION_ZBS      (EXTENSION_ZBS),
+.EXTENSION_ZBKB     (EXTENSION_ZBKB),
 .EXTENSION_ZIFENCEI (EXTENSION_ZIFENCEI),
 .CSR_M_MANDATORY    (CSR_M_MANDATORY),
 .CSR_M_TRAP         (CSR_M_TRAP),
diff --git a/hdl/hazard3_decode.v b/hdl/hazard3_decode.v
index 3aa2293..fa90daf 100644
--- a/hdl/hazard3_decode.v
+++ b/hdl/hazard3_decode.v
@@ -247,41 +247,47 @@ always @ (*) begin
 	RV_AMOMINU_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO;  d_aluop = ALUOP_MINU; end else begin d_invalid_32bit = 1'b1; end
 	RV_AMOMAXU_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO;  d_aluop = ALUOP_MAXU; end else begin d_invalid_32bit = 1'b1; end
 
-	RV_SH1ADD:    if (EXTENSION_ZBA) begin d_aluop = ALUOP_SH1ADD;                                                        end else begin d_invalid_32bit = 1'b1; end
-	RV_SH2ADD:    if (EXTENSION_ZBA) begin d_aluop = ALUOP_SH2ADD;                                                        end else begin d_invalid_32bit = 1'b1; end
-	RV_SH3ADD:    if (EXTENSION_ZBA) begin d_aluop = ALUOP_SH3ADD;                                                        end else begin d_invalid_32bit = 1'b1; end
+	RV_SH1ADD:    if (EXTENSION_ZBA)  begin d_aluop = ALUOP_SH1ADD;                                                        end else begin d_invalid_32bit = 1'b1; end
+	RV_SH2ADD:    if (EXTENSION_ZBA)  begin d_aluop = ALUOP_SH2ADD;                                                        end else begin d_invalid_32bit = 1'b1; end
+	RV_SH3ADD:    if (EXTENSION_ZBA)  begin d_aluop = ALUOP_SH3ADD;                                                        end else begin d_invalid_32bit = 1'b1; end
 
-	RV_ANDN:      if (EXTENSION_ZBB) begin d_aluop = ALUOP_ANDN;                                                          end else begin d_invalid_32bit = 1'b1; end
-	RV_CLZ:       if (EXTENSION_ZBB) begin d_aluop = ALUOP_CLZ;    d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
-	RV_CPOP:      if (EXTENSION_ZBB) begin d_aluop = ALUOP_CPOP;   d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
-	RV_CTZ:       if (EXTENSION_ZBB) begin d_aluop = ALUOP_CTZ;    d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
-	RV_MAX:       if (EXTENSION_ZBB) begin d_aluop = ALUOP_MAX;                                                           end else begin d_invalid_32bit = 1'b1; end
-	RV_MAXU:      if (EXTENSION_ZBB) begin d_aluop = ALUOP_MAXU;                                                          end else begin d_invalid_32bit = 1'b1; end
-	RV_MIN:       if (EXTENSION_ZBB) begin d_aluop = ALUOP_MIN;                                                           end else begin d_invalid_32bit = 1'b1; end
-	RV_MINU:      if (EXTENSION_ZBB) begin d_aluop = ALUOP_MINU;                                                          end else begin d_invalid_32bit = 1'b1; end
-	RV_ORC_B:     if (EXTENSION_ZBB) begin d_aluop = ALUOP_ORC_B;  d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
-	RV_ORN:       if (EXTENSION_ZBB) begin d_aluop = ALUOP_ORN;                                                           end else begin d_invalid_32bit = 1'b1; end
-	RV_REV8:      if (EXTENSION_ZBB) begin d_aluop = ALUOP_REV8;   d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
-	RV_ROL:       if (EXTENSION_ZBB) begin d_aluop = ALUOP_ROL;                                                           end else begin d_invalid_32bit = 1'b1; end
-	RV_ROR:       if (EXTENSION_ZBB) begin d_aluop = ALUOP_ROR;                                                           end else begin d_invalid_32bit = 1'b1; end
-	RV_RORI:      if (EXTENSION_ZBB) begin d_aluop = ALUOP_ROR;    d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end
-	RV_SEXT_B:    if (EXTENSION_ZBB) begin d_aluop = ALUOP_SEXT_B; d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
-	RV_SEXT_H:    if (EXTENSION_ZBB) begin d_aluop = ALUOP_SEXT_H; d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
-	RV_XNOR:      if (EXTENSION_ZBB) begin d_aluop = ALUOP_XNOR;                                                          end else begin d_invalid_32bit = 1'b1; end
-	RV_ZEXT_H:    if (EXTENSION_ZBB) begin d_aluop = ALUOP_ZEXT_H; d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
+	RV_ANDN:      if (EXTENSION_ZBB)  begin d_aluop = ALUOP_ANDN;                                                          end else begin d_invalid_32bit = 1'b1; end
+	RV_CLZ:       if (EXTENSION_ZBB)  begin d_aluop = ALUOP_CLZ;    d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
+	RV_CPOP:      if (EXTENSION_ZBB)  begin d_aluop = ALUOP_CPOP;   d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
+	RV_CTZ:       if (EXTENSION_ZBB)  begin d_aluop = ALUOP_CTZ;    d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
+	RV_MAX:       if (EXTENSION_ZBB)  begin d_aluop = ALUOP_MAX;                                                           end else begin d_invalid_32bit = 1'b1; end
+	RV_MAXU:      if (EXTENSION_ZBB)  begin d_aluop = ALUOP_MAXU;                                                          end else begin d_invalid_32bit = 1'b1; end
+	RV_MIN:       if (EXTENSION_ZBB)  begin d_aluop = ALUOP_MIN;                                                           end else begin d_invalid_32bit = 1'b1; end
+	RV_MINU:      if (EXTENSION_ZBB)  begin d_aluop = ALUOP_MINU;                                                          end else begin d_invalid_32bit = 1'b1; end
+	RV_ORC_B:     if (EXTENSION_ZBB)  begin d_aluop = ALUOP_ORC_B;  d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
+	RV_ORN:       if (EXTENSION_ZBB)  begin d_aluop = ALUOP_ORN;                                                           end else begin d_invalid_32bit = 1'b1; end
+	RV_REV8:      if (EXTENSION_ZBB)  begin d_aluop = ALUOP_REV8;   d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
+	RV_ROL:       if (EXTENSION_ZBB)  begin d_aluop = ALUOP_ROL;                                                           end else begin d_invalid_32bit = 1'b1; end
+	RV_ROR:       if (EXTENSION_ZBB)  begin d_aluop = ALUOP_ROR;                                                           end else begin d_invalid_32bit = 1'b1; end
+	RV_RORI:      if (EXTENSION_ZBB)  begin d_aluop = ALUOP_ROR;    d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end
+	RV_SEXT_B:    if (EXTENSION_ZBB)  begin d_aluop = ALUOP_SEXT_B; d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
+	RV_SEXT_H:    if (EXTENSION_ZBB)  begin d_aluop = ALUOP_SEXT_H; d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
+	RV_XNOR:      if (EXTENSION_ZBB)  begin d_aluop = ALUOP_XNOR;                                                          end else begin d_invalid_32bit = 1'b1; end
+	RV_ZEXT_H:    if (EXTENSION_ZBB)  begin d_aluop = ALUOP_ZEXT_H; d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
 
-	RV_CLMUL:     if (EXTENSION_ZBC) begin d_aluop = ALUOP_CLMUL;                                                         end else begin d_invalid_32bit = 1'b1; end
-	RV_CLMULH:    if (EXTENSION_ZBC) begin d_aluop = ALUOP_CLMULH;                                                        end else begin d_invalid_32bit = 1'b1; end
-	RV_CLMULR:    if (EXTENSION_ZBC) begin d_aluop = ALUOP_CLMULR;                                                        end else begin d_invalid_32bit = 1'b1; end
+	RV_CLMUL:     if (EXTENSION_ZBC)  begin d_aluop = ALUOP_CLMUL;                                                         end else begin d_invalid_32bit = 1'b1; end
+	RV_CLMULH:    if (EXTENSION_ZBC)  begin d_aluop = ALUOP_CLMULH;                                                        end else begin d_invalid_32bit = 1'b1; end
+	RV_CLMULR:    if (EXTENSION_ZBC)  begin d_aluop = ALUOP_CLMULR;                                                        end else begin d_invalid_32bit = 1'b1; end
 
-	RV_BCLR:      if (EXTENSION_ZBS) begin d_aluop = ALUOP_BCLR;                                                          end else begin d_invalid_32bit = 1'b1; end
-	RV_BCLRI:     if (EXTENSION_ZBS) begin d_aluop = ALUOP_BCLR;   d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end
-	RV_BEXT:      if (EXTENSION_ZBS) begin d_aluop = ALUOP_BEXT;                                                          end else begin d_invalid_32bit = 1'b1; end
-	RV_BEXTI:     if (EXTENSION_ZBS) begin d_aluop = ALUOP_BEXT;   d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end
-	RV_BINV:      if (EXTENSION_ZBS) begin d_aluop = ALUOP_BINV;                                                          end else begin d_invalid_32bit = 1'b1; end
-	RV_BINVI:     if (EXTENSION_ZBS) begin d_aluop = ALUOP_BINV;   d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end
-	RV_BSET:      if (EXTENSION_ZBS) begin d_aluop = ALUOP_BSET;                                                          end else begin d_invalid_32bit = 1'b1; end
-	RV_BSETI:     if (EXTENSION_ZBS) begin d_aluop = ALUOP_BSET;   d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end
+	RV_BCLR:      if (EXTENSION_ZBS)  begin d_aluop = ALUOP_BCLR;                                                          end else begin d_invalid_32bit = 1'b1; end
+	RV_BCLRI:     if (EXTENSION_ZBS)  begin d_aluop = ALUOP_BCLR;   d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end
+	RV_BEXT:      if (EXTENSION_ZBS)  begin d_aluop = ALUOP_BEXT;                                                          end else begin d_invalid_32bit = 1'b1; end
+	RV_BEXTI:     if (EXTENSION_ZBS)  begin d_aluop = ALUOP_BEXT;   d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end
+	RV_BINV:      if (EXTENSION_ZBS)  begin d_aluop = ALUOP_BINV;                                                          end else begin d_invalid_32bit = 1'b1; end
+	RV_BINVI:     if (EXTENSION_ZBS)  begin d_aluop = ALUOP_BINV;   d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end
+	RV_BSET:      if (EXTENSION_ZBS)  begin d_aluop = ALUOP_BSET;                                                          end else begin d_invalid_32bit = 1'b1; end
+	RV_BSETI:     if (EXTENSION_ZBS)  begin d_aluop = ALUOP_BSET;   d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end
+
+	RV_PACK:      if (EXTENSION_ZBKB) begin d_aluop = ALUOP_PACK;                                                          end else begin d_invalid_32bit = 1'b1; end
+	RV_PACKH:     if (EXTENSION_ZBKB) begin d_aluop = ALUOP_PACKH;                                                         end else begin d_invalid_32bit = 1'b1; end
+	RV_REV_B:     if (EXTENSION_ZBKB) begin d_aluop = ALUOP_REV_B;  d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
+	RV_UNZIP:     if (EXTENSION_ZBKB) begin d_aluop = ALUOP_UNZIP;  d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
+	RV_ZIP:       if (EXTENSION_ZBKB) begin d_aluop = ALUOP_ZIP;    d_rs2 = X0;                                            end else begin d_invalid_32bit = 1'b1; end
 
 	RV_FENCE:     begin d_rs2 = X0; end  // NOP, note rs1/rd are zero in instruction
 	RV_FENCE_I:   if (EXTENSION_ZIFENCEI) begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_branchcond = BCOND_ALWAYS; end else begin d_invalid_32bit = 1'b1; end // note rs1/rs2/rd are zero in instruction
diff --git a/hdl/hazard3_ops.vh b/hdl/hazard3_ops.vh
index 8b34816..6c4093e 100644
--- a/hdl/hazard3_ops.vh
+++ b/hdl/hazard3_ops.vh
@@ -48,6 +48,12 @@ localparam ALUOP_BEXT    = 6'h38;
 localparam ALUOP_BINV    = 6'h39;
 localparam ALUOP_BSET    = 6'h3a;
 
+localparam ALUOP_PACK    = 6'h3b;
+localparam ALUOP_PACKH   = 6'h3c;
+localparam ALUOP_REV_B   = 6'h3d;
+localparam ALUOP_ZIP     = 6'h3e;
+localparam ALUOP_UNZIP   = 6'h3f;
+
 // Parameters to control ALU input muxes. Bypass mux paths are
 // controlled by X, so D has no parameters to choose these.
 
diff --git a/hdl/rv_opcodes.vh b/hdl/rv_opcodes.vh
index 7cd2523..88331d2 100644
--- a/hdl/rv_opcodes.vh
+++ b/hdl/rv_opcodes.vh
@@ -125,6 +125,19 @@ localparam RV_BINVI       = 32'b0110100??????????001?????0010011;
 localparam RV_BSET        = 32'b0010100??????????001?????0110011;
 localparam RV_BSETI       = 32'b0010100??????????001?????0010011;
 
+// Zbkb (basic bit manipulation for crypto) (minus those in Zbb)
+localparam RV_PACK        = 32'b0000100??????????100?????0110011;
+localparam RV_PACKH       = 32'b0000100??????????111?????0110011;
+localparam RV_REV_B       = 32'b011010000111?????101?????0010011;
+localparam RV_UNZIP       = 32'b000010011111?????101?????0010011;
+localparam RV_ZIP         = 32'b000010011110?????001?????0010011;
+
+// Zbkc is a subset of Zbc.
+
+// Zbkx (crossbar permutation)
+localparam RV_XPERM_B     = 32'b0010100??????????100?????0110011;
+localparam RV_XPERM_N     = 32'b0010100??????????010?????0110011;
+
 // C Extension
 localparam RV_C_ADDI4SPN  = 16'b000???????????00; // *** illegal if imm 0
 localparam RV_C_LW        = 16'b010???????????00;