From e966e832d28a5793d72c3d8e2076ffab7cd698c1 Mon Sep 17 00:00:00 2001 From: Luke Wren Date: Mon, 20 Mar 2023 00:00:51 +0000 Subject: [PATCH] First attempt at Zcmp --- hdl/hazard3_config.vh | 8 +- hdl/hazard3_config_inst.vh | 1 + hdl/hazard3_core.v | 24 +- hdl/hazard3_decode.v | 41 ++- hdl/hazard3_frontend.v | 49 +++- hdl/hazard3_instr_decompress.v | 241 +++++++++++++++++- hdl/rv_opcodes.vh | 8 + test/sim/common/src_only_app.mk | 2 +- test/sim/coremark/Makefile | 2 +- .../coremark/dist/barebones/core_portme.mak | 22 +- test/sim/hellow/Makefile | 4 +- test/sim/hellow/main.c | 4 + test/sim/sw_testcases/Makefile | 2 +- test/sim/sw_testcases/extension_xh3b.c | 2 +- test/sim/tb_cxxrtl/config_default.vh | 1 + test/sim/tb_cxxrtl/config_min.vh | 1 + 16 files changed, 368 insertions(+), 44 deletions(-) diff --git a/hdl/hazard3_config.vh b/hdl/hazard3_config.vh index f8b1078..d921574 100644 --- a/hdl/hazard3_config.vh +++ b/hdl/hazard3_config.vh @@ -58,11 +58,15 @@ parameter EXTENSION_ZBS = 0, // Requires: Zbb. (This flag enables instructions in Zbkb which aren't in Zbb.) parameter EXTENSION_ZBKB = 0, -// EXTENSION_ZCB: Support for ZCB basic additional compressed instructions -// Requires: C. (Some Zcb instructions also require Zbb or M.) +// EXTENSION_ZCB: Support for Zcb basic additional compressed instructions +// Requires: EXTENSION_C. (Some Zcb instructions also require Zbb or M.) // Note Zca is equivalent to C, as we do not support the F extension. parameter EXTENSION_ZCB = 0, +// EXTENSION_ZCMP: Support for Zcmp push/pop instructions. +// Requires: EXTENSION_C. +parameter EXTENSION_ZCMP = 0, + // EXTENSION_ZIFENCEI: Support for the fence.i instruction // Optional, since a plain branch/jump will also flush the prefetch queue. parameter EXTENSION_ZIFENCEI = 0, diff --git a/hdl/hazard3_config_inst.vh b/hdl/hazard3_config_inst.vh index e41a378..1098e5a 100644 --- a/hdl/hazard3_config_inst.vh +++ b/hdl/hazard3_config_inst.vh @@ -23,6 +23,7 @@ .EXTENSION_ZBS (EXTENSION_ZBS), .EXTENSION_ZBKB (EXTENSION_ZBKB), .EXTENSION_ZCB (EXTENSION_ZCB), +.EXTENSION_ZCMP (EXTENSION_ZCMP), .EXTENSION_ZIFENCEI (EXTENSION_ZIFENCEI), .EXTENSION_XH3BEXTM (EXTENSION_XH3BEXTM), .EXTENSION_XH3IRQ (EXTENSION_XH3IRQ), diff --git a/hdl/hazard3_core.v b/hdl/hazard3_core.v index 11421bf..a8dee07 100644 --- a/hdl/hazard3_core.v +++ b/hdl/hazard3_core.v @@ -1,5 +1,5 @@ /*****************************************************************************\ -| Copyright (C) 2021-2022 Luke Wren | +| Copyright (C) 2021-2023 Luke Wren | | SPDX-License-Identifier: Apache-2.0 | \*****************************************************************************/ @@ -112,6 +112,7 @@ wire [1:0] fd_cir_predbranch; wire [1:0] fd_cir_vld; wire [1:0] df_cir_use; wire df_cir_flush_behind; +wire [3:0] df_uop_step_next; wire x_btb_set; wire [W_ADDR-1:0] x_btb_set_src_addr; @@ -160,6 +161,7 @@ hazard3_frontend #( .cir_vld (fd_cir_vld), .cir_use (df_cir_use), .cir_flush_behind (df_cir_flush_behind), + .df_uop_step_next (df_uop_step_next), .pwrdown_ok (f_frontend_pwrdown_ok), .delay_first_fetch (!pwrup_ack), @@ -215,6 +217,8 @@ wire [W_EXCEPT-1:0] d_except; wire d_sleep_wfi; wire d_sleep_block; wire d_sleep_unblock; +wire d_no_pc_increment; +wire d_uninterruptible; wire d_fence_i; wire d_csr_ren; wire d_csr_wen; @@ -241,6 +245,7 @@ hazard3_decode #( .fd_cir_vld (fd_cir_vld), .df_cir_use (df_cir_use), .df_cir_flush_behind (df_cir_flush_behind), + .df_uop_step_next (df_uop_step_next), .d_pc (d_pc), .x_jump_not_except (x_jump_not_except), @@ -280,6 +285,8 @@ hazard3_decode #( .d_sleep_wfi (d_sleep_wfi), .d_sleep_block (d_sleep_block), .d_sleep_unblock (d_sleep_unblock), + .d_no_pc_increment (d_no_pc_increment), + .d_uninterruptible (d_uninterruptible), .d_fence_i (d_fence_i) ); @@ -322,6 +329,7 @@ reg [1:0] xm_addr_align; reg [W_MEMOP-1:0] xm_memop; reg [W_EXCEPT-1:0] xm_except; reg xm_except_to_d_mode; +reg xm_no_pc_increment; reg xm_sleep_wfi; reg xm_sleep_block; reg xm_delay_irq_entry_on_ls_stagex; @@ -999,7 +1007,7 @@ hazard3_csr #( .trap_enter_vld (m_trap_enter_vld), .trap_enter_rdy (m_trap_enter_rdy), .loadstore_dphase_pending (m_dphase_in_flight), - .delay_irq_entry (m_delay_irq_entry), + .delay_irq_entry (m_delay_irq_entry || d_uninterruptible), .mepc_in (m_exception_return_addr), .pwr_allow_clkgate (m_pwr_allow_clkgate), @@ -1044,11 +1052,18 @@ always @ (posedge clk or negedge rst_n) begin xm_sleep_wfi <= 1'b0; xm_sleep_block <= 1'b0; unblock_out <= 1'b0; - {xm_rs1, xm_rs2, xm_rd} <= {3 * W_REGADDR{1'b0}}; + xm_rs1 <= {W_REGADDR{1'b0}}; + xm_rs2 <= {W_REGADDR{1'b0}}; + xm_rd <= {W_REGADDR{1'b0}}; + xm_no_pc_increment <= 1'b0; end else begin unblock_out <= 1'b0; if (!m_stall) begin - {xm_rs1, xm_rs2, xm_rd} <= {d_rs1, d_rs2, d_rd}; + xm_rs1 <= d_rs1; + xm_rs2 <= d_rs2; + xm_rd <= d_rd; + // PC increment is suppressed non-final micro-ops, only needed for Zcmp: + xm_no_pc_increment <= d_no_pc_increment && ~|EXTENSION_ZCMP; // If some X-sourced exception has squashed the address phase, need to squash the data phase too. xm_memop <= x_except != EXCEPT_NONE ? MEMOP_NONE : d_memop; xm_except <= x_except; @@ -1177,6 +1192,7 @@ assign m_stall = m_bus_stall || // was *not* a taken branch, which is why we can just walk back the PC. assign m_exception_return_addr = d_pc - ( m_trap_is_irq ? 32'h0 : + xm_no_pc_increment ? 32'h0 : prev_instr_was_32_bit ? 32'h4 : 32'h2 ); diff --git a/hdl/hazard3_decode.v b/hdl/hazard3_decode.v index 0669efb..06405d8 100644 --- a/hdl/hazard3_decode.v +++ b/hdl/hazard3_decode.v @@ -1,5 +1,5 @@ /*****************************************************************************\ -| Copyright (C) 2021-2022 Luke Wren | +| Copyright (C) 2021-2023 Luke Wren | | SPDX-License-Identifier: Apache-2.0 | \*****************************************************************************/ @@ -19,6 +19,7 @@ module hazard3_decode #( input wire [1:0] fd_cir_vld, output wire [1:0] df_cir_use, output wire df_cir_flush_behind, + output wire [3:0] df_uop_step_next, output wire [W_ADDR-1:0] d_pc, input wire debug_mode, @@ -58,6 +59,8 @@ module hazard3_decode #( output reg d_sleep_wfi, output reg d_sleep_block, output reg d_sleep_unblock, + output wire d_no_pc_increment, + output wire d_uninterruptible, output reg d_fence_i ); @@ -75,15 +78,35 @@ wire d_invalid_16bit; reg d_invalid_32bit; wire d_invalid = d_invalid_16bit || d_invalid_32bit; +wire uop_nonfinal; +wire uop_uninterruptible; +wire uop_stall; +wire uop_clear; + hazard3_instr_decompress #( `include "hazard3_config_inst.vh" ) decomp ( - .instr_in (fd_cir), - .instr_is_32bit (d_instr_is_32bit), - .instr_out (d_instr), - .invalid (d_invalid_16bit) + .clk (clk), + .rst_n (rst_n), + + .instr_in (fd_cir), + .instr_is_32bit (d_instr_is_32bit), + .instr_out (d_instr), + .instr_out_uop_nonfinal (uop_nonfinal), + .instr_out_uop_uninterruptible (uop_uninterruptible), + .instr_out_uop_stall (uop_stall), + .instr_out_uop_clear (uop_clear), + + .df_uop_step_next (df_uop_step_next), + + .invalid (d_invalid_16bit) ); +assign d_uninterruptible = uop_uninterruptible && !d_invalid; +assign d_no_pc_increment = uop_nonfinal && !d_invalid; +assign uop_stall = x_stall || d_starved; +assign uop_clear = f_jump_now; + // Decode various immmediate formats wire [31:0] d_imm_i = {{21{d_instr[31]}}, d_instr[30:20]}; wire [31:0] d_imm_s = {{21{d_instr[31]}}, d_instr[30:25], d_instr[11:7]}; @@ -102,7 +125,7 @@ wire d_except_instr_bus_fault = fd_cir_vld > 2'd0 && fd_cir_err[0] || fd_cir_vld > 2'd1 && d_instr_is_32bit && fd_cir_err[1]; assign d_starved = ~|fd_cir_vld || fd_cir_vld[0] && d_instr_is_32bit; -wire d_stall = x_stall || d_starved; +wire d_stall = x_stall || d_starved || uop_nonfinal; assign df_cir_use = d_starved || d_stall ? 2'h0 : @@ -133,7 +156,11 @@ always @ (posedge clk or negedge rst_n) begin end reg [W_ADDR-1:0] pc; -wire [W_ADDR-1:0] pc_seq_next = pc + (d_instr_is_32bit ? 32'h4 : 32'h2); +wire [W_ADDR-1:0] pc_seq_next = pc + ( + |EXTENSION_ZCMP && uop_nonfinal ? 32'h0 : + d_instr_is_32bit ? 32'h4 : 32'h2 +); + assign d_pc = pc; assign debug_dpc_rdata = pc; diff --git a/hdl/hazard3_frontend.v b/hdl/hazard3_frontend.v index 27b4a75..527ada0 100644 --- a/hdl/hazard3_frontend.v +++ b/hdl/hazard3_frontend.v @@ -62,6 +62,8 @@ module hazard3_frontend #( // stalled instruction may assert a jump request, because e.g. the stall // is dependent on a bus stall signal so can't gate the request. input wire cir_flush_behind, + // Required for regnum predecode when Zcmp is enabled: + input wire [3:0] df_uop_step_next, // Signal to power controller that power down is safe. (When going to // sleep, first the pipeline is stalled, and then the power controller @@ -545,21 +547,46 @@ assign cir_predbranch = cir_predbranch_reg[1:0]; wire [31:0] next_instr = instr_data_plus_fetch[31:0]; wire next_instr_is_32bit = next_instr[1:0] == 2'b11 || ~|EXTENSION_C; + +wire [3:0] uop_ctr = df_uop_step_next & {4{|EXTENSION_ZCMP}}; + +wire [4:0] zcmp_pushpop_rs2 = + uop_ctr == 4'h0 ? 5'd01 : // ra + uop_ctr == 4'h1 ? 5'd08 : // s0 + uop_ctr == 4'h2 ? 5'd09 : // s1 + 5'd15 + {1'b0, uop_ctr} ; // s2-s11 + +wire [4:0] zcmp_pushpop_rs1 = + uop_ctr < 4'hd ? 5'd02 : // sp (addr base reg) + uop_ctr == 4'hd ? 5'd00 : // zero (clear a0) + uop_ctr == 4'he ? 5'd02 : // sp (stack adj) + 5'd01 ; // ra (ret) + +wire [4:0] zcmp_sa01_r1s = {|next_instr[9:8], ~&next_instr[9:8], next_instr[9:7]}; +wire [4:0] zcmp_sa01_r2s = {|next_instr[2:1], ~&next_instr[2:1], next_instr[2:0]}; + +wire [4:0] zcmp_mvsa01_rs1 = {4'h4, uop_ctr[0]}; +wire [4:0] zcmp_mva01s_rs1 = uop_ctr[0] ? zcmp_sa01_r2s : zcmp_sa01_r1s; + always @ (*) begin - casez ({next_instr_is_32bit, next_instr[1:0], next_instr[15:13]}) - {1'b1, 2'bzz, 3'bzzz}: predecode_rs1_coarse = next_instr[19:15]; // 32-bit R, S, B formats - {1'b0, 2'b00, 3'b00z}: predecode_rs1_coarse = 5'd2; // c.addi4spn + don't care - {1'b0, 2'b01, 3'b0zz}: predecode_rs1_coarse = next_instr[11:7]; // c.addi, c.addi16sp + don't care (jal, li) - {1'b0, 2'b10, 3'bz1z}: predecode_rs1_coarse = 5'd2; // c.lwsp, c.lwsp + don't care - {1'b0, 2'b10, 3'bz0z}: predecode_rs1_coarse = next_instr[11:7]; - default: predecode_rs1_coarse = {2'b01, next_instr[9:7]}; + casez ({next_instr_is_32bit, |EXTENSION_ZCMP, next_instr[15:0]}) + {1'b1, 1'bz, 16'bzzzzzzzzzzzzzzzz}: predecode_rs1_coarse = next_instr[19:15]; // 32-bit R, S, B formats + {1'b0, 1'bz, 16'b00zzzzzzzzzzzz00}: predecode_rs1_coarse = 5'd2; // c.addi4spn + don't care + {1'b0, 1'bz, 16'b0zzzzzzzzzzzzz01}: predecode_rs1_coarse = next_instr[11:7]; // c.addi, c.addi16sp + don't care (jal, li) + {1'b0, 1'bz, 16'b100zzzzzzzzzzz10}: predecode_rs1_coarse = next_instr[11:7]; // c.add + {1'b0, 1'bz, 16'bz10zzzzzzzzzzz10}: predecode_rs1_coarse = 5'd2; // c.lwsp, c.swsp + {1'b0, 1'b1, 16'b1z11zzzzzzzzzz10}: predecode_rs1_coarse = zcmp_pushpop_rs1; // cm.push, cm.pop* + {1'b0, 1'b1, 16'b1z10zzzzz0zzzz10}: predecode_rs1_coarse = zcmp_mvsa01_rs1; // cm.mvsa01 + {1'b0, 1'b1, 16'b1z10zzzzz1zzzz10}: predecode_rs1_coarse = zcmp_mva01s_rs1; // cm.mva01s + default: predecode_rs1_coarse = {2'b01, next_instr[9:7]}; endcase - casez ({next_instr_is_32bit, next_instr[1:0]}) - {1'b1, 2'bzz}: predecode_rs2_coarse = next_instr[24:20]; - {1'b0, 2'b10}: predecode_rs2_coarse = next_instr[6:2]; - default: predecode_rs2_coarse = {2'b01, next_instr[4:2]}; + casez ({next_instr_is_32bit, next_instr[1:0], next_instr[13]}) + {1'b1, 2'bzz, 1'bz}: predecode_rs2_coarse = next_instr[24:20]; + {1'b0, 2'b10, 1'b0}: predecode_rs2_coarse = next_instr[6:2]; // c.add, c.swsp + {1'b0, 2'b10, 1'b1}: predecode_rs2_coarse = zcmp_pushpop_rs2; // cm.push + default: predecode_rs2_coarse = {2'b01, next_instr[4:2]}; endcase // The "fine" predecode targets those instructions which either: diff --git a/hdl/hazard3_instr_decompress.v b/hdl/hazard3_instr_decompress.v index ae3d27c..41649a0 100644 --- a/hdl/hazard3_instr_decompress.v +++ b/hdl/hazard3_instr_decompress.v @@ -1,17 +1,38 @@ /*****************************************************************************\ -| Copyright (C) 2021-2022 Luke Wren | +| Copyright (C) 2021-2023 Luke Wren | | SPDX-License-Identifier: Apache-2.0 | \*****************************************************************************/ +// Little instructions go in, big instructions come out + `default_nettype none module hazard3_instr_decompress #( `include "hazard3_config.vh" ) ( - input wire [31:0] instr_in, - output reg instr_is_32bit, - output reg [31:0] instr_out, - output reg invalid + input wire clk, + input wire rst_n, + + input wire [31:0] instr_in, + output reg instr_is_32bit, + + output reg [31:0] instr_out, + // Indicate instr_out is a uop, and more uops follow in this sequence. + // Should suppress PC update, and null the PC offset in the mepc address + // in stage 3. + output wire instr_out_uop_nonfinal, + // Indicate instr_out is a uop from the noninterruptible part of a uop + // sequence. If one uop is noninterruptible, all following uops until the + // end of the sequence are also noninterruptible. + output wire instr_out_uop_atomic, + // Current ucode sequence is stalled on downstream execution + input wire instr_out_uop_stall, + input wire instr_out_uop_clear, + + // To regnum decoder in frontend + output wire [3:0] df_uop_step_next, + + output reg invalid ); `include "rv_opcodes.vh" @@ -79,6 +100,69 @@ function [31:0] rfmt_rd; input [4:0] rd; begin rfmt_rd = {20'h00000, rd, 7'h0 function [31:0] rfmt_rs1; input [4:0] rs1; begin rfmt_rs1 = {12'h000, rs1, 15'h0000}; end endfunction function [31:0] rfmt_rs2; input [4:0] rs2; begin rfmt_rs2 = {7'h00, rs2, 20'h00000}; end endfunction +// ---------------------------------------------------------------------------- +// Push/pop and friends + +// The longest uop sequence is a maximal cm.popretz: +// +// - 13x lw (counter = 0..12) +// - 1x addi to set a0 to zero (counter = 13 ) < atomic section +// - 1x addi to adjust sp (counter = 14 ) < atomic section +// - 1x jalr to jump through ra (counter = 15 ) < atomic section + +reg [3:0] uop_ctr; +reg [3:0] uop_ctr_nxt; +reg in_uop_seq; +reg uop_seq_end; +reg uop_atomic; + +assign instr_out_uop_nonfinal = in_uop_seq && !uop_seq_end; +assign instr_out_uop_atomic = uop_atomic; +assign df_uop_step_next = uop_ctr_nxt; + +// The offset from current sp value to the lowest-addressed saved register, +64. +wire [3:0] zcmp_rlist = instr_in[7:4]; +wire [3:0] zcmp_n_regs = zcmp_rlist == 4'hf ? 4'hd : zcmp_rlist - 4'h3; + +wire [6:0] zcmp_stack_adj_base = + zcmp_rlist[3] == 1'b0 ? 7'h10 : + zcmp_rlist[3:2] == 2'h2 ? 7'h20 : + zcmp_rlist[3:0] == 4'hf ? 7'h40 : 7'h30; + +wire [11:0] zcmp_stack_lw_offset = {6'h00, uop_ctr, 2'h0}; +wire [11:0] zcmp_stack_sw_offset = zcmp_stack_lw_offset - {5'h00, zcmp_stack_adj_base}; + +wire [4:0] zcmp_ls_reg = + uop_ctr == 4'h0 ? 5'd01 : // ra + uop_ctr == 4'h1 ? 5'd08 : // s0 + uop_ctr == 4'h2 ? 5'd09 : // s1 + 5'd15 + {1'b0, uop_ctr}; // s2-s11 (s2 == x18) + +wire [31:0] zcmp_push_sw_instr = `RVOPC_NOZ_SW | rfmt_rs1(5'd2) | rfmt_rs2(zcmp_ls_reg) | { + zcmp_stack_sw_offset[11:5], 13'h0000, zcmp_stack_sw_offset[4:0], 7'h00 +}; + +wire [31:0] zcmp_pop_lw_instr = `RVOPC_NOZ_LW | rfmt_rd(zcmp_ls_reg) | rfmt_rs1(5'd2)| { + zcmp_stack_lw_offset[11:0], 20'h00000 +}; + +wire [11:0] zcmp_abs_stack_adj = {5'h00, zcmp_stack_adj_base} + {6'h00, instr_in[3:2], 4'h0}; + +wire [31:0] zcmp_push_stack_adj_instr = `RVOPC_NOZ_ADDI | rfmt_rd(5'd2) | rfmt_rs1(5'd2) | { + -zcmp_abs_stack_adj, + 20'h00000 +}; + +wire [31:0] zcmp_pop_stack_adj_instr = `RVOPC_NOZ_ADDI | rfmt_rd(5'd2) | rfmt_rs1(5'd2) | { + zcmp_abs_stack_adj, + 20'h00000 +}; + +wire zcmp_sa01_r1s = {|instr_in[9:8], ~&instr_in[9:8], instr_in[9:7]}; +wire zcmp_sa01_r2s = {|instr_in[2:1], ~&instr_in[2:1], instr_in[2:0]}; + +// ---------------------------------------------------------------------------- + generate if (PASSTHROUGH) begin: instr_passthrough always @ (*) begin @@ -92,10 +176,18 @@ end else begin: instr_decompress instr_is_32bit = 1'b1; instr_out = instr_in; invalid = 1'b0; + uop_seq_end = 1'b0; + in_uop_seq = 1'b0; + uop_atomic = 1'b0; + uop_ctr_nxt = uop_ctr; end else begin instr_is_32bit = 1'b0; instr_out = 32'h0; invalid = 1'b0; + uop_seq_end = 1'b0; + in_uop_seq = 1'b0; + uop_atomic = 1'b0; + uop_ctr_nxt = uop_ctr; casez (instr_in[15:0]) 16'h0: invalid = 1'b1; `RVOPC_C_ADDI4SPN: instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(rd_s) | rfmt_rs1(5'h2) @@ -199,13 +291,152 @@ end else begin: instr_decompress invalid = ~|EXTENSION_ZCB || ~|EXTENSION_M; end + // Optional Zcmp instructions: + `RVOPC_CM_PUSH: if (~|EXTENSION_ZCMP || zcmp_rlist < 4'h4) begin + invalid = 1'b1; + end else if (uop_ctr == 4'he) begin + in_uop_seq = 1'b1; + uop_seq_end = 1'b1; + uop_ctr_nxt = 4'h0; + instr_out = zcmp_push_stack_adj_instr; + end else begin + in_uop_seq = 1'b1; + uop_ctr_nxt = uop_ctr + 4'h1; + instr_out = zcmp_push_sw_instr; + if (uop_ctr_nxt == zcmp_n_regs) begin + uop_ctr_nxt = 4'he; + end + end + + `RVOPC_CM_POP: if (~|EXTENSION_ZCMP || zcmp_rlist < 4'h4) begin + invalid = 1'b1; + end else if (uop_ctr == 4'he) begin + in_uop_seq = 1'b1; + uop_seq_end = 1'b1; + uop_ctr_nxt = 4'h0; + uop_atomic = 1'b1; + instr_out = zcmp_pop_stack_adj_instr; + end else begin + in_uop_seq = 1'b1; + uop_ctr_nxt = uop_ctr + 4'h1; + instr_out = zcmp_pop_lw_instr; + if (uop_ctr_nxt == zcmp_n_regs) begin + uop_ctr_nxt = 4'he; + end + end + + `RVOPC_CM_POPRET: if (~|EXTENSION_ZCMP || zcmp_rlist < 4'h4) begin + invalid = 1'b1; + end else if (uop_ctr == 4'he) begin + // Note we don't set the uop_atomic flag on the first uop in + // the uninterruptible sequence -- the rule is *if* one + // executes, they all execute. Having none execute is fine. + in_uop_seq = 1'b1; + uop_ctr_nxt = uop_ctr + 4'h1; + instr_out = zcmp_pop_stack_adj_instr; + end else if (uop_ctr == 4'hf) begin + in_uop_seq = 1'b1; + uop_seq_end = 1'b1; + uop_atomic = 1'b1; + uop_ctr_nxt = 4'h0; + instr_out = `RVOPC_NOZ_JALR | rfmt_rs1(5'h1); + end else begin + in_uop_seq = 1'b1; + uop_ctr_nxt = uop_ctr + 4'h1; + instr_out = zcmp_pop_lw_instr; + if (uop_ctr_nxt == zcmp_n_regs) begin + uop_ctr_nxt = 4'he; + end + end + + `RVOPC_CM_POPRETZ: if (~|EXTENSION_ZCMP || zcmp_rlist < 4'h4) begin + invalid = 1'b1; + end else if (uop_ctr == 4'hd) begin + in_uop_seq = 1'b1; + uop_ctr_nxt = uop_ctr + 4'h1; + instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(5'd8); // li a0, 0 + end else if (uop_ctr == 4'he) begin + in_uop_seq = 1'b1; + uop_atomic = 1'b1; + uop_ctr_nxt = uop_ctr + 4'h1; + instr_out = zcmp_pop_stack_adj_instr; + end else if (uop_ctr == 4'hf) begin + in_uop_seq = 1'b1; + uop_seq_end = 1'b1; + uop_atomic = 1'b1; + uop_ctr_nxt = 4'h0; + instr_out = `RVOPC_NOZ_JALR | rfmt_rs1(5'h1); + end else begin + in_uop_seq = 1'b1; + uop_ctr_nxt = uop_ctr + 4'h1; + instr_out = zcmp_pop_lw_instr; + if (uop_ctr_nxt == zcmp_n_regs) begin + uop_ctr_nxt = 4'hd; + end + end + + `RVOPC_CM_MVSA01: if (~|EXTENSION_ZCMP) begin + invalid = 1'b1; + end else if (uop_ctr == 4'h0) begin + in_uop_seq = 1'b1; + uop_ctr_nxt = uop_ctr + 4'h1; + instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(zcmp_sa01_r1s) | rfmt_rs1(5'd10); + end else begin + in_uop_seq = 1'b1; + uop_seq_end = 1'b1; + uop_atomic = 1'b1; + uop_ctr_nxt = 4'h0; + instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(zcmp_sa01_r2s) | rfmt_rs1(5'd11); + end + + `RVOPC_CM_MVA01S: if (~|EXTENSION_ZCMP) begin + invalid = 1'b1; + end else if (uop_ctr == 4'h0) begin + in_uop_seq = 1'b1; + uop_ctr_nxt = uop_ctr + 4'h1; + instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(5'd10) | rfmt_rs1(zcmp_sa01_r1s); + end else begin + in_uop_seq = 1'b1; + uop_seq_end = 1'b1; + uop_atomic = 1'b1; + uop_ctr_nxt = 4'h0; + instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(5'd11) | rfmt_rs1(zcmp_sa01_r2s); + end + default: invalid = 1'b1; endcase + + if (instr_out_uop_clear) begin + uop_ctr_nxt = 4'h0; + end else if (instr_out_uop_stall) begin + uop_ctr_nxt = uop_ctr; + end end end end endgenerate +generate +if (EXTENSION_ZCMP) begin: have_uop_ctr; + always @ (posedge clk or negedge rst_n) begin + if (!rst_n) begin + uop_ctr <= 4'h0; + end else begin + uop_ctr <= uop_ctr_nxt; +`ifdef HAZARD3_ASSERTIONS + assert(uop_ctr == 4'h0 || in_uop_seq); + if (uop_seq_end) begin + assert(in_uop_seq)); + assert(instr_out_uop_stall || uop_ctr_nxt == 4'h0); + end +`endif + end + end +end else begin: no_uop_ctr + always @ (*) uop_ctr = 4'h0; +end +endgenerate + endmodule `ifndef YOSYS diff --git a/hdl/rv_opcodes.vh b/hdl/rv_opcodes.vh index 0df9356..8b465fc 100644 --- a/hdl/rv_opcodes.vh +++ b/hdl/rv_opcodes.vh @@ -194,6 +194,14 @@ localparam RV_RD_BITS = 5; `define RVOPC_C_NOT 16'b100111???1110101 `define RVOPC_C_MUL 16'b100111???10???01 +// Zcmp push/pop instructions +`define RVOPC_CM_PUSH 16'b10111000??????10 +`define RVOPC_CM_POP 16'b10111010??????10 +`define RVOPC_CM_POPRETZ 16'b10111100??????10 +`define RVOPC_CM_POPRET 16'b10111110??????10 +`define RVOPC_CM_MVSA01 16'b101011???01???10 +`define RVOPC_CM_MVA01S 16'b101011???11???10 + // Copies provided here with 0 instead of ? so that these can be used to build 32-bit instructions in the decompressor `define RVOPC_NOZ_BEQ 32'b00000000000000000000000001100011 diff --git a/test/sim/common/src_only_app.mk b/test/sim/common/src_only_app.mk index a40f201..f2e5127 100644 --- a/test/sim/common/src_only_app.mk +++ b/test/sim/common/src_only_app.mk @@ -8,7 +8,7 @@ endif CCFLAGS ?= LDSCRIPT ?= ../common/memmap.ld -CROSS_PREFIX ?= riscv32-unknown-elf- +CROSS_PREFIX ?= /opt/riscv/gcc-riscv32-corev/bin/riscv32-corev-elf- TBDIR ?= ../tb_cxxrtl INCDIR ?= ../common MAX_CYCLES ?= 100000 diff --git a/test/sim/coremark/Makefile b/test/sim/coremark/Makefile index 840fa8c..1c5b230 100644 --- a/test/sim/coremark/Makefile +++ b/test/sim/coremark/Makefile @@ -1,7 +1,7 @@ APP := coremark MAX_CYCLES := 100000000 -CROSS_PREFIX ?= riscv32-unknown-elf- +CROSS_PREFIX ?= /opt/riscv/gcc-riscv32-corev/bin/riscv32-corev-elf- TBDIR ?= ../tb_cxxrtl diff --git a/test/sim/coremark/dist/barebones/core_portme.mak b/test/sim/coremark/dist/barebones/core_portme.mak index 2ae0577..103c82b 100755 --- a/test/sim/coremark/dist/barebones/core_portme.mak +++ b/test/sim/coremark/dist/barebones/core_portme.mak @@ -19,23 +19,27 @@ # Use this flag to define how to to get an executable (e.g -o) OUTFLAG= -o -CC = /opt/riscv/unstable/bin/riscv32-unknown-elf-gcc -LD = /opt/riscv/unstable/bin/riscv32-unknown-elf-gcc -AS = /opt/riscv/unstable/bin/riscv32-unknown-elf-gcc -# Flag : CFLAGS -# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags" -PORT_CFLAGS = -O3 -g -march=rv32im_zicsr_zba_zbb_zbc_zbs -fno-common -funroll-loops -finline-functions --param max-inline-insns-auto=20 -falign-functions=4 -falign-jumps=4 -falign-loops=4 +MARCH = rv32im_zicsr_zba_zbb_zbc_zbs_zca_zcb_zcmp +CROSS_PREFIX = /opt/riscv/gcc-riscv32-corev/bin/riscv32-corev-elf- + +CC = $(CROSS_PREFIX)gcc +LD = $(CROSS_PREFIX)gcc +AS = $(CROSS_PREFIX)gcc + +PORT_CFLAGS = -O3 -g -march=$(MARCH) -fno-common -funroll-loops -finline-functions --param max-inline-insns-auto=20 -falign-functions=4 -falign-jumps=4 -falign-loops=4 FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)" CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\" + #Flag : LFLAGS_END # Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts). -# Note : On certain platforms, the default clock_gettime implementation is supported but requires linking of librt. + SEPARATE_COMPILE=1 # Flag : SEPARATE_COMPILE # You must also define below how to create an object file, and how to link. + OBJOUT = -o -LFLAGS = -T ../../common/memmap.ld -ASFLAGS = -c -march=rv32im_zicsr +LFLAGS = -T ../../common/memmap.ld -Wl,--noinhibit-exec +ASFLAGS = -c -march=$(MARCH) OFLAG = -o COUT = -c diff --git a/test/sim/hellow/Makefile b/test/sim/hellow/Makefile index 77daff4..a16d1c5 100644 --- a/test/sim/hellow/Makefile +++ b/test/sim/hellow/Makefile @@ -1,6 +1,6 @@ SRCS := ../common/init.S main.c APP := hellow -CCFLAGS = -march=rv32i_zicsr_zba_zbb_zbc_zbs -Os -AFLAGS = -march=rv32i_zicsr_zba_zbb_zbc_zbs +CCFLAGS = -march=rv32ima_zicsr_zba_zbb_zbc_zbs_zca_zcb_zcmp -Os +AFLAGS = -march=rv32ima_zicsr_zba_zbb_zbc_zbs_zca_zcb_zcmp include ../common/src_only_app.mk diff --git a/test/sim/hellow/main.c b/test/sim/hellow/main.c index 0546ef5..f3471b4 100644 --- a/test/sim/hellow/main.c +++ b/test/sim/hellow/main.c @@ -2,5 +2,9 @@ int main() { tb_puts("Hello world from Hazard3 + CXXRTL!\n"); + asm volatile( + "cm.push {ra, s0-s2}, -16\n" + "cm.pop {ra, s0-s2}, +16\n" + ); return 123; } diff --git a/test/sim/sw_testcases/Makefile b/test/sim/sw_testcases/Makefile index e9ef8ee..7844787 100644 --- a/test/sim/sw_testcases/Makefile +++ b/test/sim/sw_testcases/Makefile @@ -1,6 +1,6 @@ APP := hellow SRCS = ../common/init.S $(APP).c $(EXTRA_SRCS_$(APP)) -CCFLAGS := -march=rv32imac_zicsr -Os +CCFLAGS := -march=rv32imac_zicsr_zba_zbb_zbs_zcb -Os MAX_CYCLES := 1000000 INCDIR := include ../common diff --git a/test/sim/sw_testcases/extension_xh3b.c b/test/sim/sw_testcases/extension_xh3b.c index 11d4628..9173282 100644 --- a/test/sim/sw_testcases/extension_xh3b.c +++ b/test/sim/sw_testcases/extension_xh3b.c @@ -8,7 +8,7 @@ // - h3_bextmi: immediate version of the above (as bexti is to bext) // The instruction is just supposed to take a single static size... -__attribute__((noinline)) uint32_t bextm_dynamic_width(uint nbits, uint32_t rs1, uint32_t rs2) { +__attribute__((noinline)) uint32_t bextm_dynamic_width(unsigned int nbits, uint32_t rs1, uint32_t rs2) { switch (nbits) { case 1: return __hazard3_bextm(1, rs1, rs2); diff --git a/test/sim/tb_cxxrtl/config_default.vh b/test/sim/tb_cxxrtl/config_default.vh index 324e60b..e17164f 100644 --- a/test/sim/tb_cxxrtl/config_default.vh +++ b/test/sim/tb_cxxrtl/config_default.vh @@ -11,6 +11,7 @@ localparam EXTENSION_ZBC = 1; localparam EXTENSION_ZBS = 1; localparam EXTENSION_ZBKB = 1; localparam EXTENSION_ZCB = 1; +localparam EXTENSION_ZCMP = 1; localparam EXTENSION_ZIFENCEI = 1; localparam EXTENSION_XH3BEXTM = 1; localparam EXTENSION_XH3IRQ = 1; diff --git a/test/sim/tb_cxxrtl/config_min.vh b/test/sim/tb_cxxrtl/config_min.vh index c79fbae..2ff4cf0 100644 --- a/test/sim/tb_cxxrtl/config_min.vh +++ b/test/sim/tb_cxxrtl/config_min.vh @@ -11,6 +11,7 @@ localparam EXTENSION_ZBC = 0; localparam EXTENSION_ZBS = 0; localparam EXTENSION_ZBKB = 0; localparam EXTENSION_ZCB = 0; +localparam EXTENSION_ZCMP = 0; localparam EXTENSION_ZIFENCEI = 0; localparam EXTENSION_XH3BEXTM = 0; localparam EXTENSION_XH3IRQ = 0;