First attempt at Zcmp

This commit is contained in:
Luke Wren 2023-03-20 00:00:51 +00:00
parent 99c0660c3e
commit e966e832d2
16 changed files with 368 additions and 44 deletions

View File

@ -58,11 +58,15 @@ parameter EXTENSION_ZBS = 0,
// Requires: Zbb. (This flag enables instructions in Zbkb which aren't in Zbb.)
parameter EXTENSION_ZBKB = 0,
// EXTENSION_ZCB: Support for ZCB basic additional compressed instructions
// Requires: C. (Some Zcb instructions also require Zbb or M.)
// EXTENSION_ZCB: Support for Zcb basic additional compressed instructions
// Requires: EXTENSION_C. (Some Zcb instructions also require Zbb or M.)
// Note Zca is equivalent to C, as we do not support the F extension.
parameter EXTENSION_ZCB = 0,
// EXTENSION_ZCMP: Support for Zcmp push/pop instructions.
// Requires: EXTENSION_C.
parameter EXTENSION_ZCMP = 0,
// EXTENSION_ZIFENCEI: Support for the fence.i instruction
// Optional, since a plain branch/jump will also flush the prefetch queue.
parameter EXTENSION_ZIFENCEI = 0,

View File

@ -23,6 +23,7 @@
.EXTENSION_ZBS (EXTENSION_ZBS),
.EXTENSION_ZBKB (EXTENSION_ZBKB),
.EXTENSION_ZCB (EXTENSION_ZCB),
.EXTENSION_ZCMP (EXTENSION_ZCMP),
.EXTENSION_ZIFENCEI (EXTENSION_ZIFENCEI),
.EXTENSION_XH3BEXTM (EXTENSION_XH3BEXTM),
.EXTENSION_XH3IRQ (EXTENSION_XH3IRQ),

View File

@ -1,5 +1,5 @@
/*****************************************************************************\
| Copyright (C) 2021-2022 Luke Wren |
| Copyright (C) 2021-2023 Luke Wren |
| SPDX-License-Identifier: Apache-2.0 |
\*****************************************************************************/
@ -112,6 +112,7 @@ wire [1:0] fd_cir_predbranch;
wire [1:0] fd_cir_vld;
wire [1:0] df_cir_use;
wire df_cir_flush_behind;
wire [3:0] df_uop_step_next;
wire x_btb_set;
wire [W_ADDR-1:0] x_btb_set_src_addr;
@ -160,6 +161,7 @@ hazard3_frontend #(
.cir_vld (fd_cir_vld),
.cir_use (df_cir_use),
.cir_flush_behind (df_cir_flush_behind),
.df_uop_step_next (df_uop_step_next),
.pwrdown_ok (f_frontend_pwrdown_ok),
.delay_first_fetch (!pwrup_ack),
@ -215,6 +217,8 @@ wire [W_EXCEPT-1:0] d_except;
wire d_sleep_wfi;
wire d_sleep_block;
wire d_sleep_unblock;
wire d_no_pc_increment;
wire d_uninterruptible;
wire d_fence_i;
wire d_csr_ren;
wire d_csr_wen;
@ -241,6 +245,7 @@ hazard3_decode #(
.fd_cir_vld (fd_cir_vld),
.df_cir_use (df_cir_use),
.df_cir_flush_behind (df_cir_flush_behind),
.df_uop_step_next (df_uop_step_next),
.d_pc (d_pc),
.x_jump_not_except (x_jump_not_except),
@ -280,6 +285,8 @@ hazard3_decode #(
.d_sleep_wfi (d_sleep_wfi),
.d_sleep_block (d_sleep_block),
.d_sleep_unblock (d_sleep_unblock),
.d_no_pc_increment (d_no_pc_increment),
.d_uninterruptible (d_uninterruptible),
.d_fence_i (d_fence_i)
);
@ -322,6 +329,7 @@ reg [1:0] xm_addr_align;
reg [W_MEMOP-1:0] xm_memop;
reg [W_EXCEPT-1:0] xm_except;
reg xm_except_to_d_mode;
reg xm_no_pc_increment;
reg xm_sleep_wfi;
reg xm_sleep_block;
reg xm_delay_irq_entry_on_ls_stagex;
@ -999,7 +1007,7 @@ hazard3_csr #(
.trap_enter_vld (m_trap_enter_vld),
.trap_enter_rdy (m_trap_enter_rdy),
.loadstore_dphase_pending (m_dphase_in_flight),
.delay_irq_entry (m_delay_irq_entry),
.delay_irq_entry (m_delay_irq_entry || d_uninterruptible),
.mepc_in (m_exception_return_addr),
.pwr_allow_clkgate (m_pwr_allow_clkgate),
@ -1044,11 +1052,18 @@ always @ (posedge clk or negedge rst_n) begin
xm_sleep_wfi <= 1'b0;
xm_sleep_block <= 1'b0;
unblock_out <= 1'b0;
{xm_rs1, xm_rs2, xm_rd} <= {3 * W_REGADDR{1'b0}};
xm_rs1 <= {W_REGADDR{1'b0}};
xm_rs2 <= {W_REGADDR{1'b0}};
xm_rd <= {W_REGADDR{1'b0}};
xm_no_pc_increment <= 1'b0;
end else begin
unblock_out <= 1'b0;
if (!m_stall) begin
{xm_rs1, xm_rs2, xm_rd} <= {d_rs1, d_rs2, d_rd};
xm_rs1 <= d_rs1;
xm_rs2 <= d_rs2;
xm_rd <= d_rd;
// PC increment is suppressed non-final micro-ops, only needed for Zcmp:
xm_no_pc_increment <= d_no_pc_increment && ~|EXTENSION_ZCMP;
// If some X-sourced exception has squashed the address phase, need to squash the data phase too.
xm_memop <= x_except != EXCEPT_NONE ? MEMOP_NONE : d_memop;
xm_except <= x_except;
@ -1177,6 +1192,7 @@ assign m_stall = m_bus_stall ||
// was *not* a taken branch, which is why we can just walk back the PC.
assign m_exception_return_addr = d_pc - (
m_trap_is_irq ? 32'h0 :
xm_no_pc_increment ? 32'h0 :
prev_instr_was_32_bit ? 32'h4 : 32'h2
);

View File

@ -1,5 +1,5 @@
/*****************************************************************************\
| Copyright (C) 2021-2022 Luke Wren |
| Copyright (C) 2021-2023 Luke Wren |
| SPDX-License-Identifier: Apache-2.0 |
\*****************************************************************************/
@ -19,6 +19,7 @@ module hazard3_decode #(
input wire [1:0] fd_cir_vld,
output wire [1:0] df_cir_use,
output wire df_cir_flush_behind,
output wire [3:0] df_uop_step_next,
output wire [W_ADDR-1:0] d_pc,
input wire debug_mode,
@ -58,6 +59,8 @@ module hazard3_decode #(
output reg d_sleep_wfi,
output reg d_sleep_block,
output reg d_sleep_unblock,
output wire d_no_pc_increment,
output wire d_uninterruptible,
output reg d_fence_i
);
@ -75,15 +78,35 @@ wire d_invalid_16bit;
reg d_invalid_32bit;
wire d_invalid = d_invalid_16bit || d_invalid_32bit;
wire uop_nonfinal;
wire uop_uninterruptible;
wire uop_stall;
wire uop_clear;
hazard3_instr_decompress #(
`include "hazard3_config_inst.vh"
) decomp (
.instr_in (fd_cir),
.instr_is_32bit (d_instr_is_32bit),
.instr_out (d_instr),
.invalid (d_invalid_16bit)
.clk (clk),
.rst_n (rst_n),
.instr_in (fd_cir),
.instr_is_32bit (d_instr_is_32bit),
.instr_out (d_instr),
.instr_out_uop_nonfinal (uop_nonfinal),
.instr_out_uop_uninterruptible (uop_uninterruptible),
.instr_out_uop_stall (uop_stall),
.instr_out_uop_clear (uop_clear),
.df_uop_step_next (df_uop_step_next),
.invalid (d_invalid_16bit)
);
assign d_uninterruptible = uop_uninterruptible && !d_invalid;
assign d_no_pc_increment = uop_nonfinal && !d_invalid;
assign uop_stall = x_stall || d_starved;
assign uop_clear = f_jump_now;
// Decode various immmediate formats
wire [31:0] d_imm_i = {{21{d_instr[31]}}, d_instr[30:20]};
wire [31:0] d_imm_s = {{21{d_instr[31]}}, d_instr[30:25], d_instr[11:7]};
@ -102,7 +125,7 @@ wire d_except_instr_bus_fault = fd_cir_vld > 2'd0 && fd_cir_err[0] ||
fd_cir_vld > 2'd1 && d_instr_is_32bit && fd_cir_err[1];
assign d_starved = ~|fd_cir_vld || fd_cir_vld[0] && d_instr_is_32bit;
wire d_stall = x_stall || d_starved;
wire d_stall = x_stall || d_starved || uop_nonfinal;
assign df_cir_use =
d_starved || d_stall ? 2'h0 :
@ -133,7 +156,11 @@ always @ (posedge clk or negedge rst_n) begin
end
reg [W_ADDR-1:0] pc;
wire [W_ADDR-1:0] pc_seq_next = pc + (d_instr_is_32bit ? 32'h4 : 32'h2);
wire [W_ADDR-1:0] pc_seq_next = pc + (
|EXTENSION_ZCMP && uop_nonfinal ? 32'h0 :
d_instr_is_32bit ? 32'h4 : 32'h2
);
assign d_pc = pc;
assign debug_dpc_rdata = pc;

View File

@ -62,6 +62,8 @@ module hazard3_frontend #(
// stalled instruction may assert a jump request, because e.g. the stall
// is dependent on a bus stall signal so can't gate the request.
input wire cir_flush_behind,
// Required for regnum predecode when Zcmp is enabled:
input wire [3:0] df_uop_step_next,
// Signal to power controller that power down is safe. (When going to
// sleep, first the pipeline is stalled, and then the power controller
@ -545,21 +547,46 @@ assign cir_predbranch = cir_predbranch_reg[1:0];
wire [31:0] next_instr = instr_data_plus_fetch[31:0];
wire next_instr_is_32bit = next_instr[1:0] == 2'b11 || ~|EXTENSION_C;
wire [3:0] uop_ctr = df_uop_step_next & {4{|EXTENSION_ZCMP}};
wire [4:0] zcmp_pushpop_rs2 =
uop_ctr == 4'h0 ? 5'd01 : // ra
uop_ctr == 4'h1 ? 5'd08 : // s0
uop_ctr == 4'h2 ? 5'd09 : // s1
5'd15 + {1'b0, uop_ctr} ; // s2-s11
wire [4:0] zcmp_pushpop_rs1 =
uop_ctr < 4'hd ? 5'd02 : // sp (addr base reg)
uop_ctr == 4'hd ? 5'd00 : // zero (clear a0)
uop_ctr == 4'he ? 5'd02 : // sp (stack adj)
5'd01 ; // ra (ret)
wire [4:0] zcmp_sa01_r1s = {|next_instr[9:8], ~&next_instr[9:8], next_instr[9:7]};
wire [4:0] zcmp_sa01_r2s = {|next_instr[2:1], ~&next_instr[2:1], next_instr[2:0]};
wire [4:0] zcmp_mvsa01_rs1 = {4'h4, uop_ctr[0]};
wire [4:0] zcmp_mva01s_rs1 = uop_ctr[0] ? zcmp_sa01_r2s : zcmp_sa01_r1s;
always @ (*) begin
casez ({next_instr_is_32bit, next_instr[1:0], next_instr[15:13]})
{1'b1, 2'bzz, 3'bzzz}: predecode_rs1_coarse = next_instr[19:15]; // 32-bit R, S, B formats
{1'b0, 2'b00, 3'b00z}: predecode_rs1_coarse = 5'd2; // c.addi4spn + don't care
{1'b0, 2'b01, 3'b0zz}: predecode_rs1_coarse = next_instr[11:7]; // c.addi, c.addi16sp + don't care (jal, li)
{1'b0, 2'b10, 3'bz1z}: predecode_rs1_coarse = 5'd2; // c.lwsp, c.lwsp + don't care
{1'b0, 2'b10, 3'bz0z}: predecode_rs1_coarse = next_instr[11:7];
default: predecode_rs1_coarse = {2'b01, next_instr[9:7]};
casez ({next_instr_is_32bit, |EXTENSION_ZCMP, next_instr[15:0]})
{1'b1, 1'bz, 16'bzzzzzzzzzzzzzzzz}: predecode_rs1_coarse = next_instr[19:15]; // 32-bit R, S, B formats
{1'b0, 1'bz, 16'b00zzzzzzzzzzzz00}: predecode_rs1_coarse = 5'd2; // c.addi4spn + don't care
{1'b0, 1'bz, 16'b0zzzzzzzzzzzzz01}: predecode_rs1_coarse = next_instr[11:7]; // c.addi, c.addi16sp + don't care (jal, li)
{1'b0, 1'bz, 16'b100zzzzzzzzzzz10}: predecode_rs1_coarse = next_instr[11:7]; // c.add
{1'b0, 1'bz, 16'bz10zzzzzzzzzzz10}: predecode_rs1_coarse = 5'd2; // c.lwsp, c.swsp
{1'b0, 1'b1, 16'b1z11zzzzzzzzzz10}: predecode_rs1_coarse = zcmp_pushpop_rs1; // cm.push, cm.pop*
{1'b0, 1'b1, 16'b1z10zzzzz0zzzz10}: predecode_rs1_coarse = zcmp_mvsa01_rs1; // cm.mvsa01
{1'b0, 1'b1, 16'b1z10zzzzz1zzzz10}: predecode_rs1_coarse = zcmp_mva01s_rs1; // cm.mva01s
default: predecode_rs1_coarse = {2'b01, next_instr[9:7]};
endcase
casez ({next_instr_is_32bit, next_instr[1:0]})
{1'b1, 2'bzz}: predecode_rs2_coarse = next_instr[24:20];
{1'b0, 2'b10}: predecode_rs2_coarse = next_instr[6:2];
default: predecode_rs2_coarse = {2'b01, next_instr[4:2]};
casez ({next_instr_is_32bit, next_instr[1:0], next_instr[13]})
{1'b1, 2'bzz, 1'bz}: predecode_rs2_coarse = next_instr[24:20];
{1'b0, 2'b10, 1'b0}: predecode_rs2_coarse = next_instr[6:2]; // c.add, c.swsp
{1'b0, 2'b10, 1'b1}: predecode_rs2_coarse = zcmp_pushpop_rs2; // cm.push
default: predecode_rs2_coarse = {2'b01, next_instr[4:2]};
endcase
// The "fine" predecode targets those instructions which either:

View File

@ -1,17 +1,38 @@
/*****************************************************************************\
| Copyright (C) 2021-2022 Luke Wren |
| Copyright (C) 2021-2023 Luke Wren |
| SPDX-License-Identifier: Apache-2.0 |
\*****************************************************************************/
// Little instructions go in, big instructions come out
`default_nettype none
module hazard3_instr_decompress #(
`include "hazard3_config.vh"
) (
input wire [31:0] instr_in,
output reg instr_is_32bit,
output reg [31:0] instr_out,
output reg invalid
input wire clk,
input wire rst_n,
input wire [31:0] instr_in,
output reg instr_is_32bit,
output reg [31:0] instr_out,
// Indicate instr_out is a uop, and more uops follow in this sequence.
// Should suppress PC update, and null the PC offset in the mepc address
// in stage 3.
output wire instr_out_uop_nonfinal,
// Indicate instr_out is a uop from the noninterruptible part of a uop
// sequence. If one uop is noninterruptible, all following uops until the
// end of the sequence are also noninterruptible.
output wire instr_out_uop_atomic,
// Current ucode sequence is stalled on downstream execution
input wire instr_out_uop_stall,
input wire instr_out_uop_clear,
// To regnum decoder in frontend
output wire [3:0] df_uop_step_next,
output reg invalid
);
`include "rv_opcodes.vh"
@ -79,6 +100,69 @@ function [31:0] rfmt_rd; input [4:0] rd; begin rfmt_rd = {20'h00000, rd, 7'h0
function [31:0] rfmt_rs1; input [4:0] rs1; begin rfmt_rs1 = {12'h000, rs1, 15'h0000}; end endfunction
function [31:0] rfmt_rs2; input [4:0] rs2; begin rfmt_rs2 = {7'h00, rs2, 20'h00000}; end endfunction
// ----------------------------------------------------------------------------
// Push/pop and friends
// The longest uop sequence is a maximal cm.popretz:
//
// - 13x lw (counter = 0..12)
// - 1x addi to set a0 to zero (counter = 13 ) < atomic section
// - 1x addi to adjust sp (counter = 14 ) < atomic section
// - 1x jalr to jump through ra (counter = 15 ) < atomic section
reg [3:0] uop_ctr;
reg [3:0] uop_ctr_nxt;
reg in_uop_seq;
reg uop_seq_end;
reg uop_atomic;
assign instr_out_uop_nonfinal = in_uop_seq && !uop_seq_end;
assign instr_out_uop_atomic = uop_atomic;
assign df_uop_step_next = uop_ctr_nxt;
// The offset from current sp value to the lowest-addressed saved register, +64.
wire [3:0] zcmp_rlist = instr_in[7:4];
wire [3:0] zcmp_n_regs = zcmp_rlist == 4'hf ? 4'hd : zcmp_rlist - 4'h3;
wire [6:0] zcmp_stack_adj_base =
zcmp_rlist[3] == 1'b0 ? 7'h10 :
zcmp_rlist[3:2] == 2'h2 ? 7'h20 :
zcmp_rlist[3:0] == 4'hf ? 7'h40 : 7'h30;
wire [11:0] zcmp_stack_lw_offset = {6'h00, uop_ctr, 2'h0};
wire [11:0] zcmp_stack_sw_offset = zcmp_stack_lw_offset - {5'h00, zcmp_stack_adj_base};
wire [4:0] zcmp_ls_reg =
uop_ctr == 4'h0 ? 5'd01 : // ra
uop_ctr == 4'h1 ? 5'd08 : // s0
uop_ctr == 4'h2 ? 5'd09 : // s1
5'd15 + {1'b0, uop_ctr}; // s2-s11 (s2 == x18)
wire [31:0] zcmp_push_sw_instr = `RVOPC_NOZ_SW | rfmt_rs1(5'd2) | rfmt_rs2(zcmp_ls_reg) | {
zcmp_stack_sw_offset[11:5], 13'h0000, zcmp_stack_sw_offset[4:0], 7'h00
};
wire [31:0] zcmp_pop_lw_instr = `RVOPC_NOZ_LW | rfmt_rd(zcmp_ls_reg) | rfmt_rs1(5'd2)| {
zcmp_stack_lw_offset[11:0], 20'h00000
};
wire [11:0] zcmp_abs_stack_adj = {5'h00, zcmp_stack_adj_base} + {6'h00, instr_in[3:2], 4'h0};
wire [31:0] zcmp_push_stack_adj_instr = `RVOPC_NOZ_ADDI | rfmt_rd(5'd2) | rfmt_rs1(5'd2) | {
-zcmp_abs_stack_adj,
20'h00000
};
wire [31:0] zcmp_pop_stack_adj_instr = `RVOPC_NOZ_ADDI | rfmt_rd(5'd2) | rfmt_rs1(5'd2) | {
zcmp_abs_stack_adj,
20'h00000
};
wire zcmp_sa01_r1s = {|instr_in[9:8], ~&instr_in[9:8], instr_in[9:7]};
wire zcmp_sa01_r2s = {|instr_in[2:1], ~&instr_in[2:1], instr_in[2:0]};
// ----------------------------------------------------------------------------
generate
if (PASSTHROUGH) begin: instr_passthrough
always @ (*) begin
@ -92,10 +176,18 @@ end else begin: instr_decompress
instr_is_32bit = 1'b1;
instr_out = instr_in;
invalid = 1'b0;
uop_seq_end = 1'b0;
in_uop_seq = 1'b0;
uop_atomic = 1'b0;
uop_ctr_nxt = uop_ctr;
end else begin
instr_is_32bit = 1'b0;
instr_out = 32'h0;
invalid = 1'b0;
uop_seq_end = 1'b0;
in_uop_seq = 1'b0;
uop_atomic = 1'b0;
uop_ctr_nxt = uop_ctr;
casez (instr_in[15:0])
16'h0: invalid = 1'b1;
`RVOPC_C_ADDI4SPN: instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(rd_s) | rfmt_rs1(5'h2)
@ -199,13 +291,152 @@ end else begin: instr_decompress
invalid = ~|EXTENSION_ZCB || ~|EXTENSION_M;
end
// Optional Zcmp instructions:
`RVOPC_CM_PUSH: if (~|EXTENSION_ZCMP || zcmp_rlist < 4'h4) begin
invalid = 1'b1;
end else if (uop_ctr == 4'he) begin
in_uop_seq = 1'b1;
uop_seq_end = 1'b1;
uop_ctr_nxt = 4'h0;
instr_out = zcmp_push_stack_adj_instr;
end else begin
in_uop_seq = 1'b1;
uop_ctr_nxt = uop_ctr + 4'h1;
instr_out = zcmp_push_sw_instr;
if (uop_ctr_nxt == zcmp_n_regs) begin
uop_ctr_nxt = 4'he;
end
end
`RVOPC_CM_POP: if (~|EXTENSION_ZCMP || zcmp_rlist < 4'h4) begin
invalid = 1'b1;
end else if (uop_ctr == 4'he) begin
in_uop_seq = 1'b1;
uop_seq_end = 1'b1;
uop_ctr_nxt = 4'h0;
uop_atomic = 1'b1;
instr_out = zcmp_pop_stack_adj_instr;
end else begin
in_uop_seq = 1'b1;
uop_ctr_nxt = uop_ctr + 4'h1;
instr_out = zcmp_pop_lw_instr;
if (uop_ctr_nxt == zcmp_n_regs) begin
uop_ctr_nxt = 4'he;
end
end
`RVOPC_CM_POPRET: if (~|EXTENSION_ZCMP || zcmp_rlist < 4'h4) begin
invalid = 1'b1;
end else if (uop_ctr == 4'he) begin
// Note we don't set the uop_atomic flag on the first uop in
// the uninterruptible sequence -- the rule is *if* one
// executes, they all execute. Having none execute is fine.
in_uop_seq = 1'b1;
uop_ctr_nxt = uop_ctr + 4'h1;
instr_out = zcmp_pop_stack_adj_instr;
end else if (uop_ctr == 4'hf) begin
in_uop_seq = 1'b1;
uop_seq_end = 1'b1;
uop_atomic = 1'b1;
uop_ctr_nxt = 4'h0;
instr_out = `RVOPC_NOZ_JALR | rfmt_rs1(5'h1);
end else begin
in_uop_seq = 1'b1;
uop_ctr_nxt = uop_ctr + 4'h1;
instr_out = zcmp_pop_lw_instr;
if (uop_ctr_nxt == zcmp_n_regs) begin
uop_ctr_nxt = 4'he;
end
end
`RVOPC_CM_POPRETZ: if (~|EXTENSION_ZCMP || zcmp_rlist < 4'h4) begin
invalid = 1'b1;
end else if (uop_ctr == 4'hd) begin
in_uop_seq = 1'b1;
uop_ctr_nxt = uop_ctr + 4'h1;
instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(5'd8); // li a0, 0
end else if (uop_ctr == 4'he) begin
in_uop_seq = 1'b1;
uop_atomic = 1'b1;
uop_ctr_nxt = uop_ctr + 4'h1;
instr_out = zcmp_pop_stack_adj_instr;
end else if (uop_ctr == 4'hf) begin
in_uop_seq = 1'b1;
uop_seq_end = 1'b1;
uop_atomic = 1'b1;
uop_ctr_nxt = 4'h0;
instr_out = `RVOPC_NOZ_JALR | rfmt_rs1(5'h1);
end else begin
in_uop_seq = 1'b1;
uop_ctr_nxt = uop_ctr + 4'h1;
instr_out = zcmp_pop_lw_instr;
if (uop_ctr_nxt == zcmp_n_regs) begin
uop_ctr_nxt = 4'hd;
end
end
`RVOPC_CM_MVSA01: if (~|EXTENSION_ZCMP) begin
invalid = 1'b1;
end else if (uop_ctr == 4'h0) begin
in_uop_seq = 1'b1;
uop_ctr_nxt = uop_ctr + 4'h1;
instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(zcmp_sa01_r1s) | rfmt_rs1(5'd10);
end else begin
in_uop_seq = 1'b1;
uop_seq_end = 1'b1;
uop_atomic = 1'b1;
uop_ctr_nxt = 4'h0;
instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(zcmp_sa01_r2s) | rfmt_rs1(5'd11);
end
`RVOPC_CM_MVA01S: if (~|EXTENSION_ZCMP) begin
invalid = 1'b1;
end else if (uop_ctr == 4'h0) begin
in_uop_seq = 1'b1;
uop_ctr_nxt = uop_ctr + 4'h1;
instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(5'd10) | rfmt_rs1(zcmp_sa01_r1s);
end else begin
in_uop_seq = 1'b1;
uop_seq_end = 1'b1;
uop_atomic = 1'b1;
uop_ctr_nxt = 4'h0;
instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(5'd11) | rfmt_rs1(zcmp_sa01_r2s);
end
default: invalid = 1'b1;
endcase
if (instr_out_uop_clear) begin
uop_ctr_nxt = 4'h0;
end else if (instr_out_uop_stall) begin
uop_ctr_nxt = uop_ctr;
end
end
end
end
endgenerate
generate
if (EXTENSION_ZCMP) begin: have_uop_ctr;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
uop_ctr <= 4'h0;
end else begin
uop_ctr <= uop_ctr_nxt;
`ifdef HAZARD3_ASSERTIONS
assert(uop_ctr == 4'h0 || in_uop_seq);
if (uop_seq_end) begin
assert(in_uop_seq));
assert(instr_out_uop_stall || uop_ctr_nxt == 4'h0);
end
`endif
end
end
end else begin: no_uop_ctr
always @ (*) uop_ctr = 4'h0;
end
endgenerate
endmodule
`ifndef YOSYS

View File

@ -194,6 +194,14 @@ localparam RV_RD_BITS = 5;
`define RVOPC_C_NOT 16'b100111???1110101
`define RVOPC_C_MUL 16'b100111???10???01
// Zcmp push/pop instructions
`define RVOPC_CM_PUSH 16'b10111000??????10
`define RVOPC_CM_POP 16'b10111010??????10
`define RVOPC_CM_POPRETZ 16'b10111100??????10
`define RVOPC_CM_POPRET 16'b10111110??????10
`define RVOPC_CM_MVSA01 16'b101011???01???10
`define RVOPC_CM_MVA01S 16'b101011???11???10
// Copies provided here with 0 instead of ? so that these can be used to build 32-bit instructions in the decompressor
`define RVOPC_NOZ_BEQ 32'b00000000000000000000000001100011

View File

@ -8,7 +8,7 @@ endif
CCFLAGS ?=
LDSCRIPT ?= ../common/memmap.ld
CROSS_PREFIX ?= riscv32-unknown-elf-
CROSS_PREFIX ?= /opt/riscv/gcc-riscv32-corev/bin/riscv32-corev-elf-
TBDIR ?= ../tb_cxxrtl
INCDIR ?= ../common
MAX_CYCLES ?= 100000

View File

@ -1,7 +1,7 @@
APP := coremark
MAX_CYCLES := 100000000
CROSS_PREFIX ?= riscv32-unknown-elf-
CROSS_PREFIX ?= /opt/riscv/gcc-riscv32-corev/bin/riscv32-corev-elf-
TBDIR ?= ../tb_cxxrtl

View File

@ -19,23 +19,27 @@
# Use this flag to define how to to get an executable (e.g -o)
OUTFLAG= -o
CC = /opt/riscv/unstable/bin/riscv32-unknown-elf-gcc
LD = /opt/riscv/unstable/bin/riscv32-unknown-elf-gcc
AS = /opt/riscv/unstable/bin/riscv32-unknown-elf-gcc
# Flag : CFLAGS
# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
PORT_CFLAGS = -O3 -g -march=rv32im_zicsr_zba_zbb_zbc_zbs -fno-common -funroll-loops -finline-functions --param max-inline-insns-auto=20 -falign-functions=4 -falign-jumps=4 -falign-loops=4
MARCH = rv32im_zicsr_zba_zbb_zbc_zbs_zca_zcb_zcmp
CROSS_PREFIX = /opt/riscv/gcc-riscv32-corev/bin/riscv32-corev-elf-
CC = $(CROSS_PREFIX)gcc
LD = $(CROSS_PREFIX)gcc
AS = $(CROSS_PREFIX)gcc
PORT_CFLAGS = -O3 -g -march=$(MARCH) -fno-common -funroll-loops -finline-functions --param max-inline-insns-auto=20 -falign-functions=4 -falign-jumps=4 -falign-loops=4
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\"
#Flag : LFLAGS_END
# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).
# Note : On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
SEPARATE_COMPILE=1
# Flag : SEPARATE_COMPILE
# You must also define below how to create an object file, and how to link.
OBJOUT = -o
LFLAGS = -T ../../common/memmap.ld
ASFLAGS = -c -march=rv32im_zicsr
LFLAGS = -T ../../common/memmap.ld -Wl,--noinhibit-exec
ASFLAGS = -c -march=$(MARCH)
OFLAG = -o
COUT = -c

View File

@ -1,6 +1,6 @@
SRCS := ../common/init.S main.c
APP := hellow
CCFLAGS = -march=rv32i_zicsr_zba_zbb_zbc_zbs -Os
AFLAGS = -march=rv32i_zicsr_zba_zbb_zbc_zbs
CCFLAGS = -march=rv32ima_zicsr_zba_zbb_zbc_zbs_zca_zcb_zcmp -Os
AFLAGS = -march=rv32ima_zicsr_zba_zbb_zbc_zbs_zca_zcb_zcmp
include ../common/src_only_app.mk

View File

@ -2,5 +2,9 @@
int main() {
tb_puts("Hello world from Hazard3 + CXXRTL!\n");
asm volatile(
"cm.push {ra, s0-s2}, -16\n"
"cm.pop {ra, s0-s2}, +16\n"
);
return 123;
}

View File

@ -1,6 +1,6 @@
APP := hellow
SRCS = ../common/init.S $(APP).c $(EXTRA_SRCS_$(APP))
CCFLAGS := -march=rv32imac_zicsr -Os
CCFLAGS := -march=rv32imac_zicsr_zba_zbb_zbs_zcb -Os
MAX_CYCLES := 1000000
INCDIR := include ../common

View File

@ -8,7 +8,7 @@
// - h3_bextmi: immediate version of the above (as bexti is to bext)
// The instruction is just supposed to take a single static size...
__attribute__((noinline)) uint32_t bextm_dynamic_width(uint nbits, uint32_t rs1, uint32_t rs2) {
__attribute__((noinline)) uint32_t bextm_dynamic_width(unsigned int nbits, uint32_t rs1, uint32_t rs2) {
switch (nbits) {
case 1:
return __hazard3_bextm(1, rs1, rs2);

View File

@ -11,6 +11,7 @@ localparam EXTENSION_ZBC = 1;
localparam EXTENSION_ZBS = 1;
localparam EXTENSION_ZBKB = 1;
localparam EXTENSION_ZCB = 1;
localparam EXTENSION_ZCMP = 1;
localparam EXTENSION_ZIFENCEI = 1;
localparam EXTENSION_XH3BEXTM = 1;
localparam EXTENSION_XH3IRQ = 1;

View File

@ -11,6 +11,7 @@ localparam EXTENSION_ZBC = 0;
localparam EXTENSION_ZBS = 0;
localparam EXTENSION_ZBKB = 0;
localparam EXTENSION_ZCB = 0;
localparam EXTENSION_ZCMP = 0;
localparam EXTENSION_ZIFENCEI = 0;
localparam EXTENSION_XH3BEXTM = 0;
localparam EXTENSION_XH3IRQ = 0;