First attempt at Zcmp
This commit is contained in:
		
							parent
							
								
									99c0660c3e
								
							
						
					
					
						commit
						e966e832d2
					
				|  | @ -58,11 +58,15 @@ parameter EXTENSION_ZBS       = 0, | |||
| // Requires: Zbb. (This flag enables instructions in Zbkb which aren't in Zbb.) | ||||
| parameter EXTENSION_ZBKB      = 0, | ||||
| 
 | ||||
| // EXTENSION_ZCB: Support for ZCB basic additional compressed instructions | ||||
| // Requires: C. (Some Zcb instructions also require Zbb or M.) | ||||
| // EXTENSION_ZCB: Support for Zcb basic additional compressed instructions | ||||
| // Requires: EXTENSION_C. (Some Zcb instructions also require Zbb or M.) | ||||
| // Note Zca is equivalent to C, as we do not support the F extension. | ||||
| parameter EXTENSION_ZCB       = 0, | ||||
| 
 | ||||
| // EXTENSION_ZCMP: Support for Zcmp push/pop instructions. | ||||
| // Requires: EXTENSION_C. | ||||
| parameter EXTENSION_ZCMP      = 0, | ||||
| 
 | ||||
| // EXTENSION_ZIFENCEI: Support for the fence.i instruction | ||||
| // Optional, since a plain branch/jump will also flush the prefetch queue. | ||||
| parameter EXTENSION_ZIFENCEI  = 0, | ||||
|  |  | |||
|  | @ -23,6 +23,7 @@ | |||
| .EXTENSION_ZBS       (EXTENSION_ZBS), | ||||
| .EXTENSION_ZBKB      (EXTENSION_ZBKB), | ||||
| .EXTENSION_ZCB       (EXTENSION_ZCB), | ||||
| .EXTENSION_ZCMP      (EXTENSION_ZCMP), | ||||
| .EXTENSION_ZIFENCEI  (EXTENSION_ZIFENCEI), | ||||
| .EXTENSION_XH3BEXTM  (EXTENSION_XH3BEXTM), | ||||
| .EXTENSION_XH3IRQ    (EXTENSION_XH3IRQ), | ||||
|  |  | |||
|  | @ -1,5 +1,5 @@ | |||
| /*****************************************************************************\ | ||||
| |                      Copyright (C) 2021-2022 Luke Wren                      | | ||||
| |                      Copyright (C) 2021-2023 Luke Wren                      | | ||||
| |                     SPDX-License-Identifier: Apache-2.0                     | | ||||
| \*****************************************************************************/ | ||||
| 
 | ||||
|  | @ -112,6 +112,7 @@ wire [1:0]           fd_cir_predbranch; | |||
| wire [1:0]           fd_cir_vld; | ||||
| wire [1:0]           df_cir_use; | ||||
| wire                 df_cir_flush_behind; | ||||
| wire [3:0]           df_uop_step_next; | ||||
| 
 | ||||
| wire                 x_btb_set; | ||||
| wire [W_ADDR-1:0]    x_btb_set_src_addr; | ||||
|  | @ -160,6 +161,7 @@ hazard3_frontend #( | |||
| 	.cir_vld              (fd_cir_vld), | ||||
| 	.cir_use              (df_cir_use), | ||||
| 	.cir_flush_behind     (df_cir_flush_behind), | ||||
| 	.df_uop_step_next     (df_uop_step_next), | ||||
| 
 | ||||
| 	.pwrdown_ok           (f_frontend_pwrdown_ok), | ||||
| 	.delay_first_fetch    (!pwrup_ack), | ||||
|  | @ -215,6 +217,8 @@ wire [W_EXCEPT-1:0]  d_except; | |||
| wire                 d_sleep_wfi; | ||||
| wire                 d_sleep_block; | ||||
| wire                 d_sleep_unblock; | ||||
| wire                 d_no_pc_increment; | ||||
| wire                 d_uninterruptible; | ||||
| wire                 d_fence_i; | ||||
| wire                 d_csr_ren; | ||||
| wire                 d_csr_wen; | ||||
|  | @ -241,6 +245,7 @@ hazard3_decode #( | |||
| 	.fd_cir_vld           (fd_cir_vld), | ||||
| 	.df_cir_use           (df_cir_use), | ||||
| 	.df_cir_flush_behind  (df_cir_flush_behind), | ||||
| 	.df_uop_step_next     (df_uop_step_next), | ||||
| 	.d_pc                 (d_pc), | ||||
| 	.x_jump_not_except    (x_jump_not_except), | ||||
| 
 | ||||
|  | @ -280,6 +285,8 @@ hazard3_decode #( | |||
| 	.d_sleep_wfi          (d_sleep_wfi), | ||||
| 	.d_sleep_block        (d_sleep_block), | ||||
| 	.d_sleep_unblock      (d_sleep_unblock), | ||||
| 	.d_no_pc_increment    (d_no_pc_increment), | ||||
| 	.d_uninterruptible    (d_uninterruptible), | ||||
| 	.d_fence_i            (d_fence_i) | ||||
| ); | ||||
| 
 | ||||
|  | @ -322,6 +329,7 @@ reg  [1:0]           xm_addr_align; | |||
| reg  [W_MEMOP-1:0]   xm_memop; | ||||
| reg  [W_EXCEPT-1:0]  xm_except; | ||||
| reg                  xm_except_to_d_mode; | ||||
| reg                  xm_no_pc_increment; | ||||
| reg                  xm_sleep_wfi; | ||||
| reg                  xm_sleep_block; | ||||
| reg                  xm_delay_irq_entry_on_ls_stagex; | ||||
|  | @ -999,7 +1007,7 @@ hazard3_csr #( | |||
| 	.trap_enter_vld             (m_trap_enter_vld), | ||||
| 	.trap_enter_rdy             (m_trap_enter_rdy), | ||||
| 	.loadstore_dphase_pending   (m_dphase_in_flight), | ||||
| 	.delay_irq_entry            (m_delay_irq_entry), | ||||
| 	.delay_irq_entry            (m_delay_irq_entry || d_uninterruptible), | ||||
| 	.mepc_in                    (m_exception_return_addr), | ||||
| 
 | ||||
| 	.pwr_allow_clkgate          (m_pwr_allow_clkgate), | ||||
|  | @ -1044,11 +1052,18 @@ always @ (posedge clk or negedge rst_n) begin | |||
| 		xm_sleep_wfi <= 1'b0; | ||||
| 		xm_sleep_block <= 1'b0; | ||||
| 		unblock_out <= 1'b0; | ||||
| 		{xm_rs1, xm_rs2, xm_rd} <= {3 * W_REGADDR{1'b0}}; | ||||
| 		xm_rs1 <= {W_REGADDR{1'b0}}; | ||||
| 		xm_rs2 <= {W_REGADDR{1'b0}}; | ||||
| 		xm_rd <= {W_REGADDR{1'b0}}; | ||||
| 		xm_no_pc_increment <= 1'b0; | ||||
| 	end else begin | ||||
| 		unblock_out <= 1'b0; | ||||
| 		if (!m_stall) begin | ||||
| 			{xm_rs1, xm_rs2, xm_rd} <= {d_rs1, d_rs2, d_rd}; | ||||
| 			xm_rs1 <= d_rs1; | ||||
| 			xm_rs2 <= d_rs2; | ||||
| 			xm_rd <= d_rd; | ||||
| 			// PC increment is suppressed non-final micro-ops, only needed for Zcmp: | ||||
| 			xm_no_pc_increment <= d_no_pc_increment && ~|EXTENSION_ZCMP; | ||||
| 			// If some X-sourced exception has squashed the address phase, need to squash the data phase too. | ||||
| 			xm_memop            <= x_except != EXCEPT_NONE ? MEMOP_NONE : d_memop; | ||||
| 			xm_except           <= x_except; | ||||
|  | @ -1177,6 +1192,7 @@ assign m_stall = m_bus_stall || | |||
| // was *not* a taken branch, which is why we can just walk back the PC. | ||||
| assign m_exception_return_addr = d_pc - ( | ||||
| 	m_trap_is_irq         ? 32'h0 : | ||||
| 	xm_no_pc_increment    ? 32'h0 : | ||||
| 	prev_instr_was_32_bit ? 32'h4 : 32'h2 | ||||
| ); | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,5 +1,5 @@ | |||
| /*****************************************************************************\ | ||||
| |                      Copyright (C) 2021-2022 Luke Wren                      | | ||||
| |                      Copyright (C) 2021-2023 Luke Wren                      | | ||||
| |                     SPDX-License-Identifier: Apache-2.0                     | | ||||
| \*****************************************************************************/ | ||||
| 
 | ||||
|  | @ -19,6 +19,7 @@ module hazard3_decode #( | |||
| 	input  wire [1:0]           fd_cir_vld, | ||||
| 	output wire [1:0]           df_cir_use, | ||||
| 	output wire                 df_cir_flush_behind, | ||||
| 	output wire [3:0]           df_uop_step_next, | ||||
| 	output wire [W_ADDR-1:0]    d_pc, | ||||
| 
 | ||||
| 	input  wire                 debug_mode, | ||||
|  | @ -58,6 +59,8 @@ module hazard3_decode #( | |||
| 	output reg                  d_sleep_wfi, | ||||
| 	output reg                  d_sleep_block, | ||||
| 	output reg                  d_sleep_unblock, | ||||
| 	output wire                 d_no_pc_increment, | ||||
| 	output wire                 d_uninterruptible, | ||||
| 	output reg                  d_fence_i | ||||
| ); | ||||
| 
 | ||||
|  | @ -75,15 +78,35 @@ wire        d_invalid_16bit; | |||
| reg         d_invalid_32bit; | ||||
| wire        d_invalid = d_invalid_16bit || d_invalid_32bit; | ||||
| 
 | ||||
| wire        uop_nonfinal; | ||||
| wire        uop_uninterruptible; | ||||
| wire        uop_stall; | ||||
| wire        uop_clear; | ||||
| 
 | ||||
| hazard3_instr_decompress #( | ||||
| `include "hazard3_config_inst.vh" | ||||
| ) decomp ( | ||||
| 	.instr_in       (fd_cir), | ||||
| 	.instr_is_32bit (d_instr_is_32bit), | ||||
| 	.instr_out      (d_instr), | ||||
| 	.invalid        (d_invalid_16bit) | ||||
| 	.clk                           (clk), | ||||
| 	.rst_n                         (rst_n), | ||||
| 
 | ||||
| 	.instr_in                      (fd_cir), | ||||
| 	.instr_is_32bit                (d_instr_is_32bit), | ||||
| 	.instr_out                     (d_instr), | ||||
| 	.instr_out_uop_nonfinal        (uop_nonfinal), | ||||
| 	.instr_out_uop_uninterruptible (uop_uninterruptible), | ||||
| 	.instr_out_uop_stall           (uop_stall), | ||||
| 	.instr_out_uop_clear           (uop_clear), | ||||
| 
 | ||||
| 	.df_uop_step_next              (df_uop_step_next), | ||||
| 
 | ||||
| 	.invalid                       (d_invalid_16bit) | ||||
| ); | ||||
| 
 | ||||
| assign d_uninterruptible = uop_uninterruptible && !d_invalid; | ||||
| assign d_no_pc_increment = uop_nonfinal && !d_invalid; | ||||
| assign uop_stall         = x_stall || d_starved; | ||||
| assign uop_clear         = f_jump_now; | ||||
| 
 | ||||
| // Decode various immmediate formats | ||||
| wire [31:0] d_imm_i = {{21{d_instr[31]}}, d_instr[30:20]}; | ||||
| wire [31:0] d_imm_s = {{21{d_instr[31]}}, d_instr[30:25], d_instr[11:7]}; | ||||
|  | @ -102,7 +125,7 @@ wire d_except_instr_bus_fault = fd_cir_vld > 2'd0 && fd_cir_err[0] || | |||
| 	fd_cir_vld > 2'd1 && d_instr_is_32bit && fd_cir_err[1]; | ||||
| 
 | ||||
| assign d_starved = ~|fd_cir_vld || fd_cir_vld[0] && d_instr_is_32bit; | ||||
| wire d_stall = x_stall || d_starved; | ||||
| wire d_stall = x_stall || d_starved || uop_nonfinal; | ||||
| 
 | ||||
| assign df_cir_use = | ||||
| 	d_starved || d_stall ? 2'h0 : | ||||
|  | @ -133,7 +156,11 @@ always @ (posedge clk or negedge rst_n) begin | |||
| end | ||||
| 
 | ||||
| reg  [W_ADDR-1:0] pc; | ||||
| wire [W_ADDR-1:0] pc_seq_next = pc + (d_instr_is_32bit ? 32'h4 : 32'h2); | ||||
| wire [W_ADDR-1:0] pc_seq_next = pc + ( | ||||
| 	|EXTENSION_ZCMP && uop_nonfinal ? 32'h0 : | ||||
| 	d_instr_is_32bit                ? 32'h4 : 32'h2 | ||||
| ); | ||||
| 
 | ||||
| assign d_pc = pc; | ||||
| assign debug_dpc_rdata = pc; | ||||
| 
 | ||||
|  |  | |||
|  | @ -62,6 +62,8 @@ module hazard3_frontend #( | |||
| 	//  stalled instruction may assert a jump request, because e.g. the stall | ||||
| 	//  is dependent on a bus stall signal so can't gate the request. | ||||
| 	input  wire              cir_flush_behind, | ||||
| 	// Required for regnum predecode when Zcmp is enabled: | ||||
| 	input  wire [3:0]        df_uop_step_next, | ||||
| 
 | ||||
| 	// Signal to power controller that power down is safe. (When going to | ||||
| 	// sleep, first the pipeline is stalled, and then the power controller | ||||
|  | @ -545,21 +547,46 @@ assign cir_predbranch = cir_predbranch_reg[1:0]; | |||
| wire [31:0] next_instr = instr_data_plus_fetch[31:0]; | ||||
| wire next_instr_is_32bit = next_instr[1:0] == 2'b11 || ~|EXTENSION_C; | ||||
| 
 | ||||
| 
 | ||||
| wire [3:0] uop_ctr = df_uop_step_next & {4{|EXTENSION_ZCMP}}; | ||||
| 
 | ||||
| wire [4:0] zcmp_pushpop_rs2 = | ||||
| 	uop_ctr == 4'h0 ? 5'd01                   : // ra | ||||
| 	uop_ctr == 4'h1 ? 5'd08                   : // s0 | ||||
| 	uop_ctr == 4'h2 ? 5'd09                   : // s1 | ||||
| 	                  5'd15 + {1'b0, uop_ctr} ; // s2-s11 | ||||
| 
 | ||||
| wire [4:0] zcmp_pushpop_rs1 = | ||||
| 	uop_ctr <  4'hd ? 5'd02 :                   // sp   (addr base reg) | ||||
| 	uop_ctr == 4'hd ? 5'd00 :                   // zero (clear a0) | ||||
| 	uop_ctr == 4'he ? 5'd02 :                   // sp   (stack adj) | ||||
| 	                  5'd01 ;                   // ra   (ret) | ||||
| 
 | ||||
| wire [4:0] zcmp_sa01_r1s  = {|next_instr[9:8], ~&next_instr[9:8], next_instr[9:7]}; | ||||
| wire [4:0] zcmp_sa01_r2s  = {|next_instr[2:1], ~&next_instr[2:1], next_instr[2:0]}; | ||||
| 
 | ||||
| wire [4:0] zcmp_mvsa01_rs1 = {4'h4, uop_ctr[0]}; | ||||
| wire [4:0] zcmp_mva01s_rs1 = uop_ctr[0] ? zcmp_sa01_r2s : zcmp_sa01_r1s; | ||||
| 
 | ||||
| always @ (*) begin | ||||
| 
 | ||||
| 	casez ({next_instr_is_32bit, next_instr[1:0], next_instr[15:13]}) | ||||
| 	{1'b1, 2'bzz, 3'bzzz}: predecode_rs1_coarse = next_instr[19:15]; // 32-bit R, S, B formats | ||||
| 	{1'b0, 2'b00, 3'b00z}: predecode_rs1_coarse = 5'd2;              // c.addi4spn + don't care | ||||
| 	{1'b0, 2'b01, 3'b0zz}: predecode_rs1_coarse = next_instr[11:7];  // c.addi, c.addi16sp + don't care (jal, li) | ||||
| 	{1'b0, 2'b10, 3'bz1z}: predecode_rs1_coarse = 5'd2;              // c.lwsp, c.lwsp + don't care | ||||
| 	{1'b0, 2'b10, 3'bz0z}: predecode_rs1_coarse = next_instr[11:7]; | ||||
| 	default:               predecode_rs1_coarse = {2'b01, next_instr[9:7]}; | ||||
| 	casez ({next_instr_is_32bit, |EXTENSION_ZCMP, next_instr[15:0]}) | ||||
| 	{1'b1, 1'bz, 16'bzzzzzzzzzzzzzzzz}: predecode_rs1_coarse = next_instr[19:15]; // 32-bit R, S, B formats | ||||
| 	{1'b0, 1'bz, 16'b00zzzzzzzzzzzz00}: predecode_rs1_coarse = 5'd2;              // c.addi4spn + don't care | ||||
| 	{1'b0, 1'bz, 16'b0zzzzzzzzzzzzz01}: predecode_rs1_coarse = next_instr[11:7];  // c.addi, c.addi16sp + don't care (jal, li) | ||||
| 	{1'b0, 1'bz, 16'b100zzzzzzzzzzz10}: predecode_rs1_coarse = next_instr[11:7];  // c.add | ||||
| 	{1'b0, 1'bz, 16'bz10zzzzzzzzzzz10}: predecode_rs1_coarse = 5'd2;              // c.lwsp, c.swsp | ||||
| 	{1'b0, 1'b1, 16'b1z11zzzzzzzzzz10}: predecode_rs1_coarse = zcmp_pushpop_rs1;  // cm.push, cm.pop* | ||||
| 	{1'b0, 1'b1, 16'b1z10zzzzz0zzzz10}: predecode_rs1_coarse = zcmp_mvsa01_rs1;   // cm.mvsa01 | ||||
| 	{1'b0, 1'b1, 16'b1z10zzzzz1zzzz10}: predecode_rs1_coarse = zcmp_mva01s_rs1;   // cm.mva01s | ||||
| 	default:                            predecode_rs1_coarse = {2'b01, next_instr[9:7]}; | ||||
| 	endcase | ||||
| 
 | ||||
| 	casez ({next_instr_is_32bit, next_instr[1:0]}) | ||||
| 	{1'b1, 2'bzz}: predecode_rs2_coarse = next_instr[24:20]; | ||||
| 	{1'b0, 2'b10}: predecode_rs2_coarse = next_instr[6:2]; | ||||
| 	default:       predecode_rs2_coarse = {2'b01, next_instr[4:2]}; | ||||
| 	casez ({next_instr_is_32bit, next_instr[1:0], next_instr[13]}) | ||||
| 	{1'b1, 2'bzz, 1'bz}: predecode_rs2_coarse = next_instr[24:20]; | ||||
| 	{1'b0, 2'b10, 1'b0}: predecode_rs2_coarse = next_instr[6:2];    // c.add, c.swsp | ||||
| 	{1'b0, 2'b10, 1'b1}: predecode_rs2_coarse = zcmp_pushpop_rs2;   // cm.push | ||||
| 	default:             predecode_rs2_coarse = {2'b01, next_instr[4:2]}; | ||||
| 	endcase | ||||
| 
 | ||||
| 	// The "fine" predecode targets those instructions which either: | ||||
|  |  | |||
|  | @ -1,17 +1,38 @@ | |||
| /*****************************************************************************\ | ||||
| |                      Copyright (C) 2021-2022 Luke Wren                      | | ||||
| |                      Copyright (C) 2021-2023 Luke Wren                      | | ||||
| |                     SPDX-License-Identifier: Apache-2.0                     | | ||||
| \*****************************************************************************/ | ||||
| 
 | ||||
| // Little instructions go in, big instructions come out | ||||
| 
 | ||||
| `default_nettype none | ||||
| 
 | ||||
| module hazard3_instr_decompress #( | ||||
| `include "hazard3_config.vh" | ||||
| ) ( | ||||
| 	input wire [31:0] instr_in, | ||||
| 	output reg instr_is_32bit, | ||||
| 	output reg [31:0] instr_out, | ||||
| 	output reg invalid | ||||
| 	input  wire        clk, | ||||
| 	input  wire        rst_n, | ||||
| 
 | ||||
| 	input  wire [31:0] instr_in, | ||||
| 	output reg         instr_is_32bit, | ||||
| 
 | ||||
| 	output reg  [31:0] instr_out, | ||||
| 	// Indicate instr_out is a uop, and more uops follow in this sequence. | ||||
| 	// Should suppress PC update, and null the PC offset in the mepc address | ||||
| 	// in stage 3. | ||||
| 	output wire        instr_out_uop_nonfinal, | ||||
| 	// Indicate instr_out is a uop from the noninterruptible part of a uop | ||||
| 	// sequence. If one uop is noninterruptible, all following uops until the | ||||
| 	// end of the sequence are also noninterruptible. | ||||
| 	output wire        instr_out_uop_atomic, | ||||
| 	// Current ucode sequence is stalled on downstream execution | ||||
| 	input  wire        instr_out_uop_stall, | ||||
| 	input  wire        instr_out_uop_clear, | ||||
| 
 | ||||
| 	// To regnum decoder in frontend | ||||
| 	output wire [3:0]  df_uop_step_next, | ||||
| 
 | ||||
| 	output reg         invalid | ||||
| ); | ||||
| 
 | ||||
| `include "rv_opcodes.vh" | ||||
|  | @ -79,6 +100,69 @@ function [31:0] rfmt_rd;  input [4:0] rd;  begin rfmt_rd  = {20'h00000, rd, 7'h0 | |||
| function [31:0] rfmt_rs1; input [4:0] rs1; begin rfmt_rs1 = {12'h000, rs1, 15'h0000}; end endfunction | ||||
| function [31:0] rfmt_rs2; input [4:0] rs2; begin rfmt_rs2 = {7'h00, rs2, 20'h00000};  end endfunction | ||||
| 
 | ||||
| // ---------------------------------------------------------------------------- | ||||
| // Push/pop and friends | ||||
| 
 | ||||
| // The longest uop sequence is a maximal cm.popretz: | ||||
| // | ||||
| // - 13x lw                     (counter = 0..12) | ||||
| // - 1x addi to set a0 to zero  (counter = 13   ) < atomic section | ||||
| // - 1x addi to adjust sp       (counter = 14   ) < atomic section | ||||
| // - 1x jalr to jump through ra (counter = 15   ) < atomic section | ||||
| 
 | ||||
| reg [3:0] uop_ctr; | ||||
| reg [3:0] uop_ctr_nxt; | ||||
| reg       in_uop_seq; | ||||
| reg       uop_seq_end; | ||||
| reg       uop_atomic; | ||||
| 
 | ||||
| assign instr_out_uop_nonfinal = in_uop_seq && !uop_seq_end; | ||||
| assign instr_out_uop_atomic = uop_atomic; | ||||
| assign df_uop_step_next = uop_ctr_nxt; | ||||
| 
 | ||||
| // The offset from current sp value to the lowest-addressed saved register, +64. | ||||
| wire [3:0] zcmp_rlist = instr_in[7:4]; | ||||
| wire [3:0] zcmp_n_regs = zcmp_rlist == 4'hf ? 4'hd : zcmp_rlist - 4'h3; | ||||
| 
 | ||||
| wire [6:0] zcmp_stack_adj_base = | ||||
| 	zcmp_rlist[3]   == 1'b0 ? 7'h10 : | ||||
| 	zcmp_rlist[3:2] == 2'h2 ? 7'h20 : | ||||
| 	zcmp_rlist[3:0] == 4'hf ? 7'h40 : 7'h30; | ||||
| 
 | ||||
| wire [11:0] zcmp_stack_lw_offset = {6'h00, uop_ctr, 2'h0}; | ||||
| wire [11:0] zcmp_stack_sw_offset = zcmp_stack_lw_offset - {5'h00, zcmp_stack_adj_base}; | ||||
| 
 | ||||
| wire [4:0] zcmp_ls_reg = | ||||
| 	uop_ctr == 4'h0 ? 5'd01 : // ra | ||||
| 	uop_ctr == 4'h1 ? 5'd08 : // s0 | ||||
| 	uop_ctr == 4'h2 ? 5'd09 : // s1 | ||||
| 	5'd15 + {1'b0, uop_ctr};  // s2-s11 (s2 == x18) | ||||
| 
 | ||||
| wire [31:0] zcmp_push_sw_instr = `RVOPC_NOZ_SW | rfmt_rs1(5'd2) | rfmt_rs2(zcmp_ls_reg) | { | ||||
| 	zcmp_stack_sw_offset[11:5], 13'h0000, zcmp_stack_sw_offset[4:0], 7'h00 | ||||
| }; | ||||
| 
 | ||||
| wire [31:0] zcmp_pop_lw_instr = `RVOPC_NOZ_LW | rfmt_rd(zcmp_ls_reg) | rfmt_rs1(5'd2)| { | ||||
| 	zcmp_stack_lw_offset[11:0], 20'h00000 | ||||
| }; | ||||
| 
 | ||||
| wire [11:0] zcmp_abs_stack_adj = {5'h00, zcmp_stack_adj_base} + {6'h00, instr_in[3:2], 4'h0}; | ||||
| 
 | ||||
| wire [31:0] zcmp_push_stack_adj_instr = `RVOPC_NOZ_ADDI | rfmt_rd(5'd2) | rfmt_rs1(5'd2) | { | ||||
| 	-zcmp_abs_stack_adj, | ||||
| 	20'h00000 | ||||
| }; | ||||
| 
 | ||||
| wire [31:0] zcmp_pop_stack_adj_instr = `RVOPC_NOZ_ADDI | rfmt_rd(5'd2) | rfmt_rs1(5'd2) | { | ||||
| 	zcmp_abs_stack_adj, | ||||
| 	20'h00000 | ||||
| }; | ||||
| 
 | ||||
| wire zcmp_sa01_r1s = {|instr_in[9:8], ~&instr_in[9:8], instr_in[9:7]}; | ||||
| wire zcmp_sa01_r2s = {|instr_in[2:1], ~&instr_in[2:1], instr_in[2:0]}; | ||||
| 
 | ||||
| // ---------------------------------------------------------------------------- | ||||
| 
 | ||||
| generate | ||||
| if (PASSTHROUGH) begin: instr_passthrough | ||||
| 	always @ (*) begin | ||||
|  | @ -92,10 +176,18 @@ end else begin: instr_decompress | |||
| 			instr_is_32bit = 1'b1; | ||||
| 			instr_out = instr_in; | ||||
| 			invalid = 1'b0; | ||||
| 			uop_seq_end = 1'b0; | ||||
| 			in_uop_seq = 1'b0; | ||||
| 			uop_atomic = 1'b0; | ||||
| 			uop_ctr_nxt = uop_ctr; | ||||
| 		end else begin | ||||
| 			instr_is_32bit = 1'b0; | ||||
| 			instr_out = 32'h0; | ||||
| 			invalid = 1'b0; | ||||
| 			uop_seq_end = 1'b0; | ||||
| 			in_uop_seq = 1'b0; | ||||
| 			uop_atomic = 1'b0; | ||||
| 			uop_ctr_nxt = uop_ctr; | ||||
| 			casez (instr_in[15:0]) | ||||
| 			16'h0:         invalid = 1'b1; | ||||
| 			`RVOPC_C_ADDI4SPN: instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(rd_s) | rfmt_rs1(5'h2) | ||||
|  | @ -199,13 +291,152 @@ end else begin: instr_decompress | |||
| 				invalid = ~|EXTENSION_ZCB || ~|EXTENSION_M; | ||||
| 			end | ||||
| 
 | ||||
| 			// Optional Zcmp instructions: | ||||
| 			`RVOPC_CM_PUSH: if (~|EXTENSION_ZCMP || zcmp_rlist < 4'h4) begin | ||||
| 				invalid = 1'b1; | ||||
| 			end else if (uop_ctr == 4'he) begin | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_seq_end = 1'b1; | ||||
| 				uop_ctr_nxt = 4'h0; | ||||
| 				instr_out = zcmp_push_stack_adj_instr; | ||||
| 			end else begin | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_ctr_nxt = uop_ctr + 4'h1; | ||||
| 				instr_out = zcmp_push_sw_instr; | ||||
| 				if (uop_ctr_nxt == zcmp_n_regs) begin | ||||
| 					uop_ctr_nxt = 4'he; | ||||
| 				end | ||||
| 			end | ||||
| 
 | ||||
| 			`RVOPC_CM_POP: if (~|EXTENSION_ZCMP || zcmp_rlist < 4'h4) begin | ||||
| 				invalid = 1'b1; | ||||
| 			end else if (uop_ctr == 4'he) begin | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_seq_end = 1'b1; | ||||
| 				uop_ctr_nxt = 4'h0; | ||||
| 				uop_atomic = 1'b1; | ||||
| 				instr_out = zcmp_pop_stack_adj_instr; | ||||
| 			end else begin | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_ctr_nxt = uop_ctr + 4'h1; | ||||
| 				instr_out = zcmp_pop_lw_instr; | ||||
| 				if (uop_ctr_nxt == zcmp_n_regs) begin | ||||
| 					uop_ctr_nxt = 4'he; | ||||
| 				end | ||||
| 			end | ||||
| 
 | ||||
| 			`RVOPC_CM_POPRET: if (~|EXTENSION_ZCMP || zcmp_rlist < 4'h4) begin | ||||
| 				invalid = 1'b1; | ||||
| 			end else if (uop_ctr == 4'he) begin | ||||
| 				// Note we don't set the uop_atomic flag on the first uop in | ||||
| 				// the uninterruptible sequence -- the rule is *if* one | ||||
| 				// executes, they all execute. Having none execute is fine. | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_ctr_nxt = uop_ctr + 4'h1; | ||||
| 				instr_out = zcmp_pop_stack_adj_instr; | ||||
| 			end else if (uop_ctr == 4'hf) begin | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_seq_end = 1'b1; | ||||
| 				uop_atomic = 1'b1; | ||||
| 				uop_ctr_nxt = 4'h0; | ||||
| 				instr_out = `RVOPC_NOZ_JALR | rfmt_rs1(5'h1); | ||||
| 			end else begin | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_ctr_nxt = uop_ctr + 4'h1; | ||||
| 				instr_out = zcmp_pop_lw_instr; | ||||
| 				if (uop_ctr_nxt == zcmp_n_regs) begin | ||||
| 					uop_ctr_nxt = 4'he; | ||||
| 				end | ||||
| 			end | ||||
| 
 | ||||
| 			`RVOPC_CM_POPRETZ: if (~|EXTENSION_ZCMP || zcmp_rlist < 4'h4) begin | ||||
| 				invalid = 1'b1; | ||||
| 			end else if (uop_ctr == 4'hd) begin | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_ctr_nxt = uop_ctr + 4'h1; | ||||
| 				instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(5'd8); // li a0, 0				 | ||||
| 			end else if (uop_ctr == 4'he) begin | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_atomic = 1'b1; | ||||
| 				uop_ctr_nxt = uop_ctr + 4'h1; | ||||
| 				instr_out = zcmp_pop_stack_adj_instr; | ||||
| 			end else if (uop_ctr == 4'hf) begin | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_seq_end = 1'b1; | ||||
| 				uop_atomic = 1'b1; | ||||
| 				uop_ctr_nxt = 4'h0; | ||||
| 				instr_out = `RVOPC_NOZ_JALR | rfmt_rs1(5'h1); | ||||
| 			end else begin | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_ctr_nxt = uop_ctr + 4'h1; | ||||
| 				instr_out = zcmp_pop_lw_instr; | ||||
| 				if (uop_ctr_nxt == zcmp_n_regs) begin | ||||
| 					uop_ctr_nxt = 4'hd; | ||||
| 				end | ||||
| 			end | ||||
| 
 | ||||
| 			`RVOPC_CM_MVSA01: if (~|EXTENSION_ZCMP) begin | ||||
| 				invalid = 1'b1; | ||||
| 			end else if (uop_ctr == 4'h0) begin | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_ctr_nxt = uop_ctr + 4'h1; | ||||
| 				instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(zcmp_sa01_r1s) | rfmt_rs1(5'd10); | ||||
| 			end else begin | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_seq_end = 1'b1; | ||||
| 				uop_atomic = 1'b1; | ||||
| 				uop_ctr_nxt = 4'h0; | ||||
| 				instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(zcmp_sa01_r2s) | rfmt_rs1(5'd11); | ||||
| 			end | ||||
| 
 | ||||
| 			`RVOPC_CM_MVA01S: if (~|EXTENSION_ZCMP) begin | ||||
| 				invalid = 1'b1; | ||||
| 			end else if (uop_ctr == 4'h0) begin | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_ctr_nxt = uop_ctr + 4'h1; | ||||
| 				instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(5'd10) | rfmt_rs1(zcmp_sa01_r1s); | ||||
| 			end else begin | ||||
| 				in_uop_seq = 1'b1; | ||||
| 				uop_seq_end = 1'b1; | ||||
| 				uop_atomic = 1'b1; | ||||
| 				uop_ctr_nxt = 4'h0; | ||||
| 				instr_out = `RVOPC_NOZ_ADDI | rfmt_rd(5'd11) | rfmt_rs1(zcmp_sa01_r2s); | ||||
| 			end | ||||
| 
 | ||||
| 			default: invalid = 1'b1; | ||||
| 			endcase | ||||
| 
 | ||||
| 			if (instr_out_uop_clear) begin | ||||
| 				uop_ctr_nxt = 4'h0; | ||||
| 			end else if (instr_out_uop_stall) begin | ||||
| 				uop_ctr_nxt = uop_ctr; | ||||
| 			end | ||||
| 		end | ||||
| 	end | ||||
| end | ||||
| endgenerate | ||||
| 
 | ||||
| generate | ||||
| if (EXTENSION_ZCMP) begin: have_uop_ctr; | ||||
| 	always @ (posedge clk or negedge rst_n) begin | ||||
| 		if (!rst_n) begin | ||||
| 			uop_ctr <= 4'h0; | ||||
| 		end else begin | ||||
| 			uop_ctr <= uop_ctr_nxt; | ||||
| `ifdef HAZARD3_ASSERTIONS | ||||
| 			assert(uop_ctr == 4'h0 || in_uop_seq); | ||||
| 			if (uop_seq_end) begin | ||||
| 				assert(in_uop_seq)); | ||||
| 				assert(instr_out_uop_stall || uop_ctr_nxt == 4'h0); | ||||
| 			end | ||||
| `endif | ||||
| 		end | ||||
| 	end | ||||
| end else begin: no_uop_ctr | ||||
| 	always @ (*) uop_ctr = 4'h0; | ||||
| end | ||||
| endgenerate | ||||
| 
 | ||||
| endmodule | ||||
| 
 | ||||
| `ifndef YOSYS | ||||
|  |  | |||
|  | @ -194,6 +194,14 @@ localparam RV_RD_BITS = 5; | |||
| `define RVOPC_C_NOT       16'b100111???1110101 | ||||
| `define RVOPC_C_MUL       16'b100111???10???01 | ||||
| 
 | ||||
| // Zcmp push/pop instructions | ||||
| `define RVOPC_CM_PUSH     16'b10111000??????10 | ||||
| `define RVOPC_CM_POP      16'b10111010??????10 | ||||
| `define RVOPC_CM_POPRETZ  16'b10111100??????10 | ||||
| `define RVOPC_CM_POPRET   16'b10111110??????10 | ||||
| `define RVOPC_CM_MVSA01   16'b101011???01???10 | ||||
| `define RVOPC_CM_MVA01S   16'b101011???11???10 | ||||
| 
 | ||||
| // Copies provided here with 0 instead of ? so that these can be used to build 32-bit instructions in the decompressor | ||||
| 
 | ||||
| `define RVOPC_NOZ_BEQ     32'b00000000000000000000000001100011 | ||||
|  |  | |||
|  | @ -8,7 +8,7 @@ endif | |||
| 
 | ||||
| CCFLAGS      ?= | ||||
| LDSCRIPT     ?= ../common/memmap.ld | ||||
| CROSS_PREFIX ?= riscv32-unknown-elf- | ||||
| CROSS_PREFIX ?= /opt/riscv/gcc-riscv32-corev/bin/riscv32-corev-elf- | ||||
| TBDIR        ?= ../tb_cxxrtl | ||||
| INCDIR       ?= ../common | ||||
| MAX_CYCLES   ?= 100000 | ||||
|  |  | |||
|  | @ -1,7 +1,7 @@ | |||
| APP          := coremark | ||||
| MAX_CYCLES   := 100000000 | ||||
| 
 | ||||
| CROSS_PREFIX ?= riscv32-unknown-elf- | ||||
| CROSS_PREFIX ?= /opt/riscv/gcc-riscv32-corev/bin/riscv32-corev-elf- | ||||
| TBDIR        ?= ../tb_cxxrtl | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -19,23 +19,27 @@ | |||
| #	Use this flag to define how to to get an executable (e.g -o)
 | ||||
| OUTFLAG= -o | ||||
| 
 | ||||
| CC 		= /opt/riscv/unstable/bin/riscv32-unknown-elf-gcc | ||||
| LD		= /opt/riscv/unstable/bin/riscv32-unknown-elf-gcc | ||||
| AS		= /opt/riscv/unstable/bin/riscv32-unknown-elf-gcc | ||||
| # Flag : CFLAGS
 | ||||
| #	Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
 | ||||
| PORT_CFLAGS = -O3 -g -march=rv32im_zicsr_zba_zbb_zbc_zbs -fno-common -funroll-loops -finline-functions --param max-inline-insns-auto=20 -falign-functions=4 -falign-jumps=4 -falign-loops=4 | ||||
| MARCH        = rv32im_zicsr_zba_zbb_zbc_zbs_zca_zcb_zcmp | ||||
| CROSS_PREFIX = /opt/riscv/gcc-riscv32-corev/bin/riscv32-corev-elf- | ||||
| 
 | ||||
| CC           =  $(CROSS_PREFIX)gcc | ||||
| LD           =  $(CROSS_PREFIX)gcc | ||||
| AS           =  $(CROSS_PREFIX)gcc | ||||
| 
 | ||||
| PORT_CFLAGS = -O3 -g -march=$(MARCH) -fno-common -funroll-loops -finline-functions --param max-inline-insns-auto=20 -falign-functions=4 -falign-jumps=4 -falign-loops=4 | ||||
| FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)" | ||||
| CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\"  | ||||
| 
 | ||||
| #Flag : LFLAGS_END
 | ||||
| #	Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts). 
 | ||||
| #	Note : On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
 | ||||
| 
 | ||||
| SEPARATE_COMPILE=1 | ||||
| # Flag : SEPARATE_COMPILE
 | ||||
| # You must also define below how to create an object file, and how to link.
 | ||||
| 
 | ||||
| OBJOUT 	= -o | ||||
| LFLAGS 	= -T ../../common/memmap.ld | ||||
| ASFLAGS = -c -march=rv32im_zicsr | ||||
| LFLAGS 	= -T ../../common/memmap.ld -Wl,--noinhibit-exec | ||||
| ASFLAGS = -c -march=$(MARCH) | ||||
| OFLAG 	= -o | ||||
| COUT 	= -c | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,6 +1,6 @@ | |||
| SRCS := ../common/init.S main.c | ||||
| APP  := hellow | ||||
| CCFLAGS = -march=rv32i_zicsr_zba_zbb_zbc_zbs -Os | ||||
| AFLAGS = -march=rv32i_zicsr_zba_zbb_zbc_zbs | ||||
| CCFLAGS = -march=rv32ima_zicsr_zba_zbb_zbc_zbs_zca_zcb_zcmp -Os | ||||
| AFLAGS = -march=rv32ima_zicsr_zba_zbb_zbc_zbs_zca_zcb_zcmp | ||||
| 
 | ||||
| include ../common/src_only_app.mk | ||||
|  |  | |||
|  | @ -2,5 +2,9 @@ | |||
| 
 | ||||
| int main() { | ||||
| 	tb_puts("Hello world from Hazard3 + CXXRTL!\n"); | ||||
| 	asm volatile( | ||||
| 		"cm.push {ra, s0-s2}, -16\n" | ||||
| 		"cm.pop  {ra, s0-s2}, +16\n" | ||||
| 	); | ||||
| 	return 123; | ||||
| } | ||||
|  |  | |||
|  | @ -1,6 +1,6 @@ | |||
| APP        := hellow | ||||
| SRCS        = ../common/init.S $(APP).c $(EXTRA_SRCS_$(APP)) | ||||
| CCFLAGS    := -march=rv32imac_zicsr -Os | ||||
| CCFLAGS    := -march=rv32imac_zicsr_zba_zbb_zbs_zcb -Os | ||||
| MAX_CYCLES := 1000000 | ||||
| INCDIR     := include ../common | ||||
| 
 | ||||
|  |  | |||
|  | @ -8,7 +8,7 @@ | |||
| // - h3_bextmi: immediate version of the above (as bexti is to bext)
 | ||||
| 
 | ||||
| // The instruction is just supposed to take a single static size...
 | ||||
| __attribute__((noinline)) uint32_t bextm_dynamic_width(uint nbits, uint32_t rs1, uint32_t rs2) { | ||||
| __attribute__((noinline)) uint32_t bextm_dynamic_width(unsigned int nbits, uint32_t rs1, uint32_t rs2) { | ||||
| 	switch (nbits) { | ||||
| 	case 1: | ||||
| 		return __hazard3_bextm(1, rs1, rs2); | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ localparam EXTENSION_ZBC       = 1; | |||
| localparam EXTENSION_ZBS       = 1; | ||||
| localparam EXTENSION_ZBKB      = 1; | ||||
| localparam EXTENSION_ZCB       = 1; | ||||
| localparam EXTENSION_ZCMP      = 1; | ||||
| localparam EXTENSION_ZIFENCEI  = 1; | ||||
| localparam EXTENSION_XH3BEXTM  = 1; | ||||
| localparam EXTENSION_XH3IRQ    = 1; | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ localparam EXTENSION_ZBC       = 0; | |||
| localparam EXTENSION_ZBS       = 0; | ||||
| localparam EXTENSION_ZBKB      = 0; | ||||
| localparam EXTENSION_ZCB       = 0; | ||||
| localparam EXTENSION_ZCMP      = 0; | ||||
| localparam EXTENSION_ZIFENCEI  = 0; | ||||
| localparam EXTENSION_XH3BEXTM  = 0; | ||||
| localparam EXTENSION_XH3IRQ    = 0; | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue