From 28b53ef7b53b8d9dffa8c826c59dad453a218a25 Mon Sep 17 00:00:00 2001 From: Luke Wren Date: Sat, 18 Dec 2021 00:35:13 +0000 Subject: [PATCH] Delete the AMO ALU. Save around 80 LCs vs original implementation, maybe enables some more savings. --- example_soc/fpga/fpga_icebreaker.v | 2 +- hdl/arith/hazard3_alu.v | 82 +++++++++++---------- hdl/arith/hazard3_amo_alu.v | 49 ------------- hdl/hazard3.f | 1 - hdl/hazard3_core.v | 102 ++++++++++---------------- hdl/hazard3_decode.v | 28 +++---- hdl/hazard3_ops.vh | 35 ++++----- test/sim/riscv-compliance/run_32i.sh | 2 + test/sim/riscv-compliance/run_32ic.sh | 2 + test/sim/riscv-compliance/run_32im.sh | 2 + test/sim/riscv-compliance/run_all.sh | 2 + 11 files changed, 118 insertions(+), 189 deletions(-) delete mode 100644 hdl/arith/hazard3_amo_alu.v diff --git a/example_soc/fpga/fpga_icebreaker.v b/example_soc/fpga/fpga_icebreaker.v index a31d9bc..12b7da7 100644 --- a/example_soc/fpga/fpga_icebreaker.v +++ b/example_soc/fpga/fpga_icebreaker.v @@ -62,7 +62,7 @@ activity_led #( ); example_soc #( - .EXTENSION_A (0), + .EXTENSION_A (1), .EXTENSION_C (0), .EXTENSION_M (1), .MUL_FAST (0), diff --git a/hdl/arith/hazard3_alu.v b/hdl/arith/hazard3_alu.v index a59f586..3dc2c3b 100644 --- a/hdl/arith/hazard3_alu.v +++ b/hdl/arith/hazard3_alu.v @@ -34,7 +34,7 @@ wire sub = !(aluop == ALUOP_ADD || (|EXTENSION_ZBA && ( ))); wire inv_op_b = sub && !( - aluop == ALUOP_AND || aluop == ALUOP_OR || aluop == ALUOP_XOR + aluop == ALUOP_AND || aluop == ALUOP_OR || aluop == ALUOP_XOR || aluop == ALUOP_RS2 ); wire [W_DATA-1:0] op_a_shifted = @@ -133,54 +133,60 @@ reg [W_DATA-1:0] bitwise; always @ (*) begin: bitwise_ops case (aluop[1:0]) ALUOP_AND[1:0]: bitwise = op_a & op_b_inv; - ALUOP_OR[1:0]: bitwise = op_a | op_b_inv; - default: bitwise = op_a ^ op_b_inv; + ALUOP_OR [1:0]: bitwise = op_a | op_b_inv; + ALUOP_XOR[1:0]: bitwise = op_a ^ op_b_inv; + ALUOP_RS2[1:0]: bitwise = op_b_inv; endcase end wire [W_DATA-1:0] zbs_mask = {{W_DATA-1{1'b0}}, 1'b1} << op_b[W_SHAMT-1:0]; always @ (*) begin - casez ({|EXTENSION_ZBA, |EXTENSION_ZBB, |EXTENSION_ZBC, |EXTENSION_ZBS, aluop}) + casez ({|EXTENSION_A, |EXTENSION_ZBA, |EXTENSION_ZBB, |EXTENSION_ZBC, |EXTENSION_ZBS, aluop}) // Base ISA - {4'bzzzz, ALUOP_ADD }: result = sum; - {4'bzzzz, ALUOP_SUB }: result = sum; - {4'bzzzz, ALUOP_LT }: result = {{W_DATA-1{1'b0}}, lt}; - {4'bzzzz, ALUOP_LTU }: result = {{W_DATA-1{1'b0}}, lt}; - {4'bzzzz, ALUOP_SRL }: result = shift_dout; - {4'bzzzz, ALUOP_SRA }: result = shift_dout; - {4'bzzzz, ALUOP_SLL }: result = shift_dout; + {5'bzzzzz, ALUOP_ADD }: result = sum; + {5'bzzzzz, ALUOP_SUB }: result = sum; + {5'bzzzzz, ALUOP_LT }: result = {{W_DATA-1{1'b0}}, lt}; + {5'bzzzzz, ALUOP_LTU }: result = {{W_DATA-1{1'b0}}, lt}; + {5'bzzzzz, ALUOP_SRL }: result = shift_dout; + {5'bzzzzz, ALUOP_SRA }: result = shift_dout; + {5'bzzzzz, ALUOP_SLL }: result = shift_dout; + // A (duplicates of Zbb) + {5'b1zzzz, ALUOP_MAX }: result = lt ? op_b : op_a; + {5'b1zzzz, ALUOP_MAXU }: result = lt ? op_b : op_a; + {5'b1zzzz, ALUOP_MIN }: result = lt ? op_a : op_b; + {5'b1zzzz, ALUOP_MINU }: result = lt ? op_a : op_b; // Zba - {4'b1zzz, ALUOP_SH1ADD }: result = sum; - {4'b1zzz, ALUOP_SH2ADD }: result = sum; - {4'b1zzz, ALUOP_SH3ADD }: result = sum; + {5'bz1zzz, ALUOP_SH1ADD }: result = sum; + {5'bz1zzz, ALUOP_SH2ADD }: result = sum; + {5'bz1zzz, ALUOP_SH3ADD }: result = sum; // Zbb - {4'bz1zz, ALUOP_ANDN }: result = bitwise; - {4'bz1zz, ALUOP_ORN }: result = bitwise; - {4'bz1zz, ALUOP_XNOR }: result = bitwise; - {4'bz1zz, ALUOP_CLZ }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz}; - {4'bz1zz, ALUOP_CTZ }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz}; - {4'bz1zz, ALUOP_CPOP }: result = {{W_DATA-W_SHAMT-1{1'b0}}, cpop}; - {4'bz1zz, ALUOP_MAX }: result = lt ? op_b : op_a; - {4'bz1zz, ALUOP_MAXU }: result = lt ? op_b : op_a; - {4'bz1zz, ALUOP_MIN }: result = lt ? op_a : op_b; - {4'bz1zz, ALUOP_MINU }: result = lt ? op_a : op_b; - {4'bz1zz, ALUOP_SEXT_B }: result = {{W_DATA-8{op_a[7]}}, op_a[7:0]}; - {4'bz1zz, ALUOP_SEXT_H }: result = {{W_DATA-16{op_a[15]}}, op_a[15:0]}; - {4'bz1zz, ALUOP_ZEXT_H }: result = {{W_DATA-16{1'b0}}, op_a[15:0]}; - {4'bz1zz, ALUOP_ORC_B }: result = {{8{|op_a[31:24]}}, {8{|op_a[23:16]}}, {8{|op_a[15:8]}}, {8{|op_a[7:0]}}}; - {4'bz1zz, ALUOP_REV8 }: result = {op_a[7:0], op_a[15:8], op_a[23:16], op_a[31:24]}; - {4'bz1zz, ALUOP_ROL }: result = shift_dout; - {4'bz1zz, ALUOP_ROR }: result = shift_dout; + {5'bzz1zz, ALUOP_ANDN }: result = bitwise; + {5'bzz1zz, ALUOP_ORN }: result = bitwise; + {5'bzz1zz, ALUOP_XNOR }: result = bitwise; + {5'bzz1zz, ALUOP_CLZ }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz}; + {5'bzz1zz, ALUOP_CTZ }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz}; + {5'bzz1zz, ALUOP_CPOP }: result = {{W_DATA-W_SHAMT-1{1'b0}}, cpop}; + {5'bzz1zz, ALUOP_MAX }: result = lt ? op_b : op_a; + {5'bzz1zz, ALUOP_MAXU }: result = lt ? op_b : op_a; + {5'bzz1zz, ALUOP_MIN }: result = lt ? op_a : op_b; + {5'bzz1zz, ALUOP_MINU }: result = lt ? op_a : op_b; + {5'bzz1zz, ALUOP_SEXT_B }: result = {{W_DATA-8{op_a[7]}}, op_a[7:0]}; + {5'bzz1zz, ALUOP_SEXT_H }: result = {{W_DATA-16{op_a[15]}}, op_a[15:0]}; + {5'bzz1zz, ALUOP_ZEXT_H }: result = {{W_DATA-16{1'b0}}, op_a[15:0]}; + {5'bzz1zz, ALUOP_ORC_B }: result = {{8{|op_a[31:24]}}, {8{|op_a[23:16]}}, {8{|op_a[15:8]}}, {8{|op_a[7:0]}}}; + {5'bzz1zz, ALUOP_REV8 }: result = {op_a[7:0], op_a[15:8], op_a[23:16], op_a[31:24]}; + {5'bzz1zz, ALUOP_ROL }: result = shift_dout; + {5'bzz1zz, ALUOP_ROR }: result = shift_dout; // Zbc - {4'bzz1z, ALUOP_CLMUL }: result = clmul[W_DATA-1:0]; - {4'bzz1z, ALUOP_CLMULH }: result = clmul[2*W_DATA-1:W_DATA]; - {4'bzz1z, ALUOP_CLMULR }: result = clmul[2*W_DATA-2:W_DATA-1]; + {5'bzzz1z, ALUOP_CLMUL }: result = clmul[W_DATA-1:0]; + {5'bzzz1z, ALUOP_CLMULH }: result = clmul[2*W_DATA-1:W_DATA]; + {5'bzzz1z, ALUOP_CLMULR }: result = clmul[2*W_DATA-2:W_DATA-1]; // Zbs - {4'bzzz1, ALUOP_BCLR }: result = op_a & ~zbs_mask; - {4'bzzz1, ALUOP_BSET }: result = op_a | zbs_mask; - {4'bzzz1, ALUOP_BINV }: result = op_a ^ zbs_mask; - {4'bzzz1, ALUOP_BEXT }: result = {{W_DATA-1{1'b0}}, shift_dout[0]}; + {5'bzzzz1, ALUOP_BCLR }: result = op_a & ~zbs_mask; + {5'bzzzz1, ALUOP_BSET }: result = op_a | zbs_mask; + {5'bzzzz1, ALUOP_BINV }: result = op_a ^ zbs_mask; + {5'bzzzz1, ALUOP_BEXT }: result = {{W_DATA-1{1'b0}}, shift_dout[0]}; default: result = bitwise; endcase diff --git a/hdl/arith/hazard3_amo_alu.v b/hdl/arith/hazard3_amo_alu.v deleted file mode 100644 index 74925d2..0000000 --- a/hdl/arith/hazard3_amo_alu.v +++ /dev/null @@ -1,49 +0,0 @@ -/*****************************************************************************\ -| Copyright (C) 2021 Luke Wren | -| SPDX-License-Identifier: Apache-2.0 | -\*****************************************************************************/ - -// Separate ALU for atomic memory operations - -`default_nettype none -module hazard3_amo_alu #( -`include "hazard3_config.vh" -, -`include "hazard3_width_const.vh" -) ( - input wire [W_MEMOP-1:0] op, - input wire [W_DATA-1:0] op_rs1, // From load - input wire [W_DATA-1:0] op_rs2, // From core - output reg [W_DATA-1:0] result -); - -`include "hazard3_ops.vh" - -wire sub = op != MEMOP_AMOADD_W; -wire cmp_unsigned = op == MEMOP_AMOMINU_W || op == MEMOP_AMOMAXU_W; - -wire [W_DATA-1:0] sum = op_rs1 + (op_rs2 ^ {W_DATA{sub}}) + sub; - -wire rs1_lessthan_rs2 = - op_rs1[W_DATA-1] == op_rs2[W_DATA-1] ? sum[W_DATA-1] : - cmp_unsigned ? op_rs2[W_DATA-1] : - op_rs1[W_DATA-1] ; - -always @ (*) begin - case(op) - MEMOP_AMOADD_W: result = sum; - MEMOP_AMOXOR_W: result = op_rs1 ^ op_rs2; - MEMOP_AMOAND_W: result = op_rs1 & op_rs2; - MEMOP_AMOOR_W: result = op_rs1 | op_rs2; - MEMOP_AMOMIN_W: result = rs1_lessthan_rs2 ? op_rs1 : op_rs2; - MEMOP_AMOMAX_W: result = rs1_lessthan_rs2 ? op_rs2 : op_rs1; - MEMOP_AMOMINU_W: result = rs1_lessthan_rs2 ? op_rs1 : op_rs2; - MEMOP_AMOMAXU_W: result = rs1_lessthan_rs2 ? op_rs2 : op_rs1; - // AMOSWAP - default: result = op_rs2; - endcase -end - -endmodule - -`default_nettype wire diff --git a/hdl/hazard3.f b/hdl/hazard3.f index d87bf2f..ca6051f 100644 --- a/hdl/hazard3.f +++ b/hdl/hazard3.f @@ -2,7 +2,6 @@ file hazard3_core.v file hazard3_cpu_1port.v file hazard3_cpu_2port.v file arith/hazard3_alu.v -file arith/hazard3_amo_alu.v file arith/hazard3_muldiv_seq.v file arith/hazard3_mul_fast.v file arith/hazard3_priority_encode.v diff --git a/hdl/hazard3_core.v b/hdl/hazard3_core.v index b65d798..1b072ad 100644 --- a/hdl/hazard3_core.v +++ b/hdl/hazard3_core.v @@ -255,13 +255,15 @@ reg [W_REGADDR-1:0] xm_rs1; reg [W_REGADDR-1:0] xm_rs2; reg [W_REGADDR-1:0] xm_rd; reg [W_DATA-1:0] xm_result; -reg [W_DATA-1:0] xm_store_data; reg [1:0] xm_addr_align; reg [W_MEMOP-1:0] xm_memop; reg [W_EXCEPT-1:0] xm_except; reg xm_wfi; reg xm_delay_irq_entry; +// Registered load data, routed back through ALU. AMOs were a mistake +reg [W_DATA-1:0] mx_amo_load_data; + // ---------------------------------------------------------------------------- // Stall logic @@ -275,9 +277,7 @@ wire x_stall_on_trap = m_trap_enter_vld && !m_trap_enter_rdy || // sequences). Note we don't check for AMOs in stage M, because AMOs fully // fence off on their own completion before passing down the pipe. -wire d_memop_is_amo = |EXTENSION_A && ( - d_memop >= MEMOP_AMOSWAP_W && d_memop <= MEMOP_AMOMAXU_W -); +wire d_memop_is_amo = |EXTENSION_A && d_memop == MEMOP_AMO; wire x_stall_on_exclusive_overlap = |EXTENSION_A && ( (d_memop_is_amo || d_memop == MEMOP_SC_W || d_memop == MEMOP_LR_W) && @@ -367,7 +367,9 @@ always @ (*) begin x_rs2_bypass = x_rdata2; end - if (|d_alusrc_a) + if (|EXTENSION_A && x_amo_phase == 3'h2) + x_op_a = mx_amo_load_data; + else if (|d_alusrc_a) x_op_a = d_pc; else x_op_a = x_rs1_bypass; @@ -459,12 +461,12 @@ always @ (posedge clk) if (rst_n) begin assert(x_amo_phase == 3'h0); // Error phase should never block, so it can always pass to stage 3 to raise // excepting trap entry. - if (amo_phase == 3'h4) + if (x_amo_phase == 3'h4) assert(!x_stall); // Error phase is either due to a bus response, or a misaligned address. // Neither of these are write-address-phase. - if (amo_phase == 3'h4) - assert($past(amo_phase) != 3'h2); + if (x_amo_phase == 3'h4) + assert($past(x_amo_phase) != 3'h2); end `endif @@ -738,7 +740,7 @@ always @ (posedge clk or negedge rst_n) begin if (!m_stall) begin {xm_rs1, xm_rs2, xm_rd} <= {d_rs1, d_rs2, d_rd}; // If the transfer is unaligned, make sure it is completely NOP'd on the bus - // Likewise, AMOs are handled entirely in X (well it's ambiguous; anyway different logic & stalls) + // Likewise, AMO memop logic is entirely in X, we squash the memop as it passes to M. xm_memop <= x_unaligned_addr || d_memop_is_amo ? MEMOP_NONE : d_memop; xm_except <= x_except; xm_wfi <= d_wfi; @@ -764,25 +766,18 @@ always @ (posedge clk or negedge rst_n) begin end end -reg [W_DATA-1:0] amo_load_data; - // Datapath flops always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin xm_result <= {W_DATA{1'b0}}; - xm_store_data <= {W_DATA{1'b0}}; xm_addr_align <= 2'b00; - end else if (!m_stall) begin + end else if (!m_stall || (d_memop_is_amo && x_amo_phase == 3'h2 && bus_dph_ready_d)) begin xm_result <= d_csr_ren ? x_csr_rdata : - |EXTENSION_A && d_memop_is_amo ? amo_load_data : + |EXTENSION_A && x_amo_phase == 3'h3 ? mx_amo_load_data : |EXTENSION_M && d_aluop == ALUOP_MULDIV ? x_muldiv_result : x_alu_result; - xm_store_data <= x_rs2_bypass; xm_addr_align <= x_addr_sum[1:0]; - - end else if (d_memop_is_amo && x_amo_phase == 3'h1 && bus_dph_ready_d) begin - xm_store_data <= x_rs2_bypass; end end @@ -833,56 +828,12 @@ assign m_exception_return_addr = d_pc - ( // Load/store data handling -wire [W_DATA-1:0] m_amo_wdata; -wire m_amo_wdata_valid; - -generate -if (EXTENSION_A) begin: has_amo_alu - - reg [W_MEMOP-1:0] amo_memop; - reg m_amo_wdata_valid_r; - - assign m_amo_wdata_valid = m_amo_wdata_valid_r; - - always @ (posedge clk or negedge rst_n) begin - if (!rst_n) begin - amo_memop <= MEMOP_NONE; - amo_load_data <= {W_DATA{1'b0}}; - m_amo_wdata_valid_r <= 1'b0; - end else if (x_amo_phase == 3'h4 || (x_amo_phase == 3'h3 && bus_dph_ready_d) || m_trap_enter_soon) begin - // Higher precedence to make sure trap always clears the valid bit - m_amo_wdata_valid_r <= 1'b0; - end else if (d_memop_is_amo && x_amo_phase == 3'h1 && bus_dph_ready_d) begin - amo_memop <= d_memop; - amo_load_data <= bus_rdata_d; - m_amo_wdata_valid_r <= 1'b1; - end - end - - hazard3_amo_alu #( - `include "hazard3_config_inst.vh" - ) amo_alu ( - .op (amo_memop), - .op_rs1(amo_load_data), - .op_rs2(xm_store_data), - .result(m_amo_wdata) - ); - -end else begin: no_amo_alu - - assign m_amo_wdata = {W_DATA{1'b0}}; - assign m_amo_wdata_valid = 1'b0; - always @ (*) amo_load_data = {W_DATA{1'b0}}; - -end -endgenerate - always @ (*) begin // Local forwarding of store data if (|mw_rd && xm_rs2 == mw_rd && !REDUCED_BYPASS) begin m_wdata = mw_result; end else begin - m_wdata = xm_store_data; + m_wdata = xm_result; end // Replicate store data to ensure appropriate byte lane is driven case (xm_memop) @@ -890,8 +841,6 @@ always @ (*) begin MEMOP_SB: bus_wdata_d = {4{m_wdata[7:0]}}; default: bus_wdata_d = m_wdata; endcase - if (|EXTENSION_A && m_amo_wdata_valid) - bus_wdata_d = m_amo_wdata; casez ({xm_memop, xm_addr_align[1:0]}) {MEMOP_LH , 2'b0z}: m_rdata_pick_sext = {{16{bus_rdata_d[15]}}, bus_rdata_d[15: 0]}; @@ -923,6 +872,29 @@ always @ (*) begin end end +// Capture load data in read data phase of AMO. Passes back to stage X for AMO +// calculation during AMO write address phase, using the regular ALU. Then +// registered into xm_result like a regular store, to be driven out onto +// hwdata during AMO write data phase. + +generate +if (EXTENSION_A) begin: has_amo_load_reg + + always @ (posedge clk or negedge rst_n) begin + if (!rst_n) begin + mx_amo_load_data <= {W_DATA{1'b0}}; + end else if (d_memop_is_amo && x_amo_phase == 3'h1 && bus_dph_ready_d) begin + mx_amo_load_data <= bus_rdata_d; + end + end + +end else begin: no_amo_load_reg + + always @ (*) mx_amo_load_data = {W_DATA{1'b0}}; + +end +endgenerate + // Local monitor update. // - Set on a load-reserved with good response from global monitor // - Cleared by any store-conditional diff --git a/hdl/hazard3_decode.v b/hdl/hazard3_decode.v index c64a83b..0f69b8f 100644 --- a/hdl/hazard3_decode.v +++ b/hdl/hazard3_decode.v @@ -221,9 +221,9 @@ always @ (*) begin RV_LW: begin d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_memop = MEMOP_LW; end RV_LBU: begin d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_memop = MEMOP_LBU; end RV_LHU: begin d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_memop = MEMOP_LHU; end - RV_SB: begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_SB; d_rd = X0; end - RV_SH: begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_SH; d_rd = X0; end - RV_SW: begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_SW; d_rd = X0; end + RV_SB: begin d_addr_is_regoffs = 1'b1; d_aluop = ALUOP_RS2; d_memop = MEMOP_SB; d_rd = X0; end + RV_SH: begin d_addr_is_regoffs = 1'b1; d_aluop = ALUOP_RS2; d_memop = MEMOP_SH; d_rd = X0; end + RV_SW: begin d_addr_is_regoffs = 1'b1; d_aluop = ALUOP_RS2; d_memop = MEMOP_SW; d_rd = X0; end RV_MUL: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MUL; end else begin d_invalid_32bit = 1'b1; end RV_MULH: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MULH; end else begin d_invalid_32bit = 1'b1; end @@ -234,17 +234,17 @@ always @ (*) begin RV_REM: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REM; end else begin d_invalid_32bit = 1'b1; end RV_REMU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REMU; end else begin d_invalid_32bit = 1'b1; end - RV_LR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_LR_W; d_rs2 = X0; end else begin d_invalid_32bit = 1'b1; end - RV_SC_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_SC_W; end else begin d_invalid_32bit = 1'b1; end - RV_AMOSWAP_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOSWAP_W; end else begin d_invalid_32bit = 1'b1; end - RV_AMOADD_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOADD_W; end else begin d_invalid_32bit = 1'b1; end - RV_AMOXOR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOXOR_W; end else begin d_invalid_32bit = 1'b1; end - RV_AMOAND_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOAND_W; end else begin d_invalid_32bit = 1'b1; end - RV_AMOOR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOOR_W; end else begin d_invalid_32bit = 1'b1; end - RV_AMOMIN_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOMIN_W; end else begin d_invalid_32bit = 1'b1; end - RV_AMOMAX_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOMAX_W; end else begin d_invalid_32bit = 1'b1; end - RV_AMOMINU_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOMINU_W; end else begin d_invalid_32bit = 1'b1; end - RV_AMOMAXU_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOMAXU_W; end else begin d_invalid_32bit = 1'b1; end + RV_LR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_LR_W; d_rs2 = X0; end else begin d_invalid_32bit = 1'b1; end + RV_SC_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_SC_W; d_aluop = ALUOP_RS2; end else begin d_invalid_32bit = 1'b1; end + RV_AMOSWAP_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_RS2; end else begin d_invalid_32bit = 1'b1; end + RV_AMOADD_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_ADD; end else begin d_invalid_32bit = 1'b1; end + RV_AMOXOR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_XOR; end else begin d_invalid_32bit = 1'b1; end + RV_AMOAND_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_AND; end else begin d_invalid_32bit = 1'b1; end + RV_AMOOR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_OR; end else begin d_invalid_32bit = 1'b1; end + RV_AMOMIN_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_MIN; end else begin d_invalid_32bit = 1'b1; end + RV_AMOMAX_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_MAX; end else begin d_invalid_32bit = 1'b1; end + RV_AMOMINU_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_MINU; end else begin d_invalid_32bit = 1'b1; end + RV_AMOMAXU_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_MAXU; end else begin d_invalid_32bit = 1'b1; end RV_SH1ADD: if (EXTENSION_ZBA) begin d_aluop = ALUOP_SH1ADD; end else begin d_invalid_32bit = 1'b1; end RV_SH2ADD: if (EXTENSION_ZBA) begin d_aluop = ALUOP_SH2ADD; end else begin d_invalid_32bit = 1'b1; end diff --git a/hdl/hazard3_ops.vh b/hdl/hazard3_ops.vh index 5248cc3..8b34816 100644 --- a/hdl/hazard3_ops.vh +++ b/hdl/hazard3_ops.vh @@ -16,7 +16,8 @@ localparam ALUOP_SRL = 6'h09; localparam ALUOP_SRA = 6'h0a; localparam ALUOP_SLL = 6'h0b; localparam ALUOP_MULDIV = 6'h0c; -// Bitmanip ALU operations: +localparam ALUOP_RS2 = 6'h0d; // differs from AND/OR/XOR in [1:0] +// Bitmanip ALU operations (some also used by AMOs): localparam ALUOP_SH1ADD = 6'h20; localparam ALUOP_SH2ADD = 6'h21; localparam ALUOP_SH3ADD = 6'h22; @@ -56,27 +57,19 @@ localparam ALUSRCA_PC = 1'h1; localparam ALUSRCB_RS2 = 1'h0; localparam ALUSRCB_IMM = 1'h1; -localparam MEMOP_LW = 5'h00; -localparam MEMOP_LH = 5'h01; -localparam MEMOP_LB = 5'h02; -localparam MEMOP_LHU = 5'h03; -localparam MEMOP_LBU = 5'h04; -localparam MEMOP_SW = 5'h05; -localparam MEMOP_SH = 5'h06; -localparam MEMOP_SB = 5'h07; +localparam MEMOP_LW = 5'h00; +localparam MEMOP_LH = 5'h01; +localparam MEMOP_LB = 5'h02; +localparam MEMOP_LHU = 5'h03; +localparam MEMOP_LBU = 5'h04; +localparam MEMOP_SW = 5'h05; +localparam MEMOP_SH = 5'h06; +localparam MEMOP_SB = 5'h07; -localparam MEMOP_LR_W = 5'h08; -localparam MEMOP_SC_W = 5'h09; -localparam MEMOP_AMOSWAP_W = 5'h0a; -localparam MEMOP_AMOADD_W = 5'h0b; -localparam MEMOP_AMOXOR_W = 5'h0c; -localparam MEMOP_AMOAND_W = 5'h0d; -localparam MEMOP_AMOOR_W = 5'h0e; -localparam MEMOP_AMOMIN_W = 5'h0f; -localparam MEMOP_AMOMAX_W = 5'h10; -localparam MEMOP_AMOMINU_W = 5'h11; -localparam MEMOP_AMOMAXU_W = 5'h12; -localparam MEMOP_NONE = 5'h1f; +localparam MEMOP_LR_W = 5'h08; +localparam MEMOP_SC_W = 5'h09; +localparam MEMOP_AMO = 5'h0a; +localparam MEMOP_NONE = 5'h10; localparam BCOND_NEVER = 2'h0; localparam BCOND_ALWAYS = 2'h1; diff --git a/test/sim/riscv-compliance/run_32i.sh b/test/sim/riscv-compliance/run_32i.sh index c5089be..2358ad8 100755 --- a/test/sim/riscv-compliance/run_32i.sh +++ b/test/sim/riscv-compliance/run_32i.sh @@ -1,2 +1,4 @@ #!/bin/bash +set -e + make diff --git a/test/sim/riscv-compliance/run_32ic.sh b/test/sim/riscv-compliance/run_32ic.sh index d47a310..0a218c7 100755 --- a/test/sim/riscv-compliance/run_32ic.sh +++ b/test/sim/riscv-compliance/run_32ic.sh @@ -1,4 +1,6 @@ #!/bin/bash +set -e + make TEST_ARCH=C BIN_ARCH=rv32ic TESTLIST=" \ cadd-01 \ caddi16sp-01 \ diff --git a/test/sim/riscv-compliance/run_32im.sh b/test/sim/riscv-compliance/run_32im.sh index 0f7363b..8622d2b 100755 --- a/test/sim/riscv-compliance/run_32im.sh +++ b/test/sim/riscv-compliance/run_32im.sh @@ -1,4 +1,6 @@ #!/bin/bash +set -e + make TEST_ARCH=M BIN_ARCH=rv32imc TESTLIST=" \ div-01 \ divu-01 \ diff --git a/test/sim/riscv-compliance/run_all.sh b/test/sim/riscv-compliance/run_all.sh index c7ad1cf..e035eff 100755 --- a/test/sim/riscv-compliance/run_all.sh +++ b/test/sim/riscv-compliance/run_all.sh @@ -1,4 +1,6 @@ #!/bin/bash +set -e + ./run_32i.sh ./run_32im.sh ./run_32ic.sh