Delete the AMO ALU. Save around 80 LCs vs original implementation, maybe enables some more savings.
This commit is contained in:
parent
7485269ddf
commit
28b53ef7b5
|
@ -62,7 +62,7 @@ activity_led #(
|
||||||
);
|
);
|
||||||
|
|
||||||
example_soc #(
|
example_soc #(
|
||||||
.EXTENSION_A (0),
|
.EXTENSION_A (1),
|
||||||
.EXTENSION_C (0),
|
.EXTENSION_C (0),
|
||||||
.EXTENSION_M (1),
|
.EXTENSION_M (1),
|
||||||
.MUL_FAST (0),
|
.MUL_FAST (0),
|
||||||
|
|
|
@ -34,7 +34,7 @@ wire sub = !(aluop == ALUOP_ADD || (|EXTENSION_ZBA && (
|
||||||
)));
|
)));
|
||||||
|
|
||||||
wire inv_op_b = sub && !(
|
wire inv_op_b = sub && !(
|
||||||
aluop == ALUOP_AND || aluop == ALUOP_OR || aluop == ALUOP_XOR
|
aluop == ALUOP_AND || aluop == ALUOP_OR || aluop == ALUOP_XOR || aluop == ALUOP_RS2
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [W_DATA-1:0] op_a_shifted =
|
wire [W_DATA-1:0] op_a_shifted =
|
||||||
|
@ -133,54 +133,60 @@ reg [W_DATA-1:0] bitwise;
|
||||||
always @ (*) begin: bitwise_ops
|
always @ (*) begin: bitwise_ops
|
||||||
case (aluop[1:0])
|
case (aluop[1:0])
|
||||||
ALUOP_AND[1:0]: bitwise = op_a & op_b_inv;
|
ALUOP_AND[1:0]: bitwise = op_a & op_b_inv;
|
||||||
ALUOP_OR[1:0]: bitwise = op_a | op_b_inv;
|
ALUOP_OR [1:0]: bitwise = op_a | op_b_inv;
|
||||||
default: bitwise = op_a ^ op_b_inv;
|
ALUOP_XOR[1:0]: bitwise = op_a ^ op_b_inv;
|
||||||
|
ALUOP_RS2[1:0]: bitwise = op_b_inv;
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
wire [W_DATA-1:0] zbs_mask = {{W_DATA-1{1'b0}}, 1'b1} << op_b[W_SHAMT-1:0];
|
wire [W_DATA-1:0] zbs_mask = {{W_DATA-1{1'b0}}, 1'b1} << op_b[W_SHAMT-1:0];
|
||||||
|
|
||||||
always @ (*) begin
|
always @ (*) begin
|
||||||
casez ({|EXTENSION_ZBA, |EXTENSION_ZBB, |EXTENSION_ZBC, |EXTENSION_ZBS, aluop})
|
casez ({|EXTENSION_A, |EXTENSION_ZBA, |EXTENSION_ZBB, |EXTENSION_ZBC, |EXTENSION_ZBS, aluop})
|
||||||
// Base ISA
|
// Base ISA
|
||||||
{4'bzzzz, ALUOP_ADD }: result = sum;
|
{5'bzzzzz, ALUOP_ADD }: result = sum;
|
||||||
{4'bzzzz, ALUOP_SUB }: result = sum;
|
{5'bzzzzz, ALUOP_SUB }: result = sum;
|
||||||
{4'bzzzz, ALUOP_LT }: result = {{W_DATA-1{1'b0}}, lt};
|
{5'bzzzzz, ALUOP_LT }: result = {{W_DATA-1{1'b0}}, lt};
|
||||||
{4'bzzzz, ALUOP_LTU }: result = {{W_DATA-1{1'b0}}, lt};
|
{5'bzzzzz, ALUOP_LTU }: result = {{W_DATA-1{1'b0}}, lt};
|
||||||
{4'bzzzz, ALUOP_SRL }: result = shift_dout;
|
{5'bzzzzz, ALUOP_SRL }: result = shift_dout;
|
||||||
{4'bzzzz, ALUOP_SRA }: result = shift_dout;
|
{5'bzzzzz, ALUOP_SRA }: result = shift_dout;
|
||||||
{4'bzzzz, ALUOP_SLL }: result = shift_dout;
|
{5'bzzzzz, ALUOP_SLL }: result = shift_dout;
|
||||||
|
// A (duplicates of Zbb)
|
||||||
|
{5'b1zzzz, ALUOP_MAX }: result = lt ? op_b : op_a;
|
||||||
|
{5'b1zzzz, ALUOP_MAXU }: result = lt ? op_b : op_a;
|
||||||
|
{5'b1zzzz, ALUOP_MIN }: result = lt ? op_a : op_b;
|
||||||
|
{5'b1zzzz, ALUOP_MINU }: result = lt ? op_a : op_b;
|
||||||
// Zba
|
// Zba
|
||||||
{4'b1zzz, ALUOP_SH1ADD }: result = sum;
|
{5'bz1zzz, ALUOP_SH1ADD }: result = sum;
|
||||||
{4'b1zzz, ALUOP_SH2ADD }: result = sum;
|
{5'bz1zzz, ALUOP_SH2ADD }: result = sum;
|
||||||
{4'b1zzz, ALUOP_SH3ADD }: result = sum;
|
{5'bz1zzz, ALUOP_SH3ADD }: result = sum;
|
||||||
// Zbb
|
// Zbb
|
||||||
{4'bz1zz, ALUOP_ANDN }: result = bitwise;
|
{5'bzz1zz, ALUOP_ANDN }: result = bitwise;
|
||||||
{4'bz1zz, ALUOP_ORN }: result = bitwise;
|
{5'bzz1zz, ALUOP_ORN }: result = bitwise;
|
||||||
{4'bz1zz, ALUOP_XNOR }: result = bitwise;
|
{5'bzz1zz, ALUOP_XNOR }: result = bitwise;
|
||||||
{4'bz1zz, ALUOP_CLZ }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz};
|
{5'bzz1zz, ALUOP_CLZ }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz};
|
||||||
{4'bz1zz, ALUOP_CTZ }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz};
|
{5'bzz1zz, ALUOP_CTZ }: result = {{W_DATA-W_SHAMT-1{1'b0}}, ctz_clz};
|
||||||
{4'bz1zz, ALUOP_CPOP }: result = {{W_DATA-W_SHAMT-1{1'b0}}, cpop};
|
{5'bzz1zz, ALUOP_CPOP }: result = {{W_DATA-W_SHAMT-1{1'b0}}, cpop};
|
||||||
{4'bz1zz, ALUOP_MAX }: result = lt ? op_b : op_a;
|
{5'bzz1zz, ALUOP_MAX }: result = lt ? op_b : op_a;
|
||||||
{4'bz1zz, ALUOP_MAXU }: result = lt ? op_b : op_a;
|
{5'bzz1zz, ALUOP_MAXU }: result = lt ? op_b : op_a;
|
||||||
{4'bz1zz, ALUOP_MIN }: result = lt ? op_a : op_b;
|
{5'bzz1zz, ALUOP_MIN }: result = lt ? op_a : op_b;
|
||||||
{4'bz1zz, ALUOP_MINU }: result = lt ? op_a : op_b;
|
{5'bzz1zz, ALUOP_MINU }: result = lt ? op_a : op_b;
|
||||||
{4'bz1zz, ALUOP_SEXT_B }: result = {{W_DATA-8{op_a[7]}}, op_a[7:0]};
|
{5'bzz1zz, ALUOP_SEXT_B }: result = {{W_DATA-8{op_a[7]}}, op_a[7:0]};
|
||||||
{4'bz1zz, ALUOP_SEXT_H }: result = {{W_DATA-16{op_a[15]}}, op_a[15:0]};
|
{5'bzz1zz, ALUOP_SEXT_H }: result = {{W_DATA-16{op_a[15]}}, op_a[15:0]};
|
||||||
{4'bz1zz, ALUOP_ZEXT_H }: result = {{W_DATA-16{1'b0}}, op_a[15:0]};
|
{5'bzz1zz, ALUOP_ZEXT_H }: result = {{W_DATA-16{1'b0}}, op_a[15:0]};
|
||||||
{4'bz1zz, ALUOP_ORC_B }: result = {{8{|op_a[31:24]}}, {8{|op_a[23:16]}}, {8{|op_a[15:8]}}, {8{|op_a[7:0]}}};
|
{5'bzz1zz, ALUOP_ORC_B }: result = {{8{|op_a[31:24]}}, {8{|op_a[23:16]}}, {8{|op_a[15:8]}}, {8{|op_a[7:0]}}};
|
||||||
{4'bz1zz, ALUOP_REV8 }: result = {op_a[7:0], op_a[15:8], op_a[23:16], op_a[31:24]};
|
{5'bzz1zz, ALUOP_REV8 }: result = {op_a[7:0], op_a[15:8], op_a[23:16], op_a[31:24]};
|
||||||
{4'bz1zz, ALUOP_ROL }: result = shift_dout;
|
{5'bzz1zz, ALUOP_ROL }: result = shift_dout;
|
||||||
{4'bz1zz, ALUOP_ROR }: result = shift_dout;
|
{5'bzz1zz, ALUOP_ROR }: result = shift_dout;
|
||||||
// Zbc
|
// Zbc
|
||||||
{4'bzz1z, ALUOP_CLMUL }: result = clmul[W_DATA-1:0];
|
{5'bzzz1z, ALUOP_CLMUL }: result = clmul[W_DATA-1:0];
|
||||||
{4'bzz1z, ALUOP_CLMULH }: result = clmul[2*W_DATA-1:W_DATA];
|
{5'bzzz1z, ALUOP_CLMULH }: result = clmul[2*W_DATA-1:W_DATA];
|
||||||
{4'bzz1z, ALUOP_CLMULR }: result = clmul[2*W_DATA-2:W_DATA-1];
|
{5'bzzz1z, ALUOP_CLMULR }: result = clmul[2*W_DATA-2:W_DATA-1];
|
||||||
// Zbs
|
// Zbs
|
||||||
{4'bzzz1, ALUOP_BCLR }: result = op_a & ~zbs_mask;
|
{5'bzzzz1, ALUOP_BCLR }: result = op_a & ~zbs_mask;
|
||||||
{4'bzzz1, ALUOP_BSET }: result = op_a | zbs_mask;
|
{5'bzzzz1, ALUOP_BSET }: result = op_a | zbs_mask;
|
||||||
{4'bzzz1, ALUOP_BINV }: result = op_a ^ zbs_mask;
|
{5'bzzzz1, ALUOP_BINV }: result = op_a ^ zbs_mask;
|
||||||
{4'bzzz1, ALUOP_BEXT }: result = {{W_DATA-1{1'b0}}, shift_dout[0]};
|
{5'bzzzz1, ALUOP_BEXT }: result = {{W_DATA-1{1'b0}}, shift_dout[0]};
|
||||||
|
|
||||||
default: result = bitwise;
|
default: result = bitwise;
|
||||||
endcase
|
endcase
|
||||||
|
|
|
@ -1,49 +0,0 @@
|
||||||
/*****************************************************************************\
|
|
||||||
| Copyright (C) 2021 Luke Wren |
|
|
||||||
| SPDX-License-Identifier: Apache-2.0 |
|
|
||||||
\*****************************************************************************/
|
|
||||||
|
|
||||||
// Separate ALU for atomic memory operations
|
|
||||||
|
|
||||||
`default_nettype none
|
|
||||||
module hazard3_amo_alu #(
|
|
||||||
`include "hazard3_config.vh"
|
|
||||||
,
|
|
||||||
`include "hazard3_width_const.vh"
|
|
||||||
) (
|
|
||||||
input wire [W_MEMOP-1:0] op,
|
|
||||||
input wire [W_DATA-1:0] op_rs1, // From load
|
|
||||||
input wire [W_DATA-1:0] op_rs2, // From core
|
|
||||||
output reg [W_DATA-1:0] result
|
|
||||||
);
|
|
||||||
|
|
||||||
`include "hazard3_ops.vh"
|
|
||||||
|
|
||||||
wire sub = op != MEMOP_AMOADD_W;
|
|
||||||
wire cmp_unsigned = op == MEMOP_AMOMINU_W || op == MEMOP_AMOMAXU_W;
|
|
||||||
|
|
||||||
wire [W_DATA-1:0] sum = op_rs1 + (op_rs2 ^ {W_DATA{sub}}) + sub;
|
|
||||||
|
|
||||||
wire rs1_lessthan_rs2 =
|
|
||||||
op_rs1[W_DATA-1] == op_rs2[W_DATA-1] ? sum[W_DATA-1] :
|
|
||||||
cmp_unsigned ? op_rs2[W_DATA-1] :
|
|
||||||
op_rs1[W_DATA-1] ;
|
|
||||||
|
|
||||||
always @ (*) begin
|
|
||||||
case(op)
|
|
||||||
MEMOP_AMOADD_W: result = sum;
|
|
||||||
MEMOP_AMOXOR_W: result = op_rs1 ^ op_rs2;
|
|
||||||
MEMOP_AMOAND_W: result = op_rs1 & op_rs2;
|
|
||||||
MEMOP_AMOOR_W: result = op_rs1 | op_rs2;
|
|
||||||
MEMOP_AMOMIN_W: result = rs1_lessthan_rs2 ? op_rs1 : op_rs2;
|
|
||||||
MEMOP_AMOMAX_W: result = rs1_lessthan_rs2 ? op_rs2 : op_rs1;
|
|
||||||
MEMOP_AMOMINU_W: result = rs1_lessthan_rs2 ? op_rs1 : op_rs2;
|
|
||||||
MEMOP_AMOMAXU_W: result = rs1_lessthan_rs2 ? op_rs2 : op_rs1;
|
|
||||||
// AMOSWAP
|
|
||||||
default: result = op_rs2;
|
|
||||||
endcase
|
|
||||||
end
|
|
||||||
|
|
||||||
endmodule
|
|
||||||
|
|
||||||
`default_nettype wire
|
|
|
@ -2,7 +2,6 @@ file hazard3_core.v
|
||||||
file hazard3_cpu_1port.v
|
file hazard3_cpu_1port.v
|
||||||
file hazard3_cpu_2port.v
|
file hazard3_cpu_2port.v
|
||||||
file arith/hazard3_alu.v
|
file arith/hazard3_alu.v
|
||||||
file arith/hazard3_amo_alu.v
|
|
||||||
file arith/hazard3_muldiv_seq.v
|
file arith/hazard3_muldiv_seq.v
|
||||||
file arith/hazard3_mul_fast.v
|
file arith/hazard3_mul_fast.v
|
||||||
file arith/hazard3_priority_encode.v
|
file arith/hazard3_priority_encode.v
|
||||||
|
|
|
@ -255,13 +255,15 @@ reg [W_REGADDR-1:0] xm_rs1;
|
||||||
reg [W_REGADDR-1:0] xm_rs2;
|
reg [W_REGADDR-1:0] xm_rs2;
|
||||||
reg [W_REGADDR-1:0] xm_rd;
|
reg [W_REGADDR-1:0] xm_rd;
|
||||||
reg [W_DATA-1:0] xm_result;
|
reg [W_DATA-1:0] xm_result;
|
||||||
reg [W_DATA-1:0] xm_store_data;
|
|
||||||
reg [1:0] xm_addr_align;
|
reg [1:0] xm_addr_align;
|
||||||
reg [W_MEMOP-1:0] xm_memop;
|
reg [W_MEMOP-1:0] xm_memop;
|
||||||
reg [W_EXCEPT-1:0] xm_except;
|
reg [W_EXCEPT-1:0] xm_except;
|
||||||
reg xm_wfi;
|
reg xm_wfi;
|
||||||
reg xm_delay_irq_entry;
|
reg xm_delay_irq_entry;
|
||||||
|
|
||||||
|
// Registered load data, routed back through ALU. AMOs were a mistake
|
||||||
|
reg [W_DATA-1:0] mx_amo_load_data;
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
// Stall logic
|
// Stall logic
|
||||||
|
|
||||||
|
@ -275,9 +277,7 @@ wire x_stall_on_trap = m_trap_enter_vld && !m_trap_enter_rdy ||
|
||||||
// sequences). Note we don't check for AMOs in stage M, because AMOs fully
|
// sequences). Note we don't check for AMOs in stage M, because AMOs fully
|
||||||
// fence off on their own completion before passing down the pipe.
|
// fence off on their own completion before passing down the pipe.
|
||||||
|
|
||||||
wire d_memop_is_amo = |EXTENSION_A && (
|
wire d_memop_is_amo = |EXTENSION_A && d_memop == MEMOP_AMO;
|
||||||
d_memop >= MEMOP_AMOSWAP_W && d_memop <= MEMOP_AMOMAXU_W
|
|
||||||
);
|
|
||||||
|
|
||||||
wire x_stall_on_exclusive_overlap = |EXTENSION_A && (
|
wire x_stall_on_exclusive_overlap = |EXTENSION_A && (
|
||||||
(d_memop_is_amo || d_memop == MEMOP_SC_W || d_memop == MEMOP_LR_W) &&
|
(d_memop_is_amo || d_memop == MEMOP_SC_W || d_memop == MEMOP_LR_W) &&
|
||||||
|
@ -367,7 +367,9 @@ always @ (*) begin
|
||||||
x_rs2_bypass = x_rdata2;
|
x_rs2_bypass = x_rdata2;
|
||||||
end
|
end
|
||||||
|
|
||||||
if (|d_alusrc_a)
|
if (|EXTENSION_A && x_amo_phase == 3'h2)
|
||||||
|
x_op_a = mx_amo_load_data;
|
||||||
|
else if (|d_alusrc_a)
|
||||||
x_op_a = d_pc;
|
x_op_a = d_pc;
|
||||||
else
|
else
|
||||||
x_op_a = x_rs1_bypass;
|
x_op_a = x_rs1_bypass;
|
||||||
|
@ -459,12 +461,12 @@ always @ (posedge clk) if (rst_n) begin
|
||||||
assert(x_amo_phase == 3'h0);
|
assert(x_amo_phase == 3'h0);
|
||||||
// Error phase should never block, so it can always pass to stage 3 to raise
|
// Error phase should never block, so it can always pass to stage 3 to raise
|
||||||
// excepting trap entry.
|
// excepting trap entry.
|
||||||
if (amo_phase == 3'h4)
|
if (x_amo_phase == 3'h4)
|
||||||
assert(!x_stall);
|
assert(!x_stall);
|
||||||
// Error phase is either due to a bus response, or a misaligned address.
|
// Error phase is either due to a bus response, or a misaligned address.
|
||||||
// Neither of these are write-address-phase.
|
// Neither of these are write-address-phase.
|
||||||
if (amo_phase == 3'h4)
|
if (x_amo_phase == 3'h4)
|
||||||
assert($past(amo_phase) != 3'h2);
|
assert($past(x_amo_phase) != 3'h2);
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
@ -738,7 +740,7 @@ always @ (posedge clk or negedge rst_n) begin
|
||||||
if (!m_stall) begin
|
if (!m_stall) begin
|
||||||
{xm_rs1, xm_rs2, xm_rd} <= {d_rs1, d_rs2, d_rd};
|
{xm_rs1, xm_rs2, xm_rd} <= {d_rs1, d_rs2, d_rd};
|
||||||
// If the transfer is unaligned, make sure it is completely NOP'd on the bus
|
// If the transfer is unaligned, make sure it is completely NOP'd on the bus
|
||||||
// Likewise, AMOs are handled entirely in X (well it's ambiguous; anyway different logic & stalls)
|
// Likewise, AMO memop logic is entirely in X, we squash the memop as it passes to M.
|
||||||
xm_memop <= x_unaligned_addr || d_memop_is_amo ? MEMOP_NONE : d_memop;
|
xm_memop <= x_unaligned_addr || d_memop_is_amo ? MEMOP_NONE : d_memop;
|
||||||
xm_except <= x_except;
|
xm_except <= x_except;
|
||||||
xm_wfi <= d_wfi;
|
xm_wfi <= d_wfi;
|
||||||
|
@ -764,25 +766,18 @@ always @ (posedge clk or negedge rst_n) begin
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
reg [W_DATA-1:0] amo_load_data;
|
|
||||||
|
|
||||||
// Datapath flops
|
// Datapath flops
|
||||||
always @ (posedge clk or negedge rst_n) begin
|
always @ (posedge clk or negedge rst_n) begin
|
||||||
if (!rst_n) begin
|
if (!rst_n) begin
|
||||||
xm_result <= {W_DATA{1'b0}};
|
xm_result <= {W_DATA{1'b0}};
|
||||||
xm_store_data <= {W_DATA{1'b0}};
|
|
||||||
xm_addr_align <= 2'b00;
|
xm_addr_align <= 2'b00;
|
||||||
end else if (!m_stall) begin
|
end else if (!m_stall || (d_memop_is_amo && x_amo_phase == 3'h2 && bus_dph_ready_d)) begin
|
||||||
xm_result <=
|
xm_result <=
|
||||||
d_csr_ren ? x_csr_rdata :
|
d_csr_ren ? x_csr_rdata :
|
||||||
|EXTENSION_A && d_memop_is_amo ? amo_load_data :
|
|EXTENSION_A && x_amo_phase == 3'h3 ? mx_amo_load_data :
|
||||||
|EXTENSION_M && d_aluop == ALUOP_MULDIV ? x_muldiv_result :
|
|EXTENSION_M && d_aluop == ALUOP_MULDIV ? x_muldiv_result :
|
||||||
x_alu_result;
|
x_alu_result;
|
||||||
xm_store_data <= x_rs2_bypass;
|
|
||||||
xm_addr_align <= x_addr_sum[1:0];
|
xm_addr_align <= x_addr_sum[1:0];
|
||||||
|
|
||||||
end else if (d_memop_is_amo && x_amo_phase == 3'h1 && bus_dph_ready_d) begin
|
|
||||||
xm_store_data <= x_rs2_bypass;
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -833,56 +828,12 @@ assign m_exception_return_addr = d_pc - (
|
||||||
|
|
||||||
// Load/store data handling
|
// Load/store data handling
|
||||||
|
|
||||||
wire [W_DATA-1:0] m_amo_wdata;
|
|
||||||
wire m_amo_wdata_valid;
|
|
||||||
|
|
||||||
generate
|
|
||||||
if (EXTENSION_A) begin: has_amo_alu
|
|
||||||
|
|
||||||
reg [W_MEMOP-1:0] amo_memop;
|
|
||||||
reg m_amo_wdata_valid_r;
|
|
||||||
|
|
||||||
assign m_amo_wdata_valid = m_amo_wdata_valid_r;
|
|
||||||
|
|
||||||
always @ (posedge clk or negedge rst_n) begin
|
|
||||||
if (!rst_n) begin
|
|
||||||
amo_memop <= MEMOP_NONE;
|
|
||||||
amo_load_data <= {W_DATA{1'b0}};
|
|
||||||
m_amo_wdata_valid_r <= 1'b0;
|
|
||||||
end else if (x_amo_phase == 3'h4 || (x_amo_phase == 3'h3 && bus_dph_ready_d) || m_trap_enter_soon) begin
|
|
||||||
// Higher precedence to make sure trap always clears the valid bit
|
|
||||||
m_amo_wdata_valid_r <= 1'b0;
|
|
||||||
end else if (d_memop_is_amo && x_amo_phase == 3'h1 && bus_dph_ready_d) begin
|
|
||||||
amo_memop <= d_memop;
|
|
||||||
amo_load_data <= bus_rdata_d;
|
|
||||||
m_amo_wdata_valid_r <= 1'b1;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
hazard3_amo_alu #(
|
|
||||||
`include "hazard3_config_inst.vh"
|
|
||||||
) amo_alu (
|
|
||||||
.op (amo_memop),
|
|
||||||
.op_rs1(amo_load_data),
|
|
||||||
.op_rs2(xm_store_data),
|
|
||||||
.result(m_amo_wdata)
|
|
||||||
);
|
|
||||||
|
|
||||||
end else begin: no_amo_alu
|
|
||||||
|
|
||||||
assign m_amo_wdata = {W_DATA{1'b0}};
|
|
||||||
assign m_amo_wdata_valid = 1'b0;
|
|
||||||
always @ (*) amo_load_data = {W_DATA{1'b0}};
|
|
||||||
|
|
||||||
end
|
|
||||||
endgenerate
|
|
||||||
|
|
||||||
always @ (*) begin
|
always @ (*) begin
|
||||||
// Local forwarding of store data
|
// Local forwarding of store data
|
||||||
if (|mw_rd && xm_rs2 == mw_rd && !REDUCED_BYPASS) begin
|
if (|mw_rd && xm_rs2 == mw_rd && !REDUCED_BYPASS) begin
|
||||||
m_wdata = mw_result;
|
m_wdata = mw_result;
|
||||||
end else begin
|
end else begin
|
||||||
m_wdata = xm_store_data;
|
m_wdata = xm_result;
|
||||||
end
|
end
|
||||||
// Replicate store data to ensure appropriate byte lane is driven
|
// Replicate store data to ensure appropriate byte lane is driven
|
||||||
case (xm_memop)
|
case (xm_memop)
|
||||||
|
@ -890,8 +841,6 @@ always @ (*) begin
|
||||||
MEMOP_SB: bus_wdata_d = {4{m_wdata[7:0]}};
|
MEMOP_SB: bus_wdata_d = {4{m_wdata[7:0]}};
|
||||||
default: bus_wdata_d = m_wdata;
|
default: bus_wdata_d = m_wdata;
|
||||||
endcase
|
endcase
|
||||||
if (|EXTENSION_A && m_amo_wdata_valid)
|
|
||||||
bus_wdata_d = m_amo_wdata;
|
|
||||||
|
|
||||||
casez ({xm_memop, xm_addr_align[1:0]})
|
casez ({xm_memop, xm_addr_align[1:0]})
|
||||||
{MEMOP_LH , 2'b0z}: m_rdata_pick_sext = {{16{bus_rdata_d[15]}}, bus_rdata_d[15: 0]};
|
{MEMOP_LH , 2'b0z}: m_rdata_pick_sext = {{16{bus_rdata_d[15]}}, bus_rdata_d[15: 0]};
|
||||||
|
@ -923,6 +872,29 @@ always @ (*) begin
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
// Capture load data in read data phase of AMO. Passes back to stage X for AMO
|
||||||
|
// calculation during AMO write address phase, using the regular ALU. Then
|
||||||
|
// registered into xm_result like a regular store, to be driven out onto
|
||||||
|
// hwdata during AMO write data phase.
|
||||||
|
|
||||||
|
generate
|
||||||
|
if (EXTENSION_A) begin: has_amo_load_reg
|
||||||
|
|
||||||
|
always @ (posedge clk or negedge rst_n) begin
|
||||||
|
if (!rst_n) begin
|
||||||
|
mx_amo_load_data <= {W_DATA{1'b0}};
|
||||||
|
end else if (d_memop_is_amo && x_amo_phase == 3'h1 && bus_dph_ready_d) begin
|
||||||
|
mx_amo_load_data <= bus_rdata_d;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end else begin: no_amo_load_reg
|
||||||
|
|
||||||
|
always @ (*) mx_amo_load_data = {W_DATA{1'b0}};
|
||||||
|
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
// Local monitor update.
|
// Local monitor update.
|
||||||
// - Set on a load-reserved with good response from global monitor
|
// - Set on a load-reserved with good response from global monitor
|
||||||
// - Cleared by any store-conditional
|
// - Cleared by any store-conditional
|
||||||
|
|
|
@ -221,9 +221,9 @@ always @ (*) begin
|
||||||
RV_LW: begin d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_memop = MEMOP_LW; end
|
RV_LW: begin d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_memop = MEMOP_LW; end
|
||||||
RV_LBU: begin d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_memop = MEMOP_LBU; end
|
RV_LBU: begin d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_memop = MEMOP_LBU; end
|
||||||
RV_LHU: begin d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_memop = MEMOP_LHU; end
|
RV_LHU: begin d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_memop = MEMOP_LHU; end
|
||||||
RV_SB: begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_SB; d_rd = X0; end
|
RV_SB: begin d_addr_is_regoffs = 1'b1; d_aluop = ALUOP_RS2; d_memop = MEMOP_SB; d_rd = X0; end
|
||||||
RV_SH: begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_SH; d_rd = X0; end
|
RV_SH: begin d_addr_is_regoffs = 1'b1; d_aluop = ALUOP_RS2; d_memop = MEMOP_SH; d_rd = X0; end
|
||||||
RV_SW: begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_SW; d_rd = X0; end
|
RV_SW: begin d_addr_is_regoffs = 1'b1; d_aluop = ALUOP_RS2; d_memop = MEMOP_SW; d_rd = X0; end
|
||||||
|
|
||||||
RV_MUL: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MUL; end else begin d_invalid_32bit = 1'b1; end
|
RV_MUL: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MUL; end else begin d_invalid_32bit = 1'b1; end
|
||||||
RV_MULH: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MULH; end else begin d_invalid_32bit = 1'b1; end
|
RV_MULH: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MULH; end else begin d_invalid_32bit = 1'b1; end
|
||||||
|
@ -234,17 +234,17 @@ always @ (*) begin
|
||||||
RV_REM: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REM; end else begin d_invalid_32bit = 1'b1; end
|
RV_REM: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REM; end else begin d_invalid_32bit = 1'b1; end
|
||||||
RV_REMU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REMU; end else begin d_invalid_32bit = 1'b1; end
|
RV_REMU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REMU; end else begin d_invalid_32bit = 1'b1; end
|
||||||
|
|
||||||
RV_LR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_LR_W; d_rs2 = X0; end else begin d_invalid_32bit = 1'b1; end
|
RV_LR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_LR_W; d_rs2 = X0; end else begin d_invalid_32bit = 1'b1; end
|
||||||
RV_SC_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_SC_W; end else begin d_invalid_32bit = 1'b1; end
|
RV_SC_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_SC_W; d_aluop = ALUOP_RS2; end else begin d_invalid_32bit = 1'b1; end
|
||||||
RV_AMOSWAP_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOSWAP_W; end else begin d_invalid_32bit = 1'b1; end
|
RV_AMOSWAP_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_RS2; end else begin d_invalid_32bit = 1'b1; end
|
||||||
RV_AMOADD_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOADD_W; end else begin d_invalid_32bit = 1'b1; end
|
RV_AMOADD_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_ADD; end else begin d_invalid_32bit = 1'b1; end
|
||||||
RV_AMOXOR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOXOR_W; end else begin d_invalid_32bit = 1'b1; end
|
RV_AMOXOR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_XOR; end else begin d_invalid_32bit = 1'b1; end
|
||||||
RV_AMOAND_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOAND_W; end else begin d_invalid_32bit = 1'b1; end
|
RV_AMOAND_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_AND; end else begin d_invalid_32bit = 1'b1; end
|
||||||
RV_AMOOR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOOR_W; end else begin d_invalid_32bit = 1'b1; end
|
RV_AMOOR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_OR; end else begin d_invalid_32bit = 1'b1; end
|
||||||
RV_AMOMIN_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOMIN_W; end else begin d_invalid_32bit = 1'b1; end
|
RV_AMOMIN_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_MIN; end else begin d_invalid_32bit = 1'b1; end
|
||||||
RV_AMOMAX_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOMAX_W; end else begin d_invalid_32bit = 1'b1; end
|
RV_AMOMAX_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_MAX; end else begin d_invalid_32bit = 1'b1; end
|
||||||
RV_AMOMINU_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOMINU_W; end else begin d_invalid_32bit = 1'b1; end
|
RV_AMOMINU_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_MINU; end else begin d_invalid_32bit = 1'b1; end
|
||||||
RV_AMOMAXU_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOMAXU_W; end else begin d_invalid_32bit = 1'b1; end
|
RV_AMOMAXU_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMO; d_aluop = ALUOP_MAXU; end else begin d_invalid_32bit = 1'b1; end
|
||||||
|
|
||||||
RV_SH1ADD: if (EXTENSION_ZBA) begin d_aluop = ALUOP_SH1ADD; end else begin d_invalid_32bit = 1'b1; end
|
RV_SH1ADD: if (EXTENSION_ZBA) begin d_aluop = ALUOP_SH1ADD; end else begin d_invalid_32bit = 1'b1; end
|
||||||
RV_SH2ADD: if (EXTENSION_ZBA) begin d_aluop = ALUOP_SH2ADD; end else begin d_invalid_32bit = 1'b1; end
|
RV_SH2ADD: if (EXTENSION_ZBA) begin d_aluop = ALUOP_SH2ADD; end else begin d_invalid_32bit = 1'b1; end
|
||||||
|
|
|
@ -16,7 +16,8 @@ localparam ALUOP_SRL = 6'h09;
|
||||||
localparam ALUOP_SRA = 6'h0a;
|
localparam ALUOP_SRA = 6'h0a;
|
||||||
localparam ALUOP_SLL = 6'h0b;
|
localparam ALUOP_SLL = 6'h0b;
|
||||||
localparam ALUOP_MULDIV = 6'h0c;
|
localparam ALUOP_MULDIV = 6'h0c;
|
||||||
// Bitmanip ALU operations:
|
localparam ALUOP_RS2 = 6'h0d; // differs from AND/OR/XOR in [1:0]
|
||||||
|
// Bitmanip ALU operations (some also used by AMOs):
|
||||||
localparam ALUOP_SH1ADD = 6'h20;
|
localparam ALUOP_SH1ADD = 6'h20;
|
||||||
localparam ALUOP_SH2ADD = 6'h21;
|
localparam ALUOP_SH2ADD = 6'h21;
|
||||||
localparam ALUOP_SH3ADD = 6'h22;
|
localparam ALUOP_SH3ADD = 6'h22;
|
||||||
|
@ -56,27 +57,19 @@ localparam ALUSRCA_PC = 1'h1;
|
||||||
localparam ALUSRCB_RS2 = 1'h0;
|
localparam ALUSRCB_RS2 = 1'h0;
|
||||||
localparam ALUSRCB_IMM = 1'h1;
|
localparam ALUSRCB_IMM = 1'h1;
|
||||||
|
|
||||||
localparam MEMOP_LW = 5'h00;
|
localparam MEMOP_LW = 5'h00;
|
||||||
localparam MEMOP_LH = 5'h01;
|
localparam MEMOP_LH = 5'h01;
|
||||||
localparam MEMOP_LB = 5'h02;
|
localparam MEMOP_LB = 5'h02;
|
||||||
localparam MEMOP_LHU = 5'h03;
|
localparam MEMOP_LHU = 5'h03;
|
||||||
localparam MEMOP_LBU = 5'h04;
|
localparam MEMOP_LBU = 5'h04;
|
||||||
localparam MEMOP_SW = 5'h05;
|
localparam MEMOP_SW = 5'h05;
|
||||||
localparam MEMOP_SH = 5'h06;
|
localparam MEMOP_SH = 5'h06;
|
||||||
localparam MEMOP_SB = 5'h07;
|
localparam MEMOP_SB = 5'h07;
|
||||||
|
|
||||||
localparam MEMOP_LR_W = 5'h08;
|
localparam MEMOP_LR_W = 5'h08;
|
||||||
localparam MEMOP_SC_W = 5'h09;
|
localparam MEMOP_SC_W = 5'h09;
|
||||||
localparam MEMOP_AMOSWAP_W = 5'h0a;
|
localparam MEMOP_AMO = 5'h0a;
|
||||||
localparam MEMOP_AMOADD_W = 5'h0b;
|
localparam MEMOP_NONE = 5'h10;
|
||||||
localparam MEMOP_AMOXOR_W = 5'h0c;
|
|
||||||
localparam MEMOP_AMOAND_W = 5'h0d;
|
|
||||||
localparam MEMOP_AMOOR_W = 5'h0e;
|
|
||||||
localparam MEMOP_AMOMIN_W = 5'h0f;
|
|
||||||
localparam MEMOP_AMOMAX_W = 5'h10;
|
|
||||||
localparam MEMOP_AMOMINU_W = 5'h11;
|
|
||||||
localparam MEMOP_AMOMAXU_W = 5'h12;
|
|
||||||
localparam MEMOP_NONE = 5'h1f;
|
|
||||||
|
|
||||||
localparam BCOND_NEVER = 2'h0;
|
localparam BCOND_NEVER = 2'h0;
|
||||||
localparam BCOND_ALWAYS = 2'h1;
|
localparam BCOND_ALWAYS = 2'h1;
|
||||||
|
|
|
@ -1,2 +1,4 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
make
|
make
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
make TEST_ARCH=C BIN_ARCH=rv32ic TESTLIST=" \
|
make TEST_ARCH=C BIN_ARCH=rv32ic TESTLIST=" \
|
||||||
cadd-01 \
|
cadd-01 \
|
||||||
caddi16sp-01 \
|
caddi16sp-01 \
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
make TEST_ARCH=M BIN_ARCH=rv32imc TESTLIST=" \
|
make TEST_ARCH=M BIN_ARCH=rv32imc TESTLIST=" \
|
||||||
div-01 \
|
div-01 \
|
||||||
divu-01 \
|
divu-01 \
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
./run_32i.sh
|
./run_32i.sh
|
||||||
./run_32im.sh
|
./run_32im.sh
|
||||||
./run_32ic.sh
|
./run_32ic.sh
|
||||||
|
|
Loading…
Reference in New Issue