Import from hazard5 9743a1b
This commit is contained in:
commit
6dad4e20bb
|
@ -0,0 +1,3 @@
|
|||
[submodule "test/riscv-compliance/riscv-compliance"]
|
||||
path = test/riscv-compliance/riscv-compliance
|
||||
url = https://github.com/riscv/riscv-compliance.git
|
|
@ -0,0 +1,13 @@
|
|||
DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE
|
||||
Version 3, April 2008
|
||||
|
||||
Copyright (C) 2020 Luke Wren
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim or modified
|
||||
copies of this license document and accompanying software, and
|
||||
changing either is allowed.
|
||||
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
||||
1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK.
|
|
@ -0,0 +1,4 @@
|
|||
DOTF=hazard5.f
|
||||
TOP=hazard5_alu
|
||||
|
||||
include $(SCRIPTS)/formal.mk
|
|
@ -0,0 +1,117 @@
|
|||
/**********************************************************************
|
||||
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
|
||||
* Version 3, April 2008 *
|
||||
* *
|
||||
* Copyright (C) 2018 Luke Wren *
|
||||
* *
|
||||
* Everyone is permitted to copy and distribute verbatim or modified *
|
||||
* copies of this license document and accompanying software, and *
|
||||
* changing either is allowed. *
|
||||
* *
|
||||
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
|
||||
* *
|
||||
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
|
||||
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
|
||||
* *
|
||||
*********************************************************************/
|
||||
|
||||
module hazard5_alu #(
|
||||
parameter W_DATA = 32
|
||||
) (
|
||||
input wire [3:0] aluop,
|
||||
input wire [W_DATA-1:0] op_a,
|
||||
input wire [W_DATA-1:0] op_b,
|
||||
output reg [W_DATA-1:0] result,
|
||||
output wire [W_DATA-1:0] result_add, // for load/stores
|
||||
output wire cmp
|
||||
);
|
||||
|
||||
`include "hazard5_ops.vh"
|
||||
|
||||
function msb;
|
||||
input [W_DATA-1:0] x;
|
||||
begin
|
||||
msb = x[W_DATA-1];
|
||||
end
|
||||
endfunction
|
||||
|
||||
wire sub = aluop != ALUOP_ADD;
|
||||
wire [W_DATA-1:0] sum = op_a + (op_b ^ {W_DATA{sub}}) + sub;
|
||||
wire [W_DATA-1:0] op_xor = op_a ^ op_b;
|
||||
|
||||
wire lt = msb(op_a) == msb(op_b) ? msb(sum) :
|
||||
aluop == ALUOP_LTU ? msb(op_b) :
|
||||
msb(op_a) ;
|
||||
|
||||
assign cmp = aluop == ALUOP_SUB ? |op_xor : lt;
|
||||
assign result_add = sum;
|
||||
|
||||
|
||||
wire [W_DATA-1:0] shift_dout;
|
||||
reg shift_right_nleft;
|
||||
reg shift_arith;
|
||||
|
||||
hazard5_shift_barrel #(
|
||||
.W_DATA(W_DATA),
|
||||
.W_SHAMT(5)
|
||||
) shifter (
|
||||
.din(op_a),
|
||||
.shamt(op_b[4:0]),
|
||||
.right_nleft(shift_right_nleft),
|
||||
.arith(shift_arith),
|
||||
.dout(shift_dout)
|
||||
);
|
||||
|
||||
// We can implement all bitwise ops with 1 LUT4/bit total, since each result bit
|
||||
// uses only two operand bits. Much better than feeding each into main mux tree.
|
||||
|
||||
reg [W_DATA-1:0] bitwise;
|
||||
|
||||
always @ (*) begin: bitwise_ops
|
||||
case (aluop[1:0])
|
||||
ALUOP_AND[1:0]: bitwise = op_a & op_b;
|
||||
ALUOP_OR[1:0]: bitwise = op_a | op_b;
|
||||
default: bitwise = op_a ^ op_b;
|
||||
endcase
|
||||
end
|
||||
|
||||
always @ (*) begin
|
||||
shift_right_nleft = 1'b0;
|
||||
shift_arith = 1'b0;
|
||||
case (aluop)
|
||||
ALUOP_ADD: begin result = sum; end
|
||||
ALUOP_SUB: begin result = sum; end
|
||||
ALUOP_LT: begin result = {{W_DATA-1{1'b0}}, lt}; end
|
||||
ALUOP_LTU: begin result = {{W_DATA-1{1'b0}}, lt}; end
|
||||
ALUOP_SRL: begin shift_right_nleft = 1'b1; result = shift_dout; end
|
||||
ALUOP_SRA: begin shift_right_nleft = 1'b1; shift_arith = 1'b1; result = shift_dout; end
|
||||
ALUOP_SLL: begin result = shift_dout; end
|
||||
default: begin result = bitwise; end
|
||||
endcase
|
||||
end
|
||||
|
||||
`ifdef FORMAL
|
||||
`ifndef RISCV_FORMAL
|
||||
// Really we're just interested in the shifts and comparisons, as these are
|
||||
// the nontrivial ones. However, easier to test everything!
|
||||
|
||||
wire clk;
|
||||
always @ (posedge clk) begin
|
||||
case(aluop)
|
||||
default: begin end
|
||||
ALUOP_ADD: assert(result == op_a + op_b);
|
||||
ALUOP_SUB: assert(result == op_a - op_b);
|
||||
ALUOP_LT: assert(result == $signed(op_a) < $signed(op_b));
|
||||
ALUOP_LTU: assert(result == op_a < op_b);
|
||||
ALUOP_AND: assert(result == (op_a & op_b));
|
||||
ALUOP_OR: assert(result == (op_a | op_b));
|
||||
ALUOP_XOR: assert(result == (op_a ^ op_b));
|
||||
ALUOP_SRL: assert(result == op_a >> op_b[4:0]);
|
||||
ALUOP_SRA: assert($signed(result) == $signed(op_a) >>> $signed(op_b[4:0]));
|
||||
ALUOP_SLL: assert(result == op_a << op_b[4:0]);
|
||||
endcase
|
||||
end
|
||||
`endif
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,75 @@
|
|||
/**********************************************************************
|
||||
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
|
||||
* Version 3, April 2008 *
|
||||
* *
|
||||
* Copyright (C) 2021 Luke Wren *
|
||||
* *
|
||||
* Everyone is permitted to copy and distribute verbatim or modified *
|
||||
* copies of this license document and accompanying software, and *
|
||||
* changing either is allowed. *
|
||||
* *
|
||||
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
|
||||
* *
|
||||
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
|
||||
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
|
||||
* *
|
||||
*********************************************************************/
|
||||
|
||||
module hazard5_mul_fast #(
|
||||
parameter XLEN = 32
|
||||
) (
|
||||
input wire clk,
|
||||
input wire rst_n,
|
||||
input wire [XLEN-1:0] op_a,
|
||||
input wire [XLEN-1:0] op_b,
|
||||
input wire op_vld,
|
||||
|
||||
output wire [XLEN-1:0] result,
|
||||
output reg result_vld
|
||||
);
|
||||
|
||||
// This pipestage is folded into the front of the DSP tiles on UP5k. Note the
|
||||
// intention is to register the bypassed core regs at the end of X (since
|
||||
// bypass is quite slow), then perform multiply combinatorially in stage M,
|
||||
// and mux into MW result register.
|
||||
|
||||
reg [XLEN-1:0] op_a_r;
|
||||
reg [XLEN-1:0] op_b_r;
|
||||
|
||||
always @ (posedge clk) begin
|
||||
if (op_vld) begin
|
||||
op_a_r <= op_a;
|
||||
op_b_r <= op_b;
|
||||
end
|
||||
end
|
||||
|
||||
// This should be inferred as 3 DSP tiles on UP5k:
|
||||
//
|
||||
// 1. Register then multiply a[15: 0] and b[15: 0]
|
||||
// 2. Register then multiply a[31:16] and b[15: 0], then directly add output of 1
|
||||
// 3. Register then multiply a[15: 0] and b[31:16], then directly add output of 2
|
||||
//
|
||||
// So there is quite a long path (1x 16-bit multiply, then 2x 16-bit add). On
|
||||
// other platforms you may just end up with a pile of gates.
|
||||
|
||||
`ifndef RISCV_FORMAL_ALTOPS
|
||||
|
||||
assign result = op_a_r * op_b_r;
|
||||
|
||||
`else
|
||||
|
||||
// riscv-formal can use a simpler function, since it's just confirming the
|
||||
// result is correctly hooked up.
|
||||
assign result = result_vld ? (op_a_r + op_b_r) ^ 32'h5876063e : 32'hdeadbeef;
|
||||
|
||||
`endif
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
result_vld <= 1'b0;
|
||||
end else begin
|
||||
result_vld <= op_vld;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,294 @@
|
|||
/**********************************************************************
|
||||
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
|
||||
* Version 3, April 2008 *
|
||||
* *
|
||||
* Copyright (C) 2018 Luke Wren *
|
||||
* *
|
||||
* Everyone is permitted to copy and distribute verbatim or modified *
|
||||
* copies of this license document and accompanying software, and *
|
||||
* changing either is allowed. *
|
||||
* *
|
||||
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
|
||||
* *
|
||||
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
|
||||
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
|
||||
* *
|
||||
*********************************************************************/
|
||||
|
||||
// Combined multiply/divide/modulo circuit.
|
||||
// All operations performed at 1 bit per clock; aiming for minimal resource usage.
|
||||
// There are lots of opportunities for off-by-one errors here. See muldiv_model.py
|
||||
// for a simple reference model of the mul/div/mod iterations.
|
||||
//
|
||||
// When op_kill is high, the current calculation halts immediately. op_vld can be
|
||||
// asserted on the same cycle, and the new calculation begins without delay, regardless
|
||||
// of op_rdy. This may be used by the processor on e.g. mispredict or trap.
|
||||
//
|
||||
// The actual multiply/divide hardware is unsigned. We handle signedness at
|
||||
// input/output.
|
||||
|
||||
module hazard5_muldiv_seq #(
|
||||
parameter XLEN = 32,
|
||||
parameter UNROLL = 1,
|
||||
parameter W_CTR = $clog2(XLEN + 1) // do not modify
|
||||
) (
|
||||
input wire clk,
|
||||
input wire rst_n,
|
||||
input wire [2:0] op,
|
||||
input wire op_vld,
|
||||
output wire op_rdy,
|
||||
input wire op_kill,
|
||||
input wire [XLEN-1:0] op_a,
|
||||
input wire [XLEN-1:0] op_b,
|
||||
|
||||
output wire [XLEN-1:0] result_h, // mulh* or rem*
|
||||
output wire [XLEN-1:0] result_l, // mul or div*
|
||||
output wire result_vld
|
||||
);
|
||||
|
||||
`include "hazard5_ops.vh"
|
||||
|
||||
//synthesis translate_off
|
||||
generate if (UNROLL & (UNROLL - 1) || ~|UNROLL)
|
||||
initial $fatal("%m: UNROLL must be a positive power of 2");
|
||||
endgenerate
|
||||
//synthesis translate_on
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Operation decode, operand sign adjustment
|
||||
|
||||
// On the first cycle, op_a and op_b go straight through to the accumulator
|
||||
// and the divisor/multiplicand register. They are then adjusted in-place
|
||||
// on the next cycle. This allows the same circuits to be reused for sign
|
||||
// adjustment before output (and helps input timing).
|
||||
|
||||
reg [W_MULOP-1:0] op_r;
|
||||
reg [2*XLEN-1:0] accum;
|
||||
reg [XLEN-1:0] op_b_r;
|
||||
reg op_a_neg_r;
|
||||
reg op_b_neg_r;
|
||||
|
||||
wire op_a_signed =
|
||||
op_r == M_OP_MULH ||
|
||||
op_r == M_OP_MULHSU ||
|
||||
op_r == M_OP_DIV ||
|
||||
op_r == M_OP_REM;
|
||||
|
||||
wire op_b_signed =
|
||||
op_r == M_OP_MULH ||
|
||||
op_r == M_OP_DIV ||
|
||||
op_r == M_OP_REM;
|
||||
|
||||
wire op_a_neg = op_a_signed && accum[XLEN-1];
|
||||
wire op_b_neg = op_b_signed && op_b_r[XLEN-1];
|
||||
|
||||
wire is_div = op_r[2];
|
||||
|
||||
// Controls for modifying sign of all/part of accumulator
|
||||
wire accum_neg_l;
|
||||
wire accum_inv_h;
|
||||
wire accum_incr_h;
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Arithmetic circuit
|
||||
|
||||
// Combinatorials:
|
||||
reg [2*XLEN-1:0] accum_next;
|
||||
reg [2*XLEN-1:0] addend;
|
||||
reg [2*XLEN-1:0] shift_tmp;
|
||||
reg [2*XLEN-1:0] addsub_tmp;
|
||||
reg neg_l_borrow;
|
||||
|
||||
always @ (*) begin: alu
|
||||
integer i;
|
||||
// Multiply/divide iteration layers
|
||||
accum_next = accum;
|
||||
addend = {2*XLEN{1'b0}};
|
||||
addsub_tmp = {2*XLEN{1'b0}};
|
||||
neg_l_borrow = 1'b0;
|
||||
for (i = 0; i < UNROLL; i = i + 1) begin
|
||||
addend = {is_div && |op_b_r, op_b_r, {XLEN-1{1'b0}}};
|
||||
shift_tmp = is_div ? accum_next : accum_next >> 1;
|
||||
addsub_tmp = shift_tmp + addend;
|
||||
accum_next = (is_div ? !addsub_tmp[2 * XLEN - 1] : accum_next[0]) ?
|
||||
addsub_tmp : shift_tmp;
|
||||
if (is_div)
|
||||
accum_next = {accum_next[2*XLEN-2:0], !addsub_tmp[2 * XLEN - 1]};
|
||||
end
|
||||
// Alternative path for negation of all/part of accumulator
|
||||
if (accum_neg_l)
|
||||
{neg_l_borrow, accum_next[XLEN-1:0]} = {~accum[XLEN-1:0]} + 1'b1;
|
||||
if (accum_incr_h || accum_inv_h)
|
||||
accum_next[XLEN +: XLEN] = (accum[XLEN +: XLEN] ^ {XLEN{accum_inv_h}})
|
||||
+ accum_incr_h;
|
||||
end
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Main state machine
|
||||
|
||||
reg sign_preadj_done;
|
||||
reg [W_CTR-1:0] ctr;
|
||||
reg sign_postadj_done;
|
||||
reg sign_postadj_carry;
|
||||
|
||||
localparam CTR_TOP = XLEN[W_CTR-1:0];
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
ctr <= {W_CTR{1'b0}};
|
||||
sign_preadj_done <= 1'b1;
|
||||
sign_postadj_done <= 1'b1;
|
||||
sign_postadj_carry <= 1'b0;
|
||||
op_r <= {W_MULOP{1'b0}};
|
||||
op_a_neg_r <= 1'b0;
|
||||
op_b_neg_r <= 1'b0;
|
||||
op_b_r <= {XLEN{1'b0}};
|
||||
accum <= {XLEN*2{1'b0}};
|
||||
end else if (op_kill || (op_vld && op_rdy)) begin
|
||||
// Initialise circuit with operands + state
|
||||
ctr <= op_vld ? CTR_TOP : {W_CTR{1'b0}};
|
||||
sign_preadj_done <= !op_vld;
|
||||
sign_postadj_done <= !op_vld;
|
||||
sign_postadj_carry <= 1'b0;
|
||||
op_r <= op;
|
||||
op_b_r <= op_b;
|
||||
accum <= {{XLEN{1'b0}}, op_a};
|
||||
end else if (!sign_preadj_done) begin
|
||||
// Pre-adjust sign if necessary, else perform first iteration immediately
|
||||
op_a_neg_r <= op_a_neg;
|
||||
op_b_neg_r <= op_b_neg;
|
||||
sign_preadj_done <= 1'b1;
|
||||
if (accum_neg_l || (op_b_neg ^ is_div)) begin
|
||||
if (accum_neg_l)
|
||||
accum[0 +: XLEN] <= accum_next[0 +: XLEN];
|
||||
if (op_b_neg ^ is_div)
|
||||
op_b_r <= -op_b_r;
|
||||
end else begin
|
||||
ctr <= ctr - UNROLL[W_CTR-1:0];
|
||||
accum <= accum_next;
|
||||
end
|
||||
end else if (|ctr) begin
|
||||
ctr <= ctr - UNROLL[W_CTR-1:0];
|
||||
accum <= accum_next;
|
||||
end else if (!sign_postadj_done || sign_postadj_carry) begin
|
||||
sign_postadj_done <= 1'b1;
|
||||
if (accum_inv_h || accum_incr_h)
|
||||
accum[XLEN +: XLEN] <= accum_next[XLEN +: XLEN];
|
||||
if (accum_neg_l) begin
|
||||
accum[0 +: XLEN] <= accum_next[0 +: XLEN];
|
||||
if (!is_div) begin
|
||||
sign_postadj_carry <= neg_l_borrow;
|
||||
sign_postadj_done <= !neg_l_borrow;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Sign adjustment control
|
||||
|
||||
// Pre-adjustment: for any a, b we want |a|, |b|. Note that the magnitude of any
|
||||
// 32-bit signed integer is representable by a 32-bit unsigned integer.
|
||||
|
||||
// Post-adjustment for division:
|
||||
// We seek q, r to satisfy a = b * q + r, where a and b are given,
|
||||
// and |r| < |b|. One way to do this is if
|
||||
// sgn(r) = sgn(a)
|
||||
// sgn(q) = sgn(a) ^ sgn(b)
|
||||
// This has additional nice properties like
|
||||
// -(a / b) = (-a) / b = a / (-b)
|
||||
|
||||
// Post-adjustment for multiplication:
|
||||
// We have calculated the 2*XLEN result of |a| * |b|.
|
||||
// Negate the entire accumulator if sgn(a) ^ sgn(b).
|
||||
// This is done in two steps (to share div/mod circuit, and avoid 64-bit carry):
|
||||
// - Negate lower half of accumulator, and invert upper half
|
||||
// - Increment upper half if lower half carried
|
||||
|
||||
wire do_postadj = ~|{ctr, sign_postadj_done};
|
||||
wire op_signs_differ = op_a_neg_r ^ op_b_neg_r;
|
||||
|
||||
assign accum_neg_l =
|
||||
!sign_preadj_done && op_a_neg ||
|
||||
do_postadj && !sign_postadj_carry && op_signs_differ && !(is_div && ~|op_b_r);
|
||||
|
||||
assign {accum_incr_h, accum_inv_h} =
|
||||
do_postadj && is_div && op_a_neg_r ? 2'b11 :
|
||||
do_postadj && !is_div && op_signs_differ && !sign_postadj_carry ? 2'b01 :
|
||||
do_postadj && !is_div && op_signs_differ && sign_postadj_carry ? 2'b10 :
|
||||
2'b00 ;
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Outputs
|
||||
|
||||
assign op_rdy = ~|{ctr, accum_neg_l, accum_incr_h, accum_inv_h};
|
||||
assign result_vld = op_rdy;
|
||||
|
||||
`ifndef RISCV_FORMAL_ALTOPS
|
||||
|
||||
assign {result_h, result_l} = accum;
|
||||
|
||||
`else
|
||||
|
||||
// Provide arithmetically simpler alternative operations, to speed up formal checks
|
||||
always assert(XLEN == 32); // TODO may care about this one day
|
||||
|
||||
reg [XLEN-1:0] fml_a_saved;
|
||||
reg [XLEN-1:0] fml_b_saved;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
fml_a_saved <= {XLEN{1'b0}};
|
||||
fml_b_saved <= {XLEN{1'b0}};
|
||||
end else if (op_vld && op_rdy) begin
|
||||
fml_a_saved <= op_a;
|
||||
fml_b_saved <= op_b;
|
||||
end
|
||||
end
|
||||
|
||||
assign result_h =
|
||||
op_r == M_OP_MULH ? (fml_a_saved + fml_b_saved) ^ 32'hf6583fb7 :
|
||||
op_r == M_OP_MULHSU ? (fml_a_saved - fml_b_saved) ^ 32'hecfbe137 :
|
||||
op_r == M_OP_MULHU ? (fml_a_saved + fml_b_saved) ^ 32'h949ce5e8 :
|
||||
op_r == M_OP_REM ? (fml_a_saved - fml_b_saved) ^ 32'h8da68fa5 :
|
||||
op_r == M_OP_REMU ? (fml_a_saved - fml_b_saved) ^ 32'h3138d0e1 : 32'hdeadbeef;
|
||||
|
||||
assign result_l =
|
||||
op_r == M_OP_MUL ? (fml_a_saved + fml_b_saved) ^ 32'h5876063e :
|
||||
op_r == M_OP_DIV ? (fml_a_saved - fml_b_saved) ^ 32'h7f8529ec :
|
||||
op_r == M_OP_DIVU ? (fml_a_saved - fml_b_saved) ^ 32'h10e8fd70 : 32'hdeadbeef;
|
||||
|
||||
`endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Interface properties
|
||||
|
||||
`ifdef FORMAL
|
||||
|
||||
always @ (posedge clk) if (rst_n && $past(rst_n)) begin: properties
|
||||
integer i;
|
||||
reg alive;
|
||||
|
||||
if ($past(op_rdy && !op_vld))
|
||||
assert(op_rdy);
|
||||
|
||||
if (result_vld && $past(result_vld) && !$past(op_kill))
|
||||
assert($stable({result_h, result_l}));
|
||||
|
||||
// Kill will halt an in-progress operation, but a new operation may be
|
||||
// asserted simultaneously with kill.
|
||||
if ($past(op_kill))
|
||||
assert(op_rdy == !$past(op_vld));
|
||||
|
||||
// We should be periodically ready (liveness property), unless new operations
|
||||
// are forced in immediately, simultaneous with a kill, in which case there
|
||||
// is no intermediate ready state.
|
||||
alive = op_rdy || (op_kill && op_vld);
|
||||
for (i = 1; i <= XLEN / UNROLL + 3; i = i + 1)
|
||||
alive = alive || $past(op_rdy || (op_kill && op_vld), i);
|
||||
assert(alive);
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,57 @@
|
|||
/******************************************************************************
|
||||
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
|
||||
* Version 3, April 2008 *
|
||||
* *
|
||||
* Copyright (C) 2019 Luke Wren *
|
||||
* *
|
||||
* Everyone is permitted to copy and distribute verbatim or modified *
|
||||
* copies of this license document and accompanying software, and *
|
||||
* changing either is allowed. *
|
||||
* *
|
||||
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
|
||||
* *
|
||||
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
|
||||
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
|
||||
* *
|
||||
*****************************************************************************/
|
||||
|
||||
// Really something like this should be in a utility library (or the language!),
|
||||
// but Hazard5 is supposed to be self-contained
|
||||
|
||||
module hazard5_priority_encode #(
|
||||
parameter W_REQ = 16,
|
||||
parameter W_GNT = $clog2(W_REQ) // do not modify
|
||||
) (
|
||||
input wire [W_REQ-1:0] req,
|
||||
output wire [W_GNT-1:0] gnt
|
||||
);
|
||||
|
||||
// First do a priority-select of the input bitmap.
|
||||
|
||||
reg [W_REQ-1:0] deny;
|
||||
|
||||
always @ (*) begin: smear
|
||||
integer i;
|
||||
deny[0] = 1'b0;
|
||||
for (i = 1; i < W_REQ; i = i + 1)
|
||||
deny[i] = deny[i - 1] || req[i - 1];
|
||||
end
|
||||
|
||||
wire [W_REQ-1:0] gnt_onehot = req & ~deny;
|
||||
|
||||
// As the result is onehot, we can now just OR in the representation of each
|
||||
// encoded integer.
|
||||
|
||||
reg [W_GNT-1:0] gnt_accum;
|
||||
|
||||
always @ (*) begin: encode
|
||||
integer i;
|
||||
gnt_accum = {W_GNT{1'b0}};
|
||||
for (i = 0; i < W_REQ; i = i + 1) begin
|
||||
gnt_accum = gnt_accum | ({W_GNT{gnt_onehot[i]}} & i[W_GNT-1:0]);
|
||||
end
|
||||
end
|
||||
|
||||
assign gnt = gnt_accum;
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,72 @@
|
|||
/**********************************************************************
|
||||
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
|
||||
* Version 3, April 2008 *
|
||||
* *
|
||||
* Copyright (C) 2018 Luke Wren *
|
||||
* *
|
||||
* Everyone is permitted to copy and distribute verbatim or modified *
|
||||
* copies of this license document and accompanying software, and *
|
||||
* changing either is allowed. *
|
||||
* *
|
||||
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
|
||||
* *
|
||||
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
|
||||
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
|
||||
* *
|
||||
*********************************************************************/
|
||||
|
||||
module hazard5_shift_1bit_seq #(
|
||||
parameter W_DATA = 32,
|
||||
parameter W_SHAMT = 5
|
||||
) (
|
||||
input wire clk,
|
||||
input wire rst_n,
|
||||
|
||||
input wire [W_DATA-1:0] din,
|
||||
input wire din_vld, // can be asserted at any time, we always respond
|
||||
input wire [W_SHAMT-1:0] shamt,
|
||||
input wire right_nleft,
|
||||
input wire arith,
|
||||
output wire [W_DATA-1:0] dout,
|
||||
output wire dout_vld,
|
||||
);
|
||||
|
||||
reg [W_DATA-1:0] accum;
|
||||
reg [W_DATA-1:0] accum_next;
|
||||
reg [W_SHAMT-1:0] shamt_remaining;
|
||||
reg flipped;
|
||||
|
||||
// Handle actual shifting
|
||||
|
||||
wire sext = arith && accum[W_DATA - 1];
|
||||
|
||||
always @ (*) begin: shift_unit
|
||||
accum_next = accum;
|
||||
if (din_vld) begin
|
||||
accum_next = din;
|
||||
end else if (shamt_remaining) begin
|
||||
if (right_nleft)
|
||||
accum_next = {sext, accum[W_DATA-1:1]};
|
||||
else
|
||||
accum_next = {accum << 1};
|
||||
end
|
||||
end
|
||||
|
||||
// No reset on datapath
|
||||
always @ (posedge clk)
|
||||
accum <= accum_next;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
shamt_remaining <= {W_SHAMT{1'b0}};
|
||||
end else if (din_vld) begin
|
||||
shamt_remaining <= shamt;
|
||||
end else begin
|
||||
shamt_remaining <= shamt_remaining - |shamt_remaining;
|
||||
end
|
||||
end
|
||||
|
||||
assign dout_vld = shamt_remaining == 0;
|
||||
assign dout = accum;
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,71 @@
|
|||
/**********************************************************************
|
||||
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
|
||||
* Version 3, April 2008 *
|
||||
* *
|
||||
* Copyright (C) 2018 Luke Wren *
|
||||
* *
|
||||
* Everyone is permitted to copy and distribute verbatim or modified *
|
||||
* copies of this license document and accompanying software, and *
|
||||
* changing either is allowed. *
|
||||
* *
|
||||
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
|
||||
* *
|
||||
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
|
||||
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
|
||||
* *
|
||||
*********************************************************************/
|
||||
|
||||
// Implement the three shifts (left logical, right logical, right arithmetic)
|
||||
// using a single log-type barrel shifter. Around 240 LUTs for 32 bits.
|
||||
// (7 layers of 32 2-input muxes, some extra LUTs and LUT inputs used for arith)
|
||||
|
||||
module hazard5_shift_barrel #(
|
||||
parameter W_DATA = 32,
|
||||
parameter W_SHAMT = 5
|
||||
) (
|
||||
input wire [W_DATA-1:0] din,
|
||||
input wire [W_SHAMT-1:0] shamt,
|
||||
input wire right_nleft,
|
||||
input wire arith,
|
||||
output reg [W_DATA-1:0] dout
|
||||
);
|
||||
|
||||
integer i;
|
||||
|
||||
reg [W_DATA-1:0] din_rev;
|
||||
reg [W_DATA-1:0] shift_accum;
|
||||
wire sext = arith && din_rev[0]; // haha
|
||||
|
||||
always @ (*) begin
|
||||
for (i = 0; i < W_DATA; i = i + 1)
|
||||
din_rev[i] = right_nleft ? din[W_DATA - 1 - i] : din[i];
|
||||
end
|
||||
|
||||
always @ (*) begin
|
||||
shift_accum = din_rev;
|
||||
for (i = 0; i < W_SHAMT; i = i + 1) begin
|
||||
if (shamt[i]) begin
|
||||
shift_accum = (shift_accum << (1 << i)) |
|
||||
({W_DATA{sext}} & ~({W_DATA{1'b1}} << (1 << i)));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @ (*) begin
|
||||
for (i = 0; i < W_DATA; i = i + 1)
|
||||
dout[i] = right_nleft ? shift_accum[W_DATA - 1 - i] : shift_accum[i];
|
||||
end
|
||||
|
||||
`ifdef FORMAL
|
||||
always @ (*) begin
|
||||
if (right_nleft && arith) begin: asr
|
||||
assert($signed(dout) == $signed(din) >>> $signed(shamt));
|
||||
end else if (right_nleft && !arith) begin
|
||||
assert(dout == din >> shamt);
|
||||
end else if (!right_nleft && !arith) begin
|
||||
assert(dout == din << shamt);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,106 @@
|
|||
/**********************************************************************
|
||||
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
|
||||
* Version 3, April 2008 *
|
||||
* *
|
||||
* Copyright (C) 2018 Luke Wren *
|
||||
* *
|
||||
* Everyone is permitted to copy and distribute verbatim or modified *
|
||||
* copies of this license document and accompanying software, and *
|
||||
* changing either is allowed. *
|
||||
* *
|
||||
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
|
||||
* *
|
||||
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
|
||||
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
|
||||
* *
|
||||
*********************************************************************/
|
||||
|
||||
// Implement the three shifts using a single log-sequential shifter.
|
||||
// On each clock, the shifter can left-shift by a power-of-two amount (arith or
|
||||
// logical), OR it can reverse the accumulator.
|
||||
//
|
||||
// The accumulator is wired in reverse to the output. So the sequences are:
|
||||
// - Right shift: flip, then shift. Output wiring flips again. Internal left-shifts
|
||||
// are effectively right shifts.
|
||||
// - Left shift: perform shift ops, then flip, so that reversed output cancels.
|
||||
//
|
||||
// An additional cycle is consumed to load the input into the accumulator; this
|
||||
// simplifies muxing. In total, a shift consumes between 2 and 7 cycles on a
|
||||
// 32-bit machine, depending on the bit weight of shamt.
|
||||
|
||||
module hazard5_shift_log_seq #(
|
||||
parameter W_DATA = 32,
|
||||
parameter W_SHAMT = 5
|
||||
) (
|
||||
input wire clk,
|
||||
input wire rst_n,
|
||||
|
||||
input wire [W_DATA-1:0] din,
|
||||
input wire din_vld, // can be asserted at any time, we always respond
|
||||
input wire [W_SHAMT-1:0] shamt,
|
||||
input wire right_nleft,
|
||||
input wire arith,
|
||||
output reg [W_DATA-1:0] dout,
|
||||
output reg dout_vld,
|
||||
);
|
||||
|
||||
reg [W_DATA-1:0] accum;
|
||||
reg [W_DATA-1:0] accum_next;
|
||||
reg [W_SHAMT-1:0] shamt_remaining;
|
||||
reg flipped;
|
||||
|
||||
// Handle actual shifting
|
||||
|
||||
wire flip = !flipped && (right_nleft || ~|shamt_remaining);
|
||||
wire sext = arith && accum[0]; // "Left arithmetic" shifting
|
||||
|
||||
always @ (*) begin: shift_unit
|
||||
integer i;
|
||||
accum_next = accum;
|
||||
// The following is a priority mux tree (honest) which the synthesis tool should balance
|
||||
if (din_vld) begin
|
||||
accum_next = din;
|
||||
end else if (flip) begin
|
||||
for (i = 0; i < W_DATA; i = i + 1)
|
||||
accum_next[i] = accum[W_DATA - 1 - i];
|
||||
end else if (shamt_remaining) begin
|
||||
// Smallest shift first
|
||||
for (i = 0; i < W_SHAMT; i = i + 1) begin
|
||||
if (shamt_remaining[i] && ~|(shamt_remaining & ~({W_SHAMT{1'b1}} << i))) begin
|
||||
accum_next = (accum << (1 << i)) |
|
||||
({W_DATA{sext}} & ~({W_DATA{1'b1}} << (1 << i)));
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// No reset on datapath
|
||||
always @ (posedge clk)
|
||||
accum <= accum_next;
|
||||
|
||||
// State machine
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
shamt_remaining <= {W_SHAMT{1'b0}};
|
||||
flipped <= 1'b0;
|
||||
end else if (din_vld) begin
|
||||
shamt_remaining <= shamt;
|
||||
flipped <= 1'b0;
|
||||
end else begin
|
||||
if (flip)
|
||||
flipped <= 1'b1;
|
||||
else
|
||||
shamt_remaining <= shamt_remaining & {shamt_remaining - 1'b1};
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
always @ (*) begin: connect_output
|
||||
dout_vld = flipped && ~|shamt_remaining;
|
||||
integer i;
|
||||
for (i = 0; i < W_DATA; i = i + 1)
|
||||
dout[i] = accum[W_DATA - 1 - i];
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,65 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Quick reference model for sequential unsigned multiply/divide/modulo
|
||||
|
||||
def div_step(w, accum, divisor):
|
||||
sub_tmp = accum - (divisor << (w - 1))
|
||||
underflow = sub_tmp < 0
|
||||
if not underflow:
|
||||
accum = sub_tmp
|
||||
accum = (accum << 1) | (not underflow)
|
||||
return accum
|
||||
|
||||
def divmod(w, dividend, divisor, debug=True):
|
||||
accum = dividend
|
||||
for i in range(w):
|
||||
accum_prev = accum
|
||||
accum = div_step(w, accum, divisor)
|
||||
if debug:
|
||||
print("Step {:02d}: accum {:0{}x} -> {:0{}x}".format(
|
||||
i, accum_prev, int(w / 2), accum, int(w / 2)))
|
||||
return (accum >> w, accum & ((1 << w) - 1))
|
||||
|
||||
def mul_step(w, accum, multiplicand):
|
||||
add_en = accum & 1
|
||||
accum = accum >> 1
|
||||
if add_en:
|
||||
accum += (multiplicand << (w - 1))
|
||||
return accum
|
||||
|
||||
def mul(w, multiplicand, multiplier, debug=True):
|
||||
accum = multiplier
|
||||
for i in range(w):
|
||||
accum_prev = accum
|
||||
accum = mul_step(w, accum, multiplicand)
|
||||
if debug:
|
||||
print("Step {:02d}: accum {:0{}x} -> {:0{}x}".format(
|
||||
i, accum_prev, int(w / 2), accum, int(w / 2)))
|
||||
return (accum >> w, accum & ((1 << w) - 1))
|
||||
|
||||
def divtest(w=4):
|
||||
for i in range(2 ** w):
|
||||
for j in range(1, 2 ** w):
|
||||
gatemod, gatediv = divmod(w, i, j, debug=False)
|
||||
goldmod, golddiv = (i % j, i // j)
|
||||
print("{:02d} % {:02d} = {:02d} (gold {:02d}); ./. = {:02d} (gold {:02d})"
|
||||
.format(i, j, gatemod, goldmod, gatediv, golddiv))
|
||||
assert(gatemod == goldmod)
|
||||
assert(gatediv == golddiv)
|
||||
|
||||
def multest(w=4):
|
||||
for i in range(2 ** w):
|
||||
for j in range(2 ** w):
|
||||
gateh, gatel = mul(w, i, j, debug=False)
|
||||
gold = i * j
|
||||
goldl, goldh = (gold & ((1 << w) - 1), gold >> w)
|
||||
print("{:02d} * {:02d} = ({:02d} (gold {:02d}), {:02d} (gold {:02d})"
|
||||
.format(i, j, gateh, goldh, gatel, goldl))
|
||||
assert(gatel == goldl)
|
||||
assert(gateh == goldh)
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Test division:")
|
||||
divtest()
|
||||
print("Test multiplication:")
|
||||
multest()
|
|
@ -0,0 +1,14 @@
|
|||
file arith/hazard5_alu.v
|
||||
file arith/hazard5_shift_barrel.v
|
||||
file arith/hazard5_priority_encode.v
|
||||
file arith/hazard5_muldiv_seq.v
|
||||
file arith/hazard5_mul_fast.v
|
||||
file hazard5_frontend.v
|
||||
file hazard5_instr_decompress.v
|
||||
file hazard5_decode.v
|
||||
file hazard5_csr.v
|
||||
file hazard5_regfile_1w2r.v
|
||||
file hazard5_core.v
|
||||
file hazard5_cpu_1port.v
|
||||
file hazard5_cpu_2port.v
|
||||
include .
|
|
@ -0,0 +1,64 @@
|
|||
// Hazard5 CPU configuration parameters
|
||||
|
||||
// To configure Hazard5 you can either edit this file, or set parameters on
|
||||
// your top-level instantiation, it's up to you. These parameters are all
|
||||
// plumbed through Hazard5's internal hierarchy to the appropriate places.
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Reset state configuration
|
||||
|
||||
// RESET_VECTOR: Address of first instruction executed.
|
||||
parameter RESET_VECTOR = 32'h0,
|
||||
|
||||
// MTVEC_INIT: Initial value of trap vector base. Bits clear in MTVEC_WMASK
|
||||
// will never change from this initial value. Bits set in MTVEC_WMASK can be
|
||||
// written/set/cleared as normal. Note that, if CSR_M_TRAP is set, MTVEC_INIT
|
||||
// should probably have a different value from RESET_VECTOR.
|
||||
parameter MTVEC_INIT = 32'h00000000,
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RISC-V ISA and CSR support
|
||||
|
||||
// EXTENSION_C: Support for compressed (variable-width) instructions
|
||||
parameter EXTENSION_C = 1,
|
||||
|
||||
// EXTENSION_M: Support for hardware multiply/divide/modulo instructions
|
||||
parameter EXTENSION_M = 1,
|
||||
|
||||
// CSR_M_MANDATORY: Bare minimum CSR support e.g. misa. Spec says must = 1 if
|
||||
// CSRs are present, but I won't tell anyone.
|
||||
parameter CSR_M_MANDATORY = 1,
|
||||
|
||||
// CSR_M_TRAP: Include M-mode trap-handling CSRs, and enable trap support.
|
||||
parameter CSR_M_TRAP = 1,
|
||||
|
||||
// CSR_COUNTER: Include performance counters and relevant M-mode CSRs
|
||||
parameter CSR_COUNTER = 0,
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Performance/size options
|
||||
|
||||
// REDUCED_BYPASS: Remove all forwarding paths except X->X (so back-to-back
|
||||
// ALU ops can still run at 1 CPI), to save area.
|
||||
parameter REDUCED_BYPASS = 0,
|
||||
|
||||
// MULDIV_UNROLL: Bits per clock for multiply/divide circuit, if present. Must
|
||||
// be a power of 2.
|
||||
parameter MULDIV_UNROLL = 1,
|
||||
|
||||
// MUL_FAST: Use single-cycle multiply circuit for MUL instructions, retiring
|
||||
// to stage M. The sequential multiply/divide circuit is still used for
|
||||
// MULH/MULHU/MULHSU.
|
||||
parameter MUL_FAST = 0,
|
||||
|
||||
// MTVEC_WMASK: Mask of which bits in MTVEC are modifiable. Save gates by
|
||||
// making trap vector base partly fixed (legal, as it's WARL). Note the entire
|
||||
// vector table must always be aligned to its size, rounded up to a power of
|
||||
// two, so careful with the low-order bits.
|
||||
parameter MTVEC_WMASK = 32'hfffff000,
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Port size parameters (do not modify)
|
||||
|
||||
parameter W_ADDR = 32, // Do not modify
|
||||
parameter W_DATA = 32 // Do not modify
|
|
@ -0,0 +1,728 @@
|
|||
/**********************************************************************
|
||||
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
|
||||
* Version 3, April 2008 *
|
||||
* *
|
||||
* Copyright (C) 2018 Luke Wren *
|
||||
* *
|
||||
* Everyone is permitted to copy and distribute verbatim or modified *
|
||||
* copies of this license document and accompanying software, and *
|
||||
* changing either is allowed. *
|
||||
* *
|
||||
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
|
||||
* *
|
||||
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
|
||||
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
|
||||
* *
|
||||
*********************************************************************/
|
||||
|
||||
module hazard5_core #(
|
||||
`include "hazard5_config.vh"
|
||||
) (
|
||||
// Global signals
|
||||
input wire clk,
|
||||
input wire rst_n,
|
||||
|
||||
`ifdef RISCV_FORMAL
|
||||
`RVFI_OUTPUTS ,
|
||||
`endif
|
||||
|
||||
// Instruction fetch port
|
||||
output wire bus_aph_req_i,
|
||||
output wire bus_aph_panic_i, // e.g. branch mispredict + flush
|
||||
input wire bus_aph_ready_i,
|
||||
input wire bus_dph_ready_i,
|
||||
input wire bus_dph_err_i,
|
||||
|
||||
output wire [2:0] bus_hsize_i,
|
||||
output wire [W_ADDR-1:0] bus_haddr_i,
|
||||
input wire [W_DATA-1:0] bus_rdata_i,
|
||||
|
||||
// Load/store port
|
||||
output reg bus_aph_req_d,
|
||||
input wire bus_aph_ready_d,
|
||||
input wire bus_dph_ready_d,
|
||||
input wire bus_dph_err_d,
|
||||
|
||||
output reg [W_ADDR-1:0] bus_haddr_d,
|
||||
output reg [2:0] bus_hsize_d,
|
||||
output reg bus_hwrite_d,
|
||||
output reg [W_DATA-1:0] bus_wdata_d,
|
||||
input wire [W_DATA-1:0] bus_rdata_d,
|
||||
|
||||
// External level-sensitive interrupt sources (tie 0 if unused)
|
||||
input wire [15:0] irq
|
||||
);
|
||||
|
||||
`include "hazard5_ops.vh"
|
||||
|
||||
`ifdef FORMAL
|
||||
// Only yosys-smtbmc seems to support immediate assertions
|
||||
`ifdef RISCV_FORMAL
|
||||
`define ASSERT(x)
|
||||
`else
|
||||
`define ASSERT(x) assert(x)
|
||||
`endif
|
||||
`else
|
||||
`define ASSERT(x)
|
||||
//synthesis translate_off
|
||||
`undef ASSERT
|
||||
`define ASSERT(x) if (!x) begin $display("Assertion failed!"); $finish(1); end
|
||||
//synthesis translate_on
|
||||
`endif
|
||||
|
||||
localparam N_REGS = 32;
|
||||
// should be localparam but ISIM can't cope
|
||||
parameter W_REGADDR = $clog2(N_REGS);
|
||||
localparam NOP_INSTR = 32'h13; // addi x0, x0, 0
|
||||
|
||||
wire flush_d_x;
|
||||
|
||||
wire d_stall;
|
||||
wire x_stall;
|
||||
wire m_stall;
|
||||
|
||||
localparam HSIZE_WORD = 3'd2;
|
||||
localparam HSIZE_HWORD = 3'd1;
|
||||
localparam HSIZE_BYTE = 3'd0;
|
||||
|
||||
|
||||
|
||||
// ============================================================================
|
||||
// Pipe Stage F
|
||||
// ============================================================================
|
||||
|
||||
wire m_jump_req;
|
||||
wire [W_ADDR-1:0] m_jump_target;
|
||||
wire d_jump_req;
|
||||
wire [W_ADDR-1:0] d_jump_target;
|
||||
|
||||
wire f_jump_req = d_jump_req || m_jump_req;
|
||||
wire [W_ADDR-1:0] f_jump_target = m_jump_req ? m_jump_target : d_jump_target;
|
||||
wire f_jump_rdy;
|
||||
wire f_jump_now = f_jump_req && f_jump_rdy;
|
||||
|
||||
wire [31:0] fd_cir;
|
||||
wire [1:0] fd_cir_vld;
|
||||
wire [1:0] df_cir_use;
|
||||
wire df_cir_lock;
|
||||
|
||||
assign bus_aph_panic_i = m_jump_req;
|
||||
|
||||
wire f_mem_size;
|
||||
assign bus_hsize_i = f_mem_size ? HSIZE_WORD : HSIZE_HWORD;
|
||||
|
||||
hazard5_frontend #(
|
||||
.EXTENSION_C(EXTENSION_C),
|
||||
.W_ADDR(W_ADDR),
|
||||
.W_DATA(32),
|
||||
.FIFO_DEPTH(2),
|
||||
.RESET_VECTOR(RESET_VECTOR)
|
||||
) frontend (
|
||||
.clk (clk),
|
||||
.rst_n (rst_n),
|
||||
|
||||
.mem_size (f_mem_size),
|
||||
.mem_addr (bus_haddr_i),
|
||||
.mem_addr_vld (bus_aph_req_i),
|
||||
.mem_addr_rdy (bus_aph_ready_i),
|
||||
|
||||
.mem_data (bus_rdata_i),
|
||||
.mem_data_vld (bus_dph_ready_i),
|
||||
|
||||
.jump_target (f_jump_target),
|
||||
.jump_target_vld (f_jump_req),
|
||||
.jump_target_rdy (f_jump_rdy),
|
||||
|
||||
.cir (fd_cir),
|
||||
.cir_vld (fd_cir_vld),
|
||||
.cir_use (df_cir_use),
|
||||
.cir_lock (df_cir_lock)
|
||||
);
|
||||
|
||||
assign flush_d_x = m_jump_req && f_jump_rdy;
|
||||
|
||||
// ============================================================================
|
||||
// Pipe Stage D
|
||||
// ============================================================================
|
||||
|
||||
// X-check on pieces of instruction which frontend claims are valid
|
||||
//synthesis translate_off
|
||||
always @ (posedge clk) begin
|
||||
if (rst_n) begin
|
||||
if (|fd_cir_vld && (^fd_cir[15:0] === 1'bx)) begin
|
||||
$display("CIR LSBs are X, should be valid!");
|
||||
$finish;
|
||||
end
|
||||
if (fd_cir_vld[1] && (^fd_cir === 1'bX)) begin
|
||||
$display("CIR contains X, should be fully valid!");
|
||||
$finish;
|
||||
end
|
||||
end
|
||||
end
|
||||
//synthesis translate_on
|
||||
|
||||
wire [W_ADDR-1:0] d_pc; // FIXME only used for riscv-formal
|
||||
|
||||
// To register file
|
||||
wire [W_REGADDR-1:0] d_rs1;
|
||||
wire [W_REGADDR-1:0] d_rs2;
|
||||
|
||||
// To X
|
||||
wire [W_DATA-1:0] dx_imm;
|
||||
wire [W_REGADDR-1:0] dx_rs1;
|
||||
wire [W_REGADDR-1:0] dx_rs2;
|
||||
wire [W_REGADDR-1:0] dx_rd;
|
||||
wire [W_ALUSRC-1:0] dx_alusrc_a;
|
||||
wire [W_ALUSRC-1:0] dx_alusrc_b;
|
||||
wire [W_ALUOP-1:0] dx_aluop;
|
||||
wire [W_MEMOP-1:0] dx_memop;
|
||||
wire [W_MULOP-1:0] dx_mulop;
|
||||
wire [W_BCOND-1:0] dx_branchcond;
|
||||
wire [W_ADDR-1:0] dx_jump_target;
|
||||
wire dx_jump_is_regoffs;
|
||||
wire dx_result_is_linkaddr;
|
||||
wire [W_ADDR-1:0] dx_pc;
|
||||
wire [W_ADDR-1:0] dx_mispredict_addr;
|
||||
wire [W_EXCEPT-1:0] dx_except;
|
||||
wire dx_csr_ren;
|
||||
wire dx_csr_wen;
|
||||
wire [1:0] dx_csr_wtype;
|
||||
wire dx_csr_w_imm;
|
||||
|
||||
hazard5_decode #(
|
||||
.EXTENSION_C (EXTENSION_C),
|
||||
.EXTENSION_M (EXTENSION_M),
|
||||
.HAVE_CSR (CSR_M_MANDATORY || CSR_M_TRAP || CSR_COUNTER),
|
||||
.W_ADDR (W_ADDR),
|
||||
.W_DATA (W_DATA),
|
||||
.RESET_VECTOR (RESET_VECTOR),
|
||||
.W_REGADDR (W_REGADDR)
|
||||
) inst_hazard5_decode (
|
||||
.clk (clk),
|
||||
.rst_n (rst_n),
|
||||
|
||||
.fd_cir (fd_cir),
|
||||
.fd_cir_vld (fd_cir_vld),
|
||||
.df_cir_use (df_cir_use),
|
||||
.df_cir_lock (df_cir_lock),
|
||||
.d_jump_req (d_jump_req),
|
||||
.d_jump_target (d_jump_target),
|
||||
.d_pc (d_pc),
|
||||
|
||||
.d_stall (d_stall),
|
||||
.x_stall (x_stall),
|
||||
.flush_d_x (flush_d_x),
|
||||
.f_jump_rdy (f_jump_rdy),
|
||||
.f_jump_now (f_jump_now),
|
||||
.f_jump_target (f_jump_target),
|
||||
|
||||
.d_rs1 (d_rs1),
|
||||
.d_rs2 (d_rs2),
|
||||
.dx_imm (dx_imm),
|
||||
.dx_rs1 (dx_rs1),
|
||||
.dx_rs2 (dx_rs2),
|
||||
.dx_rd (dx_rd),
|
||||
.dx_alusrc_a (dx_alusrc_a),
|
||||
.dx_alusrc_b (dx_alusrc_b),
|
||||
.dx_aluop (dx_aluop),
|
||||
.dx_memop (dx_memop),
|
||||
.dx_mulop (dx_mulop),
|
||||
.dx_csr_ren (dx_csr_ren),
|
||||
.dx_csr_wen (dx_csr_wen),
|
||||
.dx_csr_wtype (dx_csr_wtype),
|
||||
.dx_csr_w_imm (dx_csr_w_imm),
|
||||
.dx_branchcond (dx_branchcond),
|
||||
.dx_jump_target (dx_jump_target),
|
||||
.dx_jump_is_regoffs (dx_jump_is_regoffs),
|
||||
.dx_result_is_linkaddr (dx_result_is_linkaddr),
|
||||
.dx_pc (dx_pc),
|
||||
.dx_mispredict_addr (dx_mispredict_addr),
|
||||
.dx_except (dx_except)
|
||||
);
|
||||
|
||||
// ============================================================================
|
||||
// Pipe Stage X
|
||||
// ============================================================================
|
||||
|
||||
// Register the write which took place to the regfile on previous cycle, and bypass.
|
||||
// This is an alternative to a write -> read bypass in the regfile,
|
||||
// which we can't implement whilst maintaining BRAM inference compatibility (iCE40).
|
||||
reg [W_REGADDR-1:0] mw_rd;
|
||||
reg [W_DATA-1:0] mw_result;
|
||||
|
||||
// From register file:
|
||||
wire [W_DATA-1:0] dx_rdata1;
|
||||
wire [W_DATA-1:0] dx_rdata2;
|
||||
|
||||
// Combinational regs for muxing
|
||||
reg [W_DATA-1:0] x_rs1_bypass;
|
||||
reg [W_DATA-1:0] x_rs2_bypass;
|
||||
reg [W_DATA-1:0] x_op_a;
|
||||
reg [W_DATA-1:0] x_op_b;
|
||||
wire [W_DATA-1:0] x_alu_result;
|
||||
wire [W_DATA-1:0] x_alu_add;
|
||||
wire x_alu_cmp;
|
||||
|
||||
wire [W_DATA-1:0] x_trap_addr;
|
||||
wire [W_DATA-1:0] x_mepc;
|
||||
wire x_trap_enter;
|
||||
wire x_trap_exit;
|
||||
|
||||
reg [W_REGADDR-1:0] xm_rs1;
|
||||
reg [W_REGADDR-1:0] xm_rs2;
|
||||
reg [W_REGADDR-1:0] xm_rd;
|
||||
reg [W_DATA-1:0] xm_result;
|
||||
reg [W_ADDR-1:0] xm_jump_target;
|
||||
reg [W_DATA-1:0] xm_store_data;
|
||||
reg xm_jump;
|
||||
reg [W_MEMOP-1:0] xm_memop;
|
||||
|
||||
// For JALR, the LSB of the result must be cleared by hardware
|
||||
wire [W_ADDR-1:0] x_taken_jump_target = dx_jump_is_regoffs ? x_alu_add & ~32'h1 : dx_jump_target;
|
||||
wire [W_ADDR-1:0] x_jump_target =
|
||||
x_trap_exit ? x_mepc : // Note precedence -- it's possible to have enter && exit, but in this case enter_rdy is false.
|
||||
x_trap_enter ? x_trap_addr :
|
||||
dx_imm[31] && dx_branchcond != BCOND_ALWAYS ? dx_mispredict_addr :
|
||||
x_taken_jump_target;
|
||||
|
||||
reg x_stall_raw;
|
||||
wire x_stall_muldiv;
|
||||
|
||||
assign x_stall = m_stall ||
|
||||
x_stall_raw || x_stall_muldiv || bus_aph_req_d && !bus_aph_ready_d;
|
||||
|
||||
wire m_fast_mul_result_vld;
|
||||
wire m_generating_result = xm_memop < MEMOP_SW || m_fast_mul_result_vld;
|
||||
|
||||
// Load-use hazard detection
|
||||
always @ (*) begin
|
||||
x_stall_raw = 1'b0;
|
||||
if (REDUCED_BYPASS) begin
|
||||
x_stall_raw =
|
||||
|xm_rd && (xm_rd == dx_rs1 || xm_rd == dx_rs2) ||
|
||||
|mw_rd && (mw_rd == dx_rs1 || mw_rd == dx_rs2);
|
||||
end else if (m_generating_result) begin
|
||||
// With the full bypass network, load-use (or fast multiply-use) is the only RAW stall
|
||||
if (|xm_rd && xm_rd == dx_rs1) begin
|
||||
// Store addresses cannot be bypassed later, so there is no exception here.
|
||||
x_stall_raw = 1'b1;
|
||||
end else if (|xm_rd && xm_rd == dx_rs2) begin
|
||||
// Store data can be bypassed in M. Any other instructions must stall.
|
||||
x_stall_raw = !(dx_memop == MEMOP_SW || dx_memop == MEMOP_SH || dx_memop == MEMOP_SB);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// AHB transaction request
|
||||
|
||||
wire x_memop_vld = !dx_memop[3];
|
||||
wire x_memop_write = dx_memop == MEMOP_SW || dx_memop == MEMOP_SH || dx_memop == MEMOP_SB;
|
||||
wire x_unaligned_addr =
|
||||
bus_hsize_d == HSIZE_WORD && |bus_haddr_d[1:0] ||
|
||||
bus_hsize_d == HSIZE_HWORD && bus_haddr_d[0];
|
||||
|
||||
wire x_except_load_misaligned = x_memop_vld && x_unaligned_addr && !x_memop_write;
|
||||
wire x_except_store_misaligned = x_memop_vld && x_unaligned_addr && x_memop_write;
|
||||
|
||||
always @ (*) begin
|
||||
// Need to be careful not to use anything hready-sourced to gate htrans!
|
||||
bus_haddr_d = x_alu_add;
|
||||
bus_hwrite_d = x_memop_write;
|
||||
case (dx_memop)
|
||||
MEMOP_LW: bus_hsize_d = HSIZE_WORD;
|
||||
MEMOP_SW: bus_hsize_d = HSIZE_WORD;
|
||||
MEMOP_LH: bus_hsize_d = HSIZE_HWORD;
|
||||
MEMOP_LHU: bus_hsize_d = HSIZE_HWORD;
|
||||
MEMOP_SH: bus_hsize_d = HSIZE_HWORD;
|
||||
default: bus_hsize_d = HSIZE_BYTE;
|
||||
endcase
|
||||
// m_jump_req implies flush_d_x is coming. Can't use flush_d_x because it's
|
||||
// possible for a mispredicted load/store to go through whilst a late jump
|
||||
// request is stalled, if there are two bus masters.
|
||||
bus_aph_req_d = x_memop_vld && !(x_stall_raw || m_jump_req || x_trap_enter);
|
||||
end
|
||||
|
||||
// ALU operand muxes and bypass
|
||||
always @ (*) begin
|
||||
if (~|dx_rs1) begin
|
||||
x_rs1_bypass = {W_DATA{1'b0}};
|
||||
end else if (xm_rd == dx_rs1) begin
|
||||
x_rs1_bypass = xm_result;
|
||||
end else if (mw_rd == dx_rs1 && !REDUCED_BYPASS) begin
|
||||
x_rs1_bypass = mw_result;
|
||||
end else begin
|
||||
x_rs1_bypass = dx_rdata1;
|
||||
end
|
||||
if (~|dx_rs2) begin
|
||||
x_rs2_bypass = {W_DATA{1'b0}};
|
||||
end else if (xm_rd == dx_rs2) begin
|
||||
x_rs2_bypass = xm_result;
|
||||
end else if (mw_rd == dx_rs2 && !REDUCED_BYPASS) begin
|
||||
x_rs2_bypass = mw_result;
|
||||
end else begin
|
||||
x_rs2_bypass = dx_rdata2;
|
||||
end
|
||||
|
||||
if (|dx_alusrc_a)
|
||||
x_op_a = dx_pc;
|
||||
else
|
||||
x_op_a = x_rs1_bypass;
|
||||
|
||||
if (|dx_alusrc_b)
|
||||
x_op_b = dx_imm;
|
||||
else
|
||||
x_op_b = x_rs2_bypass;
|
||||
end
|
||||
|
||||
// CSRs and Trap Handling
|
||||
|
||||
wire x_except_ecall = dx_except == EXCEPT_ECALL;
|
||||
wire x_except_breakpoint = dx_except == EXCEPT_EBREAK;
|
||||
wire x_except_invalid_instr = dx_except == EXCEPT_INSTR_ILLEGAL;
|
||||
assign x_trap_exit = dx_except == EXCEPT_MRET && !(x_stall || m_jump_req);
|
||||
wire x_trap_enter_rdy = !(x_stall || m_jump_req || x_trap_exit);
|
||||
wire x_trap_is_exception; // diagnostic
|
||||
|
||||
`ifdef FORMAL
|
||||
always @ (posedge clk) begin
|
||||
if (flush_d_x)
|
||||
assert(!x_trap_enter_rdy);
|
||||
if (x_trap_exit)
|
||||
assert(!bus_aph_req_d);
|
||||
end
|
||||
`endif
|
||||
|
||||
wire [W_DATA-1:0] x_csr_wdata = dx_csr_w_imm ?
|
||||
{{W_DATA-5{1'b0}}, dx_rs1} : x_rs1_bypass;
|
||||
|
||||
wire [W_DATA-1:0] x_csr_rdata;
|
||||
|
||||
hazard5_csr #(
|
||||
.XLEN (W_DATA),
|
||||
.CSR_M_MANDATORY (CSR_M_MANDATORY),
|
||||
.CSR_M_TRAP (CSR_M_TRAP),
|
||||
.CSR_COUNTER (CSR_COUNTER),
|
||||
.EXTENSION_C (EXTENSION_C),
|
||||
.EXTENSION_M (EXTENSION_M),
|
||||
.MTVEC_WMASK (MTVEC_WMASK),
|
||||
.MTVEC_INIT (MTVEC_INIT)
|
||||
) inst_hazard5_csr (
|
||||
.clk (clk),
|
||||
.rst_n (rst_n),
|
||||
// CSR access port
|
||||
// *en_soon are early access strobes which are not a function of bus stall.
|
||||
// Can generate access faults (hence traps), but do not actually perform access.
|
||||
.addr (dx_imm[11:0]),
|
||||
.wdata (x_csr_wdata),
|
||||
.wen_soon (dx_csr_wen),
|
||||
.wen (dx_csr_wen && !(x_stall || flush_d_x)),
|
||||
.wtype (dx_csr_wtype),
|
||||
.rdata (x_csr_rdata),
|
||||
.ren_soon (dx_csr_ren),
|
||||
.ren (dx_csr_ren && !(x_stall || flush_d_x)),
|
||||
// Trap signalling
|
||||
.trap_addr (x_trap_addr),
|
||||
.trap_enter_vld (x_trap_enter),
|
||||
.trap_enter_rdy (x_trap_enter_rdy),
|
||||
.trap_exit (x_trap_exit),
|
||||
.trap_is_exception (x_trap_is_exception),
|
||||
.mepc_in (dx_pc),
|
||||
.mepc_out (x_mepc),
|
||||
// IRQ and exception requests
|
||||
.irq (irq),
|
||||
.except_instr_misaligned (1'b0), // TODO
|
||||
.except_instr_fault (1'b0), // TODO
|
||||
.except_instr_invalid (x_except_invalid_instr),
|
||||
.except_breakpoint (x_except_breakpoint),
|
||||
.except_load_misaligned (x_except_load_misaligned),
|
||||
.except_load_fault (1'b0), // TODO
|
||||
.except_store_misaligned (x_except_store_misaligned),
|
||||
.except_store_fault (1'b0), // TODO
|
||||
.except_ecall (x_except_ecall),
|
||||
// Other CSR-specific signalling
|
||||
.instr_ret (1'b0) // TODO
|
||||
);
|
||||
|
||||
// Multiply/divide
|
||||
|
||||
wire [W_DATA-1:0] x_muldiv_result;
|
||||
wire [W_DATA-1:0] m_fast_mul_result;
|
||||
|
||||
generate
|
||||
if (EXTENSION_M) begin: has_muldiv
|
||||
wire x_muldiv_op_vld;
|
||||
wire x_muldiv_op_rdy;
|
||||
wire x_muldiv_result_vld;
|
||||
wire [W_DATA-1:0] x_muldiv_result_h;
|
||||
wire [W_DATA-1:0] x_muldiv_result_l;
|
||||
|
||||
reg x_muldiv_posted;
|
||||
always @ (posedge clk or negedge rst_n)
|
||||
if (!rst_n)
|
||||
x_muldiv_posted <= 1'b0;
|
||||
else
|
||||
x_muldiv_posted <= (x_muldiv_posted || (x_muldiv_op_vld && x_muldiv_op_rdy)) && x_stall;
|
||||
|
||||
wire x_muldiv_kill = flush_d_x || x_trap_enter; // TODO this takes an extra cycle to kill muldiv before trap entry
|
||||
|
||||
wire x_use_fast_mul = MUL_FAST && dx_aluop == ALUOP_MULDIV && dx_mulop == M_OP_MUL;
|
||||
|
||||
assign x_muldiv_op_vld = (dx_aluop == ALUOP_MULDIV && !x_use_fast_mul)
|
||||
&& !(x_muldiv_posted || x_stall_raw || x_muldiv_kill);
|
||||
|
||||
hazard5_muldiv_seq #(
|
||||
.XLEN (W_DATA),
|
||||
.UNROLL (MULDIV_UNROLL)
|
||||
) muldiv (
|
||||
.clk (clk),
|
||||
.rst_n (rst_n),
|
||||
.op (dx_mulop),
|
||||
.op_vld (x_muldiv_op_vld),
|
||||
.op_rdy (x_muldiv_op_rdy),
|
||||
.op_kill (x_muldiv_kill),
|
||||
.op_a (x_rs1_bypass),
|
||||
.op_b (x_rs2_bypass),
|
||||
|
||||
.result_h (x_muldiv_result_h),
|
||||
.result_l (x_muldiv_result_l),
|
||||
.result_vld (x_muldiv_result_vld)
|
||||
);
|
||||
|
||||
// TODO fusion of MULHx->MUL and DIVy->REMy sequences
|
||||
wire x_muldiv_result_is_high =
|
||||
dx_mulop == M_OP_MULH ||
|
||||
dx_mulop == M_OP_MULHSU ||
|
||||
dx_mulop == M_OP_MULHU ||
|
||||
dx_mulop == M_OP_REM ||
|
||||
dx_mulop == M_OP_REMU;
|
||||
assign x_muldiv_result = x_muldiv_result_is_high ? x_muldiv_result_h : x_muldiv_result_l;
|
||||
assign x_stall_muldiv = x_muldiv_op_vld || !x_muldiv_result_vld;
|
||||
|
||||
if (MUL_FAST) begin: has_fast_mul
|
||||
|
||||
wire x_issue_fast_mul = x_use_fast_mul && |dx_rd && !(x_stall || flush_d_x);
|
||||
|
||||
hazard5_mul_fast #(
|
||||
.XLEN(W_DATA)
|
||||
) inst_hazard5_mul_fast (
|
||||
.clk (clk),
|
||||
.rst_n (rst_n),
|
||||
|
||||
.op_a (x_rs1_bypass),
|
||||
.op_b (x_rs2_bypass),
|
||||
.op_vld (x_issue_fast_mul),
|
||||
|
||||
.result (m_fast_mul_result),
|
||||
.result_vld (m_fast_mul_result_vld)
|
||||
);
|
||||
|
||||
end else begin: no_fast_mul
|
||||
|
||||
assign m_fast_mul_result = {W_DATA{1'b0}};
|
||||
assign m_fast_mul_result_vld = 1'b0;
|
||||
|
||||
end
|
||||
|
||||
`ifdef FORMAL
|
||||
always @ (posedge clk) if (dx_aluop != ALUOP_MULDIV) assert(!x_stall_muldiv);
|
||||
`endif
|
||||
|
||||
end else begin: no_muldiv
|
||||
|
||||
assign x_muldiv_result = {W_DATA{1'b0}};
|
||||
assign m_fast_mul_result = {W_DATA{1'b0}};
|
||||
assign m_fast_mul_result_vld = 1'b0;
|
||||
assign x_stall_muldiv = 1'b0;
|
||||
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// State machine and branch detection
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
xm_jump <= 1'b0;
|
||||
xm_memop <= MEMOP_NONE;
|
||||
{xm_rs1, xm_rs2, xm_rd} <= {3 * W_REGADDR{1'b0}};
|
||||
end else begin
|
||||
// TODO: this assertion may become untrue depending on how we handle exceptions/IRQs when stalled?
|
||||
//`ASSERT(!(m_stall && flush_d_x));// bubble insertion logic below is broken otherwise
|
||||
if (!m_stall) begin
|
||||
{xm_rs1, xm_rs2, xm_rd} <= {dx_rs1, dx_rs2, dx_rd};
|
||||
// If the transfer is unaligned, make sure it is completely NOP'd on the bus
|
||||
xm_memop <= dx_memop | {x_unaligned_addr, 3'h0};
|
||||
if (x_stall || flush_d_x || x_trap_enter) begin
|
||||
// Insert bubble
|
||||
xm_rd <= {W_REGADDR{1'b0}};
|
||||
xm_jump <= 1'b0;
|
||||
xm_memop <= MEMOP_NONE;
|
||||
end
|
||||
if (!(x_stall || flush_d_x)) begin
|
||||
case (dx_branchcond)
|
||||
BCOND_ALWAYS: xm_jump <= 1'b1;
|
||||
// For branches, we are either taking a branch late, or recovering from
|
||||
// an incorrectly taken branch, depending on sign of branch offset.
|
||||
BCOND_ZERO: xm_jump <= !x_alu_cmp ^ dx_imm[31];
|
||||
BCOND_NZERO: xm_jump <= x_alu_cmp ^ dx_imm[31];
|
||||
default xm_jump <= 1'b0;
|
||||
endcase
|
||||
if (x_trap_enter || x_trap_exit)
|
||||
xm_jump <= 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// No reset on datapath flops
|
||||
always @ (posedge clk)
|
||||
if (!m_stall) begin
|
||||
xm_result <=
|
||||
dx_result_is_linkaddr ? dx_mispredict_addr :
|
||||
dx_csr_ren ? x_csr_rdata :
|
||||
EXTENSION_M && dx_aluop == ALUOP_MULDIV ? x_muldiv_result :
|
||||
x_alu_result;
|
||||
xm_store_data <= x_rs2_bypass;
|
||||
xm_jump_target <= x_jump_target;
|
||||
end
|
||||
|
||||
hazard5_alu alu (
|
||||
.aluop (dx_aluop),
|
||||
.op_a (x_op_a),
|
||||
.op_b (x_op_b),
|
||||
.result (x_alu_result),
|
||||
.result_add (x_alu_add),
|
||||
.cmp (x_alu_cmp)
|
||||
);
|
||||
|
||||
// ============================================================================
|
||||
// Pipe Stage M
|
||||
// ============================================================================
|
||||
|
||||
reg [W_DATA-1:0] m_rdata_shift;
|
||||
reg [W_DATA-1:0] m_wdata;
|
||||
reg [W_DATA-1:0] m_result;
|
||||
assign m_jump_req = xm_jump;
|
||||
assign m_jump_target = xm_jump_target;
|
||||
|
||||
assign m_stall = (!xm_memop[3] && !bus_dph_ready_d) || (m_jump_req && !f_jump_rdy);
|
||||
|
||||
wire m_except_bus_fault = bus_dph_err_d; // TODO: handle differently for LSU/ifetch?
|
||||
|
||||
always @ (*) begin
|
||||
// Local forwarding of store data
|
||||
if (|mw_rd && xm_rs2 == mw_rd && !REDUCED_BYPASS) begin
|
||||
m_wdata = mw_result;
|
||||
end else begin
|
||||
m_wdata = xm_store_data;
|
||||
end
|
||||
// Replicate store data to ensure appropriate byte lane is driven
|
||||
case (xm_memop)
|
||||
MEMOP_SW: bus_wdata_d = m_wdata;
|
||||
MEMOP_SH: bus_wdata_d = {2{m_wdata[15:0]}};
|
||||
MEMOP_SB: bus_wdata_d = {4{m_wdata[7:0]}};
|
||||
default: bus_wdata_d = 32'h0;
|
||||
endcase
|
||||
// Pick out correct data from load access, and sign/unsign extend it.
|
||||
// This is slightly cheaper than a normal shift:
|
||||
case (xm_result[1:0])
|
||||
2'b00: m_rdata_shift = bus_rdata_d;
|
||||
2'b01: m_rdata_shift = {bus_rdata_d[31:8], bus_rdata_d[15:8]};
|
||||
2'b10: m_rdata_shift = {bus_rdata_d[31:16], bus_rdata_d[31:16]};
|
||||
2'b11: m_rdata_shift = {bus_rdata_d[31:8], bus_rdata_d[31:24]};
|
||||
endcase
|
||||
|
||||
case (xm_memop)
|
||||
MEMOP_LW: m_result = m_rdata_shift;
|
||||
MEMOP_LH: m_result = {{16{m_rdata_shift[15]}}, m_rdata_shift[15:0]};
|
||||
MEMOP_LHU: m_result = {16'h0, m_rdata_shift[15:0]};
|
||||
MEMOP_LB: m_result = {{24{m_rdata_shift[7]}}, m_rdata_shift[7:0]};
|
||||
MEMOP_LBU: m_result = {24'h0, m_rdata_shift[7:0]};
|
||||
default: begin
|
||||
if (MUL_FAST && m_fast_mul_result_vld) begin
|
||||
m_result = m_fast_mul_result;
|
||||
end else begin
|
||||
m_result = xm_result;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
mw_rd <= {W_REGADDR{1'b0}};
|
||||
end else if (!m_stall) begin
|
||||
//synthesis translate_off
|
||||
// TODO: proper exception support
|
||||
if (m_except_bus_fault) begin
|
||||
$display("Bus fault!");
|
||||
$finish;
|
||||
end
|
||||
if (^bus_wdata_d === 1'bX) begin
|
||||
$display("Writing Xs to memory!");
|
||||
$finish;
|
||||
end
|
||||
//synthesis translate_on
|
||||
mw_rd <= xm_rd;
|
||||
end
|
||||
end
|
||||
|
||||
// No need to reset result register, as reset on mw_rd protects register file from it
|
||||
always @ (posedge clk)
|
||||
if (!m_stall)
|
||||
mw_result <= m_result;
|
||||
|
||||
// ============================================================================
|
||||
// Pipe Stage W
|
||||
// ============================================================================
|
||||
|
||||
// mw_result and mw_rd register the most recent write to the register file,
|
||||
// so that X can bypass them in.
|
||||
|
||||
wire w_reg_wen = |xm_rd && !m_stall;
|
||||
|
||||
//synthesis translate_off
|
||||
always @ (posedge clk) begin
|
||||
if (rst_n) begin
|
||||
if (w_reg_wen && (^m_result === 1'bX)) begin
|
||||
$display("Writing X to register file!");
|
||||
$finish;
|
||||
end
|
||||
end
|
||||
end
|
||||
//synthesis translate_on
|
||||
|
||||
hazard5_regfile_1w2r #(
|
||||
.FAKE_DUALPORT(0),
|
||||
`ifdef SIM
|
||||
.RESET_REGS(1),
|
||||
`elsif FORMAL
|
||||
.RESET_REGS(1),
|
||||
`else
|
||||
.RESET_REGS(0),
|
||||
`endif
|
||||
.N_REGS(N_REGS),
|
||||
.W_DATA(W_DATA)
|
||||
) inst_regfile_1w2r (
|
||||
.clk (clk),
|
||||
.rst_n (rst_n),
|
||||
// On stall, we feed X's addresses back into regfile
|
||||
// so that output does not change.
|
||||
.raddr1 (x_stall ? dx_rs1 : d_rs1),
|
||||
.rdata1 (dx_rdata1),
|
||||
.raddr2 (x_stall ? dx_rs2 : d_rs2),
|
||||
.rdata2 (dx_rdata2),
|
||||
|
||||
.waddr (xm_rd),
|
||||
.wdata (m_result),
|
||||
.wen (w_reg_wen)
|
||||
);
|
||||
|
||||
`ifdef RISCV_FORMAL
|
||||
`include "hazard5_rvfi_monitor.vh"
|
||||
`endif
|
||||
|
||||
`ifdef HAZARD5_FORMAL_REGRESSION
|
||||
// Each formal regression provides its own file with the below name:
|
||||
`include "hazard5_formal_regression.vh"
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,213 @@
|
|||
/**********************************************************************
|
||||
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
|
||||
* Version 3, April 2008 *
|
||||
* *
|
||||
* Copyright (C) 2021 Luke Wren *
|
||||
* *
|
||||
* Everyone is permitted to copy and distribute verbatim or modified *
|
||||
* copies of this license document and accompanying software, and *
|
||||
* changing either is allowed. *
|
||||
* *
|
||||
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
|
||||
* *
|
||||
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
|
||||
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
|
||||
* *
|
||||
*********************************************************************/
|
||||
|
||||
// Single-ported top level file for Hazard5 CPU. This file instantiates the
|
||||
// Hazard5 core, and arbitrates its instruction fetch and load/store signals
|
||||
// down to a single AHB-Lite master port.
|
||||
|
||||
module hazard5_cpu_1port #(
|
||||
`include "hazard5_config.vh"
|
||||
) (
|
||||
// Global signals
|
||||
input wire clk,
|
||||
input wire rst_n,
|
||||
|
||||
`ifdef RISCV_FORMAL
|
||||
`RVFI_OUTPUTS ,
|
||||
`endif
|
||||
|
||||
// AHB-lite Master port
|
||||
output reg [W_ADDR-1:0] ahblm_haddr,
|
||||
output reg ahblm_hwrite,
|
||||
output reg [1:0] ahblm_htrans,
|
||||
output reg [2:0] ahblm_hsize,
|
||||
output wire [2:0] ahblm_hburst,
|
||||
output reg [3:0] ahblm_hprot,
|
||||
output wire ahblm_hmastlock,
|
||||
input wire ahblm_hready,
|
||||
input wire ahblm_hresp,
|
||||
output wire [W_DATA-1:0] ahblm_hwdata,
|
||||
input wire [W_DATA-1:0] ahblm_hrdata,
|
||||
|
||||
// External level-sensitive interrupt sources (tie 0 if unused)
|
||||
input wire [15:0] irq
|
||||
);
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Processor core
|
||||
|
||||
// Instruction fetch signals
|
||||
wire core_aph_req_i;
|
||||
wire core_aph_panic_i;
|
||||
wire core_aph_ready_i;
|
||||
wire core_dph_ready_i;
|
||||
wire core_dph_err_i;
|
||||
|
||||
wire [2:0] core_hsize_i;
|
||||
wire [W_ADDR-1:0] core_haddr_i;
|
||||
wire [W_DATA-1:0] core_rdata_i;
|
||||
|
||||
|
||||
// Load/store signals
|
||||
wire core_aph_req_d;
|
||||
wire core_aph_ready_d;
|
||||
wire core_dph_ready_d;
|
||||
wire core_dph_err_d;
|
||||
|
||||
wire [W_ADDR-1:0] core_haddr_d;
|
||||
wire [2:0] core_hsize_d;
|
||||
wire core_hwrite_d;
|
||||
wire [W_DATA-1:0] core_wdata_d;
|
||||
wire [W_DATA-1:0] core_rdata_d;
|
||||
|
||||
|
||||
hazard5_core #(
|
||||
.RESET_VECTOR (RESET_VECTOR),
|
||||
.EXTENSION_C (EXTENSION_C),
|
||||
.EXTENSION_M (EXTENSION_M),
|
||||
.MULDIV_UNROLL (MULDIV_UNROLL),
|
||||
.MUL_FAST (MUL_FAST),
|
||||
.CSR_M_MANDATORY (CSR_M_MANDATORY),
|
||||
.CSR_M_TRAP (CSR_M_TRAP),
|
||||
.CSR_COUNTER (CSR_COUNTER),
|
||||
.MTVEC_WMASK (MTVEC_WMASK),
|
||||
.MTVEC_INIT (MTVEC_INIT),
|
||||
.REDUCED_BYPASS (REDUCED_BYPASS)
|
||||
) core (
|
||||
.clk (clk),
|
||||
.rst_n (rst_n),
|
||||
|
||||
`ifdef RISCV_FORMAL
|
||||
`RVFI_CONN ,
|
||||
`endif
|
||||
|
||||
.bus_aph_req_i (core_aph_req_i),
|
||||
.bus_aph_panic_i (core_aph_panic_i),
|
||||
.bus_aph_ready_i (core_aph_ready_i),
|
||||
.bus_dph_ready_i (core_dph_ready_i),
|
||||
.bus_dph_err_i (core_dph_err_i),
|
||||
.bus_hsize_i (core_hsize_i),
|
||||
.bus_haddr_i (core_haddr_i),
|
||||
.bus_rdata_i (core_rdata_i),
|
||||
|
||||
.bus_aph_req_d (core_aph_req_d),
|
||||
.bus_aph_ready_d (core_aph_ready_d),
|
||||
.bus_dph_ready_d (core_dph_ready_d),
|
||||
.bus_dph_err_d (core_dph_err_d),
|
||||
.bus_haddr_d (core_haddr_d),
|
||||
.bus_hsize_d (core_hsize_d),
|
||||
.bus_hwrite_d (core_hwrite_d),
|
||||
.bus_wdata_d (core_wdata_d),
|
||||
.bus_rdata_d (core_rdata_d),
|
||||
|
||||
.irq (irq)
|
||||
);
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Arbitration state machine
|
||||
|
||||
wire bus_gnt_i;
|
||||
wire bus_gnt_d;
|
||||
|
||||
reg bus_hold_aph;
|
||||
reg [1:0] bus_gnt_id_prev;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
bus_hold_aph <= 1'b0;
|
||||
bus_gnt_id_prev <= 2'h0;
|
||||
end else begin
|
||||
bus_hold_aph <= ahblm_htrans[1] && !ahblm_hready;
|
||||
bus_gnt_id_prev <= {bus_gnt_i, bus_gnt_d};
|
||||
end
|
||||
end
|
||||
|
||||
assign {bus_gnt_i, bus_gnt_d} =
|
||||
bus_hold_aph ? bus_gnt_id_prev :
|
||||
core_aph_panic_i ? 2'b10 :
|
||||
core_aph_req_d ? 2'b01 :
|
||||
core_aph_req_i ? 2'b10 :
|
||||
2'b00 ;
|
||||
|
||||
// Keep track of whether instr/data access is active in AHB dataphase.
|
||||
reg bus_active_dph_i;
|
||||
reg bus_active_dph_d;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
bus_active_dph_i <= 1'b0;
|
||||
bus_active_dph_d <= 1'b0;
|
||||
end else if (ahblm_hready) begin
|
||||
bus_active_dph_i <= bus_gnt_i;
|
||||
bus_active_dph_d <= bus_gnt_d;
|
||||
end
|
||||
end
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Address phase request muxing
|
||||
|
||||
localparam HTRANS_IDLE = 2'b00;
|
||||
localparam HTRANS_NSEQ = 2'b10;
|
||||
|
||||
// Noncacheable nonbufferable privileged data/instr:
|
||||
localparam HPROT_DATA = 4'b0011;
|
||||
localparam HPROT_INSTR = 4'b0010;
|
||||
|
||||
assign ahblm_hburst = 3'b000; // HBURST_SINGLE
|
||||
assign ahblm_hmastlock = 1'b0;
|
||||
|
||||
always @ (*) begin
|
||||
if (bus_gnt_d) begin
|
||||
ahblm_htrans = HTRANS_NSEQ;
|
||||
ahblm_haddr = core_haddr_d;
|
||||
ahblm_hsize = core_hsize_d;
|
||||
ahblm_hwrite = core_hwrite_d;
|
||||
ahblm_hprot = HPROT_DATA;
|
||||
end else if (bus_gnt_i) begin
|
||||
ahblm_htrans = HTRANS_NSEQ;
|
||||
ahblm_haddr = core_haddr_i;
|
||||
ahblm_hsize = core_hsize_i;
|
||||
ahblm_hwrite = 1'b0;
|
||||
ahblm_hprot = HPROT_INSTR;
|
||||
end else begin
|
||||
ahblm_htrans = HTRANS_IDLE;
|
||||
ahblm_haddr = {W_ADDR{1'b0}};
|
||||
ahblm_hsize = 3'h0;
|
||||
ahblm_hwrite = 1'b0;
|
||||
ahblm_hprot = 4'h0;
|
||||
end
|
||||
end
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Response routing
|
||||
|
||||
// Data buses directly connected
|
||||
assign core_rdata_d = ahblm_hrdata;
|
||||
assign core_rdata_i = ahblm_hrdata;
|
||||
assign ahblm_hwdata = core_wdata_d;
|
||||
|
||||
// Handhshake based on grant and bus stall
|
||||
assign core_aph_ready_i = ahblm_hready && bus_gnt_i;
|
||||
assign core_dph_ready_i = ahblm_hready && bus_active_dph_i;
|
||||
assign core_dph_err_i = ahblm_hready && bus_active_dph_i && ahblm_hresp;
|
||||
|
||||
assign core_aph_ready_d = ahblm_hready && bus_gnt_d;
|
||||
assign core_dph_ready_d = ahblm_hready && bus_active_dph_d;
|
||||
assign core_dph_err_d = ahblm_hready && bus_active_dph_d && ahblm_hresp;
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,188 @@
|
|||
/**********************************************************************
|
||||
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
|
||||
* Version 3, April 2008 *
|
||||
* *
|
||||
* Copyright (C) 2021 Luke Wren *
|
||||
* *
|
||||
* Everyone is permitted to copy and distribute verbatim or modified *
|
||||
* copies of this license document and accompanying software, and *
|
||||
* changing either is allowed. *
|
||||
* *
|
||||
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
|
||||
* *
|
||||
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
|
||||
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
|
||||
* *
|
||||
*********************************************************************/
|
||||
|
||||
// Dual-ported top level file for Hazard5 CPU. This file instantiates the
|
||||
// Hazard5 core, and interfaces its instruction fetch and load/store signals
|
||||
// to a pair of AHB-Lite master ports.
|
||||
|
||||
module hazard5_cpu_2port #(
|
||||
`include "hazard5_config.vh"
|
||||
) (
|
||||
// Global signals
|
||||
input wire clk,
|
||||
input wire rst_n,
|
||||
|
||||
`ifdef RISCV_FORMAL
|
||||
`RVFI_OUTPUTS ,
|
||||
`endif
|
||||
|
||||
// Instruction fetch port
|
||||
output wire [W_ADDR-1:0] i_haddr,
|
||||
output wire i_hwrite,
|
||||
output wire [1:0] i_htrans,
|
||||
output wire [2:0] i_hsize,
|
||||
output wire [2:0] i_hburst,
|
||||
output wire [3:0] i_hprot,
|
||||
output wire i_hmastlock,
|
||||
input wire i_hready,
|
||||
input wire i_hresp,
|
||||
output wire [W_DATA-1:0] i_hwdata,
|
||||
input wire [W_DATA-1:0] i_hrdata,
|
||||
|
||||
// Load/store port
|
||||
output wire [W_ADDR-1:0] d_haddr,
|
||||
output wire d_hwrite,
|
||||
output wire [1:0] d_htrans,
|
||||
output wire [2:0] d_hsize,
|
||||
output wire [2:0] d_hburst,
|
||||
output wire [3:0] d_hprot,
|
||||
output wire d_hmastlock,
|
||||
input wire d_hready,
|
||||
input wire d_hresp,
|
||||
output wire [W_DATA-1:0] d_hwdata,
|
||||
input wire [W_DATA-1:0] d_hrdata,
|
||||
|
||||
// External level-sensitive interrupt sources (tie 0 if unused)
|
||||
input wire [15:0] irq
|
||||
);
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Processor core
|
||||
|
||||
// Instruction fetch signals
|
||||
wire core_aph_req_i;
|
||||
wire core_aph_panic_i; // unused as there's no arbitration
|
||||
wire core_aph_ready_i;
|
||||
wire core_dph_ready_i;
|
||||
wire core_dph_err_i;
|
||||
|
||||
wire [2:0] core_hsize_i;
|
||||
wire [W_ADDR-1:0] core_haddr_i;
|
||||
wire [W_DATA-1:0] core_rdata_i;
|
||||
|
||||
|
||||
// Load/store signals
|
||||
wire core_aph_req_d;
|
||||
wire core_aph_ready_d;
|
||||
wire core_dph_ready_d;
|
||||
wire core_dph_err_d;
|
||||
|
||||
wire [W_ADDR-1:0] core_haddr_d;
|
||||
wire [2:0] core_hsize_d;
|
||||
wire core_hwrite_d;
|
||||
wire [W_DATA-1:0] core_wdata_d;
|
||||
wire [W_DATA-1:0] core_rdata_d;
|
||||
|
||||
|
||||
hazard5_core #(
|
||||
.RESET_VECTOR (RESET_VECTOR),
|
||||
.EXTENSION_C (EXTENSION_C),
|
||||
.EXTENSION_M (EXTENSION_M),
|
||||
.MULDIV_UNROLL (MULDIV_UNROLL),
|
||||
.MUL_FAST (MUL_FAST),
|
||||
.CSR_M_MANDATORY (CSR_M_MANDATORY),
|
||||
.CSR_M_TRAP (CSR_M_TRAP),
|
||||
.CSR_COUNTER (CSR_COUNTER),
|
||||
.MTVEC_WMASK (MTVEC_WMASK),
|
||||
.MTVEC_INIT (MTVEC_INIT),
|
||||
.REDUCED_BYPASS (REDUCED_BYPASS)
|
||||
) core (
|
||||
.clk (clk),
|
||||
.rst_n (rst_n),
|
||||
|
||||
`ifdef RISCV_FORMAL
|
||||
`RVFI_CONN ,
|
||||
`endif
|
||||
|
||||
.bus_aph_req_i (core_aph_req_i),
|
||||
.bus_aph_panic_i (core_aph_panic_i),
|
||||
.bus_aph_ready_i (core_aph_ready_i),
|
||||
.bus_dph_ready_i (core_dph_ready_i),
|
||||
.bus_dph_err_i (core_dph_err_i),
|
||||
.bus_hsize_i (core_hsize_i),
|
||||
.bus_haddr_i (core_haddr_i),
|
||||
.bus_rdata_i (core_rdata_i),
|
||||
|
||||
.bus_aph_req_d (core_aph_req_d),
|
||||
.bus_aph_ready_d (core_aph_ready_d),
|
||||
.bus_dph_ready_d (core_dph_ready_d),
|
||||
.bus_dph_err_d (core_dph_err_d),
|
||||
.bus_haddr_d (core_haddr_d),
|
||||
.bus_hsize_d (core_hsize_d),
|
||||
.bus_hwrite_d (core_hwrite_d),
|
||||
.bus_wdata_d (core_wdata_d),
|
||||
.bus_rdata_d (core_rdata_d),
|
||||
|
||||
.irq (irq)
|
||||
);
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Instruction port
|
||||
|
||||
localparam HTRANS_IDLE = 2'b00;
|
||||
localparam HTRANS_NSEQ = 2'b10;
|
||||
|
||||
assign i_haddr = core_haddr_i;
|
||||
assign i_htrans = core_aph_req_i ? HTRANS_NSEQ : HTRANS_IDLE;
|
||||
assign i_hsize = core_hsize_i;
|
||||
|
||||
reg dphase_active_i;
|
||||
always @ (posedge clk or negedge rst_n)
|
||||
if (!rst_n)
|
||||
dphase_active_i <= 1'b0;
|
||||
else if (i_hready)
|
||||
dphase_active_i <= core_aph_req_i;
|
||||
|
||||
assign core_aph_ready_i = i_hready && core_aph_req_i;
|
||||
assign core_dph_ready_i = i_hready && dphase_active_i;
|
||||
assign core_dph_err_i = i_hready && dphase_active_i && i_hresp;
|
||||
|
||||
assign core_rdata_i = i_hrdata;
|
||||
|
||||
assign i_hwrite = 1'b0;
|
||||
assign i_hburst = 3'h0;
|
||||
assign i_hprot = 4'b0010;
|
||||
assign i_hmastlock = 1'b0;
|
||||
assign i_hwdata = {W_DATA{1'b0}};
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Load/store port
|
||||
|
||||
assign d_haddr = core_haddr_d;
|
||||
assign d_htrans = core_aph_req_d ? HTRANS_NSEQ : HTRANS_IDLE;
|
||||
assign d_hwrite = core_hwrite_d;
|
||||
assign d_hsize = core_hsize_d;
|
||||
|
||||
reg dphase_active_d;
|
||||
always @ (posedge clk or negedge rst_n)
|
||||
if (!rst_n)
|
||||
dphase_active_d <= 1'b0;
|
||||
else if (d_hready)
|
||||
dphase_active_d <= core_aph_req_d;
|
||||
|
||||
assign core_aph_ready_d = d_hready && core_aph_req_d;
|
||||
assign core_dph_ready_d = d_hready && dphase_active_d;
|
||||
assign core_dph_err_d = d_hready && dphase_active_d && d_hresp;
|
||||
|
||||
assign core_rdata_d = d_hrdata;
|
||||
assign d_hwdata = core_wdata_d;
|
||||
|
||||
assign d_hburst = 3'h0;
|
||||
assign d_hprot = 4'b0010;
|
||||
assign d_hmastlock = 1'b0;
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,778 @@
|
|||
/******************************************************************************
|
||||
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
|
||||
* Version 3, April 2008 *
|
||||
* *
|
||||
* Copyright (C) 2019 Luke Wren *
|
||||
* *
|
||||
* Everyone is permitted to copy and distribute verbatim or modified *
|
||||
* copies of this license document and accompanying software, and *
|
||||
* changing either is allowed. *
|
||||
* *
|
||||
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
|
||||
* *
|
||||
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
|
||||
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
|
||||
* *
|
||||
*****************************************************************************/
|
||||
|
||||
// Control and Status Registers (CSRs)
|
||||
// Also includes CSR-related logic like interrupt enable/masking,
|
||||
// trap vector calculation.
|
||||
|
||||
module hazard5_csr #(
|
||||
parameter XLEN = 32, // Must be 32
|
||||
parameter CSR_M_MANDATORY = 1, // Include mandatory M-mode CSRs e.g. misa, marchid
|
||||
parameter CSR_M_TRAP = 1, // Include M-mode trap setup/handling CSRs
|
||||
parameter CSR_COUNTER = 1, // Include counter/timer CSRs
|
||||
parameter EXTENSION_C = 0, // For misa
|
||||
parameter EXTENSION_M = 0, // For misa
|
||||
parameter MTVEC_WMASK = 32'hfffff000, // Save gates by making trap vector base partially fixed (legal, as it's WARL)
|
||||
parameter MTVEC_INIT = 32'h0,// Initial value of trap vector base
|
||||
parameter W_COUNTER = 64 // This *should* be 64, but can be reduced to save gates.
|
||||
// The full 64 bits is writeable, so high-word increment can
|
||||
// be implemented in software, and a narrower hw counter used
|
||||
) (
|
||||
input wire clk,
|
||||
input wire rst_n,
|
||||
|
||||
// Read port is combinatorial.
|
||||
// Write port is synchronous, and write effects will be observed on the next clock cycle.
|
||||
// The *_soon strobes are versions which the core does not gate with its stall signal.
|
||||
// These are needed because:
|
||||
// - Core stall is a function of bus stall
|
||||
// - Illegal CSR accesses produce trap entry
|
||||
// - Trap entry (not necessarily caused by CSR access) gates outgoing bus accesses
|
||||
// - Through-paths from e.g. hready to htrans are problematic for timing/implementation
|
||||
input wire [11:0] addr,
|
||||
input wire [XLEN-1:0] wdata,
|
||||
input wire wen,
|
||||
input wire wen_soon, // wen will be asserted once some stall condition clears
|
||||
input wire [1:0] wtype,
|
||||
output reg [XLEN-1:0] rdata,
|
||||
input wire ren,
|
||||
input wire ren_soon, // ren will be asserted once some stall condition clears
|
||||
|
||||
// Trap signalling
|
||||
// *We* tell the core that we are taking a trap, and where to, based on:
|
||||
// - Synchronous exception inputs from the core
|
||||
// - External IRQ signals
|
||||
// - Masking etc based on the state of CSRs like mie
|
||||
//
|
||||
// We do this by raising trap_enter_vld, and keeping it raised until trap_enter_rdy
|
||||
// goes high. trap_addr has the absolute value of trap target address.
|
||||
// Once trap_enter_vld && _rdy, mepc_in is copied to mepc, and other trap state is set.
|
||||
//
|
||||
// Note that an exception input can go away, e.g. if the pipe gets flushed. In this
|
||||
// case we lower trap_enter_vld.
|
||||
//
|
||||
// The core tells *us* that we are leaving the trap, by putting a 1-clock pulse on
|
||||
// trap_exit. The core will simultaneously produce a jump (specifically a mispredict)
|
||||
// to mepc_out.
|
||||
output wire [XLEN-1:0] trap_addr,
|
||||
output wire trap_enter_vld,
|
||||
input wire trap_enter_rdy,
|
||||
input wire trap_exit,
|
||||
output wire trap_is_exception, // diagnostic
|
||||
input wire [XLEN-1:0] mepc_in,
|
||||
output wire [XLEN-1:0] mepc_out,
|
||||
|
||||
// Exceptions must *not* be a function of bus stall.
|
||||
input wire [15:0] irq,
|
||||
input wire except_instr_misaligned,
|
||||
input wire except_instr_fault,
|
||||
input wire except_instr_invalid,
|
||||
input wire except_breakpoint,
|
||||
input wire except_load_misaligned,
|
||||
input wire except_load_fault,
|
||||
input wire except_store_misaligned,
|
||||
input wire except_store_fault,
|
||||
input wire except_ecall,
|
||||
|
||||
// Other CSR-specific signalling
|
||||
input wire instr_ret
|
||||
);
|
||||
|
||||
// TODO block CSR access when entering trap?
|
||||
|
||||
`include "hazard5_ops.vh"
|
||||
|
||||
localparam X0 = {XLEN{1'b0}};
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// List of M-mode CSRs (we implement a configurable subset of M-mode).
|
||||
// ----------------------------------------------------------------------------
|
||||
// The CSR block is the only piece of hardware which needs to know this mapping.
|
||||
|
||||
// Machine Information Registers (RO)
|
||||
localparam MVENDORID = 12'hf11; // Vendor ID.
|
||||
localparam MARCHID = 12'hf12; // Architecture ID.
|
||||
localparam MIMPID = 12'hf13; // Implementation ID.
|
||||
localparam MHARTID = 12'hf14; // Hardware thread ID.
|
||||
|
||||
// Machine Trap Setup (RW)
|
||||
localparam MSTATUS = 12'h300; // Machine status register.
|
||||
localparam MISA = 12'h301; // ISA and extensions
|
||||
localparam MEDELEG = 12'h302; // Machine exception delegation register.
|
||||
localparam MIDELEG = 12'h303; // Machine interrupt delegation register.
|
||||
localparam MIE = 12'h304; // Machine interrupt-enable register.
|
||||
localparam MTVEC = 12'h305; // Machine trap-handler base address.
|
||||
localparam MCOUNTEREN = 12'h306; // Machine counter enable.
|
||||
|
||||
// Machine Trap Handling (RW)
|
||||
localparam MSCRATCH = 12'h340; // Scratch register for machine trap handlers.
|
||||
localparam MEPC = 12'h341; // Machine exception program counter.
|
||||
localparam MCAUSE = 12'h342; // Machine trap cause.
|
||||
localparam MTVAL = 12'h343; // Machine bad address or instruction.
|
||||
localparam MIP = 12'h344; // Machine interrupt pending.
|
||||
|
||||
// Machine Memory Protection (RW)
|
||||
localparam PMPCFG0 = 12'h3a0; // Physical memory protection configuration.
|
||||
localparam PMPCFG1 = 12'h3a1; // Physical memory protection configuration, RV32 only.
|
||||
localparam PMPCFG2 = 12'h3a2; // Physical memory protection configuration.
|
||||
localparam PMPCFG3 = 12'h3a3; // Physical memory protection configuration, RV32 only.
|
||||
localparam PMPADDR0 = 12'h3b0; // Physical memory protection address register.
|
||||
localparam PMPADDR1 = 12'h3b1; // Physical memory protection address register.
|
||||
|
||||
// Performance counters (RW)
|
||||
localparam MCYCLE = 12'hb00; // Raw cycles since start of day
|
||||
localparam MTIME = 12'hb01; // "Wall clock", can be aliased to MCYCLE
|
||||
localparam MINSTRET = 12'hb02; // Instruction retire count since start of day
|
||||
localparam MHPMCOUNTER3 = 12'hb03; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER4 = 12'hb04; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER5 = 12'hb05; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER6 = 12'hb06; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER7 = 12'hb07; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER8 = 12'hb08; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER9 = 12'hb09; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER10 = 12'hb0a; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER11 = 12'hb0b; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER12 = 12'hb0c; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER13 = 12'hb0d; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER14 = 12'hb0e; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER15 = 12'hb0f; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER16 = 12'hb10; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER17 = 12'hb11; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER18 = 12'hb12; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER19 = 12'hb13; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER20 = 12'hb14; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER21 = 12'hb15; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER22 = 12'hb16; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER23 = 12'hb17; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER24 = 12'hb18; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER25 = 12'hb19; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER26 = 12'hb1a; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER27 = 12'hb1b; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER28 = 12'hb1c; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER29 = 12'hb1d; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER30 = 12'hb1e; // WARL (we tie to 0)
|
||||
localparam MHPMCOUNTER31 = 12'hb1f; // WARL (we tie to 0)
|
||||
|
||||
localparam MCYCLEH = 12'hb80; // High halves of each counter
|
||||
localparam MTIMEH = 12'hb81;
|
||||
localparam MINSTRETH = 12'hb82;
|
||||
localparam MHPMCOUNTER3H = 12'hb83;
|
||||
localparam MHPMCOUNTER4H = 12'hb84;
|
||||
localparam MHPMCOUNTER5H = 12'hb85;
|
||||
localparam MHPMCOUNTER6H = 12'hb86;
|
||||
localparam MHPMCOUNTER7H = 12'hb87;
|
||||
localparam MHPMCOUNTER8H = 12'hb88;
|
||||
localparam MHPMCOUNTER9H = 12'hb89;
|
||||
localparam MHPMCOUNTER10H = 12'hb8a;
|
||||
localparam MHPMCOUNTER11H = 12'hb8b;
|
||||
localparam MHPMCOUNTER12H = 12'hb8c;
|
||||
localparam MHPMCOUNTER13H = 12'hb8d;
|
||||
localparam MHPMCOUNTER14H = 12'hb8e;
|
||||
localparam MHPMCOUNTER15H = 12'hb8f;
|
||||
localparam MHPMCOUNTER16H = 12'hb90;
|
||||
localparam MHPMCOUNTER17H = 12'hb91;
|
||||
localparam MHPMCOUNTER18H = 12'hb92;
|
||||
localparam MHPMCOUNTER19H = 12'hb93;
|
||||
localparam MHPMCOUNTER20H = 12'hb94;
|
||||
localparam MHPMCOUNTER21H = 12'hb95;
|
||||
localparam MHPMCOUNTER22H = 12'hb96;
|
||||
localparam MHPMCOUNTER23H = 12'hb97;
|
||||
localparam MHPMCOUNTER24H = 12'hb98;
|
||||
localparam MHPMCOUNTER25H = 12'hb99;
|
||||
localparam MHPMCOUNTER26H = 12'hb9a;
|
||||
localparam MHPMCOUNTER27H = 12'hb9b;
|
||||
localparam MHPMCOUNTER28H = 12'hb9c;
|
||||
localparam MHPMCOUNTER29H = 12'hb9d;
|
||||
localparam MHPMCOUNTER30H = 12'hb9e;
|
||||
localparam MHPMCOUNTER31H = 12'hb9f;
|
||||
|
||||
localparam MCOUNTINHIBIT = 12'h302; // WARL (we must tie 0 as CYCLE and TIME are aliased)
|
||||
localparam MHPMEVENT3 = 12'h323; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT4 = 12'h324; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT5 = 12'h325; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT6 = 12'h326; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT7 = 12'h327; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT8 = 12'h328; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT9 = 12'h329; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT10 = 12'h32a; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT11 = 12'h32b; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT12 = 12'h32c; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT13 = 12'h32d; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT14 = 12'h32e; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT15 = 12'h32f; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT16 = 12'h330; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT17 = 12'h331; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT18 = 12'h332; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT19 = 12'h333; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT20 = 12'h334; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT21 = 12'h335; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT22 = 12'h336; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT23 = 12'h337; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT24 = 12'h338; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT25 = 12'h339; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT26 = 12'h33a; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT27 = 12'h33b; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT28 = 12'h33c; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT29 = 12'h33d; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT30 = 12'h33e; // WARL (we tie to 0)
|
||||
localparam MHPMEVENT31 = 12'h33f; // WARL (we tie to 0)
|
||||
|
||||
// TODO
|
||||
// Decoding all these damn HPMs bloats the logic. If we don't decode them, we
|
||||
// can still trap the illegal opcode and emulate them. This is ugly and
|
||||
// contravenes the standard, but why on earth would they mandate 100 useless
|
||||
// registers with no defined operation?
|
||||
// If you really want them, set this to 1:
|
||||
localparam DECODE_HPM = 0;
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// CSR state + update logic
|
||||
// ----------------------------------------------------------------------------
|
||||
// Names are (reg)_(field)
|
||||
|
||||
// Generic update logic for write/set/clear of an entire CSR:
|
||||
function [XLEN-1:0] update;
|
||||
input [XLEN-1:0] prev;
|
||||
begin
|
||||
update =
|
||||
wtype == CSR_WTYPE_C ? prev & ~wdata :
|
||||
wtype == CSR_WTYPE_S ? prev | wdata :
|
||||
wdata;
|
||||
end
|
||||
endfunction
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Trap-handling
|
||||
|
||||
// Two-level interrupt enable stack, shuffled on entry/exit:
|
||||
reg mstatus_mpie;
|
||||
reg mstatus_mie;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
mstatus_mpie <= 1'b0;
|
||||
mstatus_mie <= 1'b0;
|
||||
end else if (CSR_M_TRAP) begin
|
||||
if (trap_enter_vld && trap_enter_rdy) begin
|
||||
mstatus_mpie <= mstatus_mie;
|
||||
mstatus_mie <= 1'b0;
|
||||
end else if (trap_exit) begin
|
||||
mstatus_mpie <= 1'b1;
|
||||
mstatus_mie <= mstatus_mpie;
|
||||
end else if (wen && addr == MSTATUS) begin
|
||||
{mstatus_mpie, mstatus_mie} <=
|
||||
wtype == CSR_WTYPE_C ? {mstatus_mpie, mstatus_mie} & ~{wdata[7], wdata[3]} :
|
||||
wtype == CSR_WTYPE_S ? {mstatus_mpie, mstatus_mie} | {wdata[7], wdata[3]} :
|
||||
{wdata[7], wdata[3]} ;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [XLEN-1:0] mscratch;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
mscratch <= X0;
|
||||
end else if (CSR_M_TRAP) begin
|
||||
if (wen && addr == MSCRATCH)
|
||||
mscratch <= update(mscratch);
|
||||
end
|
||||
end
|
||||
|
||||
// Trap vector base
|
||||
reg [XLEN-1:0] mtvec_reg;
|
||||
wire [XLEN-1:0] mtvec = (mtvec_reg & MTVEC_WMASK) | (MTVEC_INIT & ~MTVEC_WMASK);
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
mtvec_reg <= MTVEC_INIT;
|
||||
end else if (CSR_M_TRAP) begin
|
||||
if (wen && addr == MTVEC)
|
||||
mtvec_reg <= update(mtvec_reg);
|
||||
end
|
||||
end
|
||||
|
||||
// Exception program counter
|
||||
reg [XLEN-1:0] mepc;
|
||||
assign mepc_out = mepc;
|
||||
// LSB is always 0
|
||||
localparam MEPC_MASK = {{XLEN-1{1'b1}}, 1'b0};
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
mepc <= X0;
|
||||
end else if (CSR_M_TRAP) begin
|
||||
if (trap_enter_vld && trap_enter_rdy) begin
|
||||
mepc <= mepc_in & MEPC_MASK;
|
||||
end else if (wen && addr == MEPC) begin
|
||||
mepc <= update(mepc) & MEPC_MASK;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Interrupt enable (reserved bits are tied to 0)
|
||||
reg [XLEN-1:0] mie;
|
||||
localparam MIE_CONST_MASK = 32'h0000f777;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
mie <= X0;
|
||||
end else if (CSR_M_TRAP) begin
|
||||
if (wen && addr == MIE)
|
||||
mie <= update(mie) & ~MIE_CONST_MASK;
|
||||
end
|
||||
end
|
||||
|
||||
wire [15:0] mie_irq = mie[31:16]; // Per-IRQ mask. Nonstandard, but legal.
|
||||
wire mie_meie = mie[11]; // Global external IRQ enable. This is ANDed over our per-IRQ mask
|
||||
wire mie_mtie = mie[7]; // Timer interrupt enable
|
||||
wire mie_msie = mie[3]; // Software interrupt enable
|
||||
|
||||
// Interrupt status ("pending") register, handled later
|
||||
wire [XLEN-1:0] mip;
|
||||
// None of the bits we implement are directly writeable.
|
||||
// MSIP is only writeable by a "platform-defined" mechanism, and we don't implement
|
||||
// one!
|
||||
|
||||
// Trap cause registers. The non-constant bits can be written by software,
|
||||
// and update automatically on trap entry. (bits 30:0 are WLRL, so we tie most off)
|
||||
reg mcause_irq;
|
||||
reg [4:0] mcause_code;
|
||||
wire mcause_irq_next;
|
||||
wire [4:0] mcause_code_next;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
mcause_irq <= 1'b0;
|
||||
mcause_code <= 5'h0;
|
||||
end else if (CSR_M_TRAP) begin
|
||||
if (trap_enter_vld && trap_enter_rdy) begin
|
||||
mcause_irq <= mcause_irq_next;
|
||||
mcause_code <= mcause_code_next;
|
||||
end else if (wen && addr == MCAUSE) begin
|
||||
{mcause_irq, mcause_code} <=
|
||||
wtype == CSR_WTYPE_C ? {mcause_irq, mcause_code} & ~{wdata[31], wdata[4:0]} :
|
||||
wtype == CSR_WTYPE_S ? {mcause_irq, mcause_code} | {wdata[31], wdata[4:0]} :
|
||||
{wdata[31], wdata[4:0]} ;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Counters
|
||||
// MCYCLE and MTIME are aliased (fine as long as MCOUNTINHIBIT[0] is tied low)
|
||||
reg [XLEN-1:0] mcycleh;
|
||||
reg [XLEN-1:0] mcycle;
|
||||
reg [XLEN-1:0] minstreth;
|
||||
reg [XLEN-1:0] minstret;
|
||||
|
||||
wire [XLEN-1:0] ctr_update = update(
|
||||
{addr[7], addr[1]} == 2'b00 ? mcycle :
|
||||
{addr[7], addr[1]} == 2'b01 ? minstret :
|
||||
{addr[7], addr[1]} == 2'b10 ? mcycleh :
|
||||
minstreth
|
||||
);
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
mcycleh <= X0;
|
||||
mcycle <= X0;
|
||||
minstreth <= X0;
|
||||
minstret <= X0;
|
||||
end else if (CSR_COUNTER) begin
|
||||
// Hold the top (2 * XLEN - W_COUNTER) bits constant to save gates:
|
||||
{mcycleh, mcycle} <= (({mcycleh, mcycle} + 1'b1) & ~({2*XLEN{1'b1}} << W_COUNTER))
|
||||
| ({mcycleh, mcycle} & ({2*XLEN{1'b1}} << W_COUNTER));
|
||||
if (instr_ret)
|
||||
{minstreth, minstret} <= (({minstreth, minstret} + 1'b1) & ~({2*XLEN{1'b1}} << W_COUNTER))
|
||||
| ({minstreth, minstret} & ({2*XLEN{1'b1}} << W_COUNTER));
|
||||
if (wen) begin
|
||||
if (addr == MCYCLEH)
|
||||
mcycleh <= ctr_update;
|
||||
if (addr == MCYCLE)
|
||||
mcycle <= ctr_update;
|
||||
if (addr == MINSTRETH)
|
||||
minstreth <= ctr_update;
|
||||
if (addr == MINSTRET)
|
||||
minstret <= ctr_update;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Read port + detect addressing of unmapped CSRs
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
reg decode_match;
|
||||
|
||||
always @ (*) begin
|
||||
decode_match = 1'b0;
|
||||
rdata = {XLEN{1'b0}};
|
||||
case (addr)
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Mandatory CSRs
|
||||
|
||||
MISA: if (CSR_M_MANDATORY) begin
|
||||
// WARL, so it is legal to be tied constant
|
||||
decode_match = 1'b1;
|
||||
rdata = {
|
||||
2'h1, // MXL: 32-bit
|
||||
{XLEN-28{1'b0}}, // WLRL
|
||||
|
||||
13'd0, // Z...N, no
|
||||
|EXTENSION_M,
|
||||
3'd0, // L...J, no
|
||||
1'b1, // Integer ISA
|
||||
5'd0, // H...D, no
|
||||
|EXTENSION_C,
|
||||
2'b0
|
||||
};
|
||||
end
|
||||
MVENDORID: if (CSR_M_MANDATORY) begin
|
||||
decode_match = !wen_soon; // MRO
|
||||
// I don't have a JEDEC ID. It is legal to tie this to 0 if non-commercial.
|
||||
rdata = {XLEN{1'b0}};
|
||||
end
|
||||
MARCHID: if (CSR_M_MANDATORY) begin
|
||||
decode_match = !wen_soon; // MRO
|
||||
// I don't have a RV foundation ID. It is legal to tie this to 0.
|
||||
rdata = {XLEN{1'b0}};
|
||||
end
|
||||
MIMPID: if (CSR_M_MANDATORY) begin
|
||||
decode_match = !wen_soon; // MRO
|
||||
// TODO put git SHA or something here
|
||||
rdata = {XLEN{1'b0}};
|
||||
end
|
||||
MHARTID: if (CSR_M_MANDATORY) begin
|
||||
decode_match = !wen_soon; // MRO
|
||||
// There is only one hart, and spec says this must be numbered 0.
|
||||
rdata = {XLEN{1'b0}};
|
||||
end
|
||||
|
||||
MSTATUS: if (CSR_M_MANDATORY || CSR_M_TRAP) begin
|
||||
decode_match = 1'b1;
|
||||
rdata = {
|
||||
1'b0, // Never any dirty state besides GPRs
|
||||
8'd0, // (WPRI)
|
||||
1'b0, // TSR (Trap SRET), tied 0 if no S mode.
|
||||
1'b0, // TW (Timeout Wait), tied 0 if only M mode.
|
||||
1'b0, // TVM (trap virtual memory), tied 0 if no S mode.
|
||||
1'b0, // MXR (Make eXecutable Readable), tied 0 if not S mode.
|
||||
1'b0, // SUM, tied 0, we have no S or U mode
|
||||
1'b0, // MPRV (modify privilege), tied 0 if no U mode
|
||||
4'd0, // XS, FS always "off" (no extension state to clear!)
|
||||
2'b11, // MPP (M-mode previous privilege), we are always M-mode
|
||||
2'd0, // (WPRI)
|
||||
1'b0, // SPP, tied 0 if S mode not supported
|
||||
mstatus_mpie,
|
||||
3'd0, // No S, U
|
||||
mstatus_mie,
|
||||
3'd0 // No S, U
|
||||
};
|
||||
end
|
||||
|
||||
// MEDELEG, MIDELEG should not exist for M-only implementations. Will raise
|
||||
// illegal instruction exception if accessed.
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Trap-handling CSRs
|
||||
|
||||
// TODO bit of a hack but this is a 32 bit synthesised register with
|
||||
// set/clear/write/read, don't turn it on unless we really have to
|
||||
MSCRATCH: if (CSR_M_TRAP && CSR_M_MANDATORY) begin
|
||||
decode_match = 1'b1;
|
||||
rdata = mscratch;
|
||||
end
|
||||
|
||||
MEPC: if (CSR_M_TRAP) begin
|
||||
decode_match = 1'b1;
|
||||
rdata = mepc;
|
||||
end
|
||||
|
||||
MCAUSE: if (CSR_M_TRAP) begin
|
||||
decode_match = 1'b1;
|
||||
rdata = {
|
||||
mcause_irq, // Sign bit is 1 for IRQ, 0 for exception
|
||||
{26{1'b0}}, // Padding
|
||||
mcause_code[4:0] // Enough for 16 external IRQs, which is all we have room for in mip/mie
|
||||
};
|
||||
end
|
||||
|
||||
MTVAL: if (CSR_M_TRAP) begin
|
||||
decode_match = 1'b1;
|
||||
// Hardwired to 0
|
||||
end
|
||||
|
||||
MIE: if (CSR_M_TRAP) begin
|
||||
decode_match = 1'b1;
|
||||
rdata = mie;
|
||||
end
|
||||
|
||||
MIP: if (CSR_M_TRAP) begin
|
||||
decode_match = 1'b1;
|
||||
rdata = mip;
|
||||
end
|
||||
|
||||
MTVEC: if (CSR_M_TRAP) begin
|
||||
decode_match = 1'b1;
|
||||
rdata = {
|
||||
mtvec[XLEN-1:2], // BASE
|
||||
2'h1 // MODE = Vectored (Direct is useless, and we don't have CLIC)
|
||||
};
|
||||
end
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Counter CSRs
|
||||
|
||||
// Get the tied WARLs out the way first
|
||||
MHPMCOUNTER3: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER4: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER5: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER6: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER7: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER8: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER9: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER10: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER11: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER12: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER13: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER14: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER15: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER16: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER17: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER18: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER19: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER20: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER21: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER22: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER23: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER24: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER25: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER26: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER27: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER28: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER29: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER30: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER31: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
|
||||
MHPMCOUNTER3H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER4H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER5H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER6H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER7H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER8H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER9H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER10H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER11H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER12H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER13H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER14H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER15H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER16H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER17H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER18H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER19H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER20H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER21H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER22H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER23H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER24H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER25H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER26H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER27H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER28H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER29H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER30H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMCOUNTER31H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
|
||||
MHPMEVENT3: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT4: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT5: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT6: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT7: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT8: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT9: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT10: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT11: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT12: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT13: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT14: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT15: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT16: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT17: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT18: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT19: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT20: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT21: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT22: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT23: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT24: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT25: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT26: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT27: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT28: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT29: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT30: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
MHPMEVENT31: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
|
||||
MCOUNTINHIBIT: if (CSR_COUNTER) begin decode_match = 1'b1; end
|
||||
// Phew...
|
||||
|
||||
MCYCLE: if (CSR_COUNTER) begin
|
||||
decode_match = 1'b1;
|
||||
rdata = mcycle;
|
||||
end
|
||||
MTIME: if (CSR_COUNTER) begin
|
||||
decode_match = 1'b1;
|
||||
rdata = mcycle; // Can be aliased as long as we tie MCOUNTINHIBIT[0] to 0
|
||||
end
|
||||
MINSTRET: if (CSR_COUNTER) begin
|
||||
decode_match = 1'b1;
|
||||
rdata = minstret;
|
||||
end
|
||||
|
||||
MCYCLEH: if (CSR_COUNTER) begin
|
||||
decode_match = 1'b1;
|
||||
rdata = mcycleh;
|
||||
end
|
||||
MTIMEH: if (CSR_COUNTER) begin
|
||||
decode_match = 1'b1;
|
||||
rdata = mcycleh; // Can be aliased as long as we tie MCOUNTINHIBIT[0] to 0
|
||||
end
|
||||
MINSTRETH: if (CSR_COUNTER) begin
|
||||
decode_match = 1'b1;
|
||||
rdata = minstreth;
|
||||
end
|
||||
|
||||
default: begin end
|
||||
endcase
|
||||
end
|
||||
|
||||
wire csr_access_error = (wen_soon || ren_soon) && !decode_match;
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Trap request generation
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// Keep track of whether we are in a trap; we do not permit exception nesting.
|
||||
// TODO lockup condition?
|
||||
reg in_trap;
|
||||
|
||||
always @ (posedge clk or negedge rst_n)
|
||||
if (!rst_n)
|
||||
in_trap <= 1'b0;
|
||||
else
|
||||
in_trap <= (in_trap || (trap_enter_vld && trap_enter_rdy)) && !trap_exit;
|
||||
|
||||
// Exception selection
|
||||
|
||||
// Most-significant is lowest priority
|
||||
// FIXME: this is different from the priority order given in the spec, but will get us off the ground
|
||||
wire [15:0] exception_req = {
|
||||
4'h0, // reserved by spec
|
||||
except_ecall,
|
||||
3'h0, // nonimplemented privileges
|
||||
except_store_fault,
|
||||
except_store_misaligned,
|
||||
except_load_fault,
|
||||
except_load_misaligned,
|
||||
except_breakpoint,
|
||||
except_instr_invalid || csr_access_error,
|
||||
except_instr_fault,
|
||||
except_instr_misaligned
|
||||
};
|
||||
|
||||
wire exception_req_any = |exception_req && !in_trap;
|
||||
wire [3:0] exception_req_num;
|
||||
|
||||
hazard5_priority_encode #(
|
||||
.W_REQ(16)
|
||||
) except_priority (
|
||||
.req (exception_req),
|
||||
.gnt (exception_req_num)
|
||||
);
|
||||
|
||||
// Interrupt masking and selection
|
||||
|
||||
reg [15:0] irq_r;
|
||||
|
||||
always @ (posedge clk or negedge rst_n)
|
||||
if (!rst_n)
|
||||
irq_r <= 16'h0;
|
||||
else
|
||||
irq_r <= irq;
|
||||
|
||||
assign mip = {
|
||||
irq_r, // Our nonstandard bits for per-IRQ status
|
||||
4'h0, // Reserved
|
||||
|irq_r, // Global pending bit for external IRQs
|
||||
3'h0, // Reserved
|
||||
1'b0, // Timer (FIXME)
|
||||
3'h0, // Reserved
|
||||
1'b0, // Software interrupt
|
||||
3'h0 // Reserved
|
||||
};
|
||||
|
||||
// We don't actually trap the aggregate IRQ, just provide it for software info
|
||||
wire [31:0] mip_no_global = mip & 32'hffff_f7ff;
|
||||
wire irq_any = |(mip_no_global & {{16{mie_meie}}, {16{1'b1}}}) && mstatus_mie;
|
||||
wire [4:0] irq_num;
|
||||
|
||||
hazard5_priority_encode #(
|
||||
.W_REQ(32)
|
||||
) irq_priority (
|
||||
.req (mip_no_global),
|
||||
.gnt (irq_num)
|
||||
);
|
||||
|
||||
wire [11:0] mtvec_offs = (exception_req_any ?
|
||||
{8'h0, exception_req_num} :
|
||||
12'h10 + {7'h0, irq_num}
|
||||
) << 2;
|
||||
|
||||
assign trap_addr = mtvec | {20'h0, mtvec_offs};
|
||||
assign trap_enter_vld = CSR_M_TRAP && (exception_req_any || irq_any);
|
||||
assign trap_is_exception = exception_req_any;
|
||||
|
||||
assign mcause_irq_next = !exception_req_any;
|
||||
assign mcause_code_next = exception_req_any ? exception_req_num : {1'b0, irq_num};
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
`ifdef RISCV_FORMAL
|
||||
always @ (posedge clk) begin
|
||||
// We disallow double exceptions -- this causes riscv-formal to complain that
|
||||
// loads/stores don't trap inside of traps. Therefore assume this doesn't happen
|
||||
if (in_trap)
|
||||
assume(!(except_load_misaligned || except_store_misaligned));
|
||||
|
||||
// Something is screwed up if this happens
|
||||
if ($past(trap_enter_vld && trap_enter_rdy))
|
||||
assert(!wen);
|
||||
// Don't do this
|
||||
assert(!(trap_enter_vld && trap_enter_rdy && trap_exit));
|
||||
// Should be impossible to get into the trap and exit it so quickly:
|
||||
if (in_trap && !$past(in_trap))
|
||||
assert(!trap_exit);
|
||||
// Should be impossible to get to another mret so soon after exiting:
|
||||
assert(!(trap_exit && $past(trap_exit)));
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,376 @@
|
|||
/******************************************************************************
|
||||
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
|
||||
* Version 3, April 2008 *
|
||||
* *
|
||||
* Copyright (C) 2019 Luke Wren *
|
||||
* *
|
||||
* Everyone is permitted to copy and distribute verbatim or modified *
|
||||
* copies of this license document and accompanying software, and *
|
||||
* changing either is allowed. *
|
||||
* *
|
||||
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
|
||||
* *
|
||||
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
|
||||
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
|
||||
* *
|
||||
*****************************************************************************/
|
||||
|
||||
module hazard5_decode #(
|
||||
parameter EXTENSION_C = 1, // compressed instruction extension
|
||||
parameter EXTENSION_M = 1, // mul/div/mod instruction extension
|
||||
parameter HAVE_CSR = 0,
|
||||
parameter W_ADDR = 32,
|
||||
parameter W_DATA = 32,
|
||||
parameter RESET_VECTOR = 32'h0,
|
||||
parameter W_REGADDR = 5
|
||||
) (
|
||||
input wire clk,
|
||||
input wire rst_n,
|
||||
|
||||
input wire [31:0] fd_cir,
|
||||
input wire [1:0] fd_cir_vld,
|
||||
output wire [1:0] df_cir_use,
|
||||
output wire df_cir_lock,
|
||||
output reg d_jump_req,
|
||||
output reg [W_ADDR-1:0] d_jump_target,
|
||||
output wire [W_ADDR-1:0] d_pc, // FIXME only added for riscv-formal
|
||||
|
||||
output wire d_stall,
|
||||
input wire x_stall,
|
||||
input wire flush_d_x,
|
||||
input wire f_jump_rdy,
|
||||
input wire f_jump_now,
|
||||
input wire [W_ADDR-1:0] f_jump_target,
|
||||
|
||||
output reg [W_REGADDR-1:0] d_rs1, // combinatorial
|
||||
output reg [W_REGADDR-1:0] d_rs2, // combinatorial
|
||||
|
||||
output reg [W_DATA-1:0] dx_imm,
|
||||
output reg [W_REGADDR-1:0] dx_rs1,
|
||||
output reg [W_REGADDR-1:0] dx_rs2,
|
||||
output reg [W_REGADDR-1:0] dx_rd,
|
||||
output reg [W_ALUSRC-1:0] dx_alusrc_a,
|
||||
output reg [W_ALUSRC-1:0] dx_alusrc_b,
|
||||
output reg [W_ALUOP-1:0] dx_aluop,
|
||||
output reg [W_MEMOP-1:0] dx_memop,
|
||||
output reg [W_MULOP-1:0] dx_mulop,
|
||||
output reg dx_csr_ren,
|
||||
output reg dx_csr_wen,
|
||||
output reg [1:0] dx_csr_wtype,
|
||||
output reg dx_csr_w_imm,
|
||||
output reg [W_BCOND-1:0] dx_branchcond,
|
||||
output reg [W_ADDR-1:0] dx_jump_target,
|
||||
output reg dx_jump_is_regoffs,
|
||||
output reg dx_result_is_linkaddr,
|
||||
output reg [W_ADDR-1:0] dx_pc,
|
||||
output reg [W_ADDR-1:0] dx_mispredict_addr,
|
||||
output reg [2:0] dx_except
|
||||
);
|
||||
|
||||
|
||||
// TODO TODO factor this out in a cleaner way, e.g. separate out registers and stall logic.
|
||||
|
||||
`include "rv_opcodes.vh"
|
||||
`include "hazard5_ops.vh"
|
||||
|
||||
// ============================================================================
|
||||
// PC/CIR control
|
||||
// ============================================================================
|
||||
|
||||
wire d_starved = ~|fd_cir_vld || fd_cir_vld[0] && d_instr_is_32bit;
|
||||
assign d_stall = x_stall ||
|
||||
d_starved || (d_jump_req && !f_jump_rdy);
|
||||
assign df_cir_use =
|
||||
d_starved || d_stall ? 2'h0 :
|
||||
d_instr_is_32bit ? 2'h2 : 2'h1;
|
||||
|
||||
// CIR Locking is required if we successfully assert a jump request, but decode is stalled.
|
||||
// (This only happens if decode stall is caused by X stall, not if fetch is starved!)
|
||||
// The reason for this is that, if the CIR is not locked in, it can be trashed by
|
||||
// incoming fetch data before the roadblock clears ahead of us, which will squash any other
|
||||
// side effects this instruction may have besides jumping! This includes:
|
||||
// - Linking for JAL
|
||||
// - Mispredict recovery for branches
|
||||
// Note that it is not possible to simply gate the jump request based on X stalling,
|
||||
// because X stall is a function of hready, and jump request feeds haddr htrans etc.
|
||||
|
||||
// Note it is possible for d_jump_req and m_jump_req to be asserted
|
||||
// simultaneously, hence checking flush:
|
||||
wire jump_caused_by_d = d_jump_req && f_jump_rdy && !flush_d_x;
|
||||
wire assert_cir_lock = jump_caused_by_d && d_stall;
|
||||
wire deassert_cir_lock = !d_stall;
|
||||
reg cir_lock_prev;
|
||||
|
||||
assign df_cir_lock = (cir_lock_prev && !deassert_cir_lock) || assert_cir_lock;
|
||||
|
||||
always @ (posedge clk or negedge rst_n)
|
||||
if (!rst_n)
|
||||
cir_lock_prev <= 1'b0;
|
||||
else
|
||||
cir_lock_prev <= df_cir_lock;
|
||||
|
||||
reg [W_ADDR-1:0] pc;
|
||||
wire [W_ADDR-1:0] pc_next = pc + (d_instr_is_32bit ? 32'h4 : 32'h2);
|
||||
assign d_pc = pc;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
pc <= RESET_VECTOR;
|
||||
end else begin
|
||||
if ((f_jump_now && !assert_cir_lock) || (cir_lock_prev && deassert_cir_lock)) begin
|
||||
pc <= f_jump_target;
|
||||
`ifdef FORMAL
|
||||
// Being cheeky above to save a 32 bit mux. Check that we never get an M target by mistake.
|
||||
if (cir_lock_prev && deassert_cir_lock)
|
||||
assert(f_jump_target == d_jump_target);
|
||||
`endif
|
||||
end else if (!d_stall && !df_cir_lock) begin
|
||||
pc <= pc_next;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// If the current CIR is there due to locking, it is a jump which has already had primary effect.
|
||||
wire d_invalid;
|
||||
wire jump_enable = !d_starved && !cir_lock_prev && !d_invalid;
|
||||
reg [W_ADDR-1:0] d_jump_offs;
|
||||
|
||||
|
||||
always @ (*) begin
|
||||
// JAL is major opcode 1101111,
|
||||
// branches are 1100011.
|
||||
case (d_instr[3])
|
||||
1'b1: d_jump_offs = d_imm_j;
|
||||
default: d_jump_offs = d_imm_b;
|
||||
endcase
|
||||
|
||||
d_jump_target = pc + d_jump_offs;
|
||||
|
||||
casez ({d_instr[31], d_instr})
|
||||
{1'b1, RV_BEQ }: d_jump_req = jump_enable;
|
||||
{1'b1, RV_BNE }: d_jump_req = jump_enable;
|
||||
{1'b1, RV_BLT }: d_jump_req = jump_enable;
|
||||
{1'b1, RV_BGE }: d_jump_req = jump_enable;
|
||||
{1'b1, RV_BLTU}: d_jump_req = jump_enable;
|
||||
{1'b1, RV_BGEU}: d_jump_req = jump_enable;
|
||||
{1'bz, RV_JAL }: d_jump_req = jump_enable;
|
||||
default: d_jump_req = 1'b0;
|
||||
endcase
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// Expand compressed instructions
|
||||
// ============================================================================
|
||||
|
||||
wire [31:0] d_instr;
|
||||
wire d_instr_is_32bit;
|
||||
wire d_invalid_16bit;
|
||||
reg d_invalid_32bit;
|
||||
assign d_invalid = d_invalid_16bit || d_invalid_32bit;
|
||||
|
||||
hazard5_instr_decompress #(
|
||||
.PASSTHROUGH(!EXTENSION_C)
|
||||
) decomp (
|
||||
.instr_in (fd_cir),
|
||||
.instr_is_32bit (d_instr_is_32bit),
|
||||
.instr_out (d_instr),
|
||||
.invalid (d_invalid_16bit)
|
||||
);
|
||||
|
||||
// ============================================================================
|
||||
// Decode X controls
|
||||
// ============================================================================
|
||||
|
||||
// Decode various immmediate formats
|
||||
wire [31:0] d_imm_i = {{21{d_instr[31]}}, d_instr[30:20]};
|
||||
wire [31:0] d_imm_s = {{21{d_instr[31]}}, d_instr[30:25], d_instr[11:7]};
|
||||
wire [31:0] d_imm_b = {{20{d_instr[31]}}, d_instr[7], d_instr[30:25], d_instr[11:8], 1'b0};
|
||||
wire [31:0] d_imm_u = {d_instr[31:12], {12{1'b0}}};
|
||||
wire [31:0] d_imm_j = {{12{d_instr[31]}}, d_instr[19:12], d_instr[20], d_instr[30:21], 1'b0};
|
||||
|
||||
// Combinatorials:
|
||||
reg [W_REGADDR-1:0] d_rd;
|
||||
reg [W_DATA-1:0] d_imm;
|
||||
reg [W_DATA-1:0] d_branchoffs;
|
||||
reg [W_ALUSRC-1:0] d_alusrc_a;
|
||||
reg [W_ALUSRC-1:0] d_alusrc_b;
|
||||
reg [W_ALUOP-1:0] d_aluop;
|
||||
reg [W_MEMOP-1:0] d_memop;
|
||||
reg [W_MULOP-1:0] d_mulop;
|
||||
reg [W_BCOND-1:0] d_branchcond;
|
||||
reg d_jump_is_regoffs;
|
||||
reg d_result_is_linkaddr;
|
||||
reg d_csr_ren;
|
||||
reg d_csr_wen;
|
||||
reg [1:0] d_csr_wtype;
|
||||
reg d_csr_w_imm;
|
||||
reg [W_EXCEPT-1:0] d_except;
|
||||
|
||||
localparam X0 = {W_REGADDR{1'b0}};
|
||||
|
||||
always @ (*) begin
|
||||
// Assign some defaults
|
||||
d_rs1 = d_instr[19:15];
|
||||
d_rs2 = d_instr[24:20];
|
||||
d_rd = d_instr[11: 7];
|
||||
d_imm = d_imm_i;
|
||||
d_branchoffs = d_imm_i;
|
||||
d_alusrc_a = ALUSRCA_RS1;
|
||||
d_alusrc_b = ALUSRCB_RS2;
|
||||
d_aluop = ALUOP_ADD;
|
||||
d_memop = MEMOP_NONE;
|
||||
d_mulop = M_OP_MUL;
|
||||
d_csr_ren = 1'b0;
|
||||
d_csr_wen = 1'b0;
|
||||
d_csr_wtype = CSR_WTYPE_W;
|
||||
d_csr_w_imm = 1'b0;
|
||||
d_branchcond = BCOND_NEVER;
|
||||
d_jump_is_regoffs = 1'b0;
|
||||
d_result_is_linkaddr = 1'b0;
|
||||
d_invalid_32bit = 1'b0;
|
||||
d_except = EXCEPT_NONE;
|
||||
|
||||
casez (d_instr)
|
||||
RV_BEQ: begin d_rd = X0; d_aluop = ALUOP_SUB; d_branchcond = BCOND_ZERO; end
|
||||
RV_BNE: begin d_rd = X0; d_aluop = ALUOP_SUB; d_branchcond = BCOND_NZERO; end
|
||||
RV_BLT: begin d_rd = X0; d_aluop = ALUOP_LT; d_branchcond = BCOND_NZERO; end
|
||||
RV_BGE: begin d_rd = X0; d_aluop = ALUOP_LT; d_branchcond = BCOND_ZERO; end
|
||||
RV_BLTU: begin d_rd = X0; d_aluop = ALUOP_LTU; d_branchcond = BCOND_NZERO; end
|
||||
RV_BGEU: begin d_rd = X0; d_aluop = ALUOP_LTU; d_branchcond = BCOND_ZERO; end
|
||||
RV_JALR: begin d_result_is_linkaddr = 1'b1; d_jump_is_regoffs = 1'b1; d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_branchcond = BCOND_ALWAYS; end
|
||||
RV_JAL: begin d_result_is_linkaddr = 1'b1; d_rs2 = X0; d_rs1 = X0; end
|
||||
RV_LUI: begin d_aluop = ALUOP_ADD; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_rs1 = X0; end
|
||||
RV_AUIPC: begin d_aluop = ALUOP_ADD; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_alusrc_a = ALUSRCA_PC; d_rs1 = X0; end
|
||||
RV_ADDI: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
|
||||
RV_SLLI: begin d_aluop = ALUOP_SLL; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
|
||||
RV_SLTI: begin d_aluop = ALUOP_LT; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
|
||||
RV_SLTIU: begin d_aluop = ALUOP_LTU; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
|
||||
RV_XORI: begin d_aluop = ALUOP_XOR; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
|
||||
RV_SRLI: begin d_aluop = ALUOP_SRL; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
|
||||
RV_SRAI: begin d_aluop = ALUOP_SRA; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
|
||||
RV_ORI: begin d_aluop = ALUOP_OR; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
|
||||
RV_ANDI: begin d_aluop = ALUOP_AND; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
|
||||
RV_ADD: begin d_aluop = ALUOP_ADD; end
|
||||
RV_SUB: begin d_aluop = ALUOP_SUB; end
|
||||
RV_SLL: begin d_aluop = ALUOP_SLL; end
|
||||
RV_SLT: begin d_aluop = ALUOP_LT; end
|
||||
RV_SLTU: begin d_aluop = ALUOP_LTU; end
|
||||
RV_XOR: begin d_aluop = ALUOP_XOR; end
|
||||
RV_SRL: begin d_aluop = ALUOP_SRL; end
|
||||
RV_SRA: begin d_aluop = ALUOP_SRA; end
|
||||
RV_OR: begin d_aluop = ALUOP_OR; end
|
||||
RV_AND: begin d_aluop = ALUOP_AND; end
|
||||
RV_LB: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LB; end
|
||||
RV_LH: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LH; end
|
||||
RV_LW: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LW; end
|
||||
RV_LBU: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LBU; end
|
||||
RV_LHU: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LHU; end
|
||||
RV_SB: begin d_aluop = ALUOP_ADD; d_imm = d_imm_s; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_SB; d_rd = X0; end
|
||||
RV_SH: begin d_aluop = ALUOP_ADD; d_imm = d_imm_s; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_SH; d_rd = X0; end
|
||||
RV_SW: begin d_aluop = ALUOP_ADD; d_imm = d_imm_s; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_SW; d_rd = X0; end
|
||||
RV_MUL: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MUL; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_MULH: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MULH; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_MULHSU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MULHSU; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_MULHU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MULHU; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_DIV: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_DIV; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_DIVU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_DIVU; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_REM: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REM; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_REMU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REMU; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_FENCE: begin d_rd = X0; end // NOP
|
||||
RV_FENCE_I: begin d_rd = X0; d_rs1 = X0; d_rs2 = X0; d_branchcond = BCOND_NZERO; d_imm[31] = 1'b1; end // Pretend we are recovering from a mispredicted-taken backward branch. Mispredict recovery flushes frontend.
|
||||
RV_CSRRW: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = 1'b1 ; d_csr_ren = |d_rd; d_csr_wtype = CSR_WTYPE_W; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_CSRRS: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_S; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_CSRRC: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_C; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_CSRRWI: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = 1'b1 ; d_csr_ren = |d_rd; d_csr_wtype = CSR_WTYPE_W; d_csr_w_imm = 1'b1; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_CSRRSI: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_S; d_csr_w_imm = 1'b1; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_CSRRCI: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_C; d_csr_w_imm = 1'b1; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_ECALL: if (HAVE_CSR) begin d_except = EXCEPT_ECALL; d_rs2 = X0; d_rs1 = X0; d_rd = X0; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_EBREAK: if (HAVE_CSR) begin d_except = EXCEPT_EBREAK; d_rs2 = X0; d_rs1 = X0; d_rd = X0; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_MRET: if (HAVE_CSR) begin d_except = EXCEPT_MRET; d_rs2 = X0; d_rs1 = X0; d_rd = X0; end else begin d_invalid_32bit = 1'b1; end
|
||||
default: begin d_invalid_32bit = 1'b1; end
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
{dx_rs1, dx_rs2, dx_rd} <= {(3 * W_REGADDR){1'b0}};
|
||||
dx_alusrc_a <= ALUSRCA_RS1;
|
||||
dx_alusrc_b <= ALUSRCB_RS2;
|
||||
dx_aluop <= ALUOP_ADD;
|
||||
dx_memop <= MEMOP_NONE;
|
||||
dx_mulop <= M_OP_MUL;
|
||||
dx_csr_ren <= 1'b0;
|
||||
dx_csr_wen <= 1'b0;
|
||||
dx_csr_wtype <= CSR_WTYPE_W;
|
||||
dx_csr_w_imm <= 1'b0;
|
||||
dx_branchcond <= BCOND_NEVER;
|
||||
dx_jump_is_regoffs <= 1'b0;
|
||||
dx_result_is_linkaddr <= 1'b0;
|
||||
dx_except <= EXCEPT_NONE;
|
||||
end else if (flush_d_x || (d_stall && !x_stall)) begin
|
||||
// Bubble insertion
|
||||
dx_branchcond <= BCOND_NEVER;
|
||||
dx_memop <= MEMOP_NONE;
|
||||
dx_rd <= 5'h0;
|
||||
dx_except <= EXCEPT_NONE;
|
||||
dx_csr_ren <= 1'b0;
|
||||
dx_csr_wen <= 1'b0;
|
||||
// Don't start a multiply in a pipe bubble
|
||||
if (EXTENSION_M)
|
||||
dx_aluop <= ALUOP_ADD;
|
||||
// Also need to clear rs1, rs2, due to a nasty sequence of events:
|
||||
// Suppose we have a load, followed by a dependent branch, which is predicted taken
|
||||
// - branch will stall in D until AHB master becomes free
|
||||
// - on next cycle, prediction causes jump, and bubble is in X
|
||||
// - if X gets branch's rs1, rs2, it will cause spurious RAW stall
|
||||
// - on next cycle, branch will not progress into X due to RAW stall, but *will* be replaced in D due to jump
|
||||
// - branch mispredict now cannot be corrected
|
||||
dx_rs1 <= 5'h0;
|
||||
dx_rs2 <= 5'h0;
|
||||
end else if (!x_stall) begin
|
||||
// These ones can have side effects
|
||||
dx_rs1 <= d_invalid ? {W_REGADDR{1'b0}} : d_rs1;
|
||||
dx_rs2 <= d_invalid ? {W_REGADDR{1'b0}} : d_rs2;
|
||||
dx_rd <= d_invalid ? {W_REGADDR{1'b0}} : d_rd;
|
||||
dx_memop <= d_invalid ? MEMOP_NONE : d_memop;
|
||||
dx_branchcond <= d_invalid ? BCOND_NEVER : d_branchcond;
|
||||
dx_csr_ren <= d_invalid ? 1'b0 : d_csr_ren;
|
||||
dx_csr_wen <= d_invalid ? 1'b0 : d_csr_wen;
|
||||
dx_except <= d_invalid ? EXCEPT_INSTR_ILLEGAL : d_except;
|
||||
dx_aluop <= d_invalid && EXTENSION_M ? ALUOP_ADD : d_aluop;
|
||||
|
||||
// These can't
|
||||
dx_alusrc_a <= d_alusrc_a;
|
||||
dx_alusrc_b <= d_alusrc_b;
|
||||
dx_mulop <= d_mulop;
|
||||
dx_jump_is_regoffs <= d_jump_is_regoffs;
|
||||
dx_result_is_linkaddr <= d_result_is_linkaddr;
|
||||
dx_csr_wtype <= d_csr_wtype;
|
||||
dx_csr_w_imm <= d_csr_w_imm;
|
||||
end
|
||||
end
|
||||
|
||||
// No reset required on these; will be masked by the resettable pipeline controls until they're valid
|
||||
always @ (posedge clk) begin
|
||||
if (!x_stall) begin
|
||||
dx_imm <= d_imm;
|
||||
dx_jump_target <= d_jump_target;
|
||||
dx_mispredict_addr <= pc_next;
|
||||
dx_pc <= pc;
|
||||
end
|
||||
if (flush_d_x) begin
|
||||
// The target of a late jump must be propagated *immediately* to X PC, as
|
||||
// mepc may sample X PC at any time due to IRQ, and must not capture
|
||||
// misprediction.
|
||||
// Also required for flush while X stalled (e.g. if a muldiv enters X while
|
||||
// a 1 cycle bus stall holds off the jump request in M)
|
||||
dx_pc <= f_jump_target;
|
||||
`ifdef FORMAL
|
||||
// This should only be caused by late jumps
|
||||
assert(f_jump_now);
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,300 @@
|
|||
module hazard5_frontend #(
|
||||
parameter EXTENSION_C = 1,
|
||||
parameter W_ADDR = 32, // other sizes currently unsupported
|
||||
parameter W_DATA = 32, // other sizes currently unsupported
|
||||
parameter FIFO_DEPTH = 2, // power of 2, >= 1
|
||||
parameter RESET_VECTOR = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire rst_n,
|
||||
|
||||
// Fetch interface
|
||||
// addr_vld may be asserted at any time, but after assertion,
|
||||
// neither addr nor addr_vld may change until the cycle after addr_rdy.
|
||||
// There is no backpressure on the data interface; the front end
|
||||
// must ensure it does not request data it cannot receive.
|
||||
// addr_rdy and dat_vld may be functions of hready, and
|
||||
// may not be used to compute combinational outputs.
|
||||
output wire mem_size, // 1'b1 -> 32 bit access
|
||||
output wire [W_ADDR-1:0] mem_addr,
|
||||
output wire mem_addr_vld,
|
||||
input wire mem_addr_rdy,
|
||||
input wire [W_DATA-1:0] mem_data,
|
||||
input wire mem_data_vld,
|
||||
|
||||
// Jump/flush interface
|
||||
// Processor may assert vld at any time. The request will not go through
|
||||
// unless rdy is high. Processor *may* alter request during this time.
|
||||
// Inputs must not be a function of hready.
|
||||
input wire [W_ADDR-1:0] jump_target,
|
||||
input wire jump_target_vld,
|
||||
output wire jump_target_rdy,
|
||||
|
||||
// Interface to Decode
|
||||
// Note reg/wire distinction
|
||||
// => decode is providing live feedback on the CIR it is decoding,
|
||||
// which we fetched previously
|
||||
// This works OK because size is decoded from 2 LSBs of instruction, so cheap.
|
||||
output reg [31:0] cir,
|
||||
output reg [1:0] cir_vld, // number of valid halfwords in CIR
|
||||
input wire [1:0] cir_use, // number of halfwords D intends to consume
|
||||
// *may* be a function of hready
|
||||
input wire cir_lock // Lock-in current contents and level of CIR.
|
||||
// Assert simultaneously with a jump request,
|
||||
// if decode is going to stall. This stops the CIR
|
||||
// from being trashed by incoming fetch data;
|
||||
// jump instructions have other side effects besides jumping!
|
||||
);
|
||||
|
||||
`undef ASSERT
|
||||
`ifdef HAZARD5_FRONTEND_ASSERTIONS
|
||||
`define ASSERT(x) assert(x)
|
||||
`else
|
||||
`define ASSERT(x)
|
||||
`endif
|
||||
|
||||
// ISIM doesn't support some of this:
|
||||
// //synthesis translate_off
|
||||
// initial if (W_DATA != 32) begin $error("Frontend requires 32-bit databus"); end
|
||||
// initial if ((1 << $clog2(FIFO_DEPTH)) != FIFO_DEPTH) begin $error("Frontend FIFO depth must be power of 2"); end
|
||||
// initial if (~|FIFO_DEPTH) begin $error("Frontend FIFO depth must be > 0"); end
|
||||
// //synthesis translate_on
|
||||
|
||||
localparam W_BUNDLE = W_DATA / 2;
|
||||
parameter W_FIFO_LEVEL = $clog2(FIFO_DEPTH + 1);
|
||||
|
||||
// ============================================================================
|
||||
// Fetch Queue (FIFO)
|
||||
// ============================================================================
|
||||
// This is a little different from either a normal sync fifo or sync fwft fifo
|
||||
// so it's worth implementing from scratch
|
||||
|
||||
wire jump_now = jump_target_vld && jump_target_rdy;
|
||||
|
||||
reg [W_DATA-1:0] fifo_mem [0:FIFO_DEPTH];
|
||||
reg [FIFO_DEPTH-1:0] fifo_valid;
|
||||
|
||||
wire fifo_push;
|
||||
wire fifo_pop;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
fifo_valid <= {FIFO_DEPTH{1'b0}};
|
||||
end else if (jump_now) begin
|
||||
fifo_valid <= {FIFO_DEPTH{1'b0}};
|
||||
end else if (fifo_push || fifo_pop) begin
|
||||
fifo_valid <= ~(~fifo_valid << fifo_push) >> fifo_pop;
|
||||
end
|
||||
end
|
||||
|
||||
always @ (posedge clk) begin: fifo_data_shift
|
||||
integer i;
|
||||
for (i = 0; i < FIFO_DEPTH; i = i + 1) begin
|
||||
if (fifo_pop || (fifo_push && !fifo_valid[i])) begin
|
||||
fifo_mem[i] <= fifo_valid[i + 1] ? fifo_mem[i + 1] : fifo_wdata;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [W_DATA-1:0] fifo_wdata = mem_data;
|
||||
wire [W_DATA-1:0] fifo_rdata = fifo_mem[0];
|
||||
always @ (*) fifo_mem[FIFO_DEPTH] = fifo_wdata;
|
||||
|
||||
wire fifo_full = fifo_valid[FIFO_DEPTH - 1];
|
||||
wire fifo_empty = !fifo_valid[0];
|
||||
wire fifo_almost_full = FIFO_DEPTH == 1 || (!fifo_valid[FIFO_DEPTH - 1] && fifo_valid[FIFO_DEPTH - 2]);
|
||||
|
||||
// ============================================================================
|
||||
// Fetch Request + State Logic
|
||||
// ============================================================================
|
||||
|
||||
// Keep track of some useful state of the memory interface
|
||||
|
||||
reg mem_addr_hold;
|
||||
reg [1:0] pending_fetches;
|
||||
reg [1:0] ctr_flush_pending;
|
||||
wire [1:0] pending_fetches_next = pending_fetches + (mem_addr_vld && !mem_addr_hold) - mem_data_vld;
|
||||
|
||||
wire cir_must_refill;
|
||||
// If fetch data is forwarded past the FIFO, ensure it is not also written to it.
|
||||
assign fifo_push = mem_data_vld && ~|ctr_flush_pending && !(cir_must_refill && fifo_empty);
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
mem_addr_hold <= 1'b0;
|
||||
pending_fetches <= 2'h0;
|
||||
ctr_flush_pending <= 2'h0;
|
||||
end else begin
|
||||
`ASSERT(ctr_flush_pending <= pending_fetches);
|
||||
`ASSERT(pending_fetches < 2'd3);
|
||||
`ASSERT(!(mem_data_vld && !pending_fetches));
|
||||
// `ASSERT(!($past(mem_addr_hold) && $past(mem_addr_vld) && !$stable(mem_addr)));
|
||||
mem_addr_hold <= mem_addr_vld && !mem_addr_rdy;
|
||||
pending_fetches <= pending_fetches_next;
|
||||
if (jump_now) begin
|
||||
ctr_flush_pending <= pending_fetches - mem_data_vld;
|
||||
end else if (|ctr_flush_pending && mem_data_vld) begin
|
||||
ctr_flush_pending <= ctr_flush_pending - 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Fetch addr runs ahead of the PC, in word increments.
|
||||
reg [W_ADDR-1:0] fetch_addr;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
fetch_addr <= RESET_VECTOR;
|
||||
end else begin
|
||||
if (jump_now) begin
|
||||
// Post-increment if jump request is going straight through
|
||||
fetch_addr <= {jump_target[W_ADDR-1:2] + (mem_addr_rdy && !mem_addr_hold), 2'b00};
|
||||
end else if (mem_addr_vld && mem_addr_rdy) begin
|
||||
fetch_addr <= fetch_addr + 32'h4;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Using the non-registered version of pending_fetches would improve FIFO
|
||||
// utilisation, but create a combinatorial path from hready to address phase!
|
||||
wire fetch_stall = fifo_full
|
||||
|| fifo_almost_full && |pending_fetches // TODO causes issue with depth 1: only one in flight, so bus rate halved.
|
||||
|| pending_fetches > 2'h1;
|
||||
|
||||
|
||||
// unaligned jump is handled in two different places:
|
||||
// - during address phase, offset may be applied to fetch_addr if hready was low when jump_target_vld was high
|
||||
// - during data phase, need to assemble CIR differently.
|
||||
|
||||
|
||||
wire unaligned_jump_now = EXTENSION_C && jump_now && jump_target[1];
|
||||
reg unaligned_jump_aph;
|
||||
reg unaligned_jump_dph;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
unaligned_jump_aph <= 1'b0;
|
||||
unaligned_jump_dph <= 1'b0;
|
||||
end else if (EXTENSION_C) begin
|
||||
`ASSERT(!(unaligned_jump_aph && !unaligned_jump_dph));
|
||||
`ASSERT(!($past(jump_now && !jump_target[1]) && unaligned_jump_aph));
|
||||
`ASSERT(!($past(jump_now && !jump_target[1]) && unaligned_jump_dph));
|
||||
if (mem_addr_rdy || (jump_now && !unaligned_jump_now)) begin
|
||||
unaligned_jump_aph <= 1'b0;
|
||||
end
|
||||
if ((mem_data_vld && ~|ctr_flush_pending && !cir_lock)
|
||||
|| (jump_now && !unaligned_jump_now)) begin
|
||||
unaligned_jump_dph <= 1'b0;
|
||||
end
|
||||
if (fifo_pop) begin
|
||||
// Following a lock/unlock of the CIR, we may have an unaligned fetch in
|
||||
// the FIFO, rather than consuming straight from the bus.
|
||||
unaligned_jump_dph <= 1'b0;
|
||||
end
|
||||
if (unaligned_jump_now) begin
|
||||
unaligned_jump_dph <= 1'b1;
|
||||
unaligned_jump_aph <= !mem_addr_rdy;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Combinatorially generate the address-phase request
|
||||
|
||||
reg reset_holdoff;
|
||||
always @ (posedge clk or negedge rst_n)
|
||||
if (!rst_n)
|
||||
reset_holdoff <= 1'b1;
|
||||
else
|
||||
reset_holdoff <= 1'b0;
|
||||
|
||||
reg [W_ADDR-1:0] mem_addr_r;
|
||||
reg mem_addr_vld_r;
|
||||
reg mem_size_r;
|
||||
|
||||
assign mem_addr = mem_addr_r;
|
||||
assign mem_addr_vld = mem_addr_vld_r && !reset_holdoff;
|
||||
assign mem_size = mem_size_r;
|
||||
|
||||
always @ (*) begin
|
||||
mem_addr_r = {W_ADDR{1'b0}};
|
||||
mem_addr_vld_r = 1'b1;
|
||||
mem_size_r = 1'b1; // almost all accesses are 32 bit
|
||||
case (1'b1)
|
||||
mem_addr_hold : begin mem_addr_r = {fetch_addr[W_ADDR-1:2], unaligned_jump_aph, 1'b0}; mem_size_r = !unaligned_jump_aph; end
|
||||
jump_target_vld : begin mem_addr_r = jump_target; mem_size_r = !unaligned_jump_now; end
|
||||
!fetch_stall : begin mem_addr_r = fetch_addr; end
|
||||
default : begin mem_addr_vld_r = 1'b0; end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign jump_target_rdy = !mem_addr_hold;
|
||||
|
||||
|
||||
// ============================================================================
|
||||
// Instruction assembly yard
|
||||
// ============================================================================
|
||||
|
||||
// buf_level is the number of valid halfwords in {hwbuf, cir}.
|
||||
// cir_vld and hwbuf_vld are functions of this.
|
||||
reg [1:0] buf_level;
|
||||
reg [W_BUNDLE-1:0] hwbuf;
|
||||
reg hwbuf_vld;
|
||||
|
||||
wire [W_DATA-1:0] fetch_data = fifo_empty ? mem_data : fifo_rdata;
|
||||
wire fetch_data_vld = !fifo_empty || (mem_data_vld && ~|ctr_flush_pending);
|
||||
|
||||
// Shift any recycled instruction data down to backfill D's consumption
|
||||
// We don't care about anything which is invalid or will be overlaid with fresh data,
|
||||
// so choose these values in a way that minimises muxes
|
||||
wire [3*W_BUNDLE-1:0] instr_data_shifted =
|
||||
cir_use[1] ? {hwbuf, cir[W_BUNDLE +: W_BUNDLE], hwbuf} :
|
||||
cir_use[0] && EXTENSION_C ? {hwbuf, hwbuf, cir[W_BUNDLE +: W_BUNDLE]} :
|
||||
{hwbuf, cir};
|
||||
|
||||
// Saturating subtraction: on cir_lock dassertion,
|
||||
// buf_level will be 0 but cir_use will be positive!
|
||||
wire [1:0] cir_use_clipped = |buf_level ? cir_use : 2'h0;
|
||||
|
||||
wire [1:0] level_next_no_fetch = buf_level - cir_use_clipped;
|
||||
|
||||
// Overlay fresh fetch data onto the shifted/recycled instruction data
|
||||
// Again, if something won't be looked at, generate cheapest possible garbage.
|
||||
// Don't care if fetch data is valid or not, as will just retry next cycle (as long as flags set correctly)
|
||||
wire [3*W_BUNDLE-1:0] instr_data_plus_fetch =
|
||||
cir_lock || (level_next_no_fetch[1] && !unaligned_jump_dph) ? instr_data_shifted :
|
||||
unaligned_jump_dph && EXTENSION_C ? {instr_data_shifted[W_BUNDLE +: 2*W_BUNDLE], fetch_data[W_BUNDLE +: W_BUNDLE]} :
|
||||
level_next_no_fetch[0] && EXTENSION_C ? {fetch_data, instr_data_shifted[0 +: W_BUNDLE]} :
|
||||
{instr_data_shifted[2*W_BUNDLE +: W_BUNDLE], fetch_data};
|
||||
|
||||
assign cir_must_refill = !cir_lock && !level_next_no_fetch[1];
|
||||
assign fifo_pop = cir_must_refill && !fifo_empty;
|
||||
|
||||
wire [1:0] buf_level_next =
|
||||
jump_now || |ctr_flush_pending || cir_lock ? 2'h0 :
|
||||
fetch_data_vld && unaligned_jump_dph ? 2'h1 :
|
||||
buf_level + {cir_must_refill && fetch_data_vld, 1'b0} - cir_use_clipped;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
buf_level <= 2'h0;
|
||||
hwbuf_vld <= 1'b0;
|
||||
cir_vld <= 2'h0;
|
||||
end else begin
|
||||
`ASSERT(cir_vld <= 2);
|
||||
`ASSERT(cir_use <= 2);
|
||||
`ASSERT(cir_use <= cir_vld);
|
||||
`ASSERT(cir_vld <= buf_level || $past(cir_lock));
|
||||
// Update CIR flags
|
||||
buf_level <= buf_level_next;
|
||||
hwbuf_vld <= &buf_level_next;
|
||||
if (!cir_lock)
|
||||
cir_vld <= buf_level_next & ~(buf_level_next >> 1'b1);
|
||||
// Update CIR contents
|
||||
end
|
||||
end
|
||||
|
||||
// No need to reset these as they will be written before first use
|
||||
always @ (posedge clk)
|
||||
{hwbuf, cir} <= instr_data_plus_fetch;
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,114 @@
|
|||
module hazard5_instr_decompress #(
|
||||
parameter PASSTHROUGH = 0
|
||||
) (
|
||||
input wire [31:0] instr_in,
|
||||
output reg instr_is_32bit,
|
||||
output reg [31:0] instr_out,
|
||||
output reg invalid
|
||||
);
|
||||
|
||||
`include "rv_opcodes.vh"
|
||||
|
||||
localparam W_REGADDR = 5;
|
||||
|
||||
// Long-register formats: cr, ci, css
|
||||
// Short-register formats: ciw, cl, cs, cb, cj
|
||||
wire [W_REGADDR-1:0] rd_l = instr_in[11:7];
|
||||
wire [W_REGADDR-1:0] rs1_l = instr_in[11:7];
|
||||
wire [W_REGADDR-1:0] rs2_l = instr_in[6:2];
|
||||
wire [W_REGADDR-1:0] rd_s = {2'b01, instr_in[4:2]};
|
||||
wire [W_REGADDR-1:0] rs1_s = {2'b01, instr_in[9:7]};
|
||||
wire [W_REGADDR-1:0] rs2_s = {2'b01, instr_in[4:2]};
|
||||
|
||||
|
||||
// I don't even O_O
|
||||
|
||||
wire [31:0] imm_ci = {{7{instr_in[12]}}, instr_in[6:2], {20{1'b0}}};
|
||||
|
||||
wire [31:0] imm_cj = {instr_in[12], instr_in[8], instr_in[10:9], instr_in[6], instr_in[7],
|
||||
instr_in[2], instr_in[11], instr_in[5:3], {9{instr_in[12]}}, {12{1'b0}}};
|
||||
|
||||
wire [31:0] imm_cb =
|
||||
{{20{1'b0}}, instr_in[11:10], instr_in[4:3], instr_in[12], {7{1'b0}}} |
|
||||
{{4{instr_in[12]}}, instr_in[6:5], instr_in[2], {25{1'b0}}};
|
||||
|
||||
generate
|
||||
if (PASSTHROUGH) begin
|
||||
always @ (*) begin
|
||||
instr_is_32bit = 1'b1;
|
||||
instr_out = instr_in;
|
||||
invalid = 1'b0;
|
||||
end
|
||||
end else begin
|
||||
always @ (*) begin;
|
||||
if (instr_in[1:0] == 2'b11) begin
|
||||
instr_is_32bit = 1'b1;
|
||||
instr_out = instr_in;
|
||||
invalid = 1'b0;
|
||||
end else begin
|
||||
instr_is_32bit = 1'b0;
|
||||
instr_out = 32'h0;
|
||||
invalid = 1'b0;
|
||||
casez (instr_in[15:0])
|
||||
16'h0: invalid = 1'b1;
|
||||
RV_C_ADDI4SPN: instr_out = RV_NOZ_ADDI | ({27'h0, rd_s} << RV_RD_LSB) | (5'h2 << RV_RS1_LSB)
|
||||
| ({instr_in[10:7], instr_in[12:11], instr_in[5], instr_in[6], 2'b00} << 20);
|
||||
RV_C_LW: instr_out = RV_NOZ_LW | ({27'h0, rd_s} << RV_RD_LSB) | (rs1_s << RV_RS1_LSB)
|
||||
| ({instr_in[5], instr_in[12:10], instr_in[6], 2'b00} << 20);
|
||||
RV_C_SW: instr_out = RV_NOZ_SW | (rs2_s << RV_RS2_LSB) | (rs1_s << RV_RS1_LSB)
|
||||
| ({instr_in[11:10], instr_in[6], 2'b00} << 7) | ({instr_in[5], instr_in[12]} << 25);
|
||||
RV_C_ADDI: instr_out = RV_NOZ_ADDI | (rd_l << RV_RD_LSB) | (rs1_l << RV_RS1_LSB) | imm_ci;
|
||||
RV_C_JAL: instr_out = RV_NOZ_JAL | (5'h1 << RV_RD_LSB) | imm_cj;
|
||||
RV_C_J: instr_out = RV_NOZ_JAL | (5'h0 << RV_RD_LSB) | imm_cj;
|
||||
RV_C_LI: instr_out = RV_NOZ_ADDI | (rd_l << RV_RD_LSB) | imm_ci;
|
||||
RV_C_LUI: begin
|
||||
if (rd_l == 5'h2) begin
|
||||
// addi6sp
|
||||
instr_out = RV_NOZ_ADDI | (5'h2 << RV_RD_LSB) | (5'h2 << RV_RS1_LSB) |
|
||||
({{3{instr_in[12]}}, instr_in[4:3], instr_in[5], instr_in[2], instr_in[6]} << 24);
|
||||
end else begin
|
||||
instr_out = RV_NOZ_LUI | (rd_l << RV_RD_LSB) | ({{15{instr_in[12]}}, instr_in[6:2]} << 12);
|
||||
end
|
||||
invalid = !{instr_in[12], instr_in[6:2]}; // RESERVED if imm == 0
|
||||
end
|
||||
RV_C_SLLI: instr_out = RV_NOZ_SLLI | (rs1_l << RV_RD_LSB) | (rs1_l << RV_RS1_LSB) | imm_ci;
|
||||
RV_C_SRAI: instr_out = RV_NOZ_SRAI | (rs1_s << RV_RD_LSB) | (rs1_s << RV_RS1_LSB) | imm_ci;
|
||||
RV_C_SRLI: instr_out = RV_NOZ_SRLI | (rs1_s << RV_RD_LSB) | (rs1_s << RV_RS1_LSB) | imm_ci;
|
||||
RV_C_ANDI: instr_out = RV_NOZ_ANDI | (rs1_s << RV_RD_LSB) | (rs1_s << RV_RS1_LSB) | imm_ci;
|
||||
RV_C_AND: instr_out = RV_NOZ_AND | (rs1_s << RV_RD_LSB) | (rs1_s << RV_RS1_LSB) | (rs2_s << RV_RS2_LSB);
|
||||
RV_C_OR: instr_out = RV_NOZ_OR | (rs1_s << RV_RD_LSB) | (rs1_s << RV_RS1_LSB) | (rs2_s << RV_RS2_LSB);
|
||||
RV_C_XOR: instr_out = RV_NOZ_XOR | (rs1_s << RV_RD_LSB) | (rs1_s << RV_RS1_LSB) | (rs2_s << RV_RS2_LSB);
|
||||
RV_C_SUB: instr_out = RV_NOZ_SUB | (rs1_s << RV_RD_LSB) | (rs1_s << RV_RS1_LSB) | (rs2_s << RV_RS2_LSB);
|
||||
RV_C_ADD: begin
|
||||
if (rs2_l) begin
|
||||
instr_out = RV_NOZ_ADD | (rd_l << RV_RD_LSB) | (rs1_l << RV_RS1_LSB) | (rs2_l << RV_RS2_LSB);
|
||||
end else begin // jalr
|
||||
instr_out = RV_NOZ_JALR | (5'h1 << RV_RD_LSB) | (rs1_l << RV_RS1_LSB);
|
||||
invalid = !rs1_l; // EBREAK; not supported!
|
||||
end
|
||||
end
|
||||
RV_C_MV: begin
|
||||
if (rs2_l) begin // mv
|
||||
instr_out = RV_NOZ_ADD | (rd_l << RV_RD_LSB) | (rs2_l << RV_RS2_LSB);
|
||||
end else begin // jr
|
||||
instr_out = RV_NOZ_JALR | (rs1_l << RV_RS1_LSB);
|
||||
invalid = !rs1_l; // RESERVED
|
||||
end
|
||||
end
|
||||
RV_C_LWSP: begin
|
||||
instr_out = RV_NOZ_LW | (rd_l << RV_RD_LSB) | (5'h2 << RV_RS1_LSB)
|
||||
| ({instr_in[3:2], instr_in[12], instr_in[6:4], 2'b00} << 20);
|
||||
invalid = !rd_l; // RESERVED
|
||||
end
|
||||
RV_C_SWSP: instr_out = RV_NOZ_SW | (rs2_l << RV_RS2_LSB) | (5'h2 << RV_RS1_LSB)
|
||||
| ({instr_in[11:9], 2'b00} << 7) | ({instr_in[8:7], instr_in[12]} << 25);
|
||||
RV_C_BEQZ: instr_out = RV_NOZ_BEQ | (rs1_s << RV_RS1_LSB) | imm_cb;
|
||||
RV_C_BNEZ: instr_out = RV_NOZ_BNE | (rs1_s << RV_RS1_LSB) | imm_cb;
|
||||
default: invalid = 1'b1;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,74 @@
|
|||
|
||||
localparam W_ALUOP = 4;
|
||||
localparam W_ALUSRC = 2;
|
||||
localparam W_MEMOP = 4;
|
||||
localparam W_BCOND = 2;
|
||||
|
||||
// ALU operation selectors
|
||||
|
||||
localparam ALUOP_ADD = 4'h0;
|
||||
localparam ALUOP_SUB = 4'h1;
|
||||
localparam ALUOP_LT = 4'h2;
|
||||
localparam ALUOP_LTU = 4'h4;
|
||||
localparam ALUOP_AND = 4'h6;
|
||||
localparam ALUOP_OR = 4'h7;
|
||||
localparam ALUOP_XOR = 4'h8;
|
||||
localparam ALUOP_SRL = 4'h9;
|
||||
localparam ALUOP_SRA = 4'ha;
|
||||
localparam ALUOP_SLL = 4'hb;
|
||||
localparam ALUOP_MULDIV = 4'hc;
|
||||
|
||||
// Parameters to control ALU input muxes. Bypass mux paths are
|
||||
// controlled by X, so D has no parameters to choose these.
|
||||
|
||||
localparam ALUSRCA_RS1 = 2'h0;
|
||||
localparam ALUSRCA_PC = 2'h1;
|
||||
|
||||
localparam ALUSRCB_RS2 = 2'h0;
|
||||
localparam ALUSRCB_IMM = 2'h1;
|
||||
|
||||
localparam MEMOP_LW = 4'h0;
|
||||
localparam MEMOP_LH = 4'h1;
|
||||
localparam MEMOP_LB = 4'h2;
|
||||
localparam MEMOP_LHU = 4'h3;
|
||||
localparam MEMOP_LBU = 4'h4;
|
||||
localparam MEMOP_SW = 4'h5;
|
||||
localparam MEMOP_SH = 4'h6;
|
||||
localparam MEMOP_SB = 4'h7;
|
||||
localparam MEMOP_NONE = 4'h8;
|
||||
|
||||
localparam BCOND_NEVER = 2'h0;
|
||||
localparam BCOND_ALWAYS = 2'h1;
|
||||
localparam BCOND_ZERO = 2'h2;
|
||||
localparam BCOND_NZERO = 2'h3;
|
||||
|
||||
// CSR access types
|
||||
|
||||
localparam CSR_WTYPE_W = 2'h0;
|
||||
localparam CSR_WTYPE_S = 2'h1;
|
||||
localparam CSR_WTYPE_C = 2'h2;
|
||||
|
||||
// Exceptional condition signals which travel alongside (or instead of)
|
||||
// instructions in the pipeline. These are speculative and can be flushed
|
||||
// on e.g. branch mispredict
|
||||
|
||||
localparam W_EXCEPT = 3;
|
||||
localparam EXCEPT_NONE = 3'h0;
|
||||
localparam EXCEPT_ECALL = 3'h1;
|
||||
localparam EXCEPT_EBREAK = 3'h2;
|
||||
localparam EXCEPT_MRET = 3'h3; // separate, but handled similarly
|
||||
localparam EXCEPT_INSTR_ILLEGAL = 3'h4;
|
||||
localparam EXCEPT_INSTR_MISALIGN = 3'h5;
|
||||
localparam EXCEPT_INSTR_FAULT = 3'h6;
|
||||
|
||||
// Operations for M extension (these are just instr[14:12])
|
||||
|
||||
localparam W_MULOP = 3;
|
||||
localparam M_OP_MUL = 3'h0;
|
||||
localparam M_OP_MULH = 3'h1;
|
||||
localparam M_OP_MULHSU = 3'h2;
|
||||
localparam M_OP_MULHU = 3'h3;
|
||||
localparam M_OP_DIV = 3'h4;
|
||||
localparam M_OP_DIVU = 3'h5;
|
||||
localparam M_OP_REM = 3'h6;
|
||||
localparam M_OP_REMU = 3'h7;
|
|
@ -0,0 +1,95 @@
|
|||
/**********************************************************************
|
||||
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
|
||||
* Version 3, April 2008 *
|
||||
* *
|
||||
* Copyright (C) 2018 Luke Wren *
|
||||
* *
|
||||
* Everyone is permitted to copy and distribute verbatim or modified *
|
||||
* copies of this license document and accompanying software, and *
|
||||
* changing either is allowed. *
|
||||
* *
|
||||
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
|
||||
* *
|
||||
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
|
||||
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
|
||||
* *
|
||||
*********************************************************************/
|
||||
|
||||
// Register file
|
||||
// Single write port, dual read port
|
||||
|
||||
// FAKE_DUALPORT: if 1, implement regfile with pair of memories.
|
||||
// Write ports are ganged together, read ports operate independently.
|
||||
// This allows BRAM inference on FPGAs with single-read-port BRAMs.
|
||||
// (Looking at you iCE40)
|
||||
|
||||
module hazard5_regfile_1w2r #(
|
||||
parameter FAKE_DUALPORT = 0,
|
||||
parameter RESET_REGS = 0, // Unsupported for FAKE_DUALPORT
|
||||
parameter N_REGS = 16,
|
||||
parameter W_DATA = 32,
|
||||
parameter W_ADDR = $clog2(W_DATA) // should be localparam. ISIM...
|
||||
) (
|
||||
input wire clk,
|
||||
input wire rst_n,
|
||||
|
||||
input wire [W_ADDR-1:0] raddr1,
|
||||
output reg [W_DATA-1:0] rdata1,
|
||||
|
||||
input wire [W_ADDR-1:0] raddr2,
|
||||
output reg [W_DATA-1:0] rdata2,
|
||||
|
||||
input wire [W_ADDR-1:0] waddr,
|
||||
input wire [W_DATA-1:0] wdata,
|
||||
input wire wen
|
||||
);
|
||||
|
||||
generate
|
||||
if (FAKE_DUALPORT) begin: fake_dualport
|
||||
reg [W_DATA-1:0] mem1 [0:N_REGS-1];
|
||||
reg [W_DATA-1:0] mem2 [0:N_REGS-1];
|
||||
|
||||
always @ (posedge clk) begin
|
||||
if (wen) begin
|
||||
mem1[waddr] <= wdata;
|
||||
mem2[waddr] <= wdata;
|
||||
end
|
||||
rdata1 <= mem1[raddr1];
|
||||
rdata2 <= mem2[raddr2];
|
||||
end
|
||||
end else if (RESET_REGS) begin: real_dualport_reset
|
||||
// This will presumably always be implemented with flops
|
||||
reg [W_DATA-1:0] mem [0:N_REGS-1];
|
||||
|
||||
integer i;
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
// It's best to ask nicely:
|
||||
// synthesis please_on
|
||||
for (i = 0; i < N_REGS; i = i + 1) begin
|
||||
mem[i] <= {W_DATA{1'b0}};
|
||||
end
|
||||
// synthesis please_off
|
||||
end else begin
|
||||
if (wen) begin
|
||||
mem[waddr] <= wdata;
|
||||
end
|
||||
rdata1 <= mem[raddr1];
|
||||
rdata2 <= mem[raddr2];
|
||||
end
|
||||
end
|
||||
end else begin: real_dualport_noreset
|
||||
// This should be inference-compatible on FPGAs with dual-port BRAMs
|
||||
reg [W_DATA-1:0] mem [0:N_REGS-1];
|
||||
|
||||
always @ (posedge clk) begin
|
||||
if (wen) begin
|
||||
mem[waddr] <= wdata;
|
||||
end
|
||||
rdata1 <= mem[raddr1];
|
||||
rdata2 <= mem[raddr2];
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,235 @@
|
|||
// ----------------------------------------------------------------------------
|
||||
// RVFI Instrumentation
|
||||
// ----------------------------------------------------------------------------
|
||||
// To be included into hazard5_cpu.v for use with riscv-formal.
|
||||
// Contains some state modelling to diagnose exactly what the core is doing,
|
||||
// and report this in a way RVFI understands.
|
||||
// We consider instructions to "retire" as they cross the M/W pipe register.
|
||||
//
|
||||
// All modelling signals prefixed with rvfm (riscv-formal monitor)
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Instruction monitor
|
||||
|
||||
// Diagnose whether X, M contain valid in-flight instructions, to produce
|
||||
// rvfi_valid signal.
|
||||
|
||||
// TODO fix all the redundant RVFI registers in a nice way
|
||||
|
||||
reg rvfm_x_valid, rvfm_m_valid;
|
||||
reg [31:0] rvfm_x_instr;
|
||||
reg [31:0] rvfm_m_instr;
|
||||
|
||||
wire rvfm_x_trap = x_trap_is_exception && x_trap_enter;
|
||||
reg rvfm_m_trap;
|
||||
reg rvfm_entered_intr;
|
||||
|
||||
reg rvfi_valid_r;
|
||||
reg [31:0] rvfi_insn_r;
|
||||
reg rvfi_trap_r;
|
||||
|
||||
assign rvfi_valid = rvfi_valid_r;
|
||||
assign rvfi_insn = rvfi_insn_r;
|
||||
assign rvfi_trap = rvfi_trap_r;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
rvfm_x_valid <= 1'b0;
|
||||
rvfm_m_valid <= 1'b0;
|
||||
rvfm_m_trap <= 1'b0;
|
||||
rvfm_entered_intr <= 1'b0;
|
||||
rvfi_valid_r <= 1'b0;
|
||||
rvfi_trap_r <= 1'b0;
|
||||
rvfi_insn_r <= 32'h0;
|
||||
end else begin
|
||||
if (!x_stall) begin
|
||||
// Squash X instrs on IRQ entry -- these instructions will be reexecuted on return.
|
||||
rvfm_m_valid <= rvfm_x_valid && !(x_trap_enter && x_trap_enter_rdy && !rvfm_x_trap);
|
||||
rvfm_m_instr <= rvfm_x_instr;
|
||||
rvfm_x_valid <= 1'b0;
|
||||
rvfm_m_trap <= rvfm_x_trap;
|
||||
end else if (!m_stall) begin
|
||||
rvfm_m_valid <= 1'b0;
|
||||
end
|
||||
if (flush_d_x) begin
|
||||
rvfm_x_valid <= 1'b0;
|
||||
rvfm_m_valid <= rvfm_m_valid && m_stall;
|
||||
end else if (df_cir_use) begin
|
||||
rvfm_x_valid <= 1'b1;
|
||||
rvfm_x_instr <= {
|
||||
fd_cir[31:16] & {16{df_cir_use[1]}},
|
||||
fd_cir[15:0]
|
||||
};
|
||||
end
|
||||
rvfi_valid_r <= rvfm_m_valid && !m_stall;
|
||||
rvfi_insn_r <= rvfm_m_instr;
|
||||
rvfi_trap_r <= rvfm_m_trap;
|
||||
|
||||
// Take note of M-jump in pipe bubble in between instruction retires:
|
||||
rvfm_entered_intr <= (rvfm_entered_intr && !rvfi_valid)
|
||||
|| (m_jump_req && f_jump_now && !rvfm_m_valid);
|
||||
|
||||
// Sanity checks
|
||||
if (dx_rd != 5'h0)
|
||||
assert(rvfm_x_valid);
|
||||
if (xm_rd != 5'h0)
|
||||
assert(rvfm_m_valid);
|
||||
end
|
||||
end
|
||||
|
||||
// Hazard5 is an in-order core:
|
||||
reg [63:0] rvfm_retire_ctr;
|
||||
assign rvfi_order = rvfm_retire_ctr;
|
||||
always @ (posedge clk or negedge rst_n)
|
||||
if (!rst_n)
|
||||
rvfm_retire_ctr <= 0;
|
||||
else if (rvfi_valid)
|
||||
rvfm_retire_ctr <= rvfm_retire_ctr + 1;
|
||||
|
||||
assign rvfi_mode = 2'h3; // M-mode only
|
||||
assign rvfi_intr = rvfi_valid && rvfm_entered_intr;
|
||||
assign rvfi_halt = 1'b0; // TODO
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// PC and jump monitor
|
||||
|
||||
reg rvfm_dx_have_jumped;
|
||||
|
||||
reg [31:0] rvfm_xm_pc;
|
||||
reg [31:0] rvfm_xm_pc_next;
|
||||
|
||||
// Get a strange error from Yosys with $past() on this signal (possibly due to comb terms), so just flop it explicitly
|
||||
reg rvfm_past_df_cir_lock;
|
||||
always @ (posedge clk or negedge rst_n)
|
||||
if (!rst_n)
|
||||
rvfm_past_df_cir_lock <= 1'b0;
|
||||
else
|
||||
rvfm_past_df_cir_lock <= df_cir_lock;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
rvfm_dx_have_jumped <= 0;
|
||||
rvfm_xm_pc <= 0;
|
||||
rvfm_xm_pc_next <= 0;
|
||||
end else begin
|
||||
if (!d_stall) begin
|
||||
rvfm_dx_have_jumped <= d_jump_req && f_jump_now || rvfm_past_df_cir_lock;
|
||||
end
|
||||
if (!x_stall) begin
|
||||
rvfm_xm_pc <= dx_pc;
|
||||
rvfm_xm_pc_next <= rvfm_dx_have_jumped ? dx_jump_target : dx_mispredict_addr;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [31:0] rvfi_pc_rdata_r;
|
||||
reg [31:0] rvfi_pc_wdata_r;
|
||||
|
||||
assign rvfi_pc_rdata = rvfi_pc_rdata_r;
|
||||
assign rvfi_pc_wdata = rvfi_pc_wdata_r;
|
||||
|
||||
always @ (posedge clk) begin
|
||||
if (!m_stall) begin
|
||||
rvfi_pc_rdata_r <= rvfm_xm_pc;
|
||||
rvfi_pc_wdata_r <= m_jump_req ? m_jump_target : rvfm_xm_pc_next;
|
||||
end
|
||||
end
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Register file monitor:
|
||||
assign rvfi_rd_addr = mw_rd;
|
||||
assign rvfi_rd_wdata = mw_rd ? mw_result : 32'h0;
|
||||
|
||||
// Do not reimplement internal bypassing logic. Danger of implementing
|
||||
// it correctly here but incorrectly in core.
|
||||
|
||||
reg [31:0] rvfm_xm_rdata1;
|
||||
|
||||
always @ (posedge clk or negedge rst_n)
|
||||
if (!rst_n)
|
||||
rvfm_xm_rdata1 <= 32'h0;
|
||||
else if (!x_stall)
|
||||
rvfm_xm_rdata1 <= x_rs1_bypass;
|
||||
|
||||
reg [4:0] rvfi_rs1_addr_r;
|
||||
reg [4:0] rvfi_rs2_addr_r;
|
||||
reg [31:0] rvfi_rs1_rdata_r;
|
||||
reg [31:0] rvfi_rs2_rdata_r;
|
||||
|
||||
assign rvfi_rs1_addr = rvfi_rs1_addr_r;
|
||||
assign rvfi_rs2_addr = rvfi_rs2_addr_r;
|
||||
assign rvfi_rs1_rdata = rvfi_rs1_rdata_r;
|
||||
assign rvfi_rs2_rdata = rvfi_rs2_rdata_r;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
rvfi_rs1_addr_r <= 5'h0;
|
||||
rvfi_rs2_addr_r <= 5'h0;
|
||||
rvfi_rs1_rdata_r <= 32'h0;
|
||||
rvfi_rs2_rdata_r <= 32'h0;
|
||||
end else begin
|
||||
rvfi_rs1_addr_r <= m_stall ? 5'h0 : xm_rs1;
|
||||
rvfi_rs2_addr_r <= m_stall ? 5'h0 : xm_rs2;
|
||||
rvfi_rs1_rdata_r <= rvfm_xm_rdata1;
|
||||
rvfi_rs2_rdata_r <= m_wdata;
|
||||
end
|
||||
end
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Load/store monitor: based on bus signals, NOT processor internals.
|
||||
// Marshal up a description of the current data phase, and then register this
|
||||
// into the RVFI signals.
|
||||
|
||||
`ifndef RISCV_FORMAL_ALIGNED_MEM
|
||||
initial $fatal;
|
||||
`endif
|
||||
|
||||
reg [31:0] rvfm_haddr_dph;
|
||||
reg rvfm_hwrite_dph;
|
||||
reg [1:0] rvfm_htrans_dph;
|
||||
reg [2:0] rvfm_hsize_dph;
|
||||
|
||||
always @ (posedge clk) begin
|
||||
if (ahblm_hready) begin
|
||||
rvfm_htrans_dph <= ahblm_htrans & {2{ahb_gnt_d}}; // Load/store only!
|
||||
rvfm_haddr_dph <= ahblm_haddr;
|
||||
rvfm_hwrite_dph <= ahblm_hwrite;
|
||||
rvfm_hsize_dph <= ahblm_hsize;
|
||||
end
|
||||
end
|
||||
|
||||
wire [3:0] rvfm_mem_bytemask_dph = (
|
||||
rvfm_hsize_dph == 3'h0 ? 4'h1 :
|
||||
rvfm_hsize_dph == 3'h1 ? 4'h3 :
|
||||
4'hf
|
||||
) << rvfm_haddr_dph[1:0];
|
||||
|
||||
reg [31:0] rvfi_mem_addr_r;
|
||||
reg [3:0] rvfi_mem_rmask_r;
|
||||
reg [31:0] rvfi_mem_rdata_r;
|
||||
reg [3:0] rvfi_mem_wmask_r;
|
||||
reg [31:0] rvfi_mem_wdata_r;
|
||||
|
||||
assign rvfi_mem_addr = rvfi_mem_addr_r;
|
||||
assign rvfi_mem_rmask = rvfi_mem_rmask_r;
|
||||
assign rvfi_mem_rdata = rvfi_mem_rdata_r;
|
||||
assign rvfi_mem_wmask = rvfi_mem_wmask_r;
|
||||
assign rvfi_mem_wdata = rvfi_mem_wdata_r;
|
||||
|
||||
always @ (posedge clk) begin
|
||||
if (ahblm_hready) begin
|
||||
// RVFI has an AXI-like concept of byte strobes, rather than AHB-like
|
||||
rvfi_mem_addr_r <= rvfm_haddr_dph & 32'hffff_fffc;
|
||||
{rvfi_mem_rmask_r, rvfi_mem_wmask_r} <= 0;
|
||||
if (rvfm_htrans_dph[1] && rvfm_hwrite_dph) begin
|
||||
rvfi_mem_wmask_r <= rvfm_mem_bytemask_dph;
|
||||
rvfi_mem_wdata_r <= ahblm_hwdata;
|
||||
end else if (rvfm_htrans_dph[1] && !rvfm_hwrite_dph) begin
|
||||
rvfi_mem_rmask_r <= rvfm_mem_bytemask_dph;
|
||||
rvfi_mem_rdata_r <= ahblm_hrdata;
|
||||
end
|
||||
end else begin
|
||||
// As far as RVFI is concerned nothing happens except final cycle of dphase
|
||||
{rvfi_mem_rmask_r, rvfi_mem_wmask_r} <= 0;
|
||||
end
|
||||
end
|
|
@ -0,0 +1,74 @@
|
|||
module rvfi_wrapper (
|
||||
input wire clock,
|
||||
input wire reset,
|
||||
`RVFI_OUTPUTS
|
||||
);
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Memory Interface
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
(* keep *) wire [31:0] haddr;
|
||||
(* keep *) wire hwrite;
|
||||
(* keep *) wire [1:0] htrans;
|
||||
(* keep *) wire [2:0] hsize;
|
||||
(* keep *) wire [2:0] hburst;
|
||||
(* keep *) wire [3:0] hprot;
|
||||
(* keep *) wire hmastlock;
|
||||
(* keep *) `rvformal_rand_reg hready;
|
||||
(* keep *) wire hresp;
|
||||
(* keep *) wire [31:0] hwdata;
|
||||
(* keep *) `rvformal_rand_reg [31:0] hrdata;
|
||||
|
||||
// AHB-lite requires: data phase of IDLE has no wait states
|
||||
always @ (posedge clock)
|
||||
if ($past(htrans) == 2'b00 && $past(hready))
|
||||
assume(hready);
|
||||
|
||||
// Handling of bus faults is not tested
|
||||
// always assume(!hresp);
|
||||
|
||||
`ifdef RISCV_FORMAL_FAIRNESS
|
||||
|
||||
reg [7:0] bus_fairness_ctr;
|
||||
localparam MAX_STALL_LENGTH = 8;
|
||||
|
||||
always @ (posedge clock) begin
|
||||
if (reset)
|
||||
bus_fairness_ctr <= 8'h0;
|
||||
else if (hready)
|
||||
bus_fairness_ctr <= 8'h0;
|
||||
else
|
||||
bus_fairness_ctr <= bus_fairness_ctr + ~&bus_fairness_ctr;
|
||||
|
||||
assume(bus_fairness_ctr <= MAX_STALL_LENGTH);
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Device Under Test
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
hazard5_cpu #(
|
||||
.RESET_VECTOR (0),
|
||||
.EXTENSION_C (1),
|
||||
.EXTENSION_M (1)
|
||||
) dut (
|
||||
.clk (clock),
|
||||
.rst_n (!reset),
|
||||
.ahblm_haddr (haddr),
|
||||
.ahblm_hwrite (hwrite),
|
||||
.ahblm_htrans (htrans),
|
||||
.ahblm_hsize (hsize),
|
||||
.ahblm_hburst (hburst),
|
||||
.ahblm_hprot (hprot),
|
||||
.ahblm_hmastlock (hmastlock),
|
||||
.ahblm_hready (hready),
|
||||
.ahblm_hresp (hresp),
|
||||
.ahblm_hwdata (hwdata),
|
||||
.ahblm_hrdata (hrdata),
|
||||
`RVFI_CONN
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -0,0 +1,147 @@
|
|||
localparam RV_RS1_LSB = 15;
|
||||
localparam RV_RS1_BITS = 5;
|
||||
localparam RV_RS2_LSB = 20;
|
||||
localparam RV_RS2_BITS = 5;
|
||||
localparam RV_RD_LSB = 7;
|
||||
localparam RV_RD_BITS = 5;
|
||||
|
||||
// Base ISA (some of these are Z now)
|
||||
localparam RV_BEQ = 32'b?????????????????000?????1100011;
|
||||
localparam RV_BNE = 32'b?????????????????001?????1100011;
|
||||
localparam RV_BLT = 32'b?????????????????100?????1100011;
|
||||
localparam RV_BGE = 32'b?????????????????101?????1100011;
|
||||
localparam RV_BLTU = 32'b?????????????????110?????1100011;
|
||||
localparam RV_BGEU = 32'b?????????????????111?????1100011;
|
||||
localparam RV_JALR = 32'b?????????????????000?????1100111;
|
||||
localparam RV_JAL = 32'b?????????????????????????1101111;
|
||||
localparam RV_LUI = 32'b?????????????????????????0110111;
|
||||
localparam RV_AUIPC = 32'b?????????????????????????0010111;
|
||||
localparam RV_ADDI = 32'b?????????????????000?????0010011;
|
||||
localparam RV_SLLI = 32'b0000000??????????001?????0010011;
|
||||
localparam RV_SLTI = 32'b?????????????????010?????0010011;
|
||||
localparam RV_SLTIU = 32'b?????????????????011?????0010011;
|
||||
localparam RV_XORI = 32'b?????????????????100?????0010011;
|
||||
localparam RV_SRLI = 32'b0000000??????????101?????0010011;
|
||||
localparam RV_SRAI = 32'b0100000??????????101?????0010011;
|
||||
localparam RV_ORI = 32'b?????????????????110?????0010011;
|
||||
localparam RV_ANDI = 32'b?????????????????111?????0010011;
|
||||
localparam RV_ADD = 32'b0000000??????????000?????0110011;
|
||||
localparam RV_SUB = 32'b0100000??????????000?????0110011;
|
||||
localparam RV_SLL = 32'b0000000??????????001?????0110011;
|
||||
localparam RV_SLT = 32'b0000000??????????010?????0110011;
|
||||
localparam RV_SLTU = 32'b0000000??????????011?????0110011;
|
||||
localparam RV_XOR = 32'b0000000??????????100?????0110011;
|
||||
localparam RV_SRL = 32'b0000000??????????101?????0110011;
|
||||
localparam RV_SRA = 32'b0100000??????????101?????0110011;
|
||||
localparam RV_OR = 32'b0000000??????????110?????0110011;
|
||||
localparam RV_AND = 32'b0000000??????????111?????0110011;
|
||||
localparam RV_LB = 32'b?????????????????000?????0000011;
|
||||
localparam RV_LH = 32'b?????????????????001?????0000011;
|
||||
localparam RV_LW = 32'b?????????????????010?????0000011;
|
||||
localparam RV_LBU = 32'b?????????????????100?????0000011;
|
||||
localparam RV_LHU = 32'b?????????????????101?????0000011;
|
||||
localparam RV_SB = 32'b?????????????????000?????0100011;
|
||||
localparam RV_SH = 32'b?????????????????001?????0100011;
|
||||
localparam RV_SW = 32'b?????????????????010?????0100011;
|
||||
localparam RV_FENCE = 32'b?????????????????000?????0001111;
|
||||
localparam RV_FENCE_I = 32'b?????????????????001?????0001111;
|
||||
localparam RV_ECALL = 32'b00000000000000000000000001110011;
|
||||
localparam RV_EBREAK = 32'b00000000000100000000000001110011;
|
||||
localparam RV_CSRRW = 32'b?????????????????001?????1110011;
|
||||
localparam RV_CSRRS = 32'b?????????????????010?????1110011;
|
||||
localparam RV_CSRRC = 32'b?????????????????011?????1110011;
|
||||
localparam RV_CSRRWI = 32'b?????????????????101?????1110011;
|
||||
localparam RV_CSRRSI = 32'b?????????????????110?????1110011;
|
||||
localparam RV_CSRRCI = 32'b?????????????????111?????1110011;
|
||||
localparam RV_MRET = 32'b00110000001000000000000001110011;
|
||||
localparam RV_SYSTEM = 32'b?????????????????????????1110011;
|
||||
|
||||
// M extension
|
||||
localparam RV_MUL = 32'b0000001??????????000?????0110011;
|
||||
localparam RV_MULH = 32'b0000001??????????001?????0110011;
|
||||
localparam RV_MULHSU = 32'b0000001??????????010?????0110011;
|
||||
localparam RV_MULHU = 32'b0000001??????????011?????0110011;
|
||||
localparam RV_DIV = 32'b0000001??????????100?????0110011;
|
||||
localparam RV_DIVU = 32'b0000001??????????101?????0110011;
|
||||
localparam RV_REM = 32'b0000001??????????110?????0110011;
|
||||
localparam RV_REMU = 32'b0000001??????????111?????0110011;
|
||||
|
||||
// C Extension
|
||||
localparam RV_C_ADDI4SPN = 16'b000???????????00; // *** illegal if imm 0
|
||||
localparam RV_C_LW = 16'b010???????????00;
|
||||
localparam RV_C_SW = 16'b110???????????00;
|
||||
|
||||
localparam RV_C_ADDI = 16'b000???????????01;
|
||||
localparam RV_C_JAL = 16'b001???????????01;
|
||||
localparam RV_C_J = 16'b101???????????01;
|
||||
localparam RV_C_LI = 16'b010???????????01;
|
||||
// addi16sp when rd=2:
|
||||
localparam RV_C_LUI = 16'b011???????????01; // *** reserved if imm 0 (for both LUI and ADDI16SP)
|
||||
localparam RV_C_SRLI = 16'b100000????????01; // On RV32 imm[5] (instr[12]) must be 0, else reserved NSE.
|
||||
localparam RV_C_SRAI = 16'b100001????????01; // On RV32 imm[5] (instr[12]) must be 0, else reserved NSE.
|
||||
localparam RV_C_ANDI = 16'b100?10????????01;
|
||||
localparam RV_C_SUB = 16'b100011???00???01;
|
||||
localparam RV_C_XOR = 16'b100011???01???01;
|
||||
localparam RV_C_OR = 16'b100011???10???01;
|
||||
localparam RV_C_AND = 16'b100011???11???01;
|
||||
localparam RV_C_BEQZ = 16'b110???????????01;
|
||||
localparam RV_C_BNEZ = 16'b111???????????01;
|
||||
|
||||
localparam RV_C_SLLI = 16'b0000??????????10; // On RV32 imm[5] (instr[12]) must be 0, else reserved NSE.
|
||||
// jr if !rs2:
|
||||
localparam RV_C_MV = 16'b1000??????????10; // *** reserved if JR and !rs1 (instr[11:7])
|
||||
// jalr if !rs2:
|
||||
localparam RV_C_ADD = 16'b1001??????????10; // *** EBREAK if !instr[11:2]
|
||||
localparam RV_C_LWSP = 16'b010???????????10;
|
||||
localparam RV_C_SWSP = 16'b110???????????10;
|
||||
|
||||
// Copies provided here with 0 instead of ? so that these can be used to build 32-bit instructions in the decompressor
|
||||
|
||||
localparam RV_NOZ_BEQ = 32'b00000000000000000000000001100011;
|
||||
localparam RV_NOZ_BNE = 32'b00000000000000000001000001100011;
|
||||
localparam RV_NOZ_BLT = 32'b00000000000000000100000001100011;
|
||||
localparam RV_NOZ_BGE = 32'b00000000000000000101000001100011;
|
||||
localparam RV_NOZ_BLTU = 32'b00000000000000000110000001100011;
|
||||
localparam RV_NOZ_BGEU = 32'b00000000000000000111000001100011;
|
||||
localparam RV_NOZ_JALR = 32'b00000000000000000000000001100111;
|
||||
localparam RV_NOZ_JAL = 32'b00000000000000000000000001101111;
|
||||
localparam RV_NOZ_LUI = 32'b00000000000000000000000000110111;
|
||||
localparam RV_NOZ_AUIPC = 32'b00000000000000000000000000010111;
|
||||
localparam RV_NOZ_ADDI = 32'b00000000000000000000000000010011;
|
||||
localparam RV_NOZ_SLLI = 32'b00000000000000000001000000010011;
|
||||
localparam RV_NOZ_SLTI = 32'b00000000000000000010000000010011;
|
||||
localparam RV_NOZ_SLTIU = 32'b00000000000000000011000000010011;
|
||||
localparam RV_NOZ_XORI = 32'b00000000000000000100000000010011;
|
||||
localparam RV_NOZ_SRLI = 32'b00000000000000000101000000010011;
|
||||
localparam RV_NOZ_SRAI = 32'b01000000000000000101000000010011;
|
||||
localparam RV_NOZ_ORI = 32'b00000000000000000110000000010011;
|
||||
localparam RV_NOZ_ANDI = 32'b00000000000000000111000000010011;
|
||||
localparam RV_NOZ_ADD = 32'b00000000000000000000000000110011;
|
||||
localparam RV_NOZ_SUB = 32'b01000000000000000000000000110011;
|
||||
localparam RV_NOZ_SLL = 32'b00000000000000000001000000110011;
|
||||
localparam RV_NOZ_SLT = 32'b00000000000000000010000000110011;
|
||||
localparam RV_NOZ_SLTU = 32'b00000000000000000011000000110011;
|
||||
localparam RV_NOZ_XOR = 32'b00000000000000000100000000110011;
|
||||
localparam RV_NOZ_SRL = 32'b00000000000000000101000000110011;
|
||||
localparam RV_NOZ_SRA = 32'b01000000000000000101000000110011;
|
||||
localparam RV_NOZ_OR = 32'b00000000000000000110000000110011;
|
||||
localparam RV_NOZ_AND = 32'b00000000000000000111000000110011;
|
||||
localparam RV_NOZ_LB = 32'b00000000000000000000000000000011;
|
||||
localparam RV_NOZ_LH = 32'b00000000000000000001000000000011;
|
||||
localparam RV_NOZ_LW = 32'b00000000000000000010000000000011;
|
||||
localparam RV_NOZ_LBU = 32'b00000000000000000100000000000011;
|
||||
localparam RV_NOZ_LHU = 32'b00000000000000000101000000000011;
|
||||
localparam RV_NOZ_SB = 32'b00000000000000000000000000100011;
|
||||
localparam RV_NOZ_SH = 32'b00000000000000000001000000100011;
|
||||
localparam RV_NOZ_SW = 32'b00000000000000000010000000100011;
|
||||
localparam RV_NOZ_FENCE = 32'b00000000000000000000000000001111;
|
||||
localparam RV_NOZ_FENCE_I = 32'b00000000000000000001000000001111;
|
||||
localparam RV_NOZ_ECALL = 32'b00000000000000000000000001110011;
|
||||
localparam RV_NOZ_EBREAK = 32'b00000000000100000000000001110011;
|
||||
localparam RV_NOZ_CSRRW = 32'b00000000000000000001000001110011;
|
||||
localparam RV_NOZ_CSRRS = 32'b00000000000000000010000001110011;
|
||||
localparam RV_NOZ_CSRRC = 32'b00000000000000000011000001110011;
|
||||
localparam RV_NOZ_CSRRWI = 32'b00000000000000000101000001110011;
|
||||
localparam RV_NOZ_CSRRSI = 32'b00000000000000000110000001110011;
|
||||
localparam RV_NOZ_CSRRCI = 32'b00000000000000000111000001110011;
|
||||
localparam RV_NOZ_SYSTEM = 32'b00000000000000000000000001110011;
|
|
@ -0,0 +1,7 @@
|
|||
*.o
|
||||
*.out
|
||||
*.bin
|
||||
*.vcd
|
||||
*.elf
|
||||
*.dis
|
||||
*.log
|
|
@ -0,0 +1,241 @@
|
|||
#define COLLAPSE_WEAK_HANDLERS
|
||||
|
||||
#define IO_BASE 0x80000000
|
||||
#define IO_PRINT_CHAR (IO_BASE + 0x0)
|
||||
#define IO_PRINT_U32 (IO_BASE + 0x4)
|
||||
#define IO_EXIT (IO_BASE + 0x8)
|
||||
|
||||
// Provide trap vector table, reset handler and weak default trap handlers for
|
||||
// Hazard5. This is not a crt0: the reset handler calls an external _start
|
||||
|
||||
|
||||
.option push
|
||||
.option norelax
|
||||
.option norvc
|
||||
|
||||
.section .vectors
|
||||
|
||||
.macro VEC name:req
|
||||
.p2align 2
|
||||
j \name
|
||||
.endm
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Vector table
|
||||
// Hazard5 requires 4k alignment of mtvec
|
||||
|
||||
.p2align 12
|
||||
.vector_table:
|
||||
|
||||
// Exceptions
|
||||
|
||||
VEC handle_instr_misalign
|
||||
VEC handle_instr_fault
|
||||
VEC handle_instr_illegal
|
||||
VEC handle_breakpoint
|
||||
VEC handle_load_misalign
|
||||
VEC handle_load_fault
|
||||
VEC handle_store_misalign
|
||||
VEC handle_store_fault
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
VEC handle_ecall
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
|
||||
// Standard interrupts
|
||||
// Note: global EIRQ does not fire. Instead we have 16 separate vectors
|
||||
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
VEC isr_machine_softirq
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
VEC isr_machine_timer
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
VEC .halt
|
||||
|
||||
// External interrupts
|
||||
|
||||
VEC isr_irq0
|
||||
VEC isr_irq1
|
||||
VEC isr_irq2
|
||||
VEC isr_irq3
|
||||
VEC isr_irq4
|
||||
VEC isr_irq5
|
||||
VEC isr_irq6
|
||||
VEC isr_irq7
|
||||
VEC isr_irq8
|
||||
VEC isr_irq9
|
||||
VEC isr_irq10
|
||||
VEC isr_irq11
|
||||
VEC isr_irq12
|
||||
VEC isr_irq13
|
||||
VEC isr_irq14
|
||||
VEC isr_irq15
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Reset handler
|
||||
|
||||
|
||||
.reset_handler:
|
||||
la sp, __stack_top
|
||||
la t0, .vector_table
|
||||
csrw mtvec, t0
|
||||
|
||||
// newlib _start expects argc, argv on the stack. Leave stack 16-byte aligned.
|
||||
addi sp, sp, -16
|
||||
li a0, 1
|
||||
sw a0, (sp)
|
||||
la a0, progname
|
||||
sw a0, 4(sp)
|
||||
|
||||
jal _start
|
||||
j .halt
|
||||
|
||||
.global _exit
|
||||
_exit:
|
||||
li a1, IO_EXIT
|
||||
sw a0, (a1)
|
||||
|
||||
.global _sbrk
|
||||
_sbrk:
|
||||
la a1, heap_ptr
|
||||
lw a2, (a1)
|
||||
add a0, a0, a2
|
||||
sw a0, (a1)
|
||||
mv a0, a2
|
||||
ret
|
||||
|
||||
heap_ptr:
|
||||
.word _end
|
||||
|
||||
.global .halt
|
||||
.halt:
|
||||
j .halt
|
||||
|
||||
progname:
|
||||
.asciz "hazard5-testbench"
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Weak handler/ISR symbols
|
||||
|
||||
// Routine to print out trap name, trap address, and some core registers
|
||||
// (x8..x15, ra, sp). The default handlers are all patched into this routine,
|
||||
// so the CPU will print some basic diagnostics on any unhandled trap
|
||||
// (assuming the processor is not internally completely broken)
|
||||
|
||||
// argument in x28, return in x27, trashes x28...x30
|
||||
_tb_puts:
|
||||
li x29, IO_PRINT_CHAR
|
||||
1:
|
||||
lbu x30, (x28)
|
||||
addi x28, x28, 1
|
||||
beqz x30, 2f
|
||||
sw x30, (x29)
|
||||
j 1b
|
||||
2:
|
||||
jr x27
|
||||
|
||||
.macro print_reg str reg
|
||||
la x28, \str
|
||||
jal x27, _tb_puts
|
||||
sw \reg, (x31)
|
||||
.endm
|
||||
|
||||
_weak_handler_name_in_x31:
|
||||
la x28, _str_unhandled_trap
|
||||
jal x27, _tb_puts
|
||||
mv x28, x31
|
||||
jal x27, _tb_puts
|
||||
la x28, _str_at_mepc
|
||||
jal x27, _tb_puts
|
||||
li x31, IO_PRINT_U32
|
||||
csrr x28, mepc
|
||||
sw x28, (x31)
|
||||
print_reg _str_s0 s0
|
||||
print_reg _str_s1 s1
|
||||
print_reg _str_a0 a0
|
||||
print_reg _str_a1 a1
|
||||
print_reg _str_a2 a2
|
||||
print_reg _str_a3 a3
|
||||
print_reg _str_a4 a4
|
||||
print_reg _str_a5 a5
|
||||
print_reg _str_ra ra
|
||||
print_reg _str_sp sp
|
||||
li x31, IO_EXIT
|
||||
li x30, -1
|
||||
sw x30, (x31)
|
||||
// Should be unreachable:
|
||||
j .halt
|
||||
|
||||
_str_unhandled_trap: .asciz "*** Unhandled trap ***\n"
|
||||
_str_at_mepc: .asciz " @ mepc = "
|
||||
_str_s0: .asciz "s0: "
|
||||
_str_s1: .asciz "s1: "
|
||||
_str_a0: .asciz "a0: "
|
||||
_str_a1: .asciz "a1: "
|
||||
_str_a2: .asciz "a2: "
|
||||
_str_a3: .asciz "a3: "
|
||||
_str_a4: .asciz "a4: "
|
||||
_str_a5: .asciz "a5: "
|
||||
_str_ra: .asciz "ra: "
|
||||
_str_sp: .asciz "sp: "
|
||||
|
||||
// Provide a default weak handler for each trap, which calls into the above
|
||||
// diagnostic routine with the trap name (a null-terminated string) in x31
|
||||
|
||||
.macro weak_handler name:req
|
||||
.p2align 2
|
||||
.global \name
|
||||
.type \name,%function
|
||||
.weak \name
|
||||
\name:
|
||||
la x31, _str_\name
|
||||
j _weak_handler_name_in_x31
|
||||
_str_\name:
|
||||
.asciz "\name"
|
||||
.endm
|
||||
|
||||
weak_handler handle_instr_misalign
|
||||
weak_handler handle_instr_fault
|
||||
weak_handler handle_instr_illegal
|
||||
weak_handler handle_breakpoint
|
||||
weak_handler handle_load_misalign
|
||||
weak_handler handle_load_fault
|
||||
weak_handler handle_store_misalign
|
||||
weak_handler handle_store_fault
|
||||
weak_handler handle_ecall
|
||||
weak_handler isr_machine_softirq
|
||||
weak_handler isr_machine_timer
|
||||
weak_handler isr_irq0
|
||||
weak_handler isr_irq1
|
||||
weak_handler isr_irq2
|
||||
weak_handler isr_irq3
|
||||
weak_handler isr_irq4
|
||||
weak_handler isr_irq5
|
||||
weak_handler isr_irq6
|
||||
weak_handler isr_irq7
|
||||
weak_handler isr_irq8
|
||||
weak_handler isr_irq9
|
||||
weak_handler isr_irq10
|
||||
weak_handler isr_irq11
|
||||
weak_handler isr_irq12
|
||||
weak_handler isr_irq13
|
||||
weak_handler isr_irq14
|
||||
weak_handler isr_irq15
|
||||
|
||||
// You can relax now
|
||||
.option pop
|
|
@ -0,0 +1,253 @@
|
|||
/* Script for -z combreloc: combine and sort reloc sections */
|
||||
/* Copyright (C) 2014-2017 Free Software Foundation, Inc.
|
||||
Copying and distribution of this script, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
notice and this notice are preserved. */
|
||||
|
||||
/* Modified from GCC default for hazard5 testbench */
|
||||
|
||||
MEMORY
|
||||
{
|
||||
RAM (wx) : ORIGIN = 0x0, LENGTH = 16M
|
||||
}
|
||||
|
||||
OUTPUT_FORMAT("elf32-littleriscv", "elf32-littleriscv",
|
||||
"elf32-littleriscv")
|
||||
OUTPUT_ARCH(riscv)
|
||||
ENTRY(_start)
|
||||
SEARCH_DIR("/opt/riscv/riscv32-unknown-elf/lib");
|
||||
SECTIONS
|
||||
{
|
||||
/* Read-only sections, merged into text segment: */
|
||||
PROVIDE (__executable_start = ORIGIN(RAM)); . = ORIGIN(RAM);
|
||||
.interp : { *(.interp) }
|
||||
.note.gnu.build-id : { *(.note.gnu.build-id) }
|
||||
.hash : { *(.hash) }
|
||||
.gnu.hash : { *(.gnu.hash) }
|
||||
.dynsym : { *(.dynsym) }
|
||||
.dynstr : { *(.dynstr) }
|
||||
.gnu.version : { *(.gnu.version) }
|
||||
.gnu.version_d : { *(.gnu.version_d) }
|
||||
.gnu.version_r : { *(.gnu.version_r) }
|
||||
.rela.dyn :
|
||||
{
|
||||
*(.rela.init)
|
||||
*(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
|
||||
*(.rela.fini)
|
||||
*(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
|
||||
*(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
|
||||
*(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
|
||||
*(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
|
||||
*(.rela.ctors)
|
||||
*(.rela.dtors)
|
||||
*(.rela.got)
|
||||
*(.rela.sdata .rela.sdata.* .rela.gnu.linkonce.s.*)
|
||||
*(.rela.sbss .rela.sbss.* .rela.gnu.linkonce.sb.*)
|
||||
*(.rela.sdata2 .rela.sdata2.* .rela.gnu.linkonce.s2.*)
|
||||
*(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*)
|
||||
*(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
|
||||
PROVIDE_HIDDEN (__rela_iplt_start = .);
|
||||
*(.rela.iplt)
|
||||
PROVIDE_HIDDEN (__rela_iplt_end = .);
|
||||
}
|
||||
.rela.plt :
|
||||
{
|
||||
*(.rela.plt)
|
||||
}
|
||||
.init :
|
||||
{
|
||||
KEEP (*(SORT_NONE(.init)))
|
||||
}
|
||||
.plt : { *(.plt) }
|
||||
.iplt : { *(.iplt) }
|
||||
.text :
|
||||
{
|
||||
*(.vectors)
|
||||
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
|
||||
*(.text.exit .text.exit.*)
|
||||
*(.text.startup .text.startup.*)
|
||||
*(.text.hot .text.hot.*)
|
||||
*(.text .stub .text.* .gnu.linkonce.t.*)
|
||||
/* .gnu.warning sections are handled specially by elf32.em. */
|
||||
*(.gnu.warning)
|
||||
}
|
||||
.fini :
|
||||
{
|
||||
KEEP (*(SORT_NONE(.fini)))
|
||||
}
|
||||
PROVIDE (__etext = .);
|
||||
PROVIDE (_etext = .);
|
||||
PROVIDE (etext = .);
|
||||
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
|
||||
.rodata1 : { *(.rodata1) }
|
||||
.sdata2 :
|
||||
{
|
||||
*(.sdata2 .sdata2.* .gnu.linkonce.s2.*)
|
||||
}
|
||||
.sbss2 : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) }
|
||||
.eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) }
|
||||
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) }
|
||||
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
|
||||
.gcc_except_table.*) }
|
||||
.gnu_extab : ONLY_IF_RO { *(.gnu_extab*) }
|
||||
/* These sections are generated by the Sun/Oracle C++ compiler. */
|
||||
.exception_ranges : ONLY_IF_RO { *(.exception_ranges
|
||||
.exception_ranges*) }
|
||||
/* Adjust the address for the data segment. We want to adjust up to
|
||||
the same address within the page on the next page up. */
|
||||
. = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
|
||||
/* Exception handling */
|
||||
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) }
|
||||
.gnu_extab : ONLY_IF_RW { *(.gnu_extab) }
|
||||
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
|
||||
.exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
|
||||
/* Thread Local Storage sections */
|
||||
.tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
|
||||
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
|
||||
.preinit_array :
|
||||
{
|
||||
PROVIDE_HIDDEN (__preinit_array_start = .);
|
||||
KEEP (*(.preinit_array))
|
||||
PROVIDE_HIDDEN (__preinit_array_end = .);
|
||||
}
|
||||
.init_array :
|
||||
{
|
||||
PROVIDE_HIDDEN (__init_array_start = .);
|
||||
KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
|
||||
KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
|
||||
PROVIDE_HIDDEN (__init_array_end = .);
|
||||
}
|
||||
.fini_array :
|
||||
{
|
||||
PROVIDE_HIDDEN (__fini_array_start = .);
|
||||
KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
|
||||
KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors))
|
||||
PROVIDE_HIDDEN (__fini_array_end = .);
|
||||
}
|
||||
.ctors :
|
||||
{
|
||||
/* gcc uses crtbegin.o to find the start of
|
||||
the constructors, so we make sure it is
|
||||
first. Because this is a wildcard, it
|
||||
doesn't matter if the user does not
|
||||
actually link against crtbegin.o; the
|
||||
linker won't look for a file to match a
|
||||
wildcard. The wildcard also means that it
|
||||
doesn't matter which directory crtbegin.o
|
||||
is in. */
|
||||
KEEP (*crtbegin.o(.ctors))
|
||||
KEEP (*crtbegin?.o(.ctors))
|
||||
/* We don't want to include the .ctor section from
|
||||
the crtend.o file until after the sorted ctors.
|
||||
The .ctor section from the crtend file contains the
|
||||
end of ctors marker and it must be last */
|
||||
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
|
||||
KEEP (*(SORT(.ctors.*)))
|
||||
KEEP (*(.ctors))
|
||||
}
|
||||
.dtors :
|
||||
{
|
||||
KEEP (*crtbegin.o(.dtors))
|
||||
KEEP (*crtbegin?.o(.dtors))
|
||||
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
|
||||
KEEP (*(SORT(.dtors.*)))
|
||||
KEEP (*(.dtors))
|
||||
}
|
||||
.jcr : { KEEP (*(.jcr)) }
|
||||
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
|
||||
.dynamic : { *(.dynamic) }
|
||||
. = DATA_SEGMENT_RELRO_END (0, .);
|
||||
.data :
|
||||
{
|
||||
*(.data .data.* .gnu.linkonce.d.*)
|
||||
SORT(CONSTRUCTORS)
|
||||
}
|
||||
.data1 : { *(.data1) }
|
||||
.got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
|
||||
/* We want the small data sections together, so single-instruction offsets
|
||||
can access them all, and initialized data all before uninitialized, so
|
||||
we can shorten the on-disk segment size. */
|
||||
.sdata :
|
||||
{
|
||||
__global_pointer$ = . + 0x800;
|
||||
*(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*)
|
||||
*(.sdata .sdata.* .gnu.linkonce.s.*)
|
||||
}
|
||||
_edata = .; PROVIDE (edata = .);
|
||||
. = .;
|
||||
__bss_start = .;
|
||||
.sbss :
|
||||
{
|
||||
*(.dynsbss)
|
||||
*(.sbss .sbss.* .gnu.linkonce.sb.*)
|
||||
*(.scommon)
|
||||
}
|
||||
.bss :
|
||||
{
|
||||
*(.dynbss)
|
||||
*(.bss .bss.* .gnu.linkonce.b.*)
|
||||
*(COMMON)
|
||||
/* Align here to ensure that the .bss section occupies space up to
|
||||
_end. Align after .bss to ensure correct alignment even if the
|
||||
.bss section disappears because there are no input sections.
|
||||
FIXME: Why do we need it? When there is no .bss section, we don't
|
||||
pad the .data section. */
|
||||
. = ALIGN(. != 0 ? 32 / 8 : 1);
|
||||
}
|
||||
. = ALIGN(32 / 8);
|
||||
. = SEGMENT_START("ldata-segment", .);
|
||||
. = ALIGN(32 / 8);
|
||||
__bss_end = .;
|
||||
_end = .; PROVIDE (end = .);
|
||||
.noload (NOLOAD):
|
||||
{
|
||||
*(.noload .noload.*)
|
||||
}
|
||||
. = DATA_SEGMENT_END (.);
|
||||
|
||||
PROVIDE(__stack_top = ORIGIN(RAM) + LENGTH(RAM));
|
||||
|
||||
/* Stabs debugging sections. */
|
||||
.stab 0 : { *(.stab) }
|
||||
.stabstr 0 : { *(.stabstr) }
|
||||
.stab.excl 0 : { *(.stab.excl) }
|
||||
.stab.exclstr 0 : { *(.stab.exclstr) }
|
||||
.stab.index 0 : { *(.stab.index) }
|
||||
.stab.indexstr 0 : { *(.stab.indexstr) }
|
||||
.comment 0 : { *(.comment) }
|
||||
/* DWARF debug sections.
|
||||
Symbols in the DWARF debugging sections are relative to the beginning
|
||||
of the section so we begin them at 0. */
|
||||
/* DWARF 1 */
|
||||
.debug 0 : { *(.debug) }
|
||||
.line 0 : { *(.line) }
|
||||
/* GNU DWARF 1 extensions */
|
||||
.debug_srcinfo 0 : { *(.debug_srcinfo) }
|
||||
.debug_sfnames 0 : { *(.debug_sfnames) }
|
||||
/* DWARF 1.1 and DWARF 2 */
|
||||
.debug_aranges 0 : { *(.debug_aranges) }
|
||||
.debug_pubnames 0 : { *(.debug_pubnames) }
|
||||
/* DWARF 2 */
|
||||
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
|
||||
.debug_abbrev 0 : { *(.debug_abbrev) }
|
||||
.debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
|
||||
.debug_frame 0 : { *(.debug_frame) }
|
||||
.debug_str 0 : { *(.debug_str) }
|
||||
.debug_loc 0 : { *(.debug_loc) }
|
||||
.debug_macinfo 0 : { *(.debug_macinfo) }
|
||||
/* SGI/MIPS DWARF 2 extensions */
|
||||
.debug_weaknames 0 : { *(.debug_weaknames) }
|
||||
.debug_funcnames 0 : { *(.debug_funcnames) }
|
||||
.debug_typenames 0 : { *(.debug_typenames) }
|
||||
.debug_varnames 0 : { *(.debug_varnames) }
|
||||
/* DWARF 3 */
|
||||
.debug_pubtypes 0 : { *(.debug_pubtypes) }
|
||||
.debug_ranges 0 : { *(.debug_ranges) }
|
||||
/* DWARF Extension. */
|
||||
.debug_macro 0 : { *(.debug_macro) }
|
||||
.debug_addr 0 : { *(.debug_addr) }
|
||||
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
|
||||
/DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
ifndef SRCS
|
||||
$(error Must define list of test sources as SRCS)
|
||||
endif
|
||||
|
||||
ifndef APP
|
||||
$(error Must define application name as APP)
|
||||
endif
|
||||
|
||||
CCFLAGS ?=
|
||||
LDSCRIPT ?= ../common/memmap.ld
|
||||
CROSS_PREFIX ?= riscv32-unknown-elf-
|
||||
TBDIR ?= ../tb_cxxrtl
|
||||
INCDIR ?= ../common
|
||||
MAX_CYCLES ?= 100000
|
||||
|
||||
###############################################################################
|
||||
|
||||
.SUFFIXES:
|
||||
.PHONY: all run view tb clean clean_tb
|
||||
|
||||
all: run
|
||||
|
||||
run: $(APP).bin
|
||||
$(TBDIR)/tb $(APP).bin $(APP)_run.vcd --cycles $(MAX_CYCLES)
|
||||
|
||||
view: run
|
||||
gtkwave $(APP)_run.vcd
|
||||
|
||||
bin: $(APP).bin
|
||||
|
||||
tb:
|
||||
$(MAKE) -C $(TBDIR) tb
|
||||
|
||||
clean:
|
||||
rm -f $(APP).elf $(APP).bin $(APP).dis $(APP)_run.vcd
|
||||
|
||||
clean_tb: clean
|
||||
$(MAKE) -C $(TBDIR) clean
|
||||
|
||||
###############################################################################
|
||||
|
||||
$(APP).bin: $(APP).elf
|
||||
$(CROSS_PREFIX)objcopy -O binary $^ $@
|
||||
$(CROSS_PREFIX)objdump -h $(APP).elf > $(APP).dis
|
||||
$(CROSS_PREFIX)objdump -d $(APP).elf >> $(APP).dis
|
||||
|
||||
$(APP).elf: $(SRCS) $(wildcard %.h)
|
||||
$(CROSS_PREFIX)gcc $(CCFLAGS) $(SRCS) -T $(LDSCRIPT) $(addprefix -I,$(INCDIR)) -o $(APP).elf
|
|
@ -0,0 +1,33 @@
|
|||
#ifndef _TB_CXXRTL_IO_H
|
||||
#define _TB_CXXRTL_IO_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define IO_BASE 0x80000000
|
||||
|
||||
struct io_hw {
|
||||
volatile uint32_t print_char;
|
||||
volatile uint32_t print_u32;
|
||||
volatile uint32_t exit;
|
||||
};
|
||||
|
||||
#define mm_io ((struct io_hw *const)IO_BASE)
|
||||
|
||||
static inline void tb_putc(char c) {
|
||||
mm_io->print_char = (uint32_t)c;
|
||||
}
|
||||
|
||||
static inline void tb_puts(const char *s) {
|
||||
while (*s)
|
||||
tb_putc(*s++);
|
||||
}
|
||||
|
||||
static inline void tb_put_u32(uint32_t x) {
|
||||
mm_io->print_u32 = x;
|
||||
}
|
||||
|
||||
static inline void tb_exit(uint32_t ret) {
|
||||
mm_io->exit = ret;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,45 @@
|
|||
APP := coremark
|
||||
MAX_CYCLES := 100000000
|
||||
|
||||
CROSS_PREFIX ?= riscv32-unknown-elf-
|
||||
TBDIR ?= ../tb_cxxrtl
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
||||
.SUFFIXES:
|
||||
.PHONY: all run waves view bin tb clean clean_tb
|
||||
|
||||
all: run
|
||||
|
||||
run: $(APP).bin
|
||||
$(TBDIR)/tb $(APP).bin --cycles $(MAX_CYCLES)
|
||||
|
||||
waves: $(APP).bin
|
||||
$(TBDIR)/tb $(APP).bin $(APPNAME)_run.vcd --cycles $(MAX_CYCLES)
|
||||
|
||||
view: run
|
||||
gtkwave $(APP)_run.vcd
|
||||
|
||||
bin: $(APP).bin
|
||||
|
||||
tb:
|
||||
$(MAKE) -C $(TBDIR) tb
|
||||
|
||||
clean:
|
||||
rm -f $(APP).elf $(APP).bin $(APP).dis $(APP)_run.vcd
|
||||
rm -rf dist/build/
|
||||
|
||||
clean_tb: clean
|
||||
$(MAKE) -C $(TBDIR) clean
|
||||
|
||||
###############################################################################
|
||||
|
||||
$(APP).bin: $(APP).elf
|
||||
$(CROSS_PREFIX)objcopy -O binary $^ $@
|
||||
$(CROSS_PREFIX)objdump -h $(APP).elf > $(APP).dis
|
||||
$(CROSS_PREFIX)objdump -d $(APP).elf >> $(APP).dis
|
||||
|
||||
$(APP).elf:
|
||||
make -C dist
|
||||
cp dist/build/coremark.elf $(APP).elf
|
|
@ -0,0 +1 @@
|
|||
build
|
|
@ -0,0 +1,100 @@
|
|||
# COREMARK® ACCEPTABLE USE AGREEMENT
|
||||
|
||||
This ACCEPTABLE USE AGREEMENT (this “Agreement”) is offered by Embedded Microprocessor Benchmark Consortium, a California nonprofit corporation (“Licensor”), to users of its CoreMark® software (“Licensee”) exclusively on the following terms.
|
||||
|
||||
Licensor offers benchmarking software (“Software”) pursuant to an open source license, but carefully controls use of its benchmarks and their associated goodwill. Licensor has registered its trademark in one of the benchmarks available through the Software, COREMARK, Ser. No. 85/487,290; Reg. No. 4,179,307 (the “Trademark”), and promotes the use of a standard metric as a benchmark for assessing the performance of embedded systems. Solely on the terms described herein, Licensee may use and display the Trademark in connection with the generation of data regarding measurement and analysis of computer and embedded system benchmarking via the Software (the “Licensed Use”).
|
||||
|
||||
## Article 1 – License Grant.
|
||||
1.1. License. Subject to the terms and conditions of this Agreement, Licensor hereby grants to Licensee, and Licensee hereby accepts from Licensor, a personal, non-exclusive, royalty-free, revocable right and license to use and display the Trademark during the term of this Agreement (the “Term”), solely and exclusively in connection with the Licensed Use. During the Term, Licensee (i) shall not modify or otherwise create derivative works of the Trademark, and (ii) may use the Trademark only to the extent permitted under this License. Neither Licensee nor any affiliate or agent thereof shall otherwise use the Trademark without the prior express written consent of Licensor, which may be withheld in its sole and absolute discretion. All rights not expressly granted to Licensee hereunder shall remain the exclusive property of Licensor.
|
||||
|
||||
1.2. Modifications to the Software. Licensee shall not use the Trademark in connection with any use of a modified, derivative, or otherwise altered copy of the Software.
|
||||
|
||||
1.3. Licensor’s Use. Nothing in this Agreement shall preclude Licensor or any of its successors or assigns from using or permitting other entities to use the Trademark, whether or not such entity directly or indirectly competes or conflicts with Licensee’s Licensed Use in any manner.
|
||||
|
||||
1.4. Term and Termination. This Agreement is perpetual unless terminated by either of the parties. Licensee may terminate this Agreement for convenience, without cause or liability, for any reason or for no reason whatsoever, upon ten (10) business days written notice. Licensor may terminate this Agreement effective immediately upon notice of breach. Upon termination, Licensee shall immediately remove all implementations of the Trademark from the Licensed Use, and delete all digitals files and records of all materials related to the Trademark.
|
||||
|
||||
## Article 2 – Ownership.
|
||||
2.1. Ownership. Licensee acknowledges and agrees that Licensor is the owner of all right, title, and interest in and to the Trademark, and all such right, title, and interest shall remain with Licensor. Licensee shall not contest, dispute, challenge, oppose, or seek to cancel Licensor’s right, title, and interest in and to the Trademark. Licensee shall not prosecute any application for registration of the Trademark. Licensee shall display appropriate notices regarding ownership of the Trademark in connection with the Licensed Use.
|
||||
|
||||
2.2. Goodwill. Licensee acknowledges that Licensee shall not acquire any right, title, or interest in the Trademark by virtue of this Agreement other than the license granted hereunder, and disclaims any such right, title, interest, or ownership. All goodwill and reputation generated by Licensee’s use of the Trademark shall inure to the exclusive benefit of Licensor. Licensee shall not by any act or omission use the Trademark in any manner that disparages or reflects adversely on Licensor or its Licensed Use or reputation. Licensee shall not take any action that would interfere with or prejudice Licensor’s ownership or registration of the Trademark, the validity of the Trademark or the validity of the license granted by this Agreement. If Licensor determines and notifies Licensee that any act taken in connection with the Licensed Use (i) is inaccurate, unlawful or offensive to good taste; (ii) fails to provide for proper trademark notices, or (iii) otherwise violates Licensee’s obligations under this Agreement, the license granted under this Agreement shall terminate.
|
||||
|
||||
## Article 3 – Indemnification.
|
||||
3.1. Indemnification Generally. Licensee agrees to indemnify, defend, and hold harmless (collectively “indemnify” or “indemnification”) Licensor, including Licensor’s members, managers, officers, and employees (collectively “Related Persons”), from and against, and pay or reimburse Licensor and such Related Persons for, any and all third-party actions, claims, demands, proceedings, investigations, inquiries (collectively, “Claims”), and any and all liabilities, obligations, fines, deficiencies, costs, expenses, royalties, losses, and damages (including reasonable outside counsel fees and expenses) associated with such Claims, to the extent that such Claim arises out of (i) Licensee’s material breach of this Agreement, or (ii) any allegation(s) that Licensee’s actions infringe or violate any third-party intellectual property right, including without limitation, any U.S. copyright, patent, or trademark, or are otherwise found to be tortious or criminal (whether or not such indemnified person is a named party in a legal proceeding).
|
||||
|
||||
3.2. Notice and Defense of Claims. Licensor shall promptly notify Licensee of any Claim for which indemnification is sought, following actual knowledge of such Claim, provided however that the failure to give such notice shall not relieve Licensee of its obligations hereunder except to the extent that Licensee is materially prejudiced by such failure. In the event that any third-party Claim is brought, Licensee shall have the right and option to undertake and control the defense of such action with counsel of its choice, provided however that (i) Licensor at its own expense may participate and appear on an equal footing with Licensee in the defense of any such Claim, (ii) Licensor may undertake and control such defense in the event of the material failure of Licensee to undertake and control the same; and (iii) the defense of any Claim relating to the intellectual property rights of Licensor or its licensors and any related counterclaims shall be solely controlled by Licensor with counsel of its choice. Licensee shall not consent to judgment or concede or settle or compromise any Claim without the prior written approval of Licensor (whose approval shall not be unreasonably withheld), unless such concession or settlement or compromise includes a full and unconditional release of Licensor and any applicable Related Persons from all liabilities in respect of such Claim.
|
||||
|
||||
## Article 4 – Miscellaneous.
|
||||
4.1. Relationship of the Parties. This Agreement does not create a partnership, franchise, joint venture, agency, fiduciary, or employment relationship between the parties.
|
||||
|
||||
4.2. No Third-Party Beneficiaries. Except for the rights of Related Persons under Article 3 (Indemnification), there are no third-party beneficiaries to this Agreement.
|
||||
|
||||
4.3. Assignment. Licensee’s rights hereunder are non-assignable, and may not be sublicensed.
|
||||
|
||||
4.4. Equitable Relief. Licensee acknowledges that the remedies available at law for any breach of this Agreement will, by their nature, be inadequate. Accordingly, Licensor may obtain injunctive relief or other equitable relief to restrain a breach or threatened breach of this Agreement or to specifically enforce this Agreement, without proving that any monetary damages have been sustained, and without the requirement of posting of a bond prior to obtaining such equitable relief.
|
||||
|
||||
4.5. Governing Law. This Agreement will be interpreted, construed, and enforced in all respects in accordance with the laws of the State of California, without reference to its conflict of law principles.
|
||||
|
||||
4.6. Attorneys’ Fees. If any legal action, arbitration or other proceeding is brought for the enforcement of this Agreement, or because of an alleged dispute, breach, default, or misrepresentation in connection with any of the provisions of this Agreement, the successful or prevailing party shall be entitled to recover its reasonable attorneys’ fees and other reasonable costs incurred in that action or proceeding, in addition to any other relief to which it may be entitled.
|
||||
|
||||
4.7. Amendment; Waiver. This Agreement may not be amended, nor may any rights under it be waived, except in writing by Licensor.
|
||||
|
||||
4.8. Severability. If any provision of this Agreement is held by a court of competent jurisdiction to be contrary to law, the provision shall be modified by the court and interpreted so as best to accomplish the objectives of the original provision to the fullest extent
|
||||
permitted by law, and the remaining provisions of this Agreement shall remain in effect.
|
||||
|
||||
4.9. Entire Agreement. This Agreement constitutes the entire agreement between the parties and supersedes all prior and contemporaneous agreements, proposals or representations, written or oral, concerning its subject matter.
|
||||
|
||||
|
||||
# Apache License
|
||||
|
||||
Version 2.0, January 2004
|
||||
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
## TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
|
||||
|
||||
You must give any other recipients of the Work or Derivative Works a copy of this License; and
|
||||
You must cause any modified files to carry prominent notices stating that You changed the files; and
|
||||
You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
|
||||
If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
|
@ -0,0 +1,144 @@
|
|||
# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Original Author: Shay Gal-on
|
||||
|
||||
# Edits here:
|
||||
PORT_DIR := barebones
|
||||
OPATH := build/
|
||||
|
||||
# Make sure the default target is to simply build and run the benchmark.
|
||||
RSTAMP = v1.0
|
||||
|
||||
.PHONY: run score
|
||||
run: $(OUTFILE) rerun score
|
||||
|
||||
score:
|
||||
@echo "Check run1.log and run2.log for results."
|
||||
@echo "See README.md for run and reporting rules."
|
||||
|
||||
ifndef PORT_DIR
|
||||
# Ports for a couple of common self hosted platforms
|
||||
UNAME=$(shell if command -v uname 2> /dev/null; then uname ; fi)
|
||||
ifneq (,$(findstring CYGWIN,$(UNAME)))
|
||||
PORT_DIR=cygwin
|
||||
endif
|
||||
ifneq (,$(findstring Darwin,$(UNAME)))
|
||||
PORT_DIR=macos
|
||||
endif
|
||||
ifneq (,$(findstring FreeBSD,$(UNAME)))
|
||||
PORT_DIR=freebsd
|
||||
endif
|
||||
ifneq (,$(findstring Linux,$(UNAME)))
|
||||
PORT_DIR=linux
|
||||
endif
|
||||
endif
|
||||
ifndef PORT_DIR
|
||||
$(error PLEASE define PORT_DIR! (e.g. make PORT_DIR=simple))
|
||||
endif
|
||||
vpath %.c $(PORT_DIR)
|
||||
vpath %.h $(PORT_DIR)
|
||||
vpath %.mak $(PORT_DIR)
|
||||
include $(PORT_DIR)/core_portme.mak
|
||||
|
||||
ifndef ITERATIONS
|
||||
ITERATIONS=0
|
||||
endif
|
||||
ifdef REBUILD
|
||||
FORCE_REBUILD=force_rebuild
|
||||
endif
|
||||
|
||||
CFLAGS += -DITERATIONS=$(ITERATIONS)
|
||||
|
||||
CORE_FILES = core_list_join core_main core_matrix core_state core_util
|
||||
ORIG_SRCS = $(addsuffix .c,$(CORE_FILES))
|
||||
SRCS = $(ORIG_SRCS) $(PORT_SRCS)
|
||||
OBJS = $(addprefix $(OPATH),$(addsuffix $(OEXT),$(CORE_FILES)) $(PORT_OBJS))
|
||||
OUTNAME = coremark$(EXE)
|
||||
OUTFILE = $(OPATH)$(OUTNAME)
|
||||
LOUTCMD = $(OFLAG) $(OUTFILE) $(LFLAGS_END)
|
||||
OUTCMD = $(OUTFLAG) $(OUTFILE) $(LFLAGS_END)
|
||||
|
||||
HEADERS = coremark.h
|
||||
CHECK_FILES = $(ORIG_SRCS) $(HEADERS)
|
||||
|
||||
$(OPATH):
|
||||
$(MKDIR) $(OPATH)
|
||||
|
||||
.PHONY: compile link
|
||||
ifdef SEPARATE_COMPILE
|
||||
$(OPATH)$(PORT_DIR):
|
||||
$(MKDIR) $(OPATH)$(PORT_DIR)
|
||||
|
||||
compile: $(OPATH) $(OPATH)$(PORT_DIR) $(OBJS) $(HEADERS)
|
||||
link: compile
|
||||
$(LD) $(LFLAGS) $(XLFLAGS) $(OBJS) $(LOUTCMD)
|
||||
|
||||
else
|
||||
|
||||
compile: $(OPATH) $(SRCS) $(HEADERS)
|
||||
$(CC) $(CFLAGS) $(XCFLAGS) $(SRCS) $(OUTCMD)
|
||||
link: compile
|
||||
@echo "Link performed along with compile"
|
||||
|
||||
endif
|
||||
|
||||
$(OUTFILE): $(SRCS) $(HEADERS) Makefile core_portme.mak $(EXTRA_DEPENDS) $(FORCE_REBUILD)
|
||||
$(MAKE) port_prebuild
|
||||
$(MAKE) link
|
||||
$(MAKE) port_postbuild
|
||||
|
||||
.PHONY: rerun
|
||||
rerun:
|
||||
$(MAKE) XCFLAGS="$(XCFLAGS) -DPERFORMANCE_RUN=1" load run1.log
|
||||
$(MAKE) XCFLAGS="$(XCFLAGS) -DVALIDATION_RUN=1" load run2.log
|
||||
|
||||
PARAM1=$(PORT_PARAMS) 0x0 0x0 0x66 $(ITERATIONS)
|
||||
PARAM2=$(PORT_PARAMS) 0x3415 0x3415 0x66 $(ITERATIONS)
|
||||
PARAM3=$(PORT_PARAMS) 8 8 8 $(ITERATIONS)
|
||||
|
||||
run1.log-PARAM=$(PARAM1) 7 1 2000
|
||||
run2.log-PARAM=$(PARAM2) 7 1 2000
|
||||
run3.log-PARAM=$(PARAM3) 7 1 1200
|
||||
|
||||
run1.log run2.log run3.log: load
|
||||
$(MAKE) port_prerun
|
||||
$(RUN) $(OUTFILE) $($(@)-PARAM) > $(OPATH)$@
|
||||
$(MAKE) port_postrun
|
||||
|
||||
.PHONY: gen_pgo_data
|
||||
gen_pgo_data: run3.log
|
||||
|
||||
.PHONY: load
|
||||
load: $(OUTFILE)
|
||||
$(MAKE) port_preload
|
||||
$(LOAD) $(OUTFILE)
|
||||
$(MAKE) port_postload
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f $(OUTFILE) $(OPATH)*.log *.info $(OPATH)index.html $(PORT_CLEAN)
|
||||
|
||||
.PHONY: force_rebuild
|
||||
force_rebuild:
|
||||
echo "Forcing Rebuild"
|
||||
|
||||
.PHONY: check
|
||||
check:
|
||||
md5sum -c coremark.md5
|
||||
|
||||
ifdef ETC
|
||||
# Targets related to testing and releasing CoreMark. Not part of the general release!
|
||||
include Makefile.internal
|
||||
endif
|
|
@ -0,0 +1,161 @@
|
|||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
#include "coremark.h"
|
||||
#include "core_portme.h"
|
||||
|
||||
#if VALIDATION_RUN
|
||||
volatile ee_s32 seed1_volatile = 0x3415;
|
||||
volatile ee_s32 seed2_volatile = 0x3415;
|
||||
volatile ee_s32 seed3_volatile = 0x66;
|
||||
#endif
|
||||
#if PERFORMANCE_RUN
|
||||
volatile ee_s32 seed1_volatile = 0x0;
|
||||
volatile ee_s32 seed2_volatile = 0x0;
|
||||
volatile ee_s32 seed3_volatile = 0x66;
|
||||
#endif
|
||||
#if PROFILE_RUN
|
||||
volatile ee_s32 seed1_volatile = 0x8;
|
||||
volatile ee_s32 seed2_volatile = 0x8;
|
||||
volatile ee_s32 seed3_volatile = 0x8;
|
||||
#endif
|
||||
volatile ee_s32 seed4_volatile = ITERATIONS;
|
||||
volatile ee_s32 seed5_volatile = 0;
|
||||
/* Porting : Timing functions
|
||||
How to capture time and convert to seconds must be ported to whatever is
|
||||
supported by the platform. e.g. Read value from on board RTC, read value from
|
||||
cpu clock cycles performance counter etc. Sample implementation for standard
|
||||
time.h and windows.h definitions included.
|
||||
*/
|
||||
|
||||
#define read_csr(csrname) ({ \
|
||||
uint32_t __csr_tmp_u32; \
|
||||
__asm__ volatile ("csrr %0, " #csrname : "=r" (__csr_tmp_u32)); \
|
||||
__csr_tmp_u32; \
|
||||
})
|
||||
|
||||
CORETIMETYPE
|
||||
barebones_clock()
|
||||
{
|
||||
return read_csr(mcycle);
|
||||
// #error \
|
||||
// "You must implement a method to measure time in barebones_clock()! This function should return current time.\n"
|
||||
}
|
||||
/* Define : TIMER_RES_DIVIDER
|
||||
Divider to trade off timer resolution and total time that can be
|
||||
measured.
|
||||
|
||||
Use lower values to increase resolution, but make sure that overflow
|
||||
does not occur. If there are issues with the return value overflowing,
|
||||
increase this value.
|
||||
*/
|
||||
#define GETMYTIME(_t) (*_t = barebones_clock())
|
||||
#define MYTIMEDIFF(fin, ini) ((fin) - (ini))
|
||||
#define TIMER_RES_DIVIDER 1
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 1
|
||||
// #define EE_TICKS_PER_SEC (CLOCKS_PER_SEC / TIMER_RES_DIVIDER)
|
||||
#define EE_TICKS_PER_SEC (1000 * 1000)
|
||||
|
||||
/** Define Host specific (POSIX), or target specific global time variables. */
|
||||
static CORETIMETYPE start_time_val, stop_time_val;
|
||||
|
||||
/* Function : start_time
|
||||
This function will be called right before starting the timed portion of
|
||||
the benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the
|
||||
example code) or zeroing some system parameters - e.g. setting the cpu clocks
|
||||
cycles to 0.
|
||||
*/
|
||||
void
|
||||
start_time(void)
|
||||
{
|
||||
GETMYTIME(&start_time_val);
|
||||
}
|
||||
/* Function : stop_time
|
||||
This function will be called right after ending the timed portion of the
|
||||
benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the
|
||||
example code) or other system parameters - e.g. reading the current value of
|
||||
cpu cycles counter.
|
||||
*/
|
||||
void
|
||||
stop_time(void)
|
||||
{
|
||||
GETMYTIME(&stop_time_val);
|
||||
}
|
||||
/* Function : get_time
|
||||
Return an abstract "ticks" number that signifies time on the system.
|
||||
|
||||
Actual value returned may be cpu cycles, milliseconds or any other
|
||||
value, as long as it can be converted to seconds by <time_in_secs>. This
|
||||
methodology is taken to accomodate any hardware or simulated platform. The
|
||||
sample implementation returns millisecs by default, and the resolution is
|
||||
controlled by <TIMER_RES_DIVIDER>
|
||||
*/
|
||||
CORE_TICKS
|
||||
get_time(void)
|
||||
{
|
||||
CORE_TICKS elapsed
|
||||
= (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
|
||||
return elapsed;
|
||||
}
|
||||
/* Function : time_in_secs
|
||||
Convert the value returned by get_time to seconds.
|
||||
|
||||
The <secs_ret> type is used to accomodate systems with no support for
|
||||
floating point. Default implementation implemented by the EE_TICKS_PER_SEC
|
||||
macro above.
|
||||
*/
|
||||
secs_ret
|
||||
time_in_secs(CORE_TICKS ticks)
|
||||
{
|
||||
secs_ret retval = ((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
|
||||
return retval;
|
||||
}
|
||||
|
||||
ee_u32 default_num_contexts = 1;
|
||||
|
||||
/* Function : portable_init
|
||||
Target specific initialization code
|
||||
Test for some common mistakes.
|
||||
*/
|
||||
void
|
||||
portable_init(core_portable *p, int *argc, char *argv[])
|
||||
{
|
||||
ee_printf("IO setup.\n");
|
||||
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *))
|
||||
{
|
||||
ee_printf(
|
||||
"ERROR! Please define ee_ptr_int to a type that holds a "
|
||||
"pointer!\n");
|
||||
}
|
||||
if (sizeof(ee_u32) != 4)
|
||||
{
|
||||
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
|
||||
}
|
||||
p->portable_id = 1;
|
||||
}
|
||||
/* Function : portable_fini
|
||||
Target specific final code
|
||||
*/
|
||||
void
|
||||
portable_fini(core_portable *p)
|
||||
{
|
||||
p->portable_id = 0;
|
||||
}
|
|
@ -0,0 +1,212 @@
|
|||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
/* Topic : Description
|
||||
This file contains configuration constants required to execute on
|
||||
different platforms
|
||||
*/
|
||||
#ifndef CORE_PORTME_H
|
||||
#define CORE_PORTME_H
|
||||
/************************/
|
||||
/* Data types and settings */
|
||||
/************************/
|
||||
/* Configuration : HAS_FLOAT
|
||||
Define to 1 if the platform supports floating point.
|
||||
*/
|
||||
#ifndef HAS_FLOAT
|
||||
#define HAS_FLOAT 1
|
||||
#endif
|
||||
/* Configuration : HAS_TIME_H
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef HAS_TIME_H
|
||||
#define HAS_TIME_H 1
|
||||
#endif
|
||||
/* Configuration : USE_CLOCK
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef USE_CLOCK
|
||||
#define USE_CLOCK 1
|
||||
#endif
|
||||
/* Configuration : HAS_STDIO
|
||||
Define to 1 if the platform has stdio.h.
|
||||
*/
|
||||
#ifndef HAS_STDIO
|
||||
#define HAS_STDIO 0
|
||||
#endif
|
||||
/* Configuration : HAS_PRINTF
|
||||
Define to 1 if the platform has stdio.h and implements the printf
|
||||
function.
|
||||
*/
|
||||
#ifndef HAS_PRINTF
|
||||
#define HAS_PRINTF 0
|
||||
#endif
|
||||
|
||||
/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
|
||||
Initialize these strings per platform
|
||||
*/
|
||||
#ifndef COMPILER_VERSION
|
||||
#ifdef __GNUC__
|
||||
#define COMPILER_VERSION "GCC"__VERSION__
|
||||
#else
|
||||
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
|
||||
#endif
|
||||
#endif
|
||||
#ifndef COMPILER_FLAGS
|
||||
#define COMPILER_FLAGS \
|
||||
FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
|
||||
#endif
|
||||
#ifndef MEM_LOCATION
|
||||
#define MEM_LOCATION "STACK"
|
||||
#endif
|
||||
|
||||
/* Data Types :
|
||||
To avoid compiler issues, define the data types that need ot be used for
|
||||
8b, 16b and 32b in <core_portme.h>.
|
||||
|
||||
*Imprtant* :
|
||||
ee_ptr_int needs to be the data type used to hold pointers, otherwise
|
||||
coremark may fail!!!
|
||||
*/
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
typedef signed short ee_s16;
|
||||
typedef unsigned short ee_u16;
|
||||
typedef signed int ee_s32;
|
||||
typedef double ee_f32;
|
||||
typedef unsigned char ee_u8;
|
||||
typedef unsigned int ee_u32;
|
||||
typedef ee_u32 ee_ptr_int;
|
||||
typedef size_t ee_size_t;
|
||||
#define NULL ((void *)0)
|
||||
/* align_mem :
|
||||
This macro is used to align an offset to point to a 32b value. It is
|
||||
used in the Matrix algorithm to initialize the input memory blocks.
|
||||
*/
|
||||
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3))
|
||||
|
||||
/* Configuration : CORE_TICKS
|
||||
Define type of return from the timing functions.
|
||||
*/
|
||||
#define CORETIMETYPE ee_u32
|
||||
typedef ee_u32 CORE_TICKS;
|
||||
|
||||
/* Configuration : SEED_METHOD
|
||||
Defines method to get seed values that cannot be computed at compile
|
||||
time.
|
||||
|
||||
Valid values :
|
||||
SEED_ARG - from command line.
|
||||
SEED_FUNC - from a system function.
|
||||
SEED_VOLATILE - from volatile variables.
|
||||
*/
|
||||
#ifndef SEED_METHOD
|
||||
#define SEED_METHOD SEED_VOLATILE
|
||||
#endif
|
||||
|
||||
/* Configuration : MEM_METHOD
|
||||
Defines method to get a block of memry.
|
||||
|
||||
Valid values :
|
||||
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
|
||||
MEM_STATIC - to use a static memory array.
|
||||
MEM_STACK - to allocate the data block on the stack (NYI).
|
||||
*/
|
||||
#ifndef MEM_METHOD
|
||||
#define MEM_METHOD MEM_STACK
|
||||
#endif
|
||||
|
||||
/* Configuration : MULTITHREAD
|
||||
Define for parallel execution
|
||||
|
||||
Valid values :
|
||||
1 - only one context (default).
|
||||
N>1 - will execute N copies in parallel.
|
||||
|
||||
Note :
|
||||
If this flag is defined to more then 1, an implementation for launching
|
||||
parallel contexts must be defined.
|
||||
|
||||
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK>
|
||||
to enable them.
|
||||
|
||||
It is valid to have a different implementation of <core_start_parallel>
|
||||
and <core_end_parallel> in <core_portme.c>, to fit a particular architecture.
|
||||
*/
|
||||
#ifndef MULTITHREAD
|
||||
#define MULTITHREAD 1
|
||||
#define USE_PTHREAD 0
|
||||
#define USE_FORK 0
|
||||
#define USE_SOCKET 0
|
||||
#endif
|
||||
|
||||
/* Configuration : MAIN_HAS_NOARGC
|
||||
Needed if platform does not support getting arguments to main.
|
||||
|
||||
Valid values :
|
||||
0 - argc/argv to main is supported
|
||||
1 - argc/argv to main is not supported
|
||||
|
||||
Note :
|
||||
This flag only matters if MULTITHREAD has been defined to a value
|
||||
greater then 1.
|
||||
*/
|
||||
#ifndef MAIN_HAS_NOARGC
|
||||
#define MAIN_HAS_NOARGC 0
|
||||
#endif
|
||||
|
||||
/* Configuration : MAIN_HAS_NORETURN
|
||||
Needed if platform does not support returning a value from main.
|
||||
|
||||
Valid values :
|
||||
0 - main returns an int, and return value will be 0.
|
||||
1 - platform does not support returning a value from main
|
||||
*/
|
||||
#ifndef MAIN_HAS_NORETURN
|
||||
#define MAIN_HAS_NORETURN 0
|
||||
#endif
|
||||
|
||||
/* Variable : default_num_contexts
|
||||
Not used for this simple port, must cintain the value 1.
|
||||
*/
|
||||
extern ee_u32 default_num_contexts;
|
||||
|
||||
typedef struct CORE_PORTABLE_S
|
||||
{
|
||||
ee_u8 portable_id;
|
||||
} core_portable;
|
||||
|
||||
/* target specific init/fini */
|
||||
void portable_init(core_portable *p, int *argc, char *argv[]);
|
||||
void portable_fini(core_portable *p);
|
||||
|
||||
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) \
|
||||
&& !defined(VALIDATION_RUN)
|
||||
#if (TOTAL_DATA_SIZE == 1200)
|
||||
#define PROFILE_RUN 1
|
||||
#elif (TOTAL_DATA_SIZE == 2000)
|
||||
#define PERFORMANCE_RUN 1
|
||||
#else
|
||||
#define VALIDATION_RUN 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int ee_printf(const char *fmt, ...);
|
||||
|
||||
#endif /* CORE_PORTME_H */
|
|
@ -0,0 +1,83 @@
|
|||
# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Original Author: Shay Gal-on
|
||||
|
||||
#File : core_portme.mak
|
||||
|
||||
# Use this flag to define how to to get an executable (e.g -o)
|
||||
OUTFLAG= -o
|
||||
|
||||
CC = riscv32-unknown-elf-gcc
|
||||
LD = riscv32-unknown-elf-gcc
|
||||
AS = riscv32-unknown-elf-gcc
|
||||
# Flag : CFLAGS
|
||||
# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
|
||||
PORT_CFLAGS = -O3 -g
|
||||
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
|
||||
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\"
|
||||
#Flag : LFLAGS_END
|
||||
# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).
|
||||
# Note : On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
|
||||
SEPARATE_COMPILE=1
|
||||
# Flag : SEPARATE_COMPILE
|
||||
# You must also define below how to create an object file, and how to link.
|
||||
OBJOUT = -o
|
||||
LFLAGS = -T ../../common/memmap.ld
|
||||
ASFLAGS = -c
|
||||
OFLAG = -o
|
||||
COUT = -c
|
||||
|
||||
LFLAGS_END =
|
||||
# Flag : PORT_SRCS
|
||||
# Port specific source files can be added here
|
||||
# You may also need cvt.c if the fcvt functions are not provided as intrinsics by your compiler!
|
||||
PORT_SRCS = $(PORT_DIR)/core_portme.c $(PORT_DIR)/ee_printf.c $(PORT_DIR)/init.S
|
||||
PORT_OBJS = $(addsuffix $(OEXT),$(patsubst %.c,%,$(patsubst %.S,%,$(PORT_SRCS))))
|
||||
vpath %.c $(PORT_DIR)
|
||||
vpath %.s $(PORT_DIR)
|
||||
vpath %.S $(PORT_DIR)
|
||||
|
||||
# Flag : LOAD
|
||||
# For a simple port, we assume self hosted compile and run, no load needed.
|
||||
|
||||
# Flag : RUN
|
||||
# For a simple port, we assume self hosted compile and run, simple invocation of the executable
|
||||
|
||||
LOAD = echo "Please set LOAD to the process of loading the executable to the flash"
|
||||
RUN = echo "Please set LOAD to the process of running the executable (e.g. via jtag, or board reset)"
|
||||
|
||||
OEXT = .o
|
||||
EXE = .elf
|
||||
|
||||
$(OPATH)$(PORT_DIR)/%$(OEXT) : %.c
|
||||
$(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@
|
||||
|
||||
$(OPATH)%$(OEXT) : %.c
|
||||
$(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@
|
||||
|
||||
$(OPATH)$(PORT_DIR)/%$(OEXT) : %.s
|
||||
$(AS) $(ASFLAGS) $< $(OBJOUT) $@
|
||||
|
||||
$(OPATH)$(PORT_DIR)/%$(OEXT) : %.S
|
||||
$(AS) $(ASFLAGS) $< $(OBJOUT) $@
|
||||
|
||||
# Target : port_pre% and port_post%
|
||||
# For the purpose of this simple port, no pre or post steps needed.
|
||||
|
||||
.PHONY : port_prebuild port_postbuild port_prerun port_postrun port_preload port_postload
|
||||
port_pre% port_post% :
|
||||
|
||||
MKDIR = mkdir -p
|
||||
|
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
#include <math.h>
|
||||
#define CVTBUFSIZE 80
|
||||
static char CVTBUF[CVTBUFSIZE];
|
||||
|
||||
static char *
|
||||
cvt(double arg, int ndigits, int *decpt, int *sign, char *buf, int eflag)
|
||||
{
|
||||
int r2;
|
||||
double fi, fj;
|
||||
char * p, *p1;
|
||||
|
||||
if (ndigits < 0)
|
||||
ndigits = 0;
|
||||
if (ndigits >= CVTBUFSIZE - 1)
|
||||
ndigits = CVTBUFSIZE - 2;
|
||||
r2 = 0;
|
||||
*sign = 0;
|
||||
p = &buf[0];
|
||||
if (arg < 0)
|
||||
{
|
||||
*sign = 1;
|
||||
arg = -arg;
|
||||
}
|
||||
arg = modf(arg, &fi);
|
||||
p1 = &buf[CVTBUFSIZE];
|
||||
|
||||
if (fi != 0)
|
||||
{
|
||||
p1 = &buf[CVTBUFSIZE];
|
||||
while (fi != 0)
|
||||
{
|
||||
fj = modf(fi / 10, &fi);
|
||||
*--p1 = (int)((fj + .03) * 10) + '0';
|
||||
r2++;
|
||||
}
|
||||
while (p1 < &buf[CVTBUFSIZE])
|
||||
*p++ = *p1++;
|
||||
}
|
||||
else if (arg > 0)
|
||||
{
|
||||
while ((fj = arg * 10) < 1)
|
||||
{
|
||||
arg = fj;
|
||||
r2--;
|
||||
}
|
||||
}
|
||||
p1 = &buf[ndigits];
|
||||
if (eflag == 0)
|
||||
p1 += r2;
|
||||
*decpt = r2;
|
||||
if (p1 < &buf[0])
|
||||
{
|
||||
buf[0] = '\0';
|
||||
return buf;
|
||||
}
|
||||
while (p <= p1 && p < &buf[CVTBUFSIZE])
|
||||
{
|
||||
arg *= 10;
|
||||
arg = modf(arg, &fj);
|
||||
*p++ = (int)fj + '0';
|
||||
}
|
||||
if (p1 >= &buf[CVTBUFSIZE])
|
||||
{
|
||||
buf[CVTBUFSIZE - 1] = '\0';
|
||||
return buf;
|
||||
}
|
||||
p = p1;
|
||||
*p1 += 5;
|
||||
while (*p1 > '9')
|
||||
{
|
||||
*p1 = '0';
|
||||
if (p1 > buf)
|
||||
++*--p1;
|
||||
else
|
||||
{
|
||||
*p1 = '1';
|
||||
(*decpt)++;
|
||||
if (eflag == 0)
|
||||
{
|
||||
if (p > buf)
|
||||
*p = '0';
|
||||
p++;
|
||||
}
|
||||
}
|
||||
}
|
||||
*p = '\0';
|
||||
return buf;
|
||||
}
|
||||
|
||||
char *
|
||||
ecvt(double arg, int ndigits, int *decpt, int *sign)
|
||||
{
|
||||
return cvt(arg, ndigits, decpt, sign, CVTBUF, 1);
|
||||
}
|
||||
|
||||
char *
|
||||
ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf)
|
||||
{
|
||||
return cvt(arg, ndigits, decpt, sign, buf, 1);
|
||||
}
|
||||
|
||||
char *
|
||||
fcvt(double arg, int ndigits, int *decpt, int *sign)
|
||||
{
|
||||
return cvt(arg, ndigits, decpt, sign, CVTBUF, 0);
|
||||
}
|
||||
|
||||
char *
|
||||
fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf)
|
||||
{
|
||||
return cvt(arg, ndigits, decpt, sign, buf, 0);
|
||||
}
|
|
@ -0,0 +1,703 @@
|
|||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
#include <coremark.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#define ZEROPAD (1 << 0) /* Pad with zero */
|
||||
#define SIGN (1 << 1) /* Unsigned/signed long */
|
||||
#define PLUS (1 << 2) /* Show plus */
|
||||
#define SPACE (1 << 3) /* Spacer */
|
||||
#define LEFT (1 << 4) /* Left justified */
|
||||
#define HEX_PREP (1 << 5) /* 0x */
|
||||
#define UPPERCASE (1 << 6) /* 'ABCDEF' */
|
||||
|
||||
#define is_digit(c) ((c) >= '0' && (c) <= '9')
|
||||
|
||||
static char * digits = "0123456789abcdefghijklmnopqrstuvwxyz";
|
||||
static char * upper_digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
static ee_size_t strnlen(const char *s, ee_size_t count);
|
||||
|
||||
static ee_size_t
|
||||
strnlen(const char *s, ee_size_t count)
|
||||
{
|
||||
const char *sc;
|
||||
for (sc = s; *sc != '\0' && count--; ++sc)
|
||||
;
|
||||
return sc - s;
|
||||
}
|
||||
|
||||
static int
|
||||
skip_atoi(const char **s)
|
||||
{
|
||||
int i = 0;
|
||||
while (is_digit(**s))
|
||||
i = i * 10 + *((*s)++) - '0';
|
||||
return i;
|
||||
}
|
||||
|
||||
static char *
|
||||
number(char *str, long num, int base, int size, int precision, int type)
|
||||
{
|
||||
char c, sign, tmp[66];
|
||||
char *dig = digits;
|
||||
int i;
|
||||
|
||||
if (type & UPPERCASE)
|
||||
dig = upper_digits;
|
||||
if (type & LEFT)
|
||||
type &= ~ZEROPAD;
|
||||
if (base < 2 || base > 36)
|
||||
return 0;
|
||||
|
||||
c = (type & ZEROPAD) ? '0' : ' ';
|
||||
sign = 0;
|
||||
if (type & SIGN)
|
||||
{
|
||||
if (num < 0)
|
||||
{
|
||||
sign = '-';
|
||||
num = -num;
|
||||
size--;
|
||||
}
|
||||
else if (type & PLUS)
|
||||
{
|
||||
sign = '+';
|
||||
size--;
|
||||
}
|
||||
else if (type & SPACE)
|
||||
{
|
||||
sign = ' ';
|
||||
size--;
|
||||
}
|
||||
}
|
||||
|
||||
if (type & HEX_PREP)
|
||||
{
|
||||
if (base == 16)
|
||||
size -= 2;
|
||||
else if (base == 8)
|
||||
size--;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
|
||||
if (num == 0)
|
||||
tmp[i++] = '0';
|
||||
else
|
||||
{
|
||||
while (num != 0)
|
||||
{
|
||||
tmp[i++] = dig[((unsigned long)num) % (unsigned)base];
|
||||
num = ((unsigned long)num) / (unsigned)base;
|
||||
}
|
||||
}
|
||||
|
||||
if (i > precision)
|
||||
precision = i;
|
||||
size -= precision;
|
||||
if (!(type & (ZEROPAD | LEFT)))
|
||||
while (size-- > 0)
|
||||
*str++ = ' ';
|
||||
if (sign)
|
||||
*str++ = sign;
|
||||
|
||||
if (type & HEX_PREP)
|
||||
{
|
||||
if (base == 8)
|
||||
*str++ = '0';
|
||||
else if (base == 16)
|
||||
{
|
||||
*str++ = '0';
|
||||
*str++ = digits[33];
|
||||
}
|
||||
}
|
||||
|
||||
if (!(type & LEFT))
|
||||
while (size-- > 0)
|
||||
*str++ = c;
|
||||
while (i < precision--)
|
||||
*str++ = '0';
|
||||
while (i-- > 0)
|
||||
*str++ = tmp[i];
|
||||
while (size-- > 0)
|
||||
*str++ = ' ';
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
static char *
|
||||
eaddr(char *str, unsigned char *addr, int size, int precision, int type)
|
||||
{
|
||||
char tmp[24];
|
||||
char *dig = digits;
|
||||
int i, len;
|
||||
|
||||
if (type & UPPERCASE)
|
||||
dig = upper_digits;
|
||||
len = 0;
|
||||
for (i = 0; i < 6; i++)
|
||||
{
|
||||
if (i != 0)
|
||||
tmp[len++] = ':';
|
||||
tmp[len++] = dig[addr[i] >> 4];
|
||||
tmp[len++] = dig[addr[i] & 0x0F];
|
||||
}
|
||||
|
||||
if (!(type & LEFT))
|
||||
while (len < size--)
|
||||
*str++ = ' ';
|
||||
for (i = 0; i < len; ++i)
|
||||
*str++ = tmp[i];
|
||||
while (len < size--)
|
||||
*str++ = ' ';
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
static char *
|
||||
iaddr(char *str, unsigned char *addr, int size, int precision, int type)
|
||||
{
|
||||
char tmp[24];
|
||||
int i, n, len;
|
||||
|
||||
len = 0;
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (i != 0)
|
||||
tmp[len++] = '.';
|
||||
n = addr[i];
|
||||
|
||||
if (n == 0)
|
||||
tmp[len++] = digits[0];
|
||||
else
|
||||
{
|
||||
if (n >= 100)
|
||||
{
|
||||
tmp[len++] = digits[n / 100];
|
||||
n = n % 100;
|
||||
tmp[len++] = digits[n / 10];
|
||||
n = n % 10;
|
||||
}
|
||||
else if (n >= 10)
|
||||
{
|
||||
tmp[len++] = digits[n / 10];
|
||||
n = n % 10;
|
||||
}
|
||||
|
||||
tmp[len++] = digits[n];
|
||||
}
|
||||
}
|
||||
|
||||
if (!(type & LEFT))
|
||||
while (len < size--)
|
||||
*str++ = ' ';
|
||||
for (i = 0; i < len; ++i)
|
||||
*str++ = tmp[i];
|
||||
while (len < size--)
|
||||
*str++ = ' ';
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
#if HAS_FLOAT
|
||||
|
||||
char * ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf);
|
||||
char * fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf);
|
||||
static void ee_bufcpy(char *d, char *s, int count);
|
||||
|
||||
void
|
||||
ee_bufcpy(char *pd, char *ps, int count)
|
||||
{
|
||||
char *pe = ps + count;
|
||||
while (ps != pe)
|
||||
*pd++ = *ps++;
|
||||
}
|
||||
|
||||
static void
|
||||
parse_float(double value, char *buffer, char fmt, int precision)
|
||||
{
|
||||
int decpt, sign, exp, pos;
|
||||
char *digits = NULL;
|
||||
char cvtbuf[80];
|
||||
int capexp = 0;
|
||||
int magnitude;
|
||||
|
||||
if (fmt == 'G' || fmt == 'E')
|
||||
{
|
||||
capexp = 1;
|
||||
fmt += 'a' - 'A';
|
||||
}
|
||||
|
||||
if (fmt == 'g')
|
||||
{
|
||||
digits = ecvtbuf(value, precision, &decpt, &sign, cvtbuf);
|
||||
magnitude = decpt - 1;
|
||||
if (magnitude < -4 || magnitude > precision - 1)
|
||||
{
|
||||
fmt = 'e';
|
||||
precision -= 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt = 'f';
|
||||
precision -= decpt;
|
||||
}
|
||||
}
|
||||
|
||||
if (fmt == 'e')
|
||||
{
|
||||
digits = ecvtbuf(value, precision + 1, &decpt, &sign, cvtbuf);
|
||||
|
||||
if (sign)
|
||||
*buffer++ = '-';
|
||||
*buffer++ = *digits;
|
||||
if (precision > 0)
|
||||
*buffer++ = '.';
|
||||
ee_bufcpy(buffer, digits + 1, precision);
|
||||
buffer += precision;
|
||||
*buffer++ = capexp ? 'E' : 'e';
|
||||
|
||||
if (decpt == 0)
|
||||
{
|
||||
if (value == 0.0)
|
||||
exp = 0;
|
||||
else
|
||||
exp = -1;
|
||||
}
|
||||
else
|
||||
exp = decpt - 1;
|
||||
|
||||
if (exp < 0)
|
||||
{
|
||||
*buffer++ = '-';
|
||||
exp = -exp;
|
||||
}
|
||||
else
|
||||
*buffer++ = '+';
|
||||
|
||||
buffer[2] = (exp % 10) + '0';
|
||||
exp = exp / 10;
|
||||
buffer[1] = (exp % 10) + '0';
|
||||
exp = exp / 10;
|
||||
buffer[0] = (exp % 10) + '0';
|
||||
buffer += 3;
|
||||
}
|
||||
else if (fmt == 'f')
|
||||
{
|
||||
digits = fcvtbuf(value, precision, &decpt, &sign, cvtbuf);
|
||||
if (sign)
|
||||
*buffer++ = '-';
|
||||
if (*digits)
|
||||
{
|
||||
if (decpt <= 0)
|
||||
{
|
||||
*buffer++ = '0';
|
||||
*buffer++ = '.';
|
||||
for (pos = 0; pos < -decpt; pos++)
|
||||
*buffer++ = '0';
|
||||
while (*digits)
|
||||
*buffer++ = *digits++;
|
||||
}
|
||||
else
|
||||
{
|
||||
pos = 0;
|
||||
while (*digits)
|
||||
{
|
||||
if (pos++ == decpt)
|
||||
*buffer++ = '.';
|
||||
*buffer++ = *digits++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*buffer++ = '0';
|
||||
if (precision > 0)
|
||||
{
|
||||
*buffer++ = '.';
|
||||
for (pos = 0; pos < precision; pos++)
|
||||
*buffer++ = '0';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*buffer = '\0';
|
||||
}
|
||||
|
||||
static void
|
||||
decimal_point(char *buffer)
|
||||
{
|
||||
while (*buffer)
|
||||
{
|
||||
if (*buffer == '.')
|
||||
return;
|
||||
if (*buffer == 'e' || *buffer == 'E')
|
||||
break;
|
||||
buffer++;
|
||||
}
|
||||
|
||||
if (*buffer)
|
||||
{
|
||||
int n = strnlen(buffer, 256);
|
||||
while (n > 0)
|
||||
{
|
||||
buffer[n + 1] = buffer[n];
|
||||
n--;
|
||||
}
|
||||
|
||||
*buffer = '.';
|
||||
}
|
||||
else
|
||||
{
|
||||
*buffer++ = '.';
|
||||
*buffer = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
cropzeros(char *buffer)
|
||||
{
|
||||
char *stop;
|
||||
|
||||
while (*buffer && *buffer != '.')
|
||||
buffer++;
|
||||
if (*buffer++)
|
||||
{
|
||||
while (*buffer && *buffer != 'e' && *buffer != 'E')
|
||||
buffer++;
|
||||
stop = buffer--;
|
||||
while (*buffer == '0')
|
||||
buffer--;
|
||||
if (*buffer == '.')
|
||||
buffer--;
|
||||
while (buffer != stop)
|
||||
*++buffer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static char *
|
||||
flt(char *str, double num, int size, int precision, char fmt, int flags)
|
||||
{
|
||||
char tmp[80];
|
||||
char c, sign;
|
||||
int n, i;
|
||||
|
||||
// Left align means no zero padding
|
||||
if (flags & LEFT)
|
||||
flags &= ~ZEROPAD;
|
||||
|
||||
// Determine padding and sign char
|
||||
c = (flags & ZEROPAD) ? '0' : ' ';
|
||||
sign = 0;
|
||||
if (flags & SIGN)
|
||||
{
|
||||
if (num < 0.0)
|
||||
{
|
||||
sign = '-';
|
||||
num = -num;
|
||||
size--;
|
||||
}
|
||||
else if (flags & PLUS)
|
||||
{
|
||||
sign = '+';
|
||||
size--;
|
||||
}
|
||||
else if (flags & SPACE)
|
||||
{
|
||||
sign = ' ';
|
||||
size--;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the precision value
|
||||
if (precision < 0)
|
||||
precision = 6; // Default precision: 6
|
||||
|
||||
// Convert floating point number to text
|
||||
parse_float(num, tmp, fmt, precision);
|
||||
|
||||
if ((flags & HEX_PREP) && precision == 0)
|
||||
decimal_point(tmp);
|
||||
if (fmt == 'g' && !(flags & HEX_PREP))
|
||||
cropzeros(tmp);
|
||||
|
||||
n = strnlen(tmp, 256);
|
||||
|
||||
// Output number with alignment and padding
|
||||
size -= n;
|
||||
if (!(flags & (ZEROPAD | LEFT)))
|
||||
while (size-- > 0)
|
||||
*str++ = ' ';
|
||||
if (sign)
|
||||
*str++ = sign;
|
||||
if (!(flags & LEFT))
|
||||
while (size-- > 0)
|
||||
*str++ = c;
|
||||
for (i = 0; i < n; i++)
|
||||
*str++ = tmp[i];
|
||||
while (size-- > 0)
|
||||
*str++ = ' ';
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static int
|
||||
ee_vsprintf(char *buf, const char *fmt, va_list args)
|
||||
{
|
||||
int len;
|
||||
unsigned long num;
|
||||
int i, base;
|
||||
char * str;
|
||||
char * s;
|
||||
|
||||
int flags; // Flags to number()
|
||||
|
||||
int field_width; // Width of output field
|
||||
int precision; // Min. # of digits for integers; max number of chars for
|
||||
// from string
|
||||
int qualifier; // 'h', 'l', or 'L' for integer fields
|
||||
|
||||
for (str = buf; *fmt; fmt++)
|
||||
{
|
||||
if (*fmt != '%')
|
||||
{
|
||||
*str++ = *fmt;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Process flags
|
||||
flags = 0;
|
||||
repeat:
|
||||
fmt++; // This also skips first '%'
|
||||
switch (*fmt)
|
||||
{
|
||||
case '-':
|
||||
flags |= LEFT;
|
||||
goto repeat;
|
||||
case '+':
|
||||
flags |= PLUS;
|
||||
goto repeat;
|
||||
case ' ':
|
||||
flags |= SPACE;
|
||||
goto repeat;
|
||||
case '#':
|
||||
flags |= HEX_PREP;
|
||||
goto repeat;
|
||||
case '0':
|
||||
flags |= ZEROPAD;
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
// Get field width
|
||||
field_width = -1;
|
||||
if (is_digit(*fmt))
|
||||
field_width = skip_atoi(&fmt);
|
||||
else if (*fmt == '*')
|
||||
{
|
||||
fmt++;
|
||||
field_width = va_arg(args, int);
|
||||
if (field_width < 0)
|
||||
{
|
||||
field_width = -field_width;
|
||||
flags |= LEFT;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the precision
|
||||
precision = -1;
|
||||
if (*fmt == '.')
|
||||
{
|
||||
++fmt;
|
||||
if (is_digit(*fmt))
|
||||
precision = skip_atoi(&fmt);
|
||||
else if (*fmt == '*')
|
||||
{
|
||||
++fmt;
|
||||
precision = va_arg(args, int);
|
||||
}
|
||||
if (precision < 0)
|
||||
precision = 0;
|
||||
}
|
||||
|
||||
// Get the conversion qualifier
|
||||
qualifier = -1;
|
||||
if (*fmt == 'l' || *fmt == 'L')
|
||||
{
|
||||
qualifier = *fmt;
|
||||
fmt++;
|
||||
}
|
||||
|
||||
// Default base
|
||||
base = 10;
|
||||
|
||||
switch (*fmt)
|
||||
{
|
||||
case 'c':
|
||||
if (!(flags & LEFT))
|
||||
while (--field_width > 0)
|
||||
*str++ = ' ';
|
||||
*str++ = (unsigned char)va_arg(args, int);
|
||||
while (--field_width > 0)
|
||||
*str++ = ' ';
|
||||
continue;
|
||||
|
||||
case 's':
|
||||
s = va_arg(args, char *);
|
||||
if (!s)
|
||||
s = "<NULL>";
|
||||
len = strnlen(s, precision);
|
||||
if (!(flags & LEFT))
|
||||
while (len < field_width--)
|
||||
*str++ = ' ';
|
||||
for (i = 0; i < len; ++i)
|
||||
*str++ = *s++;
|
||||
while (len < field_width--)
|
||||
*str++ = ' ';
|
||||
continue;
|
||||
|
||||
case 'p':
|
||||
if (field_width == -1)
|
||||
{
|
||||
field_width = 2 * sizeof(void *);
|
||||
flags |= ZEROPAD;
|
||||
}
|
||||
str = number(str,
|
||||
(unsigned long)va_arg(args, void *),
|
||||
16,
|
||||
field_width,
|
||||
precision,
|
||||
flags);
|
||||
continue;
|
||||
|
||||
case 'A':
|
||||
flags |= UPPERCASE;
|
||||
|
||||
case 'a':
|
||||
if (qualifier == 'l')
|
||||
str = eaddr(str,
|
||||
va_arg(args, unsigned char *),
|
||||
field_width,
|
||||
precision,
|
||||
flags);
|
||||
else
|
||||
str = iaddr(str,
|
||||
va_arg(args, unsigned char *),
|
||||
field_width,
|
||||
precision,
|
||||
flags);
|
||||
continue;
|
||||
|
||||
// Integer number formats - set up the flags and "break"
|
||||
case 'o':
|
||||
base = 8;
|
||||
break;
|
||||
|
||||
case 'X':
|
||||
flags |= UPPERCASE;
|
||||
|
||||
case 'x':
|
||||
base = 16;
|
||||
break;
|
||||
|
||||
case 'd':
|
||||
case 'i':
|
||||
flags |= SIGN;
|
||||
|
||||
case 'u':
|
||||
break;
|
||||
|
||||
#if HAS_FLOAT
|
||||
|
||||
case 'f':
|
||||
str = flt(str,
|
||||
va_arg(args, double),
|
||||
field_width,
|
||||
precision,
|
||||
*fmt,
|
||||
flags | SIGN);
|
||||
continue;
|
||||
|
||||
#endif
|
||||
|
||||
default:
|
||||
if (*fmt != '%')
|
||||
*str++ = '%';
|
||||
if (*fmt)
|
||||
*str++ = *fmt;
|
||||
else
|
||||
--fmt;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (qualifier == 'l')
|
||||
num = va_arg(args, unsigned long);
|
||||
else if (flags & SIGN)
|
||||
num = va_arg(args, int);
|
||||
else
|
||||
num = va_arg(args, unsigned int);
|
||||
|
||||
str = number(str, num, base, field_width, precision, flags);
|
||||
}
|
||||
|
||||
*str = '\0';
|
||||
return str - buf;
|
||||
}
|
||||
|
||||
#include "tb_cxxrtl_io.h"
|
||||
|
||||
void
|
||||
uart_send_char(char c)
|
||||
{
|
||||
tb_putc(c);
|
||||
// #error "You must implement the method uart_send_char to use this file!\n";
|
||||
/* Output of a char to a UART usually follows the following model:
|
||||
Wait until UART is ready
|
||||
Write char to UART
|
||||
Wait until UART is done
|
||||
|
||||
Or in code:
|
||||
while (*UART_CONTROL_ADDRESS != UART_READY);
|
||||
*UART_DATA_ADDRESS = c;
|
||||
while (*UART_CONTROL_ADDRESS != UART_READY);
|
||||
|
||||
Check the UART sample code on your platform or the board
|
||||
documentation.
|
||||
*/
|
||||
}
|
||||
|
||||
int
|
||||
ee_printf(const char *fmt, ...)
|
||||
{
|
||||
char buf[1024], *p;
|
||||
va_list args;
|
||||
int n = 0;
|
||||
|
||||
va_start(args, fmt);
|
||||
ee_vsprintf(buf, fmt, args);
|
||||
va_end(args);
|
||||
p = buf;
|
||||
while (*p)
|
||||
{
|
||||
uart_send_char(*p);
|
||||
n++;
|
||||
p++;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
../../../common/init.S
|
|
@ -0,0 +1 @@
|
|||
../../../common/tb_cxxrtl_io.h
|
|
@ -0,0 +1,595 @@
|
|||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include "coremark.h"
|
||||
/*
|
||||
Topic: Description
|
||||
Benchmark using a linked list.
|
||||
|
||||
Linked list is a common data structure used in many applications.
|
||||
|
||||
For our purposes, this will excercise the memory units of the processor.
|
||||
In particular, usage of the list pointers to find and alter data.
|
||||
|
||||
We are not using Malloc since some platforms do not support this
|
||||
library.
|
||||
|
||||
Instead, the memory block being passed in is used to create a list,
|
||||
and the benchmark takes care not to add more items then can be
|
||||
accomodated by the memory block. The porting layer will make sure
|
||||
that we have a valid memory block.
|
||||
|
||||
All operations are done in place, without using any extra memory.
|
||||
|
||||
The list itself contains list pointers and pointers to data items.
|
||||
Data items contain the following:
|
||||
|
||||
idx - An index that captures the initial order of the list.
|
||||
data - Variable data initialized based on the input parameters. The 16b
|
||||
are divided as follows: o Upper 8b are backup of original data. o Bit 7
|
||||
indicates if the lower 7 bits are to be used as is or calculated. o Bits 0-2
|
||||
indicate type of operation to perform to get a 7b value. o Bits 3-6 provide
|
||||
input for the operation.
|
||||
|
||||
*/
|
||||
|
||||
/* local functions */
|
||||
|
||||
list_head *core_list_find(list_head *list, list_data *info);
|
||||
list_head *core_list_reverse(list_head *list);
|
||||
list_head *core_list_remove(list_head *item);
|
||||
list_head *core_list_undo_remove(list_head *item_removed,
|
||||
list_head *item_modified);
|
||||
list_head *core_list_insert_new(list_head * insert_point,
|
||||
list_data * info,
|
||||
list_head **memblock,
|
||||
list_data **datablock,
|
||||
list_head * memblock_end,
|
||||
list_data * datablock_end);
|
||||
typedef ee_s32 (*list_cmp)(list_data *a, list_data *b, core_results *res);
|
||||
list_head *core_list_mergesort(list_head * list,
|
||||
list_cmp cmp,
|
||||
core_results *res);
|
||||
|
||||
ee_s16
|
||||
calc_func(ee_s16 *pdata, core_results *res)
|
||||
{
|
||||
ee_s16 data = *pdata;
|
||||
ee_s16 retval;
|
||||
ee_u8 optype
|
||||
= (data >> 7)
|
||||
& 1; /* bit 7 indicates if the function result has been cached */
|
||||
if (optype) /* if cached, use cache */
|
||||
return (data & 0x007f);
|
||||
else
|
||||
{ /* otherwise calculate and cache the result */
|
||||
ee_s16 flag = data & 0x7; /* bits 0-2 is type of function to perform */
|
||||
ee_s16 dtype
|
||||
= ((data >> 3)
|
||||
& 0xf); /* bits 3-6 is specific data for the operation */
|
||||
dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */
|
||||
switch (flag)
|
||||
{
|
||||
case 0:
|
||||
if (dtype < 0x22) /* set min period for bit corruption */
|
||||
dtype = 0x22;
|
||||
retval = core_bench_state(res->size,
|
||||
res->memblock[3],
|
||||
res->seed1,
|
||||
res->seed2,
|
||||
dtype,
|
||||
res->crc);
|
||||
if (res->crcstate == 0)
|
||||
res->crcstate = retval;
|
||||
break;
|
||||
case 1:
|
||||
retval = core_bench_matrix(&(res->mat), dtype, res->crc);
|
||||
if (res->crcmatrix == 0)
|
||||
res->crcmatrix = retval;
|
||||
break;
|
||||
default:
|
||||
retval = data;
|
||||
break;
|
||||
}
|
||||
res->crc = crcu16(retval, res->crc);
|
||||
retval &= 0x007f;
|
||||
*pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */
|
||||
return retval;
|
||||
}
|
||||
}
|
||||
/* Function: cmp_complex
|
||||
Compare the data item in a list cell.
|
||||
|
||||
Can be used by mergesort.
|
||||
*/
|
||||
ee_s32
|
||||
cmp_complex(list_data *a, list_data *b, core_results *res)
|
||||
{
|
||||
ee_s16 val1 = calc_func(&(a->data16), res);
|
||||
ee_s16 val2 = calc_func(&(b->data16), res);
|
||||
return val1 - val2;
|
||||
}
|
||||
|
||||
/* Function: cmp_idx
|
||||
Compare the idx item in a list cell, and regen the data.
|
||||
|
||||
Can be used by mergesort.
|
||||
*/
|
||||
ee_s32
|
||||
cmp_idx(list_data *a, list_data *b, core_results *res)
|
||||
{
|
||||
if (res == NULL)
|
||||
{
|
||||
a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16 >> 8));
|
||||
b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16 >> 8));
|
||||
}
|
||||
return a->idx - b->idx;
|
||||
}
|
||||
|
||||
void
|
||||
copy_info(list_data *to, list_data *from)
|
||||
{
|
||||
to->data16 = from->data16;
|
||||
to->idx = from->idx;
|
||||
}
|
||||
|
||||
/* Benchmark for linked list:
|
||||
- Try to find multiple data items.
|
||||
- List sort
|
||||
- Operate on data from list (crc)
|
||||
- Single remove/reinsert
|
||||
* At the end of this function, the list is back to original state
|
||||
*/
|
||||
ee_u16
|
||||
core_bench_list(core_results *res, ee_s16 finder_idx)
|
||||
{
|
||||
ee_u16 retval = 0;
|
||||
ee_u16 found = 0, missed = 0;
|
||||
list_head *list = res->list;
|
||||
ee_s16 find_num = res->seed3;
|
||||
list_head *this_find;
|
||||
list_head *finder, *remover;
|
||||
list_data info;
|
||||
ee_s16 i;
|
||||
|
||||
info.idx = finder_idx;
|
||||
/* find <find_num> values in the list, and change the list each time
|
||||
* (reverse and cache if value found) */
|
||||
for (i = 0; i < find_num; i++)
|
||||
{
|
||||
info.data16 = (i & 0xff);
|
||||
this_find = core_list_find(list, &info);
|
||||
list = core_list_reverse(list);
|
||||
if (this_find == NULL)
|
||||
{
|
||||
missed++;
|
||||
retval += (list->next->info->data16 >> 8) & 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
found++;
|
||||
if (this_find->info->data16 & 0x1) /* use found value */
|
||||
retval += (this_find->info->data16 >> 9) & 1;
|
||||
/* and cache next item at the head of the list (if any) */
|
||||
if (this_find->next != NULL)
|
||||
{
|
||||
finder = this_find->next;
|
||||
this_find->next = finder->next;
|
||||
finder->next = list->next;
|
||||
list->next = finder;
|
||||
}
|
||||
}
|
||||
if (info.idx >= 0)
|
||||
info.idx++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List find %d: [%d,%d,%d]\n", i, retval, missed, found);
|
||||
#endif
|
||||
}
|
||||
retval += found * 4 - missed;
|
||||
/* sort the list by data content and remove one item*/
|
||||
if (finder_idx > 0)
|
||||
list = core_list_mergesort(list, cmp_complex, res);
|
||||
remover = core_list_remove(list->next);
|
||||
/* CRC data content of list from location of index N forward, and then undo
|
||||
* remove */
|
||||
finder = core_list_find(list, &info);
|
||||
if (!finder)
|
||||
finder = list->next;
|
||||
while (finder)
|
||||
{
|
||||
retval = crc16(list->info->data16, retval);
|
||||
finder = finder->next;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List sort 1: %04x\n", retval);
|
||||
#endif
|
||||
remover = core_list_undo_remove(remover, list->next);
|
||||
/* sort the list by index, in effect returning the list to original state */
|
||||
list = core_list_mergesort(list, cmp_idx, NULL);
|
||||
/* CRC data content of list */
|
||||
finder = list->next;
|
||||
while (finder)
|
||||
{
|
||||
retval = crc16(list->info->data16, retval);
|
||||
finder = finder->next;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List sort 2: %04x\n", retval);
|
||||
#endif
|
||||
return retval;
|
||||
}
|
||||
/* Function: core_list_init
|
||||
Initialize list with data.
|
||||
|
||||
Parameters:
|
||||
blksize - Size of memory to be initialized.
|
||||
memblock - Pointer to memory block.
|
||||
seed - Actual values chosen depend on the seed parameter.
|
||||
The seed parameter MUST be supplied from a source that cannot be
|
||||
determined at compile time
|
||||
|
||||
Returns:
|
||||
Pointer to the head of the list.
|
||||
|
||||
*/
|
||||
list_head *
|
||||
core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed)
|
||||
{
|
||||
/* calculated pointers for the list */
|
||||
ee_u32 per_item = 16 + sizeof(struct list_data_s);
|
||||
ee_u32 size = (blksize / per_item)
|
||||
- 2; /* to accomodate systems with 64b pointers, and make sure
|
||||
same code is executed, set max list elements */
|
||||
list_head *memblock_end = memblock + size;
|
||||
list_data *datablock = (list_data *)(memblock_end);
|
||||
list_data *datablock_end = datablock + size;
|
||||
/* some useful variables */
|
||||
ee_u32 i;
|
||||
list_head *finder, *list = memblock;
|
||||
list_data info;
|
||||
|
||||
/* create a fake items for the list head and tail */
|
||||
list->next = NULL;
|
||||
list->info = datablock;
|
||||
list->info->idx = 0x0000;
|
||||
list->info->data16 = (ee_s16)0x8080;
|
||||
memblock++;
|
||||
datablock++;
|
||||
info.idx = 0x7fff;
|
||||
info.data16 = (ee_s16)0xffff;
|
||||
core_list_insert_new(
|
||||
list, &info, &memblock, &datablock, memblock_end, datablock_end);
|
||||
|
||||
/* then insert size items */
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
ee_u16 datpat = ((ee_u16)(seed ^ i) & 0xf);
|
||||
ee_u16 dat
|
||||
= (datpat << 3) | (i & 0x7); /* alternate between algorithms */
|
||||
info.data16 = (dat << 8) | dat; /* fill the data with actual data and
|
||||
upper bits with rebuild value */
|
||||
core_list_insert_new(
|
||||
list, &info, &memblock, &datablock, memblock_end, datablock_end);
|
||||
}
|
||||
/* and now index the list so we know initial seed order of the list */
|
||||
finder = list->next;
|
||||
i = 1;
|
||||
while (finder->next != NULL)
|
||||
{
|
||||
if (i < size / 5) /* first 20% of the list in order */
|
||||
finder->info->idx = i++;
|
||||
else
|
||||
{
|
||||
ee_u16 pat = (ee_u16)(i++ ^ seed); /* get a pseudo random number */
|
||||
finder->info->idx = 0x3fff
|
||||
& (((i & 0x07) << 8)
|
||||
| pat); /* make sure the mixed items end up
|
||||
after the ones in sequence */
|
||||
}
|
||||
finder = finder->next;
|
||||
}
|
||||
list = core_list_mergesort(list, cmp_idx, NULL);
|
||||
#if CORE_DEBUG
|
||||
ee_printf("Initialized list:\n");
|
||||
finder = list;
|
||||
while (finder)
|
||||
{
|
||||
ee_printf(
|
||||
"[%04x,%04x]", finder->info->idx, (ee_u16)finder->info->data16);
|
||||
finder = finder->next;
|
||||
}
|
||||
ee_printf("\n");
|
||||
#endif
|
||||
return list;
|
||||
}
|
||||
|
||||
/* Function: core_list_insert
|
||||
Insert an item to the list
|
||||
|
||||
Parameters:
|
||||
insert_point - where to insert the item.
|
||||
info - data for the cell.
|
||||
memblock - pointer for the list header
|
||||
datablock - pointer for the list data
|
||||
memblock_end - end of region for list headers
|
||||
datablock_end - end of region for list data
|
||||
|
||||
Returns:
|
||||
Pointer to new item.
|
||||
*/
|
||||
list_head *
|
||||
core_list_insert_new(list_head * insert_point,
|
||||
list_data * info,
|
||||
list_head **memblock,
|
||||
list_data **datablock,
|
||||
list_head * memblock_end,
|
||||
list_data * datablock_end)
|
||||
{
|
||||
list_head *newitem;
|
||||
|
||||
if ((*memblock + 1) >= memblock_end)
|
||||
return NULL;
|
||||
if ((*datablock + 1) >= datablock_end)
|
||||
return NULL;
|
||||
|
||||
newitem = *memblock;
|
||||
(*memblock)++;
|
||||
newitem->next = insert_point->next;
|
||||
insert_point->next = newitem;
|
||||
|
||||
newitem->info = *datablock;
|
||||
(*datablock)++;
|
||||
copy_info(newitem->info, info);
|
||||
|
||||
return newitem;
|
||||
}
|
||||
|
||||
/* Function: core_list_remove
|
||||
Remove an item from the list.
|
||||
|
||||
Operation:
|
||||
For a singly linked list, remove by copying the data from the next item
|
||||
over to the current cell, and unlinking the next item.
|
||||
|
||||
Note:
|
||||
since there is always a fake item at the end of the list, no need to
|
||||
check for NULL.
|
||||
|
||||
Returns:
|
||||
Removed item.
|
||||
*/
|
||||
list_head *
|
||||
core_list_remove(list_head *item)
|
||||
{
|
||||
list_data *tmp;
|
||||
list_head *ret = item->next;
|
||||
/* swap data pointers */
|
||||
tmp = item->info;
|
||||
item->info = ret->info;
|
||||
ret->info = tmp;
|
||||
/* and eliminate item */
|
||||
item->next = item->next->next;
|
||||
ret->next = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Function: core_list_undo_remove
|
||||
Undo a remove operation.
|
||||
|
||||
Operation:
|
||||
Since we want each iteration of the benchmark to be exactly the same,
|
||||
we need to be able to undo a remove.
|
||||
Link the removed item back into the list, and switch the info items.
|
||||
|
||||
Parameters:
|
||||
item_removed - Return value from the <core_list_remove>
|
||||
item_modified - List item that was modified during <core_list_remove>
|
||||
|
||||
Returns:
|
||||
The item that was linked back to the list.
|
||||
|
||||
*/
|
||||
list_head *
|
||||
core_list_undo_remove(list_head *item_removed, list_head *item_modified)
|
||||
{
|
||||
list_data *tmp;
|
||||
/* swap data pointers */
|
||||
tmp = item_removed->info;
|
||||
item_removed->info = item_modified->info;
|
||||
item_modified->info = tmp;
|
||||
/* and insert item */
|
||||
item_removed->next = item_modified->next;
|
||||
item_modified->next = item_removed;
|
||||
return item_removed;
|
||||
}
|
||||
|
||||
/* Function: core_list_find
|
||||
Find an item in the list
|
||||
|
||||
Operation:
|
||||
Find an item by idx (if not 0) or specific data value
|
||||
|
||||
Parameters:
|
||||
list - list head
|
||||
info - idx or data to find
|
||||
|
||||
Returns:
|
||||
Found item, or NULL if not found.
|
||||
*/
|
||||
list_head *
|
||||
core_list_find(list_head *list, list_data *info)
|
||||
{
|
||||
if (info->idx >= 0)
|
||||
{
|
||||
while (list && (list->info->idx != info->idx))
|
||||
list = list->next;
|
||||
return list;
|
||||
}
|
||||
else
|
||||
{
|
||||
while (list && ((list->info->data16 & 0xff) != info->data16))
|
||||
list = list->next;
|
||||
return list;
|
||||
}
|
||||
}
|
||||
/* Function: core_list_reverse
|
||||
Reverse a list
|
||||
|
||||
Operation:
|
||||
Rearrange the pointers so the list is reversed.
|
||||
|
||||
Parameters:
|
||||
list - list head
|
||||
info - idx or data to find
|
||||
|
||||
Returns:
|
||||
Found item, or NULL if not found.
|
||||
*/
|
||||
|
||||
list_head *
|
||||
core_list_reverse(list_head *list)
|
||||
{
|
||||
list_head *next = NULL, *tmp;
|
||||
while (list)
|
||||
{
|
||||
tmp = list->next;
|
||||
list->next = next;
|
||||
next = list;
|
||||
list = tmp;
|
||||
}
|
||||
return next;
|
||||
}
|
||||
/* Function: core_list_mergesort
|
||||
Sort the list in place without recursion.
|
||||
|
||||
Description:
|
||||
Use mergesort, as for linked list this is a realistic solution.
|
||||
Also, since this is aimed at embedded, care was taken to use iterative
|
||||
rather then recursive algorithm. The sort can either return the list to
|
||||
original order (by idx) , or use the data item to invoke other other
|
||||
algorithms and change the order of the list.
|
||||
|
||||
Parameters:
|
||||
list - list to be sorted.
|
||||
cmp - cmp function to use
|
||||
|
||||
Returns:
|
||||
New head of the list.
|
||||
|
||||
Note:
|
||||
We have a special header for the list that will always be first,
|
||||
but the algorithm could theoretically modify where the list starts.
|
||||
|
||||
*/
|
||||
list_head *
|
||||
core_list_mergesort(list_head *list, list_cmp cmp, core_results *res)
|
||||
{
|
||||
list_head *p, *q, *e, *tail;
|
||||
ee_s32 insize, nmerges, psize, qsize, i;
|
||||
|
||||
insize = 1;
|
||||
|
||||
while (1)
|
||||
{
|
||||
p = list;
|
||||
list = NULL;
|
||||
tail = NULL;
|
||||
|
||||
nmerges = 0; /* count number of merges we do in this pass */
|
||||
|
||||
while (p)
|
||||
{
|
||||
nmerges++; /* there exists a merge to be done */
|
||||
/* step `insize' places along from p */
|
||||
q = p;
|
||||
psize = 0;
|
||||
for (i = 0; i < insize; i++)
|
||||
{
|
||||
psize++;
|
||||
q = q->next;
|
||||
if (!q)
|
||||
break;
|
||||
}
|
||||
|
||||
/* if q hasn't fallen off end, we have two lists to merge */
|
||||
qsize = insize;
|
||||
|
||||
/* now we have two lists; merge them */
|
||||
while (psize > 0 || (qsize > 0 && q))
|
||||
{
|
||||
|
||||
/* decide whether next element of merge comes from p or q */
|
||||
if (psize == 0)
|
||||
{
|
||||
/* p is empty; e must come from q. */
|
||||
e = q;
|
||||
q = q->next;
|
||||
qsize--;
|
||||
}
|
||||
else if (qsize == 0 || !q)
|
||||
{
|
||||
/* q is empty; e must come from p. */
|
||||
e = p;
|
||||
p = p->next;
|
||||
psize--;
|
||||
}
|
||||
else if (cmp(p->info, q->info, res) <= 0)
|
||||
{
|
||||
/* First element of p is lower (or same); e must come from
|
||||
* p. */
|
||||
e = p;
|
||||
p = p->next;
|
||||
psize--;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* First element of q is lower; e must come from q. */
|
||||
e = q;
|
||||
q = q->next;
|
||||
qsize--;
|
||||
}
|
||||
|
||||
/* add the next element to the merged list */
|
||||
if (tail)
|
||||
{
|
||||
tail->next = e;
|
||||
}
|
||||
else
|
||||
{
|
||||
list = e;
|
||||
}
|
||||
tail = e;
|
||||
}
|
||||
|
||||
/* now p has stepped `insize' places along, and q has too */
|
||||
p = q;
|
||||
}
|
||||
|
||||
tail->next = NULL;
|
||||
|
||||
/* If we have done only one merge, we're finished. */
|
||||
if (nmerges <= 1) /* allow for nmerges==0, the empty list case */
|
||||
return list;
|
||||
|
||||
/* Otherwise repeat, merging lists twice the size */
|
||||
insize *= 2;
|
||||
}
|
||||
#if COMPILER_REQUIRES_SORT_RETURN
|
||||
return list;
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,442 @@
|
|||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
/* File: core_main.c
|
||||
This file contains the framework to acquire a block of memory, seed
|
||||
initial parameters, tun t he benchmark and report the results.
|
||||
*/
|
||||
#include "coremark.h"
|
||||
|
||||
/* Function: iterate
|
||||
Run the benchmark for a specified number of iterations.
|
||||
|
||||
Operation:
|
||||
For each type of benchmarked algorithm:
|
||||
a - Initialize the data block for the algorithm.
|
||||
b - Execute the algorithm N times.
|
||||
|
||||
Returns:
|
||||
NULL.
|
||||
*/
|
||||
static ee_u16 list_known_crc[] = { (ee_u16)0xd4b0,
|
||||
(ee_u16)0x3340,
|
||||
(ee_u16)0x6a79,
|
||||
(ee_u16)0xe714,
|
||||
(ee_u16)0xe3c1 };
|
||||
static ee_u16 matrix_known_crc[] = { (ee_u16)0xbe52,
|
||||
(ee_u16)0x1199,
|
||||
(ee_u16)0x5608,
|
||||
(ee_u16)0x1fd7,
|
||||
(ee_u16)0x0747 };
|
||||
static ee_u16 state_known_crc[] = { (ee_u16)0x5e47,
|
||||
(ee_u16)0x39bf,
|
||||
(ee_u16)0xe5a4,
|
||||
(ee_u16)0x8e3a,
|
||||
(ee_u16)0x8d84 };
|
||||
void *
|
||||
iterate(void *pres)
|
||||
{
|
||||
ee_u32 i;
|
||||
ee_u16 crc;
|
||||
core_results *res = (core_results *)pres;
|
||||
ee_u32 iterations = res->iterations;
|
||||
res->crc = 0;
|
||||
res->crclist = 0;
|
||||
res->crcmatrix = 0;
|
||||
res->crcstate = 0;
|
||||
|
||||
for (i = 0; i < iterations; i++)
|
||||
{
|
||||
crc = core_bench_list(res, 1);
|
||||
res->crc = crcu16(crc, res->crc);
|
||||
crc = core_bench_list(res, -1);
|
||||
res->crc = crcu16(crc, res->crc);
|
||||
if (i == 0)
|
||||
res->crclist = res->crc;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if (SEED_METHOD == SEED_ARG)
|
||||
ee_s32 get_seed_args(int i, int argc, char *argv[]);
|
||||
#define get_seed(x) (ee_s16) get_seed_args(x, argc, argv)
|
||||
#define get_seed_32(x) get_seed_args(x, argc, argv)
|
||||
#else /* via function or volatile */
|
||||
ee_s32 get_seed_32(int i);
|
||||
#define get_seed(x) (ee_s16) get_seed_32(x)
|
||||
#endif
|
||||
|
||||
#if (MEM_METHOD == MEM_STATIC)
|
||||
ee_u8 static_memblk[TOTAL_DATA_SIZE];
|
||||
#endif
|
||||
char *mem_name[3] = { "Static", "Heap", "Stack" };
|
||||
/* Function: main
|
||||
Main entry routine for the benchmark.
|
||||
This function is responsible for the following steps:
|
||||
|
||||
1 - Initialize input seeds from a source that cannot be determined at
|
||||
compile time. 2 - Initialize memory block for use. 3 - Run and time the
|
||||
benchmark. 4 - Report results, testing the validity of the output if the
|
||||
seeds are known.
|
||||
|
||||
Arguments:
|
||||
1 - first seed : Any value
|
||||
2 - second seed : Must be identical to first for iterations to be
|
||||
identical 3 - third seed : Any value, should be at least an order of
|
||||
magnitude less then the input size, but bigger then 32. 4 - Iterations :
|
||||
Special, if set to 0, iterations will be automatically determined such that
|
||||
the benchmark will run between 10 to 100 secs
|
||||
|
||||
*/
|
||||
|
||||
#if MAIN_HAS_NOARGC
|
||||
MAIN_RETURN_TYPE
|
||||
main(void)
|
||||
{
|
||||
int argc = 0;
|
||||
char *argv[1];
|
||||
#else
|
||||
MAIN_RETURN_TYPE
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
#endif
|
||||
ee_u16 i, j = 0, num_algorithms = 0;
|
||||
ee_s16 known_id = -1, total_errors = 0;
|
||||
ee_u16 seedcrc = 0;
|
||||
CORE_TICKS total_time;
|
||||
core_results results[MULTITHREAD];
|
||||
#if (MEM_METHOD == MEM_STACK)
|
||||
ee_u8 stack_memblock[TOTAL_DATA_SIZE * MULTITHREAD];
|
||||
#endif
|
||||
/* first call any initializations needed */
|
||||
portable_init(&(results[0].port), &argc, argv);
|
||||
/* First some checks to make sure benchmark will run ok */
|
||||
if (sizeof(struct list_head_s) > 128)
|
||||
{
|
||||
ee_printf("list_head structure too big for comparable data!\n");
|
||||
return MAIN_RETURN_VAL;
|
||||
}
|
||||
results[0].seed1 = get_seed(1);
|
||||
results[0].seed2 = get_seed(2);
|
||||
results[0].seed3 = get_seed(3);
|
||||
results[0].iterations = get_seed_32(4);
|
||||
#if CORE_DEBUG
|
||||
results[0].iterations = 1;
|
||||
#endif
|
||||
results[0].execs = get_seed_32(5);
|
||||
if (results[0].execs == 0)
|
||||
{ /* if not supplied, execute all algorithms */
|
||||
results[0].execs = ALL_ALGORITHMS_MASK;
|
||||
}
|
||||
/* put in some default values based on one seed only for easy testing */
|
||||
if ((results[0].seed1 == 0) && (results[0].seed2 == 0)
|
||||
&& (results[0].seed3 == 0))
|
||||
{ /* perfromance run */
|
||||
results[0].seed1 = 0;
|
||||
results[0].seed2 = 0;
|
||||
results[0].seed3 = 0x66;
|
||||
}
|
||||
if ((results[0].seed1 == 1) && (results[0].seed2 == 0)
|
||||
&& (results[0].seed3 == 0))
|
||||
{ /* validation run */
|
||||
results[0].seed1 = 0x3415;
|
||||
results[0].seed2 = 0x3415;
|
||||
results[0].seed3 = 0x66;
|
||||
}
|
||||
#if (MEM_METHOD == MEM_STATIC)
|
||||
results[0].memblock[0] = (void *)static_memblk;
|
||||
results[0].size = TOTAL_DATA_SIZE;
|
||||
results[0].err = 0;
|
||||
#if (MULTITHREAD > 1)
|
||||
#error "Cannot use a static data area with multiple contexts!"
|
||||
#endif
|
||||
#elif (MEM_METHOD == MEM_MALLOC)
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
{
|
||||
ee_s32 malloc_override = get_seed(7);
|
||||
if (malloc_override != 0)
|
||||
results[i].size = malloc_override;
|
||||
else
|
||||
results[i].size = TOTAL_DATA_SIZE;
|
||||
results[i].memblock[0] = portable_malloc(results[i].size);
|
||||
results[i].seed1 = results[0].seed1;
|
||||
results[i].seed2 = results[0].seed2;
|
||||
results[i].seed3 = results[0].seed3;
|
||||
results[i].err = 0;
|
||||
results[i].execs = results[0].execs;
|
||||
}
|
||||
#elif (MEM_METHOD == MEM_STACK)
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
{
|
||||
results[i].memblock[0] = stack_memblock + i * TOTAL_DATA_SIZE;
|
||||
results[i].size = TOTAL_DATA_SIZE;
|
||||
results[i].seed1 = results[0].seed1;
|
||||
results[i].seed2 = results[0].seed2;
|
||||
results[i].seed3 = results[0].seed3;
|
||||
results[i].err = 0;
|
||||
results[i].execs = results[0].execs;
|
||||
}
|
||||
#else
|
||||
#error "Please define a way to initialize a memory block."
|
||||
#endif
|
||||
/* Data init */
|
||||
/* Find out how space much we have based on number of algorithms */
|
||||
for (i = 0; i < NUM_ALGORITHMS; i++)
|
||||
{
|
||||
if ((1 << (ee_u32)i) & results[0].execs)
|
||||
num_algorithms++;
|
||||
}
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
results[i].size = results[i].size / num_algorithms;
|
||||
/* Assign pointers */
|
||||
for (i = 0; i < NUM_ALGORITHMS; i++)
|
||||
{
|
||||
ee_u32 ctx;
|
||||
if ((1 << (ee_u32)i) & results[0].execs)
|
||||
{
|
||||
for (ctx = 0; ctx < MULTITHREAD; ctx++)
|
||||
results[ctx].memblock[i + 1]
|
||||
= (char *)(results[ctx].memblock[0]) + results[0].size * j;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
/* call inits */
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
{
|
||||
if (results[i].execs & ID_LIST)
|
||||
{
|
||||
results[i].list = core_list_init(
|
||||
results[0].size, results[i].memblock[1], results[i].seed1);
|
||||
}
|
||||
if (results[i].execs & ID_MATRIX)
|
||||
{
|
||||
core_init_matrix(results[0].size,
|
||||
results[i].memblock[2],
|
||||
(ee_s32)results[i].seed1
|
||||
| (((ee_s32)results[i].seed2) << 16),
|
||||
&(results[i].mat));
|
||||
}
|
||||
if (results[i].execs & ID_STATE)
|
||||
{
|
||||
core_init_state(
|
||||
results[0].size, results[i].seed1, results[i].memblock[3]);
|
||||
}
|
||||
}
|
||||
|
||||
/* automatically determine number of iterations if not set */
|
||||
if (results[0].iterations == 0)
|
||||
{
|
||||
secs_ret secs_passed = 0;
|
||||
ee_u32 divisor;
|
||||
results[0].iterations = 1;
|
||||
while (secs_passed < (secs_ret)1)
|
||||
{
|
||||
results[0].iterations *= 10;
|
||||
start_time();
|
||||
iterate(&results[0]);
|
||||
stop_time();
|
||||
secs_passed = time_in_secs(get_time());
|
||||
}
|
||||
/* now we know it executes for at least 1 sec, set actual run time at
|
||||
* about 10 secs */
|
||||
divisor = (ee_u32)secs_passed;
|
||||
if (divisor == 0) /* some machines cast float to int as 0 since this
|
||||
conversion is not defined by ANSI, but we know at
|
||||
least one second passed */
|
||||
divisor = 1;
|
||||
results[0].iterations *= 1 + 10 / divisor;
|
||||
}
|
||||
/* perform actual benchmark */
|
||||
start_time();
|
||||
#if (MULTITHREAD > 1)
|
||||
if (default_num_contexts > MULTITHREAD)
|
||||
{
|
||||
default_num_contexts = MULTITHREAD;
|
||||
}
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
{
|
||||
results[i].iterations = results[0].iterations;
|
||||
results[i].execs = results[0].execs;
|
||||
core_start_parallel(&results[i]);
|
||||
}
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
{
|
||||
core_stop_parallel(&results[i]);
|
||||
}
|
||||
#else
|
||||
iterate(&results[0]);
|
||||
#endif
|
||||
stop_time();
|
||||
total_time = get_time();
|
||||
/* get a function of the input to report */
|
||||
seedcrc = crc16(results[0].seed1, seedcrc);
|
||||
seedcrc = crc16(results[0].seed2, seedcrc);
|
||||
seedcrc = crc16(results[0].seed3, seedcrc);
|
||||
seedcrc = crc16(results[0].size, seedcrc);
|
||||
|
||||
switch (seedcrc)
|
||||
{ /* test known output for common seeds */
|
||||
case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */
|
||||
known_id = 0;
|
||||
ee_printf("6k performance run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per
|
||||
algorithm */
|
||||
known_id = 1;
|
||||
ee_printf("6k validation run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm
|
||||
*/
|
||||
known_id = 2;
|
||||
ee_printf("Profile generation run parameters for coremark.\n");
|
||||
break;
|
||||
case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */
|
||||
known_id = 3;
|
||||
ee_printf("2K performance run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per
|
||||
algorithm */
|
||||
known_id = 4;
|
||||
ee_printf("2K validation run parameters for coremark.\n");
|
||||
break;
|
||||
default:
|
||||
total_errors = -1;
|
||||
break;
|
||||
}
|
||||
if (known_id >= 0)
|
||||
{
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
{
|
||||
results[i].err = 0;
|
||||
if ((results[i].execs & ID_LIST)
|
||||
&& (results[i].crclist != list_known_crc[known_id]))
|
||||
{
|
||||
ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n",
|
||||
i,
|
||||
results[i].crclist,
|
||||
list_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
if ((results[i].execs & ID_MATRIX)
|
||||
&& (results[i].crcmatrix != matrix_known_crc[known_id]))
|
||||
{
|
||||
ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n",
|
||||
i,
|
||||
results[i].crcmatrix,
|
||||
matrix_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
if ((results[i].execs & ID_STATE)
|
||||
&& (results[i].crcstate != state_known_crc[known_id]))
|
||||
{
|
||||
ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n",
|
||||
i,
|
||||
results[i].crcstate,
|
||||
state_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
total_errors += results[i].err;
|
||||
}
|
||||
}
|
||||
total_errors += check_data_types();
|
||||
/* and report results */
|
||||
ee_printf("CoreMark Size : %lu\n", (long unsigned)results[0].size);
|
||||
ee_printf("Total ticks : %lu\n", (long unsigned)total_time);
|
||||
#if HAS_FLOAT
|
||||
ee_printf("Total time (secs): %f\n", time_in_secs(total_time));
|
||||
if (time_in_secs(total_time) > 0)
|
||||
ee_printf("Iterations/Sec : %f\n",
|
||||
default_num_contexts * results[0].iterations
|
||||
/ time_in_secs(total_time));
|
||||
#else
|
||||
ee_printf("Total time (secs): %d\n", time_in_secs(total_time));
|
||||
if (time_in_secs(total_time) > 0)
|
||||
ee_printf("Iterations/Sec : %d\n",
|
||||
default_num_contexts * results[0].iterations
|
||||
/ time_in_secs(total_time));
|
||||
#endif
|
||||
if (time_in_secs(total_time) < 10)
|
||||
{
|
||||
ee_printf(
|
||||
"ERROR! Must execute for at least 10 secs for a valid result!\n");
|
||||
total_errors++;
|
||||
}
|
||||
|
||||
ee_printf("Iterations : %lu\n",
|
||||
(long unsigned)default_num_contexts * results[0].iterations);
|
||||
ee_printf("Compiler version : %s\n", COMPILER_VERSION);
|
||||
ee_printf("Compiler flags : %s\n", COMPILER_FLAGS);
|
||||
#if (MULTITHREAD > 1)
|
||||
ee_printf("Parallel %s : %d\n", PARALLEL_METHOD, default_num_contexts);
|
||||
#endif
|
||||
ee_printf("Memory location : %s\n", MEM_LOCATION);
|
||||
/* output for verification */
|
||||
ee_printf("seedcrc : 0x%04x\n", seedcrc);
|
||||
if (results[0].execs & ID_LIST)
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crclist : 0x%04x\n", i, results[i].crclist);
|
||||
if (results[0].execs & ID_MATRIX)
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crcmatrix : 0x%04x\n", i, results[i].crcmatrix);
|
||||
if (results[0].execs & ID_STATE)
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crcstate : 0x%04x\n", i, results[i].crcstate);
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crcfinal : 0x%04x\n", i, results[i].crc);
|
||||
if (total_errors == 0)
|
||||
{
|
||||
ee_printf(
|
||||
"Correct operation validated. See README.md for run and reporting "
|
||||
"rules.\n");
|
||||
#if HAS_FLOAT
|
||||
if (known_id == 3)
|
||||
{
|
||||
ee_printf("CoreMark 1.0 : %f / %s %s",
|
||||
default_num_contexts * results[0].iterations
|
||||
/ time_in_secs(total_time),
|
||||
COMPILER_VERSION,
|
||||
COMPILER_FLAGS);
|
||||
#if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC)
|
||||
ee_printf(" / %s", MEM_LOCATION);
|
||||
#else
|
||||
ee_printf(" / %s", mem_name[MEM_METHOD]);
|
||||
#endif
|
||||
|
||||
#if (MULTITHREAD > 1)
|
||||
ee_printf(" / %d:%s", default_num_contexts, PARALLEL_METHOD);
|
||||
#endif
|
||||
ee_printf("\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (total_errors > 0)
|
||||
ee_printf("Errors detected\n");
|
||||
if (total_errors < 0)
|
||||
ee_printf(
|
||||
"Cannot validate operation for these seed values, please compare "
|
||||
"with results on a known platform.\n");
|
||||
|
||||
#if (MEM_METHOD == MEM_MALLOC)
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
portable_free(results[i].memblock[0]);
|
||||
#endif
|
||||
/* And last call any target specific code for finalizing */
|
||||
portable_fini(&(results[0].port));
|
||||
|
||||
return MAIN_RETURN_VAL;
|
||||
}
|
|
@ -0,0 +1,359 @@
|
|||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include "coremark.h"
|
||||
/*
|
||||
Topic: Description
|
||||
Matrix manipulation benchmark
|
||||
|
||||
This very simple algorithm forms the basis of many more complex
|
||||
algorithms.
|
||||
|
||||
The tight inner loop is the focus of many optimizations (compiler as
|
||||
well as hardware based) and is thus relevant for embedded processing.
|
||||
|
||||
The total available data space will be divided to 3 parts:
|
||||
NxN Matrix A - initialized with small values (upper 3/4 of the bits all
|
||||
zero). NxN Matrix B - initialized with medium values (upper half of the bits all
|
||||
zero). NxN Matrix C - used for the result.
|
||||
|
||||
The actual values for A and B must be derived based on input that is not
|
||||
available at compile time.
|
||||
*/
|
||||
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val);
|
||||
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval);
|
||||
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val);
|
||||
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val);
|
||||
|
||||
#define matrix_test_next(x) (x + 1)
|
||||
#define matrix_clip(x, y) ((y) ? (x)&0x0ff : (x)&0x0ffff)
|
||||
#define matrix_big(x) (0xf000 | (x))
|
||||
#define bit_extract(x, from, to) (((x) >> (from)) & (~(0xffffffff << (to))))
|
||||
|
||||
#if CORE_DEBUG
|
||||
void
|
||||
printmat(MATDAT *A, ee_u32 N, char *name)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
ee_printf("Matrix %s [%dx%d]:\n", name, N, N);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
if (j != 0)
|
||||
ee_printf(",");
|
||||
ee_printf("%d", A[i * N + j]);
|
||||
}
|
||||
ee_printf("\n");
|
||||
}
|
||||
}
|
||||
void
|
||||
printmatC(MATRES *C, ee_u32 N, char *name)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
ee_printf("Matrix %s [%dx%d]:\n", name, N, N);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
if (j != 0)
|
||||
ee_printf(",");
|
||||
ee_printf("%d", C[i * N + j]);
|
||||
}
|
||||
ee_printf("\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/* Function: core_bench_matrix
|
||||
Benchmark function
|
||||
|
||||
Iterate <matrix_test> N times,
|
||||
changing the matrix values slightly by a constant amount each time.
|
||||
*/
|
||||
ee_u16
|
||||
core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc)
|
||||
{
|
||||
ee_u32 N = p->N;
|
||||
MATRES *C = p->C;
|
||||
MATDAT *A = p->A;
|
||||
MATDAT *B = p->B;
|
||||
MATDAT val = (MATDAT)seed;
|
||||
|
||||
crc = crc16(matrix_test(N, C, A, B, val), crc);
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Function: matrix_test
|
||||
Perform matrix manipulation.
|
||||
|
||||
Parameters:
|
||||
N - Dimensions of the matrix.
|
||||
C - memory for result matrix.
|
||||
A - input matrix
|
||||
B - operator matrix (not changed during operations)
|
||||
|
||||
Returns:
|
||||
A CRC value that captures all results calculated in the function.
|
||||
In particular, crc of the value calculated on the result matrix
|
||||
after each step by <matrix_sum>.
|
||||
|
||||
Operation:
|
||||
|
||||
1 - Add a constant value to all elements of a matrix.
|
||||
2 - Multiply a matrix by a constant.
|
||||
3 - Multiply a matrix by a vector.
|
||||
4 - Multiply a matrix by a matrix.
|
||||
5 - Add a constant value to all elements of a matrix.
|
||||
|
||||
After the last step, matrix A is back to original contents.
|
||||
*/
|
||||
ee_s16
|
||||
matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val)
|
||||
{
|
||||
ee_u16 crc = 0;
|
||||
MATDAT clipval = matrix_big(val);
|
||||
|
||||
matrix_add_const(N, A, val); /* make sure data changes */
|
||||
#if CORE_DEBUG
|
||||
printmat(A, N, "matrix_add_const");
|
||||
#endif
|
||||
matrix_mul_const(N, C, A, val);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C, N, "matrix_mul_const");
|
||||
#endif
|
||||
matrix_mul_vect(N, C, A, B);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C, N, "matrix_mul_vect");
|
||||
#endif
|
||||
matrix_mul_matrix(N, C, A, B);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C, N, "matrix_mul_matrix");
|
||||
#endif
|
||||
matrix_mul_matrix_bitextract(N, C, A, B);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C, N, "matrix_mul_matrix_bitextract");
|
||||
#endif
|
||||
|
||||
matrix_add_const(N, A, -val); /* return matrix to initial value */
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Function : matrix_init
|
||||
Initialize the memory block for matrix benchmarking.
|
||||
|
||||
Parameters:
|
||||
blksize - Size of memory to be initialized.
|
||||
memblk - Pointer to memory block.
|
||||
seed - Actual values chosen depend on the seed parameter.
|
||||
p - pointers to <mat_params> containing initialized matrixes.
|
||||
|
||||
Returns:
|
||||
Matrix dimensions.
|
||||
|
||||
Note:
|
||||
The seed parameter MUST be supplied from a source that cannot be
|
||||
determined at compile time
|
||||
*/
|
||||
ee_u32
|
||||
core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p)
|
||||
{
|
||||
ee_u32 N = 0;
|
||||
MATDAT *A;
|
||||
MATDAT *B;
|
||||
ee_s32 order = 1;
|
||||
MATDAT val;
|
||||
ee_u32 i = 0, j = 0;
|
||||
if (seed == 0)
|
||||
seed = 1;
|
||||
while (j < blksize)
|
||||
{
|
||||
i++;
|
||||
j = i * i * 2 * 4;
|
||||
}
|
||||
N = i - 1;
|
||||
A = (MATDAT *)align_mem(memblk);
|
||||
B = A + N * N;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
seed = ((order * seed) % 65536);
|
||||
val = (seed + order);
|
||||
val = matrix_clip(val, 0);
|
||||
B[i * N + j] = val;
|
||||
val = (val + order);
|
||||
val = matrix_clip(val, 1);
|
||||
A[i * N + j] = val;
|
||||
order++;
|
||||
}
|
||||
}
|
||||
|
||||
p->A = A;
|
||||
p->B = B;
|
||||
p->C = (MATRES *)align_mem(B + N * N);
|
||||
p->N = N;
|
||||
#if CORE_DEBUG
|
||||
printmat(A, N, "A");
|
||||
printmat(B, N, "B");
|
||||
#endif
|
||||
return N;
|
||||
}
|
||||
|
||||
/* Function: matrix_sum
|
||||
Calculate a function that depends on the values of elements in the
|
||||
matrix.
|
||||
|
||||
For each element, accumulate into a temporary variable.
|
||||
|
||||
As long as this value is under the parameter clipval,
|
||||
add 1 to the result if the element is bigger then the previous.
|
||||
|
||||
Otherwise, reset the accumulator and add 10 to the result.
|
||||
*/
|
||||
ee_s16
|
||||
matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval)
|
||||
{
|
||||
MATRES tmp = 0, prev = 0, cur = 0;
|
||||
ee_s16 ret = 0;
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
cur = C[i * N + j];
|
||||
tmp += cur;
|
||||
if (tmp > clipval)
|
||||
{
|
||||
ret += 10;
|
||||
tmp = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret += (cur > prev) ? 1 : 0;
|
||||
}
|
||||
prev = cur;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_const
|
||||
Multiply a matrix by a constant.
|
||||
This could be used as a scaler for instance.
|
||||
*/
|
||||
void
|
||||
matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i * N + j] = (MATRES)A[i * N + j] * (MATRES)val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_add_const
|
||||
Add a constant value to all elements of a matrix.
|
||||
*/
|
||||
void
|
||||
matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
A[i * N + j] += val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_vect
|
||||
Multiply a matrix by a vector.
|
||||
This is common in many simple filters (e.g. fir where a vector of
|
||||
coefficients is applied to the matrix.)
|
||||
*/
|
||||
void
|
||||
matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
C[i] = 0;
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i] += (MATRES)A[i * N + j] * (MATRES)B[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_matrix
|
||||
Multiply a matrix by a matrix.
|
||||
Basic code is used in many algorithms, mostly with minor changes such as
|
||||
scaling.
|
||||
*/
|
||||
void
|
||||
matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
|
||||
{
|
||||
ee_u32 i, j, k;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i * N + j] = 0;
|
||||
for (k = 0; k < N; k++)
|
||||
{
|
||||
C[i * N + j] += (MATRES)A[i * N + k] * (MATRES)B[k * N + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_matrix_bitextract
|
||||
Multiply a matrix by a matrix, and extract some bits from the result.
|
||||
Basic code is used in many algorithms, mostly with minor changes such as
|
||||
scaling.
|
||||
*/
|
||||
void
|
||||
matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
|
||||
{
|
||||
ee_u32 i, j, k;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i * N + j] = 0;
|
||||
for (k = 0; k < N; k++)
|
||||
{
|
||||
MATRES tmp = (MATRES)A[i * N + k] * (MATRES)B[k * N + j];
|
||||
C[i * N + j] += bit_extract(tmp, 2, 4) * bit_extract(tmp, 5, 7);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,330 @@
|
|||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include "coremark.h"
|
||||
/* local functions */
|
||||
enum CORE_STATE core_state_transition(ee_u8 **instr, ee_u32 *transition_count);
|
||||
|
||||
/*
|
||||
Topic: Description
|
||||
Simple state machines like this one are used in many embedded products.
|
||||
|
||||
For more complex state machines, sometimes a state transition table
|
||||
implementation is used instead, trading speed of direct coding for ease of
|
||||
maintenance.
|
||||
|
||||
Since the main goal of using a state machine in CoreMark is to excercise
|
||||
the switch/if behaviour, we are using a small moore machine.
|
||||
|
||||
In particular, this machine tests type of string input,
|
||||
trying to determine whether the input is a number or something else.
|
||||
(see core_state.png).
|
||||
*/
|
||||
|
||||
/* Function: core_bench_state
|
||||
Benchmark function
|
||||
|
||||
Go over the input twice, once direct, and once after introducing some
|
||||
corruption.
|
||||
*/
|
||||
ee_u16
|
||||
core_bench_state(ee_u32 blksize,
|
||||
ee_u8 *memblock,
|
||||
ee_s16 seed1,
|
||||
ee_s16 seed2,
|
||||
ee_s16 step,
|
||||
ee_u16 crc)
|
||||
{
|
||||
ee_u32 final_counts[NUM_CORE_STATES];
|
||||
ee_u32 track_counts[NUM_CORE_STATES];
|
||||
ee_u8 *p = memblock;
|
||||
ee_u32 i;
|
||||
|
||||
#if CORE_DEBUG
|
||||
ee_printf("State Bench: %d,%d,%d,%04x\n", seed1, seed2, step, crc);
|
||||
#endif
|
||||
for (i = 0; i < NUM_CORE_STATES; i++)
|
||||
{
|
||||
final_counts[i] = track_counts[i] = 0;
|
||||
}
|
||||
/* run the state machine over the input */
|
||||
while (*p != 0)
|
||||
{
|
||||
enum CORE_STATE fstate = core_state_transition(&p, track_counts);
|
||||
final_counts[fstate]++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("%d,", fstate);
|
||||
}
|
||||
ee_printf("\n");
|
||||
#else
|
||||
}
|
||||
#endif
|
||||
p = memblock;
|
||||
while (p < (memblock + blksize))
|
||||
{ /* insert some corruption */
|
||||
if (*p != ',')
|
||||
*p ^= (ee_u8)seed1;
|
||||
p += step;
|
||||
}
|
||||
p = memblock;
|
||||
/* run the state machine over the input again */
|
||||
while (*p != 0)
|
||||
{
|
||||
enum CORE_STATE fstate = core_state_transition(&p, track_counts);
|
||||
final_counts[fstate]++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("%d,", fstate);
|
||||
}
|
||||
ee_printf("\n");
|
||||
#else
|
||||
}
|
||||
#endif
|
||||
p = memblock;
|
||||
while (p < (memblock + blksize))
|
||||
{ /* undo corruption is seed1 and seed2 are equal */
|
||||
if (*p != ',')
|
||||
*p ^= (ee_u8)seed2;
|
||||
p += step;
|
||||
}
|
||||
/* end timing */
|
||||
for (i = 0; i < NUM_CORE_STATES; i++)
|
||||
{
|
||||
crc = crcu32(final_counts[i], crc);
|
||||
crc = crcu32(track_counts[i], crc);
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Default initialization patterns */
|
||||
static ee_u8 *intpat[4]
|
||||
= { (ee_u8 *)"5012", (ee_u8 *)"1234", (ee_u8 *)"-874", (ee_u8 *)"+122" };
|
||||
static ee_u8 *floatpat[4] = { (ee_u8 *)"35.54400",
|
||||
(ee_u8 *)".1234500",
|
||||
(ee_u8 *)"-110.700",
|
||||
(ee_u8 *)"+0.64400" };
|
||||
static ee_u8 *scipat[4] = { (ee_u8 *)"5.500e+3",
|
||||
(ee_u8 *)"-.123e-2",
|
||||
(ee_u8 *)"-87e+832",
|
||||
(ee_u8 *)"+0.6e-12" };
|
||||
static ee_u8 *errpat[4] = { (ee_u8 *)"T0.3e-1F",
|
||||
(ee_u8 *)"-T.T++Tq",
|
||||
(ee_u8 *)"1T3.4e4z",
|
||||
(ee_u8 *)"34.0e-T^" };
|
||||
|
||||
/* Function: core_init_state
|
||||
Initialize the input data for the state machine.
|
||||
|
||||
Populate the input with several predetermined strings, interspersed.
|
||||
Actual patterns chosen depend on the seed parameter.
|
||||
|
||||
Note:
|
||||
The seed parameter MUST be supplied from a source that cannot be
|
||||
determined at compile time
|
||||
*/
|
||||
void
|
||||
core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p)
|
||||
{
|
||||
ee_u32 total = 0, next = 0, i;
|
||||
ee_u8 *buf = 0;
|
||||
#if CORE_DEBUG
|
||||
ee_u8 *start = p;
|
||||
ee_printf("State: %d,%d\n", size, seed);
|
||||
#endif
|
||||
size--;
|
||||
next = 0;
|
||||
while ((total + next + 1) < size)
|
||||
{
|
||||
if (next > 0)
|
||||
{
|
||||
for (i = 0; i < next; i++)
|
||||
*(p + total + i) = buf[i];
|
||||
*(p + total + i) = ',';
|
||||
total += next + 1;
|
||||
}
|
||||
seed++;
|
||||
switch (seed & 0x7)
|
||||
{
|
||||
case 0: /* int */
|
||||
case 1: /* int */
|
||||
case 2: /* int */
|
||||
buf = intpat[(seed >> 3) & 0x3];
|
||||
next = 4;
|
||||
break;
|
||||
case 3: /* float */
|
||||
case 4: /* float */
|
||||
buf = floatpat[(seed >> 3) & 0x3];
|
||||
next = 8;
|
||||
break;
|
||||
case 5: /* scientific */
|
||||
case 6: /* scientific */
|
||||
buf = scipat[(seed >> 3) & 0x3];
|
||||
next = 8;
|
||||
break;
|
||||
case 7: /* invalid */
|
||||
buf = errpat[(seed >> 3) & 0x3];
|
||||
next = 8;
|
||||
break;
|
||||
default: /* Never happen, just to make some compilers happy */
|
||||
break;
|
||||
}
|
||||
}
|
||||
size++;
|
||||
while (total < size)
|
||||
{ /* fill the rest with 0 */
|
||||
*(p + total) = 0;
|
||||
total++;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("State Input: %s\n", start);
|
||||
#endif
|
||||
}
|
||||
|
||||
static ee_u8
|
||||
ee_isdigit(ee_u8 c)
|
||||
{
|
||||
ee_u8 retval;
|
||||
retval = ((c >= '0') & (c <= '9')) ? 1 : 0;
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* Function: core_state_transition
|
||||
Actual state machine.
|
||||
|
||||
The state machine will continue scanning until either:
|
||||
1 - an invalid input is detcted.
|
||||
2 - a valid number has been detected.
|
||||
|
||||
The input pointer is updated to point to the end of the token, and the
|
||||
end state is returned (either specific format determined or invalid).
|
||||
*/
|
||||
|
||||
enum CORE_STATE
|
||||
core_state_transition(ee_u8 **instr, ee_u32 *transition_count)
|
||||
{
|
||||
ee_u8 * str = *instr;
|
||||
ee_u8 NEXT_SYMBOL;
|
||||
enum CORE_STATE state = CORE_START;
|
||||
for (; *str && state != CORE_INVALID; str++)
|
||||
{
|
||||
NEXT_SYMBOL = *str;
|
||||
if (NEXT_SYMBOL == ',') /* end of this input */
|
||||
{
|
||||
str++;
|
||||
break;
|
||||
}
|
||||
switch (state)
|
||||
{
|
||||
case CORE_START:
|
||||
if (ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INT;
|
||||
}
|
||||
else if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-')
|
||||
{
|
||||
state = CORE_S1;
|
||||
}
|
||||
else if (NEXT_SYMBOL == '.')
|
||||
{
|
||||
state = CORE_FLOAT;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INVALID]++;
|
||||
}
|
||||
transition_count[CORE_START]++;
|
||||
break;
|
||||
case CORE_S1:
|
||||
if (ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INT;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
else if (NEXT_SYMBOL == '.')
|
||||
{
|
||||
state = CORE_FLOAT;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
break;
|
||||
case CORE_INT:
|
||||
if (NEXT_SYMBOL == '.')
|
||||
{
|
||||
state = CORE_FLOAT;
|
||||
transition_count[CORE_INT]++;
|
||||
}
|
||||
else if (!ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_FLOAT:
|
||||
if (NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e')
|
||||
{
|
||||
state = CORE_S2;
|
||||
transition_count[CORE_FLOAT]++;
|
||||
}
|
||||
else if (!ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_FLOAT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_S2:
|
||||
if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-')
|
||||
{
|
||||
state = CORE_EXPONENT;
|
||||
transition_count[CORE_S2]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_S2]++;
|
||||
}
|
||||
break;
|
||||
case CORE_EXPONENT:
|
||||
if (ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_SCIENTIFIC;
|
||||
transition_count[CORE_EXPONENT]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_EXPONENT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_SCIENTIFIC:
|
||||
if (!ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INVALID]++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
*instr = str;
|
||||
return state;
|
||||
}
|
|
@ -0,0 +1,249 @@
|
|||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include "coremark.h"
|
||||
/* Function: get_seed
|
||||
Get a values that cannot be determined at compile time.
|
||||
|
||||
Since different embedded systems and compilers are used, 3 different
|
||||
methods are provided: 1 - Using a volatile variable. This method is only
|
||||
valid if the compiler is forced to generate code that reads the value of a
|
||||
volatile variable from memory at run time. Please note, if using this method,
|
||||
you would need to modify core_portme.c to generate training profile. 2 -
|
||||
Command line arguments. This is the preferred method if command line
|
||||
arguments are supported. 3 - System function. If none of the first 2 methods
|
||||
is available on the platform, a system function which is not a stub can be
|
||||
used.
|
||||
|
||||
e.g. read the value on GPIO pins connected to switches, or invoke
|
||||
special simulator functions.
|
||||
*/
|
||||
#if (SEED_METHOD == SEED_VOLATILE)
|
||||
extern volatile ee_s32 seed1_volatile;
|
||||
extern volatile ee_s32 seed2_volatile;
|
||||
extern volatile ee_s32 seed3_volatile;
|
||||
extern volatile ee_s32 seed4_volatile;
|
||||
extern volatile ee_s32 seed5_volatile;
|
||||
ee_s32
|
||||
get_seed_32(int i)
|
||||
{
|
||||
ee_s32 retval;
|
||||
switch (i)
|
||||
{
|
||||
case 1:
|
||||
retval = seed1_volatile;
|
||||
break;
|
||||
case 2:
|
||||
retval = seed2_volatile;
|
||||
break;
|
||||
case 3:
|
||||
retval = seed3_volatile;
|
||||
break;
|
||||
case 4:
|
||||
retval = seed4_volatile;
|
||||
break;
|
||||
case 5:
|
||||
retval = seed5_volatile;
|
||||
break;
|
||||
default:
|
||||
retval = 0;
|
||||
break;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
#elif (SEED_METHOD == SEED_ARG)
|
||||
ee_s32
|
||||
parseval(char *valstring)
|
||||
{
|
||||
ee_s32 retval = 0;
|
||||
ee_s32 neg = 1;
|
||||
int hexmode = 0;
|
||||
if (*valstring == '-')
|
||||
{
|
||||
neg = -1;
|
||||
valstring++;
|
||||
}
|
||||
if ((valstring[0] == '0') && (valstring[1] == 'x'))
|
||||
{
|
||||
hexmode = 1;
|
||||
valstring += 2;
|
||||
}
|
||||
/* first look for digits */
|
||||
if (hexmode)
|
||||
{
|
||||
while (((*valstring >= '0') && (*valstring <= '9'))
|
||||
|| ((*valstring >= 'a') && (*valstring <= 'f')))
|
||||
{
|
||||
ee_s32 digit = *valstring - '0';
|
||||
if (digit > 9)
|
||||
digit = 10 + *valstring - 'a';
|
||||
retval *= 16;
|
||||
retval += digit;
|
||||
valstring++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while ((*valstring >= '0') && (*valstring <= '9'))
|
||||
{
|
||||
ee_s32 digit = *valstring - '0';
|
||||
retval *= 10;
|
||||
retval += digit;
|
||||
valstring++;
|
||||
}
|
||||
}
|
||||
/* now add qualifiers */
|
||||
if (*valstring == 'K')
|
||||
retval *= 1024;
|
||||
if (*valstring == 'M')
|
||||
retval *= 1024 * 1024;
|
||||
|
||||
retval *= neg;
|
||||
return retval;
|
||||
}
|
||||
|
||||
ee_s32
|
||||
get_seed_args(int i, int argc, char *argv[])
|
||||
{
|
||||
if (argc > i)
|
||||
return parseval(argv[i]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif (SEED_METHOD == SEED_FUNC)
|
||||
/* If using OS based function, you must define and implement the functions below
|
||||
* in core_portme.h and core_portme.c ! */
|
||||
ee_s32
|
||||
get_seed_32(int i)
|
||||
{
|
||||
ee_s32 retval;
|
||||
switch (i)
|
||||
{
|
||||
case 1:
|
||||
retval = portme_sys1();
|
||||
break;
|
||||
case 2:
|
||||
retval = portme_sys2();
|
||||
break;
|
||||
case 3:
|
||||
retval = portme_sys3();
|
||||
break;
|
||||
case 4:
|
||||
retval = portme_sys4();
|
||||
break;
|
||||
case 5:
|
||||
retval = portme_sys5();
|
||||
break;
|
||||
default:
|
||||
retval = 0;
|
||||
break;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Function: crc*
|
||||
Service functions to calculate 16b CRC code.
|
||||
|
||||
*/
|
||||
ee_u16
|
||||
crcu8(ee_u8 data, ee_u16 crc)
|
||||
{
|
||||
ee_u8 i = 0, x16 = 0, carry = 0;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1));
|
||||
data >>= 1;
|
||||
|
||||
if (x16 == 1)
|
||||
{
|
||||
crc ^= 0x4002;
|
||||
carry = 1;
|
||||
}
|
||||
else
|
||||
carry = 0;
|
||||
crc >>= 1;
|
||||
if (carry)
|
||||
crc |= 0x8000;
|
||||
else
|
||||
crc &= 0x7fff;
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
ee_u16
|
||||
crcu16(ee_u16 newval, ee_u16 crc)
|
||||
{
|
||||
crc = crcu8((ee_u8)(newval), crc);
|
||||
crc = crcu8((ee_u8)((newval) >> 8), crc);
|
||||
return crc;
|
||||
}
|
||||
ee_u16
|
||||
crcu32(ee_u32 newval, ee_u16 crc)
|
||||
{
|
||||
crc = crc16((ee_s16)newval, crc);
|
||||
crc = crc16((ee_s16)(newval >> 16), crc);
|
||||
return crc;
|
||||
}
|
||||
ee_u16
|
||||
crc16(ee_s16 newval, ee_u16 crc)
|
||||
{
|
||||
return crcu16((ee_u16)newval, crc);
|
||||
}
|
||||
|
||||
ee_u8
|
||||
check_data_types()
|
||||
{
|
||||
ee_u8 retval = 0;
|
||||
if (sizeof(ee_u8) != 1)
|
||||
{
|
||||
ee_printf("ERROR: ee_u8 is not an 8b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_u16) != 2)
|
||||
{
|
||||
ee_printf("ERROR: ee_u16 is not a 16b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_s16) != 2)
|
||||
{
|
||||
ee_printf("ERROR: ee_s16 is not a 16b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_s32) != 4)
|
||||
{
|
||||
ee_printf("ERROR: ee_s32 is not a 32b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_u32) != 4)
|
||||
{
|
||||
ee_printf("ERROR: ee_u32 is not a 32b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_ptr_int) != sizeof(int *))
|
||||
{
|
||||
ee_printf(
|
||||
"ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n");
|
||||
retval++;
|
||||
}
|
||||
if (retval > 0)
|
||||
{
|
||||
ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n");
|
||||
}
|
||||
return retval;
|
||||
}
|
|
@ -0,0 +1,183 @@
|
|||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
/* Topic: Description
|
||||
This file contains declarations of the various benchmark functions.
|
||||
*/
|
||||
|
||||
/* Configuration: TOTAL_DATA_SIZE
|
||||
Define total size for data algorithms will operate on
|
||||
*/
|
||||
#ifndef TOTAL_DATA_SIZE
|
||||
#define TOTAL_DATA_SIZE 2 * 1000
|
||||
#endif
|
||||
|
||||
#define SEED_ARG 0
|
||||
#define SEED_FUNC 1
|
||||
#define SEED_VOLATILE 2
|
||||
|
||||
#define MEM_STATIC 0
|
||||
#define MEM_MALLOC 1
|
||||
#define MEM_STACK 2
|
||||
|
||||
#include "core_portme.h"
|
||||
|
||||
#if HAS_STDIO
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
#if HAS_PRINTF
|
||||
#define ee_printf printf
|
||||
#endif
|
||||
|
||||
/* Actual benchmark execution in iterate */
|
||||
void *iterate(void *pres);
|
||||
|
||||
/* Typedef: secs_ret
|
||||
For machines that have floating point support, get number of seconds as
|
||||
a double. Otherwise an unsigned int.
|
||||
*/
|
||||
#if HAS_FLOAT
|
||||
typedef double secs_ret;
|
||||
#else
|
||||
typedef ee_u32 secs_ret;
|
||||
#endif
|
||||
|
||||
#if MAIN_HAS_NORETURN
|
||||
#define MAIN_RETURN_VAL
|
||||
#define MAIN_RETURN_TYPE void
|
||||
#else
|
||||
#define MAIN_RETURN_VAL 0
|
||||
#define MAIN_RETURN_TYPE int
|
||||
#endif
|
||||
|
||||
void start_time(void);
|
||||
void stop_time(void);
|
||||
CORE_TICKS get_time(void);
|
||||
secs_ret time_in_secs(CORE_TICKS ticks);
|
||||
|
||||
/* Misc useful functions */
|
||||
ee_u16 crcu8(ee_u8 data, ee_u16 crc);
|
||||
ee_u16 crc16(ee_s16 newval, ee_u16 crc);
|
||||
ee_u16 crcu16(ee_u16 newval, ee_u16 crc);
|
||||
ee_u16 crcu32(ee_u32 newval, ee_u16 crc);
|
||||
ee_u8 check_data_types(void);
|
||||
void * portable_malloc(ee_size_t size);
|
||||
void portable_free(void *p);
|
||||
ee_s32 parseval(char *valstring);
|
||||
|
||||
/* Algorithm IDS */
|
||||
#define ID_LIST (1 << 0)
|
||||
#define ID_MATRIX (1 << 1)
|
||||
#define ID_STATE (1 << 2)
|
||||
#define ALL_ALGORITHMS_MASK (ID_LIST | ID_MATRIX | ID_STATE)
|
||||
#define NUM_ALGORITHMS 3
|
||||
|
||||
/* list data structures */
|
||||
typedef struct list_data_s
|
||||
{
|
||||
ee_s16 data16;
|
||||
ee_s16 idx;
|
||||
} list_data;
|
||||
|
||||
typedef struct list_head_s
|
||||
{
|
||||
struct list_head_s *next;
|
||||
struct list_data_s *info;
|
||||
} list_head;
|
||||
|
||||
/*matrix benchmark related stuff */
|
||||
#define MATDAT_INT 1
|
||||
#if MATDAT_INT
|
||||
typedef ee_s16 MATDAT;
|
||||
typedef ee_s32 MATRES;
|
||||
#else
|
||||
typedef ee_f16 MATDAT;
|
||||
typedef ee_f32 MATRES;
|
||||
#endif
|
||||
|
||||
typedef struct MAT_PARAMS_S
|
||||
{
|
||||
int N;
|
||||
MATDAT *A;
|
||||
MATDAT *B;
|
||||
MATRES *C;
|
||||
} mat_params;
|
||||
|
||||
/* state machine related stuff */
|
||||
/* List of all the possible states for the FSM */
|
||||
typedef enum CORE_STATE
|
||||
{
|
||||
CORE_START = 0,
|
||||
CORE_INVALID,
|
||||
CORE_S1,
|
||||
CORE_S2,
|
||||
CORE_INT,
|
||||
CORE_FLOAT,
|
||||
CORE_EXPONENT,
|
||||
CORE_SCIENTIFIC,
|
||||
NUM_CORE_STATES
|
||||
} core_state_e;
|
||||
|
||||
/* Helper structure to hold results */
|
||||
typedef struct RESULTS_S
|
||||
{
|
||||
/* inputs */
|
||||
ee_s16 seed1; /* Initializing seed */
|
||||
ee_s16 seed2; /* Initializing seed */
|
||||
ee_s16 seed3; /* Initializing seed */
|
||||
void * memblock[4]; /* Pointer to safe memory location */
|
||||
ee_u32 size; /* Size of the data */
|
||||
ee_u32 iterations; /* Number of iterations to execute */
|
||||
ee_u32 execs; /* Bitmask of operations to execute */
|
||||
struct list_head_s *list;
|
||||
mat_params mat;
|
||||
/* outputs */
|
||||
ee_u16 crc;
|
||||
ee_u16 crclist;
|
||||
ee_u16 crcmatrix;
|
||||
ee_u16 crcstate;
|
||||
ee_s16 err;
|
||||
/* ultithread specific */
|
||||
core_portable port;
|
||||
} core_results;
|
||||
|
||||
/* Multicore execution handling */
|
||||
#if (MULTITHREAD > 1)
|
||||
ee_u8 core_start_parallel(core_results *res);
|
||||
ee_u8 core_stop_parallel(core_results *res);
|
||||
#endif
|
||||
|
||||
/* list benchmark functions */
|
||||
list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed);
|
||||
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx);
|
||||
|
||||
/* state benchmark functions */
|
||||
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p);
|
||||
ee_u16 core_bench_state(ee_u32 blksize,
|
||||
ee_u8 *memblock,
|
||||
ee_s16 seed1,
|
||||
ee_s16 seed2,
|
||||
ee_s16 step,
|
||||
ee_u16 crc);
|
||||
|
||||
/* matrix benchmark functions */
|
||||
ee_u32 core_init_matrix(ee_u32 blksize,
|
||||
void * memblk,
|
||||
ee_s32 seed,
|
||||
mat_params *p);
|
||||
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc);
|
|
@ -0,0 +1,6 @@
|
|||
SRCS := ../common/init.S src/dhrystone_main.c src/dhrystone.c src/util.c
|
||||
APP := dhrystone
|
||||
CCFLAGS := -O3 -fno-inline
|
||||
MAX_CYCLES := 1000000
|
||||
|
||||
include ../common/src_only_app.mk
|
|
@ -0,0 +1,29 @@
|
|||
Copyright (c) 2012-2015, The Regents of the University of California (Regents).
|
||||
All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. Neither the name of the Regents nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
|
||||
SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING
|
||||
OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS
|
||||
BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
|
||||
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
|
||||
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||
|
||||
dhrystone.c, dhrystone_main.c and dhrystone.h were found at
|
||||
https://github.com/riscv/riscv-tests. Note: the util files here are not the
|
||||
ones found there. They're stub files I wrote for running against the Hazard5
|
||||
CXXRTL testbench.
|
|
@ -0,0 +1,181 @@
|
|||
// See LICENSE for license details.
|
||||
|
||||
#pragma GCC optimize ("no-inline")
|
||||
|
||||
#include "dhrystone.h"
|
||||
|
||||
#ifndef REG
|
||||
#define REG
|
||||
/* REG becomes defined as empty */
|
||||
/* i.e. no register variables */
|
||||
#else
|
||||
#undef REG
|
||||
#define REG register
|
||||
#endif
|
||||
|
||||
extern int Int_Glob;
|
||||
extern char Ch_1_Glob;
|
||||
|
||||
|
||||
Proc_6 (Enum_Val_Par, Enum_Ref_Par)
|
||||
/*********************************/
|
||||
/* executed once */
|
||||
/* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
|
||||
|
||||
Enumeration Enum_Val_Par;
|
||||
Enumeration *Enum_Ref_Par;
|
||||
{
|
||||
*Enum_Ref_Par = Enum_Val_Par;
|
||||
if (! Func_3 (Enum_Val_Par))
|
||||
/* then, not executed */
|
||||
*Enum_Ref_Par = Ident_4;
|
||||
switch (Enum_Val_Par)
|
||||
{
|
||||
case Ident_1:
|
||||
*Enum_Ref_Par = Ident_1;
|
||||
break;
|
||||
case Ident_2:
|
||||
if (Int_Glob > 100)
|
||||
/* then */
|
||||
*Enum_Ref_Par = Ident_1;
|
||||
else *Enum_Ref_Par = Ident_4;
|
||||
break;
|
||||
case Ident_3: /* executed */
|
||||
*Enum_Ref_Par = Ident_2;
|
||||
break;
|
||||
case Ident_4: break;
|
||||
case Ident_5:
|
||||
*Enum_Ref_Par = Ident_3;
|
||||
break;
|
||||
} /* switch */
|
||||
} /* Proc_6 */
|
||||
|
||||
|
||||
Proc_7 (Int_1_Par_Val, Int_2_Par_Val, Int_Par_Ref)
|
||||
/**********************************************/
|
||||
/* executed three times */
|
||||
/* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */
|
||||
/* Int_Par_Ref becomes 7 */
|
||||
/* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */
|
||||
/* Int_Par_Ref becomes 17 */
|
||||
/* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */
|
||||
/* Int_Par_Ref becomes 18 */
|
||||
One_Fifty Int_1_Par_Val;
|
||||
One_Fifty Int_2_Par_Val;
|
||||
One_Fifty *Int_Par_Ref;
|
||||
{
|
||||
One_Fifty Int_Loc;
|
||||
|
||||
Int_Loc = Int_1_Par_Val + 2;
|
||||
*Int_Par_Ref = Int_2_Par_Val + Int_Loc;
|
||||
} /* Proc_7 */
|
||||
|
||||
|
||||
Proc_8 (Arr_1_Par_Ref, Arr_2_Par_Ref, Int_1_Par_Val, Int_2_Par_Val)
|
||||
/*********************************************************************/
|
||||
/* executed once */
|
||||
/* Int_Par_Val_1 == 3 */
|
||||
/* Int_Par_Val_2 == 7 */
|
||||
Arr_1_Dim Arr_1_Par_Ref;
|
||||
Arr_2_Dim Arr_2_Par_Ref;
|
||||
int Int_1_Par_Val;
|
||||
int Int_2_Par_Val;
|
||||
{
|
||||
REG One_Fifty Int_Index;
|
||||
REG One_Fifty Int_Loc;
|
||||
|
||||
Int_Loc = Int_1_Par_Val + 5;
|
||||
Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val;
|
||||
Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc];
|
||||
Arr_1_Par_Ref [Int_Loc+30] = Int_Loc;
|
||||
for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index)
|
||||
Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc;
|
||||
Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1;
|
||||
Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc];
|
||||
Int_Glob = 5;
|
||||
} /* Proc_8 */
|
||||
|
||||
|
||||
Enumeration Func_1 (Ch_1_Par_Val, Ch_2_Par_Val)
|
||||
/*************************************************/
|
||||
/* executed three times */
|
||||
/* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */
|
||||
/* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */
|
||||
/* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */
|
||||
|
||||
Capital_Letter Ch_1_Par_Val;
|
||||
Capital_Letter Ch_2_Par_Val;
|
||||
{
|
||||
Capital_Letter Ch_1_Loc;
|
||||
Capital_Letter Ch_2_Loc;
|
||||
|
||||
Ch_1_Loc = Ch_1_Par_Val;
|
||||
Ch_2_Loc = Ch_1_Loc;
|
||||
if (Ch_2_Loc != Ch_2_Par_Val)
|
||||
/* then, executed */
|
||||
return (Ident_1);
|
||||
else /* not executed */
|
||||
{
|
||||
Ch_1_Glob = Ch_1_Loc;
|
||||
return (Ident_2);
|
||||
}
|
||||
} /* Func_1 */
|
||||
|
||||
|
||||
Boolean Func_2 (Str_1_Par_Ref, Str_2_Par_Ref)
|
||||
/*************************************************/
|
||||
/* executed once */
|
||||
/* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
|
||||
/* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
|
||||
|
||||
Str_30 Str_1_Par_Ref;
|
||||
Str_30 Str_2_Par_Ref;
|
||||
{
|
||||
REG One_Thirty Int_Loc;
|
||||
Capital_Letter Ch_Loc;
|
||||
|
||||
Int_Loc = 2;
|
||||
while (Int_Loc <= 2) /* loop body executed once */
|
||||
if (Func_1 (Str_1_Par_Ref[Int_Loc],
|
||||
Str_2_Par_Ref[Int_Loc+1]) == Ident_1)
|
||||
/* then, executed */
|
||||
{
|
||||
Ch_Loc = 'A';
|
||||
Int_Loc += 1;
|
||||
} /* if, while */
|
||||
if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
|
||||
/* then, not executed */
|
||||
Int_Loc = 7;
|
||||
if (Ch_Loc == 'R')
|
||||
/* then, not executed */
|
||||
return (true);
|
||||
else /* executed */
|
||||
{
|
||||
if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0)
|
||||
/* then, not executed */
|
||||
{
|
||||
Int_Loc += 7;
|
||||
Int_Glob = Int_Loc;
|
||||
return (true);
|
||||
}
|
||||
else /* executed */
|
||||
return (false);
|
||||
} /* if Ch_Loc */
|
||||
} /* Func_2 */
|
||||
|
||||
|
||||
Boolean Func_3 (Enum_Par_Val)
|
||||
/***************************/
|
||||
/* executed once */
|
||||
/* Enum_Par_Val == Ident_3 */
|
||||
Enumeration Enum_Par_Val;
|
||||
{
|
||||
Enumeration Enum_Loc;
|
||||
|
||||
Enum_Loc = Enum_Par_Val;
|
||||
if (Enum_Loc == Ident_3)
|
||||
/* then, executed */
|
||||
return (true);
|
||||
else /* not executed */
|
||||
return (false);
|
||||
} /* Func_3 */
|
|
@ -0,0 +1,477 @@
|
|||
// See LICENSE for license details.
|
||||
|
||||
#ifndef _DHRYSTONE_H
|
||||
#define _DHRYSTONE_H
|
||||
|
||||
/****************** "DHRYSTONE" Benchmark Program ***************************/
|
||||
#define Version "C, Version 2.2"
|
||||
/* File: dhry_1.c (part 2 of 3)
|
||||
* Author: Reinhold P. Weicker
|
||||
* Siemens Nixdorf, Paderborn/Germany
|
||||
* weicker@specbench.org
|
||||
* Date: May 25, 1988
|
||||
* Modified: Steven Pemberton, CWI, Amsterdam; Steven.Pemberton@cwi.nl
|
||||
* Date: October, 1993; March 1995
|
||||
* Included both files into one source, that gets compiled
|
||||
* in two passes. Made program auto-compiling, and auto-running,
|
||||
* and generally made it much easier to use.
|
||||
*
|
||||
* Original Version (in Ada) published in
|
||||
* "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
|
||||
* pp. 1013 - 1030, together with the statistics
|
||||
* on which the distribution of statements etc. is based.
|
||||
*
|
||||
* In this C version, the following C library functions are used:
|
||||
* - strcpy, strcmp (inside the measurement loop)
|
||||
* - printf, scanf (outside the measurement loop)
|
||||
* In addition, Berkeley UNIX system calls "times ()" or "time ()"
|
||||
* are used for execution time measurement. For measurements
|
||||
* on other systems, these calls have to be changed.
|
||||
*
|
||||
* Collection of Results:
|
||||
* Reinhold Weicker (address see above) and
|
||||
*
|
||||
* Rick Richardson
|
||||
* PC Research. Inc.
|
||||
* 94 Apple Orchard Drive
|
||||
* Tinton Falls, NJ 07724
|
||||
* Phone: (201) 389-8963 (9-17 EST)
|
||||
* Usenet: ...!uunet!pcrat!rick
|
||||
*
|
||||
* Please send results to Rick Richardson and/or Reinhold Weicker.
|
||||
* Complete information should be given on hardware and software used.
|
||||
* Hardware information includes: Machine type, CPU, type and size
|
||||
* of caches; for microprocessors: clock frequency, memory speed
|
||||
* (number of wait states).
|
||||
* Software information includes: Compiler (and runtime library)
|
||||
* manufacturer and version, compilation switches, OS version.
|
||||
* The Operating System version may give an indication about the compiler;
|
||||
* Dhrystone itself performs no OS calls in the measurement loop.
|
||||
*
|
||||
* The complete output generated by the program should be mailed
|
||||
* such that at least some checks for correctness can be made.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* Defines: The following "Defines" are possible:
|
||||
* -DREG (default: Not defined)
|
||||
* As an approximation to what an average C programmer
|
||||
* might do, causes the "register" storage class to be applied
|
||||
* - for local variables, if they are used (dynamically)
|
||||
* five or more times
|
||||
* - for parameters if they are used (dynamically)
|
||||
* six or more times
|
||||
* Note that an optimal "register" strategy is
|
||||
* compiler-dependent, and that "register" declarations
|
||||
* do not necessarily lead to faster execution.
|
||||
* -DNOSTRUCTASSIGN (default: Not defined)
|
||||
* Define if the C compiler does not support
|
||||
* assignment of structures.
|
||||
* -DNOENUMS (default: Not defined)
|
||||
* Define if the C compiler does not support
|
||||
* enumeration types.
|
||||
* -DTIMES (default)
|
||||
* -DTIME
|
||||
* The "times" function of UNIX (returning process times)
|
||||
* or the "time" function (returning wallclock time)
|
||||
* is used for measurement.
|
||||
* For single user machines, "time ()" is adequate. For
|
||||
* multi-user machines where you cannot get single-user
|
||||
* access, use the "times ()" function. If you have
|
||||
* neither, use a stopwatch in the dead of night.
|
||||
* "printf"s are provided marking the points "Start Timer"
|
||||
* and "Stop Timer". DO NOT use the UNIX "time(1)"
|
||||
* command, as this will measure the total time to
|
||||
* run this program, which will (erroneously) include
|
||||
* the time to allocate storage (malloc) and to perform
|
||||
* the initialization.
|
||||
* -DHZ=nnn
|
||||
* In Berkeley UNIX, the function "times" returns process
|
||||
* time in 1/HZ seconds, with HZ = 60 for most systems.
|
||||
* CHECK YOUR SYSTEM DESCRIPTION BEFORE YOU JUST APPLY
|
||||
* A VALUE.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* History: Version C/2.1 was made for two reasons:
|
||||
*
|
||||
* 1) There was an obvious need for a common C version of
|
||||
* Dhrystone, since C is at present the most popular system
|
||||
* programming language for the class of processors
|
||||
* (microcomputers, minicomputers) where Dhrystone is used most.
|
||||
* There should be, as far as possible, only one C version of
|
||||
* Dhrystone such that results can be compared without
|
||||
* restrictions. In the past, the C versions distributed
|
||||
* by Rick Richardson (Version 1.1) and by Reinhold Weicker
|
||||
* had small (though not significant) differences.
|
||||
*
|
||||
* 2) As far as it is possible without changes to the Dhrystone
|
||||
* statistics, optimizing compilers should be prevented from
|
||||
* removing significant statements.
|
||||
*
|
||||
* This C version has been developed in cooperation with
|
||||
* Rick Richardson (Tinton Falls, NJ), it incorporates many
|
||||
* ideas from the "Version 1.1" distributed previously by
|
||||
* him over the UNIX network Usenet.
|
||||
* I also thank Chaim Benedelac (National Semiconductor),
|
||||
* David Ditzel (SUN), Earl Killian and John Mashey (MIPS),
|
||||
* Alan Smith and Rafael Saavedra-Barrera (UC at Berkeley)
|
||||
* for their help with comments on earlier versions of the
|
||||
* benchmark.
|
||||
*
|
||||
* Changes: In the initialization part, this version follows mostly
|
||||
* Rick Richardson's version distributed via Usenet, not the
|
||||
* version distributed earlier via floppy disk by Reinhold Weicker.
|
||||
* As a concession to older compilers, names have been made
|
||||
* unique within the first 8 characters.
|
||||
* Inside the measurement loop, this version follows the
|
||||
* version previously distributed by Reinhold Weicker.
|
||||
*
|
||||
* At several places in the benchmark, code has been added,
|
||||
* but within the measurement loop only in branches that
|
||||
* are not executed. The intention is that optimizing compilers
|
||||
* should be prevented from moving code out of the measurement
|
||||
* loop, or from removing code altogether. Since the statements
|
||||
* that are executed within the measurement loop have NOT been
|
||||
* changed, the numbers defining the "Dhrystone distribution"
|
||||
* (distribution of statements, operand types and locality)
|
||||
* still hold. Except for sophisticated optimizing compilers,
|
||||
* execution times for this version should be the same as
|
||||
* for previous versions.
|
||||
*
|
||||
* Since it has proven difficult to subtract the time for the
|
||||
* measurement loop overhead in a correct way, the loop check
|
||||
* has been made a part of the benchmark. This does have
|
||||
* an impact - though a very minor one - on the distribution
|
||||
* statistics which have been updated for this version.
|
||||
*
|
||||
* All changes within the measurement loop are described
|
||||
* and discussed in the companion paper "Rationale for
|
||||
* Dhrystone version 2".
|
||||
*
|
||||
* Because of the self-imposed limitation that the order and
|
||||
* distribution of the executed statements should not be
|
||||
* changed, there are still cases where optimizing compilers
|
||||
* may not generate code for some statements. To a certain
|
||||
* degree, this is unavoidable for small synthetic benchmarks.
|
||||
* Users of the benchmark are advised to check code listings
|
||||
* whether code is generated for all statements of Dhrystone.
|
||||
*
|
||||
* Version 2.1 is identical to version 2.0 distributed via
|
||||
* the UNIX network Usenet in March 1988 except that it corrects
|
||||
* some minor deficiencies that were found by users of version 2.0.
|
||||
* The only change within the measurement loop is that a
|
||||
* non-executed "else" part was added to the "if" statement in
|
||||
* Func_3, and a non-executed "else" part removed from Proc_3.
|
||||
*
|
||||
* Version C/2.2, Steven Pemberton, October 1993
|
||||
* Functionally, identical to version 2.2; the changes are in
|
||||
* how you compile and use it:
|
||||
* - Everything is in one file now, but compiled in 2 passes
|
||||
* - Compile (and run) by running the file through the shell: 'sh dhry.c"
|
||||
* - Uses the system definition of HZ if one can be found
|
||||
* - HZ must be defined, otherwise it won't compile (no defaults here)
|
||||
* - The (uninteresting) output is printed to stderr (dhry2 > /dev/null)
|
||||
* - The number of loops is passed as a parameter, rather than read
|
||||
* (dhry2 500000)
|
||||
* - If the number of loops is insufficient to get a good result,
|
||||
* it repeats it with loops*10 until it is enough (rather than just
|
||||
* stopping)
|
||||
* - Output says which sort of clock it is using, and the HZ value
|
||||
* - You can use -DREG instead of the -DREG=register of previous versions
|
||||
* - Some stylistic cleanups.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* Compilation model and measurement (IMPORTANT):
|
||||
*
|
||||
* The following "ground rules" apply for measurements:
|
||||
* - Separate compilation
|
||||
* - No procedure merging
|
||||
* - Otherwise, compiler optimizations are allowed but should be indicated
|
||||
* - Default results are those without register declarations
|
||||
* See the companion paper "Rationale for Dhrystone Version 2" for a more
|
||||
* detailed discussion of these ground rules.
|
||||
*
|
||||
* For 16-Bit processors (e.g. 80186, 80286), times for all compilation
|
||||
* models ("small", "medium", "large" etc.) should be given if possible,
|
||||
* together with a definition of these models for the compiler system used.
|
||||
*
|
||||
**************************************************************************
|
||||
*
|
||||
* Dhrystone (C version) statistics:
|
||||
*
|
||||
* [Comment from the first distribution, updated for version 2.
|
||||
* Note that because of language differences, the numbers are slightly
|
||||
* different from the Ada version.]
|
||||
*
|
||||
* The following program contains statements of a high level programming
|
||||
* language (here: C) in a distribution considered representative:
|
||||
*
|
||||
* assignments 52 (51.0 %)
|
||||
* control statements 33 (32.4 %)
|
||||
* procedure, function calls 17 (16.7 %)
|
||||
*
|
||||
* 103 statements are dynamically executed. The program is balanced with
|
||||
* respect to the three aspects:
|
||||
*
|
||||
* - statement type
|
||||
* - operand type
|
||||
* - operand locality
|
||||
* operand global, local, parameter, or constant.
|
||||
*
|
||||
* The combination of these three aspects is balanced only approximately.
|
||||
*
|
||||
* 1. Statement Type:
|
||||
* ----------------- number
|
||||
*
|
||||
* V1 = V2 9
|
||||
* (incl. V1 = F(..)
|
||||
* V = Constant 12
|
||||
* Assignment, 7
|
||||
* with array element
|
||||
* Assignment, 6
|
||||
* with record component
|
||||
* --
|
||||
* 34 34
|
||||
*
|
||||
* X = Y +|-|"&&"|"|" Z 5
|
||||
* X = Y +|-|"==" Constant 6
|
||||
* X = X +|- 1 3
|
||||
* X = Y *|/ Z 2
|
||||
* X = Expression, 1
|
||||
* two operators
|
||||
* X = Expression, 1
|
||||
* three operators
|
||||
* --
|
||||
* 18 18
|
||||
*
|
||||
* if .... 14
|
||||
* with "else" 7
|
||||
* without "else" 7
|
||||
* executed 3
|
||||
* not executed 4
|
||||
* for ... 7 | counted every time
|
||||
* while ... 4 | the loop condition
|
||||
* do ... while 1 | is evaluated
|
||||
* switch ... 1
|
||||
* break 1
|
||||
* declaration with 1
|
||||
* initialization
|
||||
* --
|
||||
* 34 34
|
||||
*
|
||||
* P (...) procedure call 11
|
||||
* user procedure 10
|
||||
* library procedure 1
|
||||
* X = F (...)
|
||||
* function call 6
|
||||
* user function 5
|
||||
* library function 1
|
||||
* --
|
||||
* 17 17
|
||||
* ---
|
||||
* 103
|
||||
*
|
||||
* The average number of parameters in procedure or function calls
|
||||
* is 1.82 (not counting the function values aX *
|
||||
*
|
||||
* 2. Operators
|
||||
* ------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* Arithmetic 32 50.8
|
||||
*
|
||||
* + 21 33.3
|
||||
* - 7 11.1
|
||||
* * 3 4.8
|
||||
* / (int div) 1 1.6
|
||||
*
|
||||
* Comparison 27 42.8
|
||||
*
|
||||
* == 9 14.3
|
||||
* /= 4 6.3
|
||||
* > 1 1.6
|
||||
* < 3 4.8
|
||||
* >= 1 1.6
|
||||
* <= 9 14.3
|
||||
*
|
||||
* Logic 4 6.3
|
||||
*
|
||||
* && (AND-THEN) 1 1.6
|
||||
* | (OR) 1 1.6
|
||||
* ! (NOT) 2 3.2
|
||||
*
|
||||
* -- -----
|
||||
* 63 100.1
|
||||
*
|
||||
*
|
||||
* 3. Operand Type (counted once per operand reference):
|
||||
* ---------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* Integer 175 72.3 %
|
||||
* Character 45 18.6 %
|
||||
* Pointer 12 5.0 %
|
||||
* String30 6 2.5 %
|
||||
* Array 2 0.8 %
|
||||
* Record 2 0.8 %
|
||||
* --- -------
|
||||
* 242 100.0 %
|
||||
*
|
||||
* When there is an access path leading to the final operand (e.g. a record
|
||||
* component), only the final data type on the access path is counted.
|
||||
*
|
||||
*
|
||||
* 4. Operand Locality:
|
||||
* -------------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* local variable 114 47.1 %
|
||||
* global variable 22 9.1 %
|
||||
* parameter 45 18.6 %
|
||||
* value 23 9.5 %
|
||||
* reference 22 9.1 %
|
||||
* function result 6 2.5 %
|
||||
* constant 55 22.7 %
|
||||
* --- -------
|
||||
* 242 100.0 %
|
||||
*
|
||||
* The program does not compute anything meaningful, but it is syntactically
|
||||
* and semantically correct. All variables have a value assigned to them
|
||||
* before they are used as a source operand.
|
||||
*
|
||||
* There has been no explicit effort to account for the effects of a
|
||||
* cache, or to balance the use of long or short displacements for code or
|
||||
* data.
|
||||
*
|
||||
***************************************************************************
|
||||
*/
|
||||
|
||||
/* Compiler and system dependent definitions: */
|
||||
|
||||
/* variables for time measurement: */
|
||||
|
||||
#ifdef TIME
|
||||
|
||||
#define CLOCK_TYPE "time()"
|
||||
#undef HZ
|
||||
#define HZ (1) /* time() returns time in seconds */
|
||||
extern long time(); /* see library function "time" */
|
||||
#define Too_Small_Time 2 /* Measurements should last at least 2 seconds */
|
||||
#define Start_Timer() Begin_Time = time ( (long *) 0)
|
||||
#define Stop_Timer() End_Time = time ( (long *) 0)
|
||||
|
||||
#else
|
||||
|
||||
#ifdef MSC_CLOCK /* Use Microsoft C hi-res clock */
|
||||
|
||||
#undef HZ
|
||||
#undef TIMES
|
||||
#include <time.h>
|
||||
#define HZ CLK_TCK
|
||||
#define CLOCK_TYPE "MSC clock()"
|
||||
extern clock_t clock();
|
||||
#define Too_Small_Time (2*HZ)
|
||||
#define Start_Timer() Begin_Time = clock()
|
||||
#define Stop_Timer() End_Time = clock()
|
||||
|
||||
#elif defined(__riscv)
|
||||
|
||||
#define HZ 1000000
|
||||
#define Too_Small_Time 1
|
||||
#define CLOCK_TYPE "rdcycle()"
|
||||
#define Start_Timer() Begin_Time = read_csr(mcycle)
|
||||
#define Stop_Timer() End_Time = read_csr(mcycle)
|
||||
|
||||
#else
|
||||
/* Use times(2) time function unless */
|
||||
/* explicitly defined otherwise */
|
||||
#define CLOCK_TYPE "times()"
|
||||
#include <sys/types.h>
|
||||
#include <sys/times.h>
|
||||
#ifndef HZ /* Added by SP 900619 */
|
||||
#include <sys/param.h> /* If your system doesn't have this, use -DHZ=xxx */
|
||||
#else
|
||||
*** You must define HZ!!! ***
|
||||
#endif /* HZ */
|
||||
#ifndef PASS2
|
||||
struct tms time_info;
|
||||
#endif
|
||||
/*extern int times ();*/
|
||||
/* see library function "times" */
|
||||
#define Too_Small_Time (2*HZ)
|
||||
/* Measurements should last at least about 2 seconds */
|
||||
#define Start_Timer() times(&time_info); Begin_Time=(long)time_info.tms_utime
|
||||
#define Stop_Timer() times(&time_info); End_Time = (long)time_info.tms_utime
|
||||
|
||||
#endif /* MSC_CLOCK */
|
||||
#endif /* TIME */
|
||||
|
||||
|
||||
#define Mic_secs_Per_Second 1000000
|
||||
#define NUMBER_OF_RUNS 500 /* Default number of runs */
|
||||
|
||||
#ifdef NOSTRUCTASSIGN
|
||||
#define structassign(d, s) memcpy(&(d), &(s), sizeof(d))
|
||||
#else
|
||||
#define structassign(d, s) d = s
|
||||
#endif
|
||||
|
||||
#ifdef NOENUM
|
||||
#define Ident_1 0
|
||||
#define Ident_2 1
|
||||
#define Ident_3 2
|
||||
#define Ident_4 3
|
||||
#define Ident_5 4
|
||||
typedef int Enumeration;
|
||||
#else
|
||||
typedef enum {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5}
|
||||
Enumeration;
|
||||
#endif
|
||||
/* for boolean and enumeration types in Ada, Pascal */
|
||||
|
||||
/* General definitions: */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
/* for strcpy, strcmp */
|
||||
|
||||
#define Null 0
|
||||
/* Value of a Null pointer */
|
||||
#define true 1
|
||||
#define false 0
|
||||
|
||||
typedef int One_Thirty;
|
||||
typedef int One_Fifty;
|
||||
typedef char Capital_Letter;
|
||||
typedef int Boolean;
|
||||
typedef char Str_30 [31];
|
||||
typedef int Arr_1_Dim [50];
|
||||
typedef int Arr_2_Dim [50] [50];
|
||||
|
||||
typedef struct record
|
||||
{
|
||||
struct record *Ptr_Comp;
|
||||
Enumeration Discr;
|
||||
union {
|
||||
struct {
|
||||
Enumeration Enum_Comp;
|
||||
int Int_Comp;
|
||||
char Str_Comp [31];
|
||||
} var_1;
|
||||
struct {
|
||||
Enumeration E_Comp_2;
|
||||
char Str_2_Comp [31];
|
||||
} var_2;
|
||||
struct {
|
||||
char Ch_1_Comp;
|
||||
char Ch_2_Comp;
|
||||
} var_3;
|
||||
} variant;
|
||||
} Rec_Type, *Rec_Pointer;
|
||||
|
||||
#endif
|
|
@ -0,0 +1,332 @@
|
|||
// See LICENSE for license details.
|
||||
|
||||
//**************************************************************************
|
||||
// Dhrystone bencmark
|
||||
//--------------------------------------------------------------------------
|
||||
//
|
||||
// This is the classic Dhrystone synthetic integer benchmark.
|
||||
//
|
||||
|
||||
#pragma GCC optimize ("no-inline")
|
||||
|
||||
#include "dhrystone.h"
|
||||
|
||||
void debug_printf(const char* str, ...);
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#include <alloca.h>
|
||||
|
||||
/* Global Variables: */
|
||||
|
||||
Rec_Pointer Ptr_Glob,
|
||||
Next_Ptr_Glob;
|
||||
int Int_Glob;
|
||||
Boolean Bool_Glob;
|
||||
char Ch_1_Glob,
|
||||
Ch_2_Glob;
|
||||
int Arr_1_Glob [50];
|
||||
int Arr_2_Glob [50] [50];
|
||||
|
||||
Enumeration Func_1 ();
|
||||
/* forward declaration necessary since Enumeration may not simply be int */
|
||||
|
||||
#ifndef REG
|
||||
Boolean Reg = false;
|
||||
#define REG
|
||||
/* REG becomes defined as empty */
|
||||
/* i.e. no register variables */
|
||||
#else
|
||||
Boolean Reg = true;
|
||||
#undef REG
|
||||
#define REG register
|
||||
#endif
|
||||
|
||||
Boolean Done;
|
||||
|
||||
long Begin_Time,
|
||||
End_Time,
|
||||
User_Time;
|
||||
long Microseconds,
|
||||
Dhrystones_Per_Second;
|
||||
|
||||
/* end of variables for time measurement */
|
||||
|
||||
|
||||
int main (int argc, char** argv)
|
||||
/*****/
|
||||
/* main program, corresponds to procedures */
|
||||
/* Main and Proc_0 in the Ada version */
|
||||
{
|
||||
One_Fifty Int_1_Loc;
|
||||
REG One_Fifty Int_2_Loc;
|
||||
One_Fifty Int_3_Loc;
|
||||
REG char Ch_Index;
|
||||
Enumeration Enum_Loc;
|
||||
Str_30 Str_1_Loc;
|
||||
Str_30 Str_2_Loc;
|
||||
REG int Run_Index;
|
||||
REG int Number_Of_Runs;
|
||||
|
||||
/* Arguments */
|
||||
Number_Of_Runs = NUMBER_OF_RUNS;
|
||||
|
||||
/* Initializations */
|
||||
|
||||
Next_Ptr_Glob = (Rec_Pointer) alloca (sizeof (Rec_Type));
|
||||
Ptr_Glob = (Rec_Pointer) alloca (sizeof (Rec_Type));
|
||||
|
||||
Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
|
||||
Ptr_Glob->Discr = Ident_1;
|
||||
Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
|
||||
Ptr_Glob->variant.var_1.Int_Comp = 40;
|
||||
strcpy (Ptr_Glob->variant.var_1.Str_Comp,
|
||||
"DHRYSTONE PROGRAM, SOME STRING");
|
||||
strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
|
||||
|
||||
Arr_2_Glob [8][7] = 10;
|
||||
/* Was missing in published program. Without this statement, */
|
||||
/* Arr_2_Glob [8][7] would have an undefined value. */
|
||||
/* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
|
||||
/* overflow may occur for this array element. */
|
||||
|
||||
debug_printf("\n");
|
||||
debug_printf("Dhrystone Benchmark, Version %s\n", Version);
|
||||
if (Reg)
|
||||
{
|
||||
debug_printf("Program compiled with 'register' attribute\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
debug_printf("Program compiled without 'register' attribute\n");
|
||||
}
|
||||
debug_printf("Using %s, HZ=%d\n", CLOCK_TYPE, HZ);
|
||||
debug_printf("\n");
|
||||
|
||||
Done = false;
|
||||
while (!Done) {
|
||||
debug_printf("Trying %d runs through Dhrystone:\n", Number_Of_Runs);
|
||||
|
||||
/***************/
|
||||
/* Start timer */
|
||||
/***************/
|
||||
|
||||
setStats(1);
|
||||
Start_Timer();
|
||||
|
||||
for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
|
||||
{
|
||||
|
||||
Proc_5();
|
||||
Proc_4();
|
||||
/* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
|
||||
Int_1_Loc = 2;
|
||||
Int_2_Loc = 3;
|
||||
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
|
||||
Enum_Loc = Ident_2;
|
||||
Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc);
|
||||
/* Bool_Glob == 1 */
|
||||
while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
|
||||
{
|
||||
Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
|
||||
/* Int_3_Loc == 7 */
|
||||
Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc);
|
||||
/* Int_3_Loc == 7 */
|
||||
Int_1_Loc += 1;
|
||||
} /* while */
|
||||
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
|
||||
Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
|
||||
/* Int_Glob == 5 */
|
||||
Proc_1 (Ptr_Glob);
|
||||
for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
|
||||
/* loop body executed twice */
|
||||
{
|
||||
if (Enum_Loc == Func_1 (Ch_Index, 'C'))
|
||||
/* then, not executed */
|
||||
{
|
||||
Proc_6 (Ident_1, &Enum_Loc);
|
||||
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
|
||||
Int_2_Loc = Run_Index;
|
||||
Int_Glob = Run_Index;
|
||||
}
|
||||
}
|
||||
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
|
||||
Int_2_Loc = Int_2_Loc * Int_1_Loc;
|
||||
Int_1_Loc = Int_2_Loc / Int_3_Loc;
|
||||
Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
|
||||
/* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
|
||||
Proc_2 (&Int_1_Loc);
|
||||
/* Int_1_Loc == 5 */
|
||||
|
||||
} /* loop "for Run_Index" */
|
||||
|
||||
/**************/
|
||||
/* Stop timer */
|
||||
/**************/
|
||||
|
||||
Stop_Timer();
|
||||
setStats(0);
|
||||
|
||||
User_Time = End_Time - Begin_Time;
|
||||
|
||||
if (User_Time < Too_Small_Time)
|
||||
{
|
||||
debug_printf("Measured time too small to obtain meaningful results\n");
|
||||
Number_Of_Runs = Number_Of_Runs * 10;
|
||||
debug_printf("\n");
|
||||
} else Done = true;
|
||||
}
|
||||
|
||||
debug_printf("Final values of the variables used in the benchmark:\n");
|
||||
debug_printf("\n");
|
||||
debug_printf("Int_Glob: %d\n", Int_Glob);
|
||||
debug_printf(" should be: %d\n", 5);
|
||||
debug_printf("Bool_Glob: %d\n", Bool_Glob);
|
||||
debug_printf(" should be: %d\n", 1);
|
||||
debug_printf("Ch_1_Glob: %c\n", Ch_1_Glob);
|
||||
debug_printf(" should be: %c\n", 'A');
|
||||
debug_printf("Ch_2_Glob: %c\n", Ch_2_Glob);
|
||||
debug_printf(" should be: %c\n", 'B');
|
||||
debug_printf("Arr_1_Glob[8]: %d\n", Arr_1_Glob[8]);
|
||||
debug_printf(" should be: %d\n", 7);
|
||||
debug_printf("Arr_2_Glob[8][7]: %d\n", Arr_2_Glob[8][7]);
|
||||
debug_printf(" should be: Number_Of_Runs + 10\n");
|
||||
debug_printf("Ptr_Glob->\n");
|
||||
debug_printf(" Ptr_Comp: %d\n", (long) Ptr_Glob->Ptr_Comp);
|
||||
debug_printf(" should be: (implementation-dependent)\n");
|
||||
debug_printf(" Discr: %d\n", Ptr_Glob->Discr);
|
||||
debug_printf(" should be: %d\n", 0);
|
||||
debug_printf(" Enum_Comp: %d\n", Ptr_Glob->variant.var_1.Enum_Comp);
|
||||
debug_printf(" should be: %d\n", 2);
|
||||
debug_printf(" Int_Comp: %d\n", Ptr_Glob->variant.var_1.Int_Comp);
|
||||
debug_printf(" should be: %d\n", 17);
|
||||
debug_printf(" Str_Comp: %s\n", Ptr_Glob->variant.var_1.Str_Comp);
|
||||
debug_printf(" should be: DHRYSTONE PROGRAM, SOME STRING\n");
|
||||
debug_printf("Next_Ptr_Glob->\n");
|
||||
debug_printf(" Ptr_Comp: %d\n", (long) Next_Ptr_Glob->Ptr_Comp);
|
||||
debug_printf(" should be: (implementation-dependent), same as above\n");
|
||||
debug_printf(" Discr: %d\n", Next_Ptr_Glob->Discr);
|
||||
debug_printf(" should be: %d\n", 0);
|
||||
debug_printf(" Enum_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
|
||||
debug_printf(" should be: %d\n", 1);
|
||||
debug_printf(" Int_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Int_Comp);
|
||||
debug_printf(" should be: %d\n", 18);
|
||||
debug_printf(" Str_Comp: %s\n",
|
||||
Next_Ptr_Glob->variant.var_1.Str_Comp);
|
||||
debug_printf(" should be: DHRYSTONE PROGRAM, SOME STRING\n");
|
||||
debug_printf("Int_1_Loc: %d\n", Int_1_Loc);
|
||||
debug_printf(" should be: %d\n", 5);
|
||||
debug_printf("Int_2_Loc: %d\n", Int_2_Loc);
|
||||
debug_printf(" should be: %d\n", 13);
|
||||
debug_printf("Int_3_Loc: %d\n", Int_3_Loc);
|
||||
debug_printf(" should be: %d\n", 7);
|
||||
debug_printf("Enum_Loc: %d\n", Enum_Loc);
|
||||
debug_printf(" should be: %d\n", 1);
|
||||
debug_printf("Str_1_Loc: %s\n", Str_1_Loc);
|
||||
debug_printf(" should be: DHRYSTONE PROGRAM, 1'ST STRING\n");
|
||||
debug_printf("Str_2_Loc: %s\n", Str_2_Loc);
|
||||
debug_printf(" should be: DHRYSTONE PROGRAM, 2'ND STRING\n");
|
||||
debug_printf("\n");
|
||||
|
||||
|
||||
Microseconds = ((User_Time / Number_Of_Runs) * Mic_secs_Per_Second) / HZ;
|
||||
Dhrystones_Per_Second = (HZ * Number_Of_Runs) / User_Time;
|
||||
|
||||
debug_printf("Microseconds for one run through Dhrystone: %ld\n", Microseconds);
|
||||
debug_printf("Dhrystones per Second: %ld\n", Dhrystones_Per_Second);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
Proc_1 (Ptr_Val_Par)
|
||||
/******************/
|
||||
|
||||
REG Rec_Pointer Ptr_Val_Par;
|
||||
/* executed once */
|
||||
{
|
||||
REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;
|
||||
/* == Ptr_Glob_Next */
|
||||
/* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */
|
||||
/* corresponds to "rename" in Ada, "with" in Pascal */
|
||||
|
||||
structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob);
|
||||
Ptr_Val_Par->variant.var_1.Int_Comp = 5;
|
||||
Next_Record->variant.var_1.Int_Comp
|
||||
= Ptr_Val_Par->variant.var_1.Int_Comp;
|
||||
Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
|
||||
Proc_3 (&Next_Record->Ptr_Comp);
|
||||
/* Ptr_Val_Par->Ptr_Comp->Ptr_Comp
|
||||
== Ptr_Glob->Ptr_Comp */
|
||||
if (Next_Record->Discr == Ident_1)
|
||||
/* then, executed */
|
||||
{
|
||||
Next_Record->variant.var_1.Int_Comp = 6;
|
||||
Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp,
|
||||
&Next_Record->variant.var_1.Enum_Comp);
|
||||
Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
|
||||
Proc_7 (Next_Record->variant.var_1.Int_Comp, 10,
|
||||
&Next_Record->variant.var_1.Int_Comp);
|
||||
}
|
||||
else /* not executed */
|
||||
structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp);
|
||||
} /* Proc_1 */
|
||||
|
||||
|
||||
Proc_2 (Int_Par_Ref)
|
||||
/******************/
|
||||
/* executed once */
|
||||
/* *Int_Par_Ref == 1, becomes 4 */
|
||||
|
||||
One_Fifty *Int_Par_Ref;
|
||||
{
|
||||
One_Fifty Int_Loc;
|
||||
Enumeration Enum_Loc;
|
||||
|
||||
Int_Loc = *Int_Par_Ref + 10;
|
||||
do /* executed once */
|
||||
if (Ch_1_Glob == 'A')
|
||||
/* then, executed */
|
||||
{
|
||||
Int_Loc -= 1;
|
||||
*Int_Par_Ref = Int_Loc - Int_Glob;
|
||||
Enum_Loc = Ident_1;
|
||||
} /* if */
|
||||
while (Enum_Loc != Ident_1); /* true */
|
||||
} /* Proc_2 */
|
||||
|
||||
|
||||
Proc_3 (Ptr_Ref_Par)
|
||||
/******************/
|
||||
/* executed once */
|
||||
/* Ptr_Ref_Par becomes Ptr_Glob */
|
||||
|
||||
Rec_Pointer *Ptr_Ref_Par;
|
||||
|
||||
{
|
||||
if (Ptr_Glob != Null)
|
||||
/* then, executed */
|
||||
*Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
|
||||
Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
|
||||
} /* Proc_3 */
|
||||
|
||||
|
||||
Proc_4 () /* without parameters */
|
||||
/*******/
|
||||
/* executed once */
|
||||
{
|
||||
Boolean Bool_Loc;
|
||||
|
||||
Bool_Loc = Ch_1_Glob == 'A';
|
||||
Bool_Glob = Bool_Loc | Bool_Glob;
|
||||
Ch_2_Glob = 'B';
|
||||
} /* Proc_4 */
|
||||
|
||||
|
||||
Proc_5 () /* without parameters */
|
||||
/*******/
|
||||
/* executed once */
|
||||
{
|
||||
Ch_1_Glob = 'A';
|
||||
Bool_Glob = false;
|
||||
} /* Proc_5 */
|
|
@ -0,0 +1,15 @@
|
|||
#include "util.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tb_cxxrtl_io.h"
|
||||
|
||||
#define PRINTF_BUF_SIZE 256
|
||||
void debug_printf(const char* fmt, ...) {
|
||||
char buf[PRINTF_BUF_SIZE];
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vsnprintf(buf, PRINTF_BUF_SIZE, fmt, args);
|
||||
tb_puts(buf);
|
||||
va_end(args);
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
#ifndef _UTIL_H
|
||||
#define _UTIL_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define setStats(x)
|
||||
|
||||
#define read_csr(csrname) ({ \
|
||||
uint32_t __csr_tmp_u32; \
|
||||
__asm__ volatile ("csrr %0, " #csrname : "=r" (__csr_tmp_u32)); \
|
||||
__csr_tmp_u32; \
|
||||
})
|
||||
|
||||
#endif
|
|
@ -0,0 +1,4 @@
|
|||
SRCS := ../common/init.S main.c
|
||||
APP := hellow
|
||||
|
||||
include ../common/src_only_app.mk
|
|
@ -0,0 +1,6 @@
|
|||
#include "tb_cxxrtl_io.h"
|
||||
|
||||
void main() {
|
||||
tb_puts("Hello world from Hazard5 + CXXRTL!\n");
|
||||
tb_exit(123);
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
tmp
|
|
@ -0,0 +1,19 @@
|
|||
TEST = I-ADD-01
|
||||
TEST_ARCH = rv32i
|
||||
BIN_ARCH = rv32i
|
||||
SIM_EXEC = ../tb_cxxrtl/tb
|
||||
|
||||
CROSS_PREFIX = /opt/riscv/bin/riscv32-unknown-elf-
|
||||
|
||||
TEST_BIN_NAME := $(TEST_ARCH)-$(TEST)-on-$(BIN_ARCH)
|
||||
TEST_SRC := riscv-compliance/riscv-test-suite/$(TEST_ARCH)/src/$(TEST).S
|
||||
TEST_VEC := riscv-compliance/riscv-test-suite/$(TEST_ARCH)/references/$(TEST).reference_output
|
||||
|
||||
.PHONY: all
|
||||
all:
|
||||
mkdir -p tmp
|
||||
$(CROSS_PREFIX)gcc -I include -T memmap.ld -nostartfiles -march=$(BIN_ARCH) $(TEST_SRC) -o tmp/$(TEST_BIN_NAME).elf
|
||||
$(CROSS_PREFIX)objdump -d tmp/$(TEST_BIN_NAME).elf > tmp/$(TEST_BIN_NAME).dis
|
||||
$(CROSS_PREFIX)objcopy -O binary tmp/$(TEST_BIN_NAME).elf tmp/$(TEST_BIN_NAME).bin
|
||||
$(SIM_EXEC) tmp/$(TEST_BIN_NAME).bin --dump 0x10000 0x10100 | tee tmp/$(TEST_BIN_NAME).log
|
||||
./compare_testvec tmp/$(TEST_BIN_NAME).log $(TEST_VEC)
|
|
@ -0,0 +1,40 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
|
||||
gold = []
|
||||
model = []
|
||||
|
||||
model_bytes = []
|
||||
in_testdata = False
|
||||
for l in open(sys.argv[1]):
|
||||
if l.startswith("Dumping memory"):
|
||||
in_testdata = True
|
||||
continue
|
||||
if in_testdata:
|
||||
try:
|
||||
model_bytes.extend(int(x, 16) for x in l.split(" "))
|
||||
except ValueError:
|
||||
break
|
||||
for i in range(len(model_bytes) // 4):
|
||||
model.append(model_bytes[i * 4] | model_bytes[i * 4 + 1] << 8 | model_bytes[i * 4 + 2] << 16 | model_bytes[i * 4 + 3] << 24)
|
||||
|
||||
for l in open(sys.argv[2]):
|
||||
line_contents = []
|
||||
for index in range(0, len(l.strip()), 8):
|
||||
line_contents.append(int(l[index:index + 8], 16))
|
||||
gold.extend(reversed(line_contents))
|
||||
|
||||
all_match = True
|
||||
for i, g in enumerate(gold):
|
||||
if g == model[i]:
|
||||
eq_str = "\033[1;32m==\033[0;0m"
|
||||
else:
|
||||
eq_str = "\033[1;31m!=\033[0;0m"
|
||||
all_match = False
|
||||
print("{:03x}: {:08x} (gate) {} {:08x} (gold)".format(i * 4, model[i], eq_str, g))
|
||||
|
||||
if all_match:
|
||||
print("Test PASSED.")
|
||||
else:
|
||||
print("Test FAILED.")
|
|
@ -0,0 +1,17 @@
|
|||
#ifndef _COMPLIANCE_IO_H_
|
||||
#define _COMPLIANCE_IO_H_
|
||||
|
||||
#define RVTEST_IO_INIT
|
||||
#define RVTEST_IO_WRITE_STR(_SP, _STR)
|
||||
#define RVTEST_IO_CHECK()
|
||||
|
||||
|
||||
// Put this info into a label name so that it can be seen in the disassembly (holy hack batman)
|
||||
#define LABEL_ASSERT_(reg, val, line) assert_ ## reg ## _ ## val ## _l ## line:
|
||||
#define LABEL_ASSERT(reg, val, line) LABEL_ASSERT_(reg, val, line)
|
||||
|
||||
#define RVTEST_IO_ASSERT_GPR_EQ(_SP, _R, _I) LABEL_ASSERT(_R, xxx, __LINE__) nop
|
||||
#define RVTEST_IO_ASSERT_SFPR_EQ(_F, _R, _I)
|
||||
#define RVTEST_IO_ASSERT_DFPR_EQ(_D, _R, _I)
|
||||
|
||||
#endif // _COMPLIANCE_IO_H_
|
|
@ -0,0 +1,30 @@
|
|||
#ifndef _COMPLIANCE_TEST_H_
|
||||
#define _COMPLIANCE_TEST_H_
|
||||
|
||||
#define RV_COMPLIANCE_RV32M
|
||||
|
||||
#define RV_COMPLIANCE_CODE_BEGIN
|
||||
|
||||
#define RV_COMPLIANCE_CODE_END
|
||||
|
||||
#define MM_IO_EXIT 0x80000008
|
||||
|
||||
.macro RV_COMPLIANCE_HALT
|
||||
.option push
|
||||
.option norelax
|
||||
_write_io_exit:
|
||||
li a0, MM_IO_EXIT
|
||||
sw zero, 0(a0)
|
||||
// Note we should never reach this next instruction (assuming the
|
||||
// processor is working correctly!)
|
||||
_end_of_test:
|
||||
j _end_of_test
|
||||
.option pop
|
||||
.endm
|
||||
|
||||
#define RV_COMPLIANCE_DATA_BEGIN .section .testdata, "a"
|
||||
|
||||
#define RV_COMPLIANCE_DATA_END
|
||||
|
||||
|
||||
#endif // _COMPLIANCE_TEST_H_
|
|
@ -0,0 +1 @@
|
|||
../riscv-compliance/riscv-test-env/riscv_test_macros.h
|
|
@ -0,0 +1,43 @@
|
|||
MEMORY
|
||||
{
|
||||
RAM (wx) : ORIGIN = 0x0, LENGTH = 64k
|
||||
RESULT (w) : ORIGIN = ORIGIN(RAM) + LENGTH(RAM), LENGTH = 64k
|
||||
}
|
||||
|
||||
OUTPUT_FORMAT("elf32-littleriscv", "elf32-littleriscv", "elf32-littleriscv")
|
||||
OUTPUT_ARCH(riscv)
|
||||
ENTRY(_start)
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
.text : {
|
||||
/* Padding in place of vector table (by default CPU reset vector points to
|
||||
immediately after vector table */
|
||||
. = ORIGIN(RAM) + 0xc0;
|
||||
PROVIDE (_start = .);
|
||||
*(.text*)
|
||||
. = ALIGN(4);
|
||||
} > RAM
|
||||
|
||||
.rodata : {
|
||||
*(.rodata*)
|
||||
. = ALIGN(4);
|
||||
} > RAM
|
||||
|
||||
.data : {
|
||||
*(.data*)
|
||||
. = ALIGN(4);
|
||||
} > RAM
|
||||
.bss : {
|
||||
*(.bss .bss.*)
|
||||
. = ALIGN(4);
|
||||
} > RAM
|
||||
/* Link testout section to upper memory region */
|
||||
.testdata :
|
||||
{
|
||||
PROVIDE(__testdata_start = .);
|
||||
*(.testdata)
|
||||
} > RESULT
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1 @@
|
|||
rvcpp
|
|
@ -0,0 +1,11 @@
|
|||
SRCS=$(wildcard *.cpp)
|
||||
EXECUTABLE:=rvcpp
|
||||
|
||||
.SUFFIXES:
|
||||
.PHONY: all clean
|
||||
|
||||
all:
|
||||
g++ -std=c++17 -O3 -Wall -Wno-parentheses $(SRCS) -o $(EXECUTABLE)
|
||||
|
||||
clean:
|
||||
rm -f (EXECUTABLE)
|
|
@ -0,0 +1,136 @@
|
|||
#ifndef _MEM_H
|
||||
#define _MEM_H
|
||||
|
||||
#include "rv_types.h"
|
||||
|
||||
struct MemBase32 {
|
||||
virtual uint8_t r8(ux_t addr) {return 0;}
|
||||
virtual void w8(ux_t addr, uint8_t data) {}
|
||||
virtual uint16_t r16(ux_t addr) {return 0;}
|
||||
virtual void w16(ux_t addr, uint16_t data) {}
|
||||
virtual uint32_t r32(ux_t addr) {return 0;}
|
||||
virtual void w32(ux_t addr, uint32_t data) {}
|
||||
};
|
||||
|
||||
struct FlatMem32: MemBase32 {
|
||||
uint32_t size;
|
||||
uint32_t *mem;
|
||||
|
||||
FlatMem32(uint32_t size_) {
|
||||
assert(size_ % sizeof(uint32_t) == 0);
|
||||
size = size_;
|
||||
mem = new uint32_t[size >> 2];
|
||||
for (uint64_t i = 0; i < size >> 2; ++i)
|
||||
mem[i] = 0;
|
||||
}
|
||||
|
||||
~FlatMem32() {
|
||||
delete mem;
|
||||
}
|
||||
|
||||
virtual uint8_t r8(ux_t addr) {
|
||||
assert(addr < size);
|
||||
return mem[addr >> 2] >> 8 * (addr & 0x3) & 0xffu;
|
||||
}
|
||||
|
||||
virtual void w8(ux_t addr, uint8_t data) {
|
||||
assert(addr < size);
|
||||
mem[addr >> 2] &= ~(0xffu << 8 * (addr & 0x3));
|
||||
mem[addr >> 2] |= (uint32_t)data << 8 * (addr & 0x3);
|
||||
}
|
||||
|
||||
virtual uint16_t r16(ux_t addr) {
|
||||
assert(addr < size && addr + 1 < size); // careful of ~0u
|
||||
assert(addr % 2 == 0);
|
||||
return mem[addr >> 2] >> 8 * (addr & 0x2) & 0xffffu;
|
||||
}
|
||||
|
||||
virtual void w16(ux_t addr, uint16_t data) {
|
||||
assert(addr < size && addr + 1 < size);
|
||||
assert(addr % 2 == 0);
|
||||
mem[addr >> 2] &= ~(0xffffu << 8 * (addr & 0x2));
|
||||
mem[addr >> 2] |= (uint32_t)data << 8 * (addr & 0x2);
|
||||
}
|
||||
|
||||
virtual uint32_t r32(ux_t addr) {
|
||||
assert(addr < size && addr + 3 < size);
|
||||
assert(addr % 4 == 0);
|
||||
return mem[addr >> 2];
|
||||
}
|
||||
|
||||
virtual void w32(ux_t addr, uint32_t data) {
|
||||
assert(addr < size && addr + 3 < size);
|
||||
assert(addr % 4 == 0);
|
||||
mem[addr >> 2] = data;
|
||||
}
|
||||
};
|
||||
|
||||
struct TBExitException {
|
||||
ux_t exitcode;
|
||||
TBExitException(ux_t code): exitcode(code) {}
|
||||
};
|
||||
|
||||
struct TBMemIO: MemBase32 {
|
||||
virtual void w32(ux_t addr, uint32_t data) {
|
||||
switch (addr) {
|
||||
case 0x0:
|
||||
printf("%c", (char)data);
|
||||
break;
|
||||
case 0x4:
|
||||
printf("%08x\n", data);
|
||||
break;
|
||||
case 0x8:
|
||||
throw TBExitException(data);
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct MemMap32: MemBase32 {
|
||||
std::vector<std::tuple<uint32_t, uint32_t, MemBase32*> > memmap;
|
||||
|
||||
void add(uint32_t base, uint32_t size, MemBase32 *mem) {
|
||||
memmap.push_back(std::make_tuple(base, size, mem));
|
||||
}
|
||||
|
||||
std::tuple <uint32_t, MemBase32*> map_addr(uint32_t addr) {
|
||||
for (auto&& [base, size, mem] : memmap) {
|
||||
if (addr >= base && addr < base + size)
|
||||
return std::make_tuple(addr - base, mem);
|
||||
}
|
||||
throw;
|
||||
}
|
||||
|
||||
// perhaps some templatey-ness required
|
||||
virtual uint8_t r8(ux_t addr) {
|
||||
auto [offset, mem] = map_addr(addr);
|
||||
return mem->r8(offset);
|
||||
}
|
||||
|
||||
virtual void w8(ux_t addr, uint8_t data) {
|
||||
auto [offset, mem] = map_addr(addr);
|
||||
mem->w8(offset, data);
|
||||
}
|
||||
|
||||
virtual uint16_t r16(ux_t addr) {
|
||||
auto [offset, mem] = map_addr(addr);
|
||||
return mem->r16(offset);
|
||||
}
|
||||
|
||||
virtual void w16(ux_t addr, uint16_t data) {
|
||||
auto [offset, mem] = map_addr(addr);
|
||||
mem->w16(offset, data);
|
||||
}
|
||||
|
||||
virtual uint32_t r32(ux_t addr) {
|
||||
auto [offset, mem] = map_addr(addr);
|
||||
return mem->r32(offset);
|
||||
}
|
||||
|
||||
virtual void w32(ux_t addr, uint32_t data) {
|
||||
auto [offset, mem] = map_addr(addr);
|
||||
mem->w32(offset, data);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,440 @@
|
|||
#include <cstdint>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "rv_types.h"
|
||||
#include "mem.h"
|
||||
|
||||
// Minimal RISC-V interpreter, supporting RV32IM only
|
||||
|
||||
// Use unsigned arithmetic everywhere, with explicit sign extension as required.
|
||||
static inline ux_t sext(ux_t bits, int sign_bit) {
|
||||
if (sign_bit >= XLEN - 1)
|
||||
return bits;
|
||||
else
|
||||
return (bits & (1u << sign_bit + 1) - 1) - ((bits & 1u << sign_bit) << 1);
|
||||
}
|
||||
|
||||
static inline ux_t imm_i(uint32_t instr) {
|
||||
return (instr >> 20) - (instr >> 19 & 0x1000);
|
||||
}
|
||||
|
||||
static inline ux_t imm_s(uint32_t instr) {
|
||||
return (instr >> 20 & 0xfe0u)
|
||||
+ (instr >> 7 & 0x1fu)
|
||||
- (instr >> 19 & 0x1000u);
|
||||
}
|
||||
|
||||
static inline ux_t imm_u(uint32_t instr) {
|
||||
return instr & 0xfffff000u;
|
||||
}
|
||||
|
||||
static inline ux_t imm_b(uint32_t instr) {
|
||||
return (instr >> 7 & 0x1e)
|
||||
+ (instr >> 20 & 0x7e0)
|
||||
+ (instr << 4 & 0x800)
|
||||
- (instr >> 19 & 0x1000);
|
||||
}
|
||||
|
||||
static inline ux_t imm_j(uint32_t instr) {
|
||||
return (instr >> 20 & 0x7fe)
|
||||
+ (instr >> 9 & 0x800)
|
||||
+ (instr & 0xff000)
|
||||
- (instr >> 11 & 0x100000);
|
||||
}
|
||||
|
||||
struct RVCSR {
|
||||
enum {
|
||||
WRITE = 0,
|
||||
WRITE_SET = 1,
|
||||
WRITE_CLEAR = 2
|
||||
};
|
||||
|
||||
enum {
|
||||
MSCRATCH = 0x340,
|
||||
MCYCLE = 0xb00,
|
||||
MTIME = 0xb01,
|
||||
MINSTRET = 0xb02
|
||||
};
|
||||
|
||||
ux_t mcycle;
|
||||
ux_t mscratch;
|
||||
|
||||
RVCSR(): mcycle(0), mscratch(0) {}
|
||||
|
||||
void step() {++mcycle;}
|
||||
|
||||
ux_t read(uint16_t addr, bool side_effect=true) {
|
||||
if (addr == MCYCLE || addr == MTIME || addr == MINSTRET)
|
||||
return mcycle;
|
||||
else if (addr == MSCRATCH)
|
||||
return mscratch;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
void write(uint16_t addr, ux_t data, uint op=WRITE) {
|
||||
if (op == WRITE_CLEAR)
|
||||
data = read(addr, false) & ~data;
|
||||
else if (op == WRITE_SET)
|
||||
data = read(addr, false) | data;
|
||||
if (addr == MCYCLE)
|
||||
mcycle = data;
|
||||
else if (addr == MSCRATCH)
|
||||
mscratch = data;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct RVCore {
|
||||
std::array<ux_t, 32> regs;
|
||||
ux_t pc;
|
||||
RVCSR csr;
|
||||
|
||||
RVCore(ux_t reset_vector=0xc0) {
|
||||
std::fill(std::begin(regs), std::end(regs), 0);
|
||||
pc = reset_vector;
|
||||
}
|
||||
|
||||
enum {
|
||||
OPC_LOAD = 0b00'000,
|
||||
OPC_MISC_MEM = 0b00'011,
|
||||
OPC_OP_IMM = 0b00'100,
|
||||
OPC_AUIPC = 0b00'101,
|
||||
OPC_STORE = 0b01'000,
|
||||
OPC_OP = 0b01'100,
|
||||
OPC_LUI = 0b01'101,
|
||||
OPC_BRANCH = 0b11'000,
|
||||
OPC_JALR = 0b11'001,
|
||||
OPC_JAL = 0b11'011,
|
||||
OPC_SYSTEM = 0b11'100
|
||||
};
|
||||
|
||||
void step(MemBase32 &mem) {
|
||||
uint32_t instr = mem.r32(pc);
|
||||
std::optional<ux_t> rd_wdata;
|
||||
std::optional<ux_t> pc_wdata;
|
||||
uint regnum_rs1 = instr >> 15 & 0x1f;
|
||||
uint regnum_rs2 = instr >> 20 & 0x1f;
|
||||
uint regnum_rd = instr >> 7 & 0x1f;
|
||||
ux_t rs1 = regs[regnum_rs1];
|
||||
ux_t rs2 = regs[regnum_rs2];
|
||||
bool instr_invalid = false;
|
||||
|
||||
uint opc = instr >> 2 & 0x1f;
|
||||
uint funct3 = instr >> 12 & 0x7;
|
||||
uint funct7 = instr >> 25 & 0x7f;
|
||||
|
||||
switch (opc) {
|
||||
|
||||
case OPC_OP: {
|
||||
if (funct7 == 0b00'00000) {
|
||||
if (funct3 == 0b000)
|
||||
rd_wdata = rs1 + rs2;
|
||||
else if (funct3 == 0b001)
|
||||
rd_wdata = rs1 << (rs2 & 0x1f);
|
||||
else if (funct3 == 0b010)
|
||||
rd_wdata = (sx_t)rs1 < (sx_t)rs2;
|
||||
else if (funct3 == 0b011)
|
||||
rd_wdata = rs1 < rs2;
|
||||
else if (funct3 == 0b100)
|
||||
rd_wdata = rs1 ^ rs2;
|
||||
else if (funct3 == 0b101)
|
||||
rd_wdata = rs1 >> (rs2 & 0x1f);
|
||||
else if (funct3 == 0b110)
|
||||
rd_wdata = rs1 | rs2;
|
||||
else if (funct3 == 0b111)
|
||||
rd_wdata = rs1 & rs2;
|
||||
else
|
||||
instr_invalid = true;
|
||||
}
|
||||
else if (funct7 == 0b01'00000) {
|
||||
if (funct3 == 0b000)
|
||||
rd_wdata = rs1 - rs2;
|
||||
else if (funct3 == 0b101)
|
||||
rd_wdata = (sx_t)rs1 >> (rs2 & 0x1f);
|
||||
else
|
||||
instr_invalid = true;
|
||||
}
|
||||
else if (funct7 == 0b00'00001) {
|
||||
if (funct3 < 0b100) {
|
||||
sdx_t mul_op_a = rs1;
|
||||
sdx_t mul_op_b = rs2;
|
||||
if (funct3 != 0b011)
|
||||
mul_op_a -= (mul_op_a & (1 << XLEN - 1)) << 1;
|
||||
if (funct3 < 0b010)
|
||||
mul_op_b -= (mul_op_b & (1 << XLEN - 1)) << 1;
|
||||
sdx_t mul_result = mul_op_a * mul_op_b;
|
||||
if (funct3 == 0b000)
|
||||
rd_wdata = mul_result;
|
||||
else
|
||||
rd_wdata = mul_result >> XLEN;
|
||||
}
|
||||
else {
|
||||
asm volatile("" : : : "memory");
|
||||
if (funct3 == 0b100) {
|
||||
if (rs2 == 0)
|
||||
rd_wdata = -1;
|
||||
else if (rs2 == ~0u)
|
||||
rd_wdata = -rs1;
|
||||
else
|
||||
rd_wdata = (sx_t)rs1 / (sx_t)rs2;
|
||||
}
|
||||
else if (funct3 == 0b101) {
|
||||
rd_wdata = rs2 ? rs1 / rs2 : ~0ul;
|
||||
}
|
||||
else if (funct3 == 0b110) {
|
||||
if (rs2 == 0)
|
||||
rd_wdata = rs1;
|
||||
else if (rs2 == ~0u) // potential overflow of division
|
||||
rd_wdata = 0;
|
||||
else
|
||||
rd_wdata = (sx_t)rs1 % (sx_t)rs2;
|
||||
}
|
||||
else if (funct3 == 0b111) {
|
||||
rd_wdata = rs2 ? rs1 % rs2 : rs1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
instr_invalid = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OPC_OP_IMM: {
|
||||
ux_t imm = imm_i(instr);
|
||||
if (funct3 == 0b000)
|
||||
rd_wdata = rs1 + imm;
|
||||
else if (funct3 == 0b010)
|
||||
rd_wdata = !!((sx_t)rs1 < (sx_t)imm);
|
||||
else if (funct3 == 0b011)
|
||||
rd_wdata = !!(rs1 < imm);
|
||||
else if (funct3 == 0b100)
|
||||
rd_wdata = rs1 ^ imm;
|
||||
else if (funct3 == 0b110)
|
||||
rd_wdata = rs1 | imm;
|
||||
else if (funct3 == 0b111)
|
||||
rd_wdata = rs1 & imm;
|
||||
else if (funct3 == 0b001 || funct3 == 0b101) {
|
||||
// shamt is regnum_rs2
|
||||
if (funct7 == 0b00'00000 && funct3 == 0b001) {
|
||||
rd_wdata = rs1 << regnum_rs2;
|
||||
}
|
||||
else if (funct7 == 0b00'00000 && funct3 == 0b101) {
|
||||
rd_wdata = rs1 >> regnum_rs2;
|
||||
}
|
||||
else if (funct7 == 0b01'00000 && funct3 == 0b101) {
|
||||
rd_wdata = (sx_t)rs1 >> regnum_rs2;
|
||||
}
|
||||
else {
|
||||
instr_invalid = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
instr_invalid = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OPC_BRANCH: {
|
||||
ux_t target = pc + imm_b(instr);
|
||||
bool taken = false;
|
||||
if ((funct3 & 0b110) == 0b000)
|
||||
taken = rs1 == rs2;
|
||||
else if ((funct3 & 0b110) == 0b100)
|
||||
taken = (sx_t)rs1 < (sx_t) rs2;
|
||||
else if ((funct3 & 0b110) == 0b110)
|
||||
taken = rs1 < rs2;
|
||||
else
|
||||
instr_invalid = true;
|
||||
if (!instr_invalid && funct3 & 0b001)
|
||||
taken = !taken;
|
||||
if (taken)
|
||||
pc_wdata = target;
|
||||
break;
|
||||
}
|
||||
|
||||
case OPC_LOAD: {
|
||||
ux_t load_addr = rs1 + imm_i(instr);
|
||||
if (funct3 == 0b000)
|
||||
rd_wdata = sext(mem.r8(load_addr), 7);
|
||||
else if (funct3 == 0b001)
|
||||
rd_wdata = sext(mem.r16(load_addr), 15);
|
||||
else if (funct3 == 0b010)
|
||||
rd_wdata = mem.r32(load_addr);
|
||||
else if (funct3 == 0b100)
|
||||
rd_wdata = mem.r8(load_addr);
|
||||
else if (funct3 == 0b101)
|
||||
rd_wdata = mem.r16(load_addr);
|
||||
else
|
||||
instr_invalid = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case OPC_STORE: {
|
||||
ux_t store_addr = rs1 + imm_s(instr);
|
||||
if (funct3 == 0b000)
|
||||
mem.w8(store_addr, rs2 & 0xffu);
|
||||
else if (funct3 == 0b001)
|
||||
mem.w16(store_addr, rs2 & 0xffffu);
|
||||
else if (funct3 == 0b010)
|
||||
mem.w32(store_addr, rs2);
|
||||
else
|
||||
instr_invalid = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case OPC_JAL:
|
||||
rd_wdata = pc + 4;
|
||||
pc_wdata = pc + imm_j(instr);
|
||||
break;
|
||||
|
||||
case OPC_JALR:
|
||||
rd_wdata = pc + 4;
|
||||
pc_wdata = (rs1 + imm_i(instr)) & -2u;
|
||||
break;
|
||||
|
||||
case OPC_LUI:
|
||||
rd_wdata = imm_u(instr);
|
||||
break;
|
||||
|
||||
case OPC_AUIPC:
|
||||
rd_wdata = pc + imm_u(instr);
|
||||
break;
|
||||
|
||||
case OPC_SYSTEM: {
|
||||
uint16_t csr_addr = instr >> 20;
|
||||
if (funct3 >= 0b001 && funct3 <= 0b011) {
|
||||
// csrrw, csrrs, csrrc
|
||||
uint write_op = funct3 - 0b001;
|
||||
if (write_op != RVCSR::WRITE || regnum_rd != 0)
|
||||
rd_wdata = csr.read(csr_addr);
|
||||
if (write_op == RVCSR::WRITE || regnum_rs1 != 0)
|
||||
csr.write(csr_addr, rs1, write_op);
|
||||
}
|
||||
else if (funct3 >= 0b101 && funct3 <= 0b111) {
|
||||
// csrrwi, csrrsi, csrrci
|
||||
uint write_op = funct3 - 0b101;
|
||||
if (write_op != RVCSR::WRITE || regnum_rd != 0)
|
||||
rd_wdata = csr.read(csr_addr);
|
||||
if (write_op == RVCSR::WRITE || regnum_rs1 != 0)
|
||||
csr.write(csr_addr, regnum_rs1, write_op);
|
||||
}
|
||||
else {
|
||||
instr_invalid = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
instr_invalid = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (instr_invalid)
|
||||
printf("Invalid instr %08x at %08x\n", instr, pc);
|
||||
|
||||
if (pc_wdata)
|
||||
pc = *pc_wdata;
|
||||
else
|
||||
pc = pc + 4;
|
||||
if (rd_wdata && regnum_rd != 0)
|
||||
regs[regnum_rd] = *rd_wdata;
|
||||
csr.step();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
const char *help_str =
|
||||
"Usage: tb binfile [--dump start end] [--cycles n]\n"
|
||||
" binfile : Binary to load into start of memory\n"
|
||||
" --dump start end : Print out memory contents between start and end (exclusive)\n"
|
||||
" after execution finishes. Can be passed multiple times.\n"
|
||||
" --cycles n : Maximum number of cycles to run before exiting.\n"
|
||||
" --memsize n : Memory size in units of 1024 bytes, default is 16 MB\n"
|
||||
;
|
||||
|
||||
void exit_help(std::string errtext = "") {
|
||||
std::cerr << errtext << help_str;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc < 2)
|
||||
exit_help();
|
||||
|
||||
std::vector<std::tuple<uint32_t, uint32_t>> dump_ranges;
|
||||
int64_t max_cycles = 100000;
|
||||
uint32_t ramsize = 16 * (1 << 20);
|
||||
|
||||
for (int i = 2; i < argc; ++i) {
|
||||
std::string s(argv[i]);
|
||||
if (s == "--dump") {
|
||||
if (argc - i < 3)
|
||||
exit_help("Option --dump requires 2 arguments\n");
|
||||
dump_ranges.push_back(std::make_tuple(
|
||||
std::stoul(argv[i + 1], 0, 0),
|
||||
std::stoul(argv[i + 2], 0, 0)
|
||||
));
|
||||
i += 2;
|
||||
}
|
||||
else if (s == "--cycles") {
|
||||
if (argc - i < 2)
|
||||
exit_help("Option --cycles requires an argument\n");
|
||||
max_cycles = std::stol(argv[i + 1], 0, 0);
|
||||
i += 1;
|
||||
}
|
||||
else if (s == "--memsize") {
|
||||
if (argc - i < 2)
|
||||
exit_help("Option --memsize requires an argument\n");
|
||||
ramsize = 1024 * std::stol(argv[i + 1], 0, 0);
|
||||
i += 1;
|
||||
}
|
||||
else {
|
||||
std::cerr << "Unrecognised argument " << s << "\n";
|
||||
exit_help("");
|
||||
}
|
||||
}
|
||||
|
||||
FlatMem32 ram(ramsize);
|
||||
TBMemIO io;
|
||||
MemMap32 mem;
|
||||
mem.add(0, ramsize, &ram);
|
||||
mem.add(0x80000000u, 12, &io);
|
||||
|
||||
std::ifstream fd(argv[1], std::ios::binary | std::ios::ate);
|
||||
std::streamsize bin_size = fd.tellg();
|
||||
if (bin_size > ramsize) {
|
||||
std::cerr << "Binary file (" << bin_size << " bytes) is larger than memory (" << ramsize << " bytes)\n";
|
||||
return -1;
|
||||
}
|
||||
fd.seekg(0, std::ios::beg);
|
||||
fd.read((char*)ram.mem, bin_size);
|
||||
|
||||
RVCore core;
|
||||
|
||||
int64_t cyc;
|
||||
try {
|
||||
for (cyc = 0; cyc < max_cycles; ++cyc)
|
||||
core.step(mem);
|
||||
}
|
||||
catch (TBExitException e) {
|
||||
printf("CPU requested halt. Exit code %d\n", e.exitcode);
|
||||
printf("Ran for %ld cycles\n", cyc + 1);
|
||||
}
|
||||
|
||||
for (auto [start, end] : dump_ranges) {
|
||||
printf("Dumping memory from %08x to %08x:\n", start, end);
|
||||
for (uint32_t i = 0; i < end - start; ++i)
|
||||
printf("%02x%c", mem.r8(start + i), i % 16 == 15 ? '\n' : ' ');
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
#ifndef _RV_TYPES
|
||||
#define _RV_TYPES
|
||||
|
||||
enum {XLEN = 32};
|
||||
typedef uint32_t ux_t;
|
||||
typedef int32_t sx_t;
|
||||
typedef unsigned int uint;
|
||||
|
||||
typedef int64_t sdx_t;
|
||||
|
||||
#endif
|
|
@ -0,0 +1,491 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Minimal RISC-V interpreter, supporting RV32I + Zcsr only, with trace disassembly
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
XLEN = 32
|
||||
XLEN_MASK = (1 << XLEN) - 1
|
||||
|
||||
def extract(bits, msb, lsb):
|
||||
return (bits & (1 << msb + 1) - 1) >> lsb
|
||||
|
||||
def sext(bits, sign_bit):
|
||||
return (bits & (1 << sign_bit + 1) - 1) - ((bits & 1 << sign_bit) << 1)
|
||||
|
||||
def concat_extract(bits, msb_lsb_pairs, signed=True):
|
||||
accum = 0
|
||||
accum_count = 0
|
||||
for msb, lsb in msb_lsb_pairs:
|
||||
accum = (accum << (msb - lsb + 1)) | extract(bits, msb, lsb)
|
||||
accum_count += msb - lsb + 1
|
||||
if signed:
|
||||
accum = sext(accum, accum_count - 1)
|
||||
return accum
|
||||
|
||||
# Note these handy functions are not used much in the main loop, because CPython is unable
|
||||
# to inline them. This and similar changes results in a ~3x performance increase. :(
|
||||
def imm_i(instr):
|
||||
# return concat_extract(instr, ((31, 20),))
|
||||
return (instr >> 20) - (instr >> 19 & 0x1000)
|
||||
|
||||
def imm_s(instr):
|
||||
# return concat_extract(instr, ((31, 25), (11, 7)))
|
||||
return (instr >> 20 & 0xfe0) + (instr >> 7 & 0x1f) - (instr >> 19 & 0x1000)
|
||||
|
||||
def imm_u(instr):
|
||||
# return concat_extract(instr, ((31, 12),)) << 12
|
||||
return instr & 0xfffff000 - (instr << 1 & 0x100000000)
|
||||
|
||||
def imm_b(instr):
|
||||
return concat_extract(instr, ((31, 31), (7, 7), (30, 25), (11, 8))) << 1
|
||||
|
||||
def imm_j(instr):
|
||||
return concat_extract(instr, ((31, 31), (19, 12), (20, 20), (30, 21))) << 1
|
||||
|
||||
|
||||
class FlatMemory:
|
||||
|
||||
def __init__(self, size):
|
||||
self.size = size
|
||||
self.mem = [0] * (size >> 2)
|
||||
|
||||
# Reads are unsigned. Writes allow signed or unsigned values and convert
|
||||
# implicitly to unsigned. Multi-byte accesses are little-endian.
|
||||
|
||||
def get8(self, addr):
|
||||
assert(addr >= 0 and addr < self.size)
|
||||
return self.mem[addr >> 2] >> (addr & 0x3) * 8 & 0xff
|
||||
|
||||
def put8(self, addr, data):
|
||||
assert(addr >= 0 and addr < self.size)
|
||||
assert(data >= -1 << 7 and data < 1 << 8)
|
||||
self.mem[addr >> 2] &= ~(0xff << 8 * (addr & 0x3))
|
||||
self.mem[addr >> 2] |= (data & 0xff) << 8 * (addr % 4)
|
||||
|
||||
def get16(self, addr):
|
||||
return self.mem[addr >> 2] >> (addr & 0x2) * 8 & 0xffff
|
||||
|
||||
def put16(self, addr, data):
|
||||
assert(data >= -1 << 15 and data < 1 << 16)
|
||||
for i in range(2):
|
||||
self.put8(addr + i, data >> 8 * i & 0xff)
|
||||
|
||||
def get32(self, addr):
|
||||
assert(addr >= 0 and addr + 3 < self.size)
|
||||
return self.mem[addr >> 2]
|
||||
|
||||
def put32(self, addr, data):
|
||||
assert(data >= -1 << 31 and data < 1 << 32)
|
||||
assert(addr >= 0 and addr + 3 < self.size)
|
||||
self.mem[addr >> 2] = data & 0xffff_ffff
|
||||
|
||||
def loadbin(self, data, offs):
|
||||
if type(data) not in (bytes, bytearray):
|
||||
# must be fh
|
||||
assert(data.mode == "rb")
|
||||
data = data.read()
|
||||
assert(offs + len(data) < self.size)
|
||||
for i, b in enumerate(data):
|
||||
self.put8(offs + i, b)
|
||||
|
||||
class TBExit(Exception):
|
||||
pass
|
||||
|
||||
class MemWithTBIO(FlatMemory):
|
||||
|
||||
TB_IO_BASE = 0x80000000
|
||||
TB_IO_PRINT_CHAR = TB_IO_BASE + 0x0
|
||||
TB_IO_PRINT_INT = TB_IO_BASE + 0x4
|
||||
TB_IO_EXIT = TB_IO_BASE + 0x8
|
||||
|
||||
def __init__(self, size, io_log_fmt="IO: {}\n"):
|
||||
super().__init__(size)
|
||||
self.io_log_fmt = io_log_fmt
|
||||
|
||||
def put32(self, addr, data):
|
||||
if addr < self.TB_IO_BASE:
|
||||
super().put32(addr, data)
|
||||
elif addr == self.TB_IO_PRINT_CHAR:
|
||||
sys.stdout.write(self.io_log_fmt.format(chr(data)))
|
||||
elif addr == self.TB_IO_PRINT_INT:
|
||||
sys.stdout.write(self.io_log_fmt.format(f"{data:08x}"))
|
||||
elif addr == self.TB_IO_EXIT:
|
||||
raise TBExit(data)
|
||||
else:
|
||||
print(f"Unknown IO address {addr:08x}")
|
||||
|
||||
class RVCSR:
|
||||
|
||||
WRITE = 0
|
||||
WRITE_SET = 1
|
||||
WRITE_CLEAR = 2
|
||||
|
||||
MSCRATCH = 0x340
|
||||
MCYCLE = 0xb00
|
||||
MTIME = 0xb01
|
||||
MINSTRET = 0xb02
|
||||
|
||||
def __init__(self):
|
||||
self.mcycle = 0
|
||||
self.mscratch = 0
|
||||
|
||||
def step(self):
|
||||
self.mcycle += 1
|
||||
|
||||
def read(self, addr, side_effect=True):
|
||||
# Close your eyes
|
||||
if addr in (RVCSR.MCYCLE, RVCSR.MTIME, RVCSR.MINSTRET):
|
||||
return self.mcycle
|
||||
elif addr == RVCSR.MSCRATCH:
|
||||
return self.mscratch
|
||||
else:
|
||||
return None
|
||||
|
||||
def write(self, addr, data, op=0):
|
||||
if op == RVCSR.WRITE_CLEAR:
|
||||
data = self.read(addr, side_effect=False) & ~data
|
||||
elif op == RVCSR.WRITE_SET:
|
||||
data = self.read(addr, side_effect=False) | data
|
||||
if addr == RVCSR.MCYCLE:
|
||||
self.mcycle = data
|
||||
elif addr == RVCSR.MSCRATCH:
|
||||
self.mscratch = data
|
||||
|
||||
class RVCore:
|
||||
|
||||
def __init__(self, mem, reset_vector=0xc0):
|
||||
self.regs = [0] * 32
|
||||
self.mem = mem
|
||||
self.pc = reset_vector
|
||||
self.csr = RVCSR()
|
||||
|
||||
def step(self, instr=None, log=True):
|
||||
if instr is None:
|
||||
instr = self.mem.mem[self.pc >> 2]
|
||||
regnum_rs1 = instr >> 15 & 0x1f
|
||||
regnum_rs2 = instr >> 20 & 0x1f
|
||||
regnum_rd = instr >> 7 & 0x1f
|
||||
rs1 = self.regs[regnum_rs1]
|
||||
rs2 = self.regs[regnum_rs2]
|
||||
|
||||
rd_wdata = None
|
||||
pc_wdata = None
|
||||
log_disasm = None
|
||||
instr_invalid = False
|
||||
|
||||
opc = instr >> 2 & 0x1f
|
||||
funct3 = instr >> 12 & 0x7
|
||||
funct7 = instr >> 25 & 0x7f
|
||||
OPC_LOAD = 0b00_000
|
||||
OPC_MISC_MEM = 0b00_011
|
||||
OPC_OP_IMM = 0b00_100
|
||||
OPC_AUIPC = 0b00_101
|
||||
OPC_STORE = 0b01_000
|
||||
OPC_OP = 0b01_100
|
||||
OPC_LUI = 0b01_101
|
||||
OPC_BRANCH = 0b11_000
|
||||
OPC_JALR = 0b11_001
|
||||
OPC_JAL = 0b11_011
|
||||
OPC_SYSTEM = 0b11_100
|
||||
|
||||
if opc == OPC_OP:
|
||||
if log: log_reg_str = f" x{regnum_rd}, x{regnum_rs1}, x{regnum_rs2}"
|
||||
if funct7 == 0b00_00000:
|
||||
if funct3 == 0b000:
|
||||
if log: log_disasm = "add" + log_reg_str
|
||||
rd_wdata = rs1 + rs2
|
||||
elif funct3 == 0b001:
|
||||
if log: log_disasm = "sll" + log_reg_str
|
||||
rd_wdata = rs1 << (rs2 & 0x1f)
|
||||
elif funct3 == 0b010:
|
||||
if log: log_disasm = "slt" + log_reg_str
|
||||
rd_wdata = rs1 < rs2
|
||||
elif funct3 == 0b011:
|
||||
if log: log_disasm = "sltu" + log_reg_str
|
||||
rd_wdata = (rs1 & XLEN_MASK) < (rs2 & XLEN_MASK)
|
||||
elif funct3 == 0b100:
|
||||
if log: log_disasm = "xor" + log_reg_str
|
||||
rd_wdata = rs1 ^ rs2
|
||||
elif funct3 == 0b101:
|
||||
if log: log_disasm = "srl" + log_reg_str
|
||||
rd_wdata = (rs1 & XLEN_MASK) >> (rs2 & 0x1f)
|
||||
elif funct3 == 0b110:
|
||||
if log: log_disasm = "or" + log_reg_str
|
||||
rd_wdata = rs1 | rs2
|
||||
elif funct3 == 0b111:
|
||||
if log: log_disasm = "and" + log_reg_str
|
||||
rd_wdata = rs1 & rs2
|
||||
else:
|
||||
instr_invalid = True
|
||||
elif funct7 == 0b01_00000:
|
||||
if funct3 == 0b000:
|
||||
if log: log_disasm = "sub" + log_reg_str
|
||||
rd_wdata = rs1 - rs2
|
||||
elif funct3 == 0b101:
|
||||
if log: log_disasm = "sra" + log_reg_str
|
||||
rd_wdata = rs1 >> (rs2 & 0x1f)
|
||||
else:
|
||||
instr_invalid = True
|
||||
elif funct7 == 0b00_00001:
|
||||
if funct3 < 0b100:
|
||||
if log:
|
||||
mul_instr_name = {0b000: "mul", 0b001: "mulh", 0b010: "mulhsu", 0b011: "mulhu"}[funct3]
|
||||
log_disasm = f"{mul_instr_name} x{regnum_rd}, x{regnum_rs1}, x{regnum_rs2}"
|
||||
mul_op_a = rs1 & XLEN_MASK if funct3 == 0b011 else rs1
|
||||
mul_op_b = rs2 & XLEN_MASK if funct3 in (0b010, 0b011) else rs2
|
||||
mul_result = mul_op_a * mul_op_b
|
||||
if funct3 != 0b000:
|
||||
mul_result >>= 32
|
||||
rd_wdata = sext(mul_result, XLEN - 1)
|
||||
else:
|
||||
if log:
|
||||
div_instr_name = {0b100: "div", 0b101: "divu", 0b110: "rem", 0b111: "remu"}[funct3]
|
||||
log_disasm = f"{div_instr_name} x{regnum_rd}, x{regnum_rs1}, x{regnum_rs2}"
|
||||
if funct3 == 0b100:
|
||||
rd_wdata = -1 if rs2 == 0 else int(rs1 / rs2)
|
||||
elif funct3 == 0b101:
|
||||
rd_wdata = -1 if rs2 == 0 else sext((rs1 & XLEN_MASK) // (rs2 & XLEN_MASK), XLEN - 1)
|
||||
elif funct3 == 0b110:
|
||||
rd_wdata = rs1 if rs2 == 0 else rs1 - int(rs1 / rs2) * rs2
|
||||
elif funct3 == 0b111:
|
||||
rd_wdata = rs1 if rs2 == 0 else sext((rs1 & XLEN_MASK) % (rs2 & XLEN_MASK), XLEN - 1)
|
||||
else:
|
||||
instr_invalid = True
|
||||
else:
|
||||
instr_invalid = True
|
||||
|
||||
elif opc == OPC_OP_IMM:
|
||||
imm = (instr >> 20) - (instr >> 19 & 0x1000) # imm_i(instr)
|
||||
if funct3 == 0b000:
|
||||
if log: log_disasm = f"addi x{regnum_rd}, x{regnum_rs1}, {imm}"
|
||||
rd_wdata = rs1 + imm
|
||||
elif funct3 == 0b010:
|
||||
if log: log_disasm = f"slti x{regnum_rd}, x{regnum_rs1}, {imm}"
|
||||
rd_wdata = 1 * (rs1 < imm)
|
||||
elif funct3 == 0b011:
|
||||
if log: log_disasm = f"slti x{regnum_rd}, x{regnum_rs1}, {imm & XLEN_MASK}"
|
||||
rd_wdata = 1 * (rs1 & XLEN_MASK < imm & XLEN_MASK)
|
||||
elif funct3 == 0b100:
|
||||
if log: log_disasm = f"xori x{regnum_rd}, x{regnum_rs1}, 0x{imm & XLEN_MASK:x}"
|
||||
rd_wdata = rs1 ^ imm
|
||||
elif funct3 == 0b110:
|
||||
if log: log_disasm = f"ori x{regnum_rd}, x{regnum_rs1}, 0x{imm & XLEN_MASK:x}"
|
||||
rd_wdata = rs1 | imm
|
||||
elif funct3 == 0b111:
|
||||
if log: log_disasm = f"andi x{regnum_rd}, x{regnum_rs1}, 0x{imm & XLEN_MASK:x}"
|
||||
rd_wdata = rs1 & imm
|
||||
elif funct3 == 0b001 or funct3 == 0b101:
|
||||
# shamt is regnum_rs2
|
||||
if funct7 == 0b00_00000 and funct3 == 0b001:
|
||||
if log: log_disasm = f"slli x{regnum_rd}, x{regnum_rs1}, {regnum_rs2}"
|
||||
rd_wdata = rs1 << regnum_rs2
|
||||
elif funct7 == 0b00_00000 and funct3 == 0b101:
|
||||
if log: log_disasm = f"srli x{regnum_rd}, x{regnum_rs1}, {regnum_rs2}"
|
||||
rd_wdata = (rs1 & XLEN_MASK) >> regnum_rs2
|
||||
elif funct7 == 0b01_00000 and funct3 == 0b101:
|
||||
if log: log_disasm = f"srai x{regnum_rd}, x{regnum_rs1}, {regnum_rs2}"
|
||||
rd_wdata = rs1 >> regnum_rs2
|
||||
else:
|
||||
instr_invalid = True
|
||||
else:
|
||||
instr_invalid = True
|
||||
|
||||
elif opc == OPC_JAL:
|
||||
rd_wdata = self.pc + 4
|
||||
# pc_wdata = self.pc + imm_j(instr)
|
||||
pc_wdata = self.pc + (instr >> 20 & 0x7fe) + (instr >> 9 & 0x800) + (instr & 0xff000) - (instr >> 11 & 0x100000)
|
||||
if log: log_disasm = f"jal x{regnum_rd}, {pc_wdata & XLEN_MASK:08x}"
|
||||
|
||||
elif opc == OPC_JALR:
|
||||
imm = imm_i(instr)
|
||||
if log: log_disasm = f"jalr x{regnum_rd}, x{regnum_rs1}, {imm}"
|
||||
rd_wdata = self.pc + 4
|
||||
# JALR clears LSB always
|
||||
pc_wdata = (rs1 + imm) & -2
|
||||
|
||||
elif opc == OPC_BRANCH:
|
||||
# target = self.pc + imm_b(instr)
|
||||
target = self.pc + (instr >> 7 & 0x1e) + (instr >> 20 & 0x7e0) + (instr << 4 & 0x800) - (instr >> 19 & 0x1000)
|
||||
taken = False
|
||||
if log: log_branch_str = f" x{regnum_rs1}, x{regnum_rs2}, {target:08x}"
|
||||
if funct3 == 0b000:
|
||||
if log: log_disasm = "beq" + log_branch_str
|
||||
taken = rs1 == rs2
|
||||
elif funct3 == 0b001:
|
||||
if log: log_disasm = "bne" + log_branch_str
|
||||
taken = rs1 != rs2
|
||||
elif funct3 == 0b100:
|
||||
if log: log_disasm = "blt" + log_branch_str
|
||||
taken = rs1 < rs2
|
||||
elif funct3 == 0b101:
|
||||
if log: log_disasm = "bge" + log_branch_str
|
||||
taken = rs1 >= rs2
|
||||
elif funct3 == 0b110:
|
||||
if log: log_disasm = "bltu" + log_branch_str
|
||||
taken = (rs1 & XLEN_MASK) < (rs2 & XLEN_MASK)
|
||||
elif funct3 == 0b111:
|
||||
if log: log_disasm = "bgeu" + log_branch_str
|
||||
taken = (rs1 & XLEN_MASK) >= (rs2 & XLEN_MASK)
|
||||
else:
|
||||
instr_invalid = True
|
||||
if taken:
|
||||
pc_wdata = target
|
||||
|
||||
elif opc == OPC_LOAD:
|
||||
imm = imm_i(instr)
|
||||
if log: log_load_str = f" x{regnum_rd}, {imm}(x{regnum_rs1})"
|
||||
load_addr = imm + rs1 & XLEN_MASK
|
||||
if funct3 == 0b000:
|
||||
if log: log_disasm = "lb" + log_load_str
|
||||
rd_wdata = self.mem.get8(load_addr)
|
||||
rd_wdata -= rd_wdata << 1 & 0x100
|
||||
elif funct3 == 0b001:
|
||||
if log: log_disasm = "lh" + log_load_str
|
||||
rd_wdata = self.mem.get16(load_addr)
|
||||
rd_wdata -= rd_wdata << 1 & 0x10000
|
||||
elif funct3 == 0b010:
|
||||
if log: log_disasm = "lw" + log_load_str
|
||||
rd_wdata = self.mem.get32(load_addr)
|
||||
rd_wdata -= rd_wdata << 1 & 0x100000000
|
||||
elif funct3 == 0b100:
|
||||
if log: log_disasm = "lbu" + log_load_str
|
||||
rd_wdata = self.mem.get8(load_addr)
|
||||
elif funct3 == 0b101:
|
||||
if log: log_disasm = "lhu" + log_load_str
|
||||
rd_wdata = self.mem.get16(load_addr)
|
||||
else:
|
||||
instr_invalid = True
|
||||
|
||||
elif opc == OPC_STORE:
|
||||
imm = imm_s(instr)
|
||||
if log: log_store_str = f" x{regnum_rs2}, {imm}(x{regnum_rs1})"
|
||||
store_addr = imm + rs1 & XLEN_MASK
|
||||
if funct3 == 0b000:
|
||||
if log: log_disasm = "sb" + log_store_str
|
||||
self.mem.put8(store_addr, rs2 & (1 << 8) - 1)
|
||||
elif funct3 == 0b001:
|
||||
if log: log_disasm = "sh" + log_store_str
|
||||
self.mem.put16(store_addr, rs2 & (1 << 16) - 1)
|
||||
elif funct3 == 0b010:
|
||||
if log: log_disasm = "sw" + log_store_str
|
||||
self.mem.put32(store_addr, rs2)
|
||||
else:
|
||||
instr_invalid = True
|
||||
|
||||
elif opc == OPC_LUI:
|
||||
imm = imm_u(instr)
|
||||
if log: log_disasm = f"lui x{regnum_rd}, 0x{(imm & XLEN_MASK) >> 12:05x}"
|
||||
rd_wdata = imm
|
||||
|
||||
elif opc == OPC_AUIPC:
|
||||
imm = imm_u(instr)
|
||||
if log: log_disasm = f"auipc x{regnum_rd}, 0x{(imm & XLEN_MASK) >> 12:05x}"
|
||||
rd_wdata = self.pc + imm
|
||||
|
||||
elif opc == OPC_SYSTEM:
|
||||
csr_addr = extract(instr, 31, 20)
|
||||
if funct3 == 0b000 and funct7 == 0b00_00000:
|
||||
if regnum_rs2 == 0:
|
||||
if log: log_disasm = "*UNHANDLED* ecall"
|
||||
pass
|
||||
elif regnum_rs2 == 1:
|
||||
if log: log_disasm = "*UNHANDLED* ebreak"
|
||||
pass
|
||||
else:
|
||||
instr_invalid = True
|
||||
elif funct3 in (0b001, 0b010, 0b011):
|
||||
if log:
|
||||
instr_name = {0b001: "csrrw", 0b010: "csrrs", 0b011: "csrrc"}[funct3]
|
||||
log_disasm = f"{instr_name} x{regnum_rd}, 0x{csr_addr:x}, x{regnum_rs2}"
|
||||
csr_write_op = funct3 - 0b001
|
||||
if csr_write_op != RVCSR.WRITE or regnum_rd != 0:
|
||||
rd_wdata = self.csr.read(csr_addr)
|
||||
if csr_write_op == RVCSR.WRITE or rs2 != 0:
|
||||
self.csr.write(csr_addr, rs2, op=csr_write_op)
|
||||
elif funct3 in (0b101, 0b110, 0b111):
|
||||
if log:
|
||||
instr_name = {0b101: "csrrwi", 0b110: "csrrsi", 0b111: "csrrci"}[funct3]
|
||||
log_disasm = f"{instr_name} x{regnum_rd}, 0x{csr_addr:x}, 0x{regnum_rs2:x}"
|
||||
csr_write_op = funct3 = 0b101
|
||||
if csr_write_op != RVCSR.WRITE or regnum_rd != 0:
|
||||
rd_wdata = self.csr.read(csr_addr)
|
||||
if csr_write_op == RVCSR.WRITE or regnum_rs2 != 0:
|
||||
self.csr.write(csr_addr, rs2, op=csr_write_op)
|
||||
else:
|
||||
instr_invalid = True
|
||||
|
||||
|
||||
elif opc == OPC_MISC_MEM:
|
||||
if instr == 0b0000_0000_0000_00000_001_00000_0001111:
|
||||
if log: log_disasm = "fence.i"
|
||||
pass
|
||||
elif (instr & 0b1111_0000_0000_11111_111_11111_1111111) == 0b0000_0000_0000_00000_000_00000_0001111:
|
||||
if log: log_disasm = f"fence {extract(instr, 27, 24):04b}, {extract(instr, 23, 20):04b}"
|
||||
pass
|
||||
else:
|
||||
instr_invalid = True
|
||||
|
||||
if log:
|
||||
log_str = f"{self.pc:08x}: ({instr:08x}) {log_disasm if log_disasm is not None else '':<25}"
|
||||
if rd_wdata is not None and regnum_rd != 0:
|
||||
log_str += f" : x{regnum_rd:<2} <- {rd_wdata & XLEN_MASK:08x}"
|
||||
else:
|
||||
log_str += " : " + 15 * " "
|
||||
if pc_wdata is not None:
|
||||
log_str += f" : pc <- {pc_wdata & XLEN_MASK:08x}"
|
||||
else:
|
||||
log_str += " :"
|
||||
print(log_str)
|
||||
|
||||
if rd_wdata is not None and regnum_rd != 0:
|
||||
self.regs[regnum_rd] = (rd_wdata & 0xffffffff) - (rd_wdata << 1 & 0x100000000)
|
||||
|
||||
if pc_wdata is None:
|
||||
self.pc = self.pc + 4
|
||||
else:
|
||||
self.pc = pc_wdata
|
||||
|
||||
if instr_invalid:
|
||||
print(f"Invalid instruction at {self.pc:08x}: {instr:08x}")
|
||||
|
||||
self.csr.step()
|
||||
|
||||
|
||||
|
||||
def anyint(x):
|
||||
return int(x, 0)
|
||||
|
||||
def main(argv):
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("binfile")
|
||||
parser.add_argument("--memsize", default = 1 << 24, type = anyint)
|
||||
parser.add_argument("--cycles", default = int(1e4), type = anyint)
|
||||
parser.add_argument("--dump", nargs=2, action="append", type=anyint)
|
||||
parser.add_argument("--quiet", "-q", action="store_true")
|
||||
args = parser.parse_args(argv)
|
||||
if args.quiet:
|
||||
mem = MemWithTBIO(args.memsize, io_log_fmt="{}")
|
||||
else:
|
||||
mem = MemWithTBIO(args.memsize)
|
||||
mem.loadbin(open(args.binfile, "rb"), 0)
|
||||
rv = RVCore(mem)
|
||||
try:
|
||||
for i in range(args.cycles):
|
||||
rv.step(log=not args.quiet)
|
||||
except TBExit as e:
|
||||
print(f"Processor halted simulation with exit code {e}")
|
||||
except BrokenPipeError as e:
|
||||
sys.exit(0)
|
||||
print(f"Ran for {i + 1} cycles")
|
||||
|
||||
for start, end in args.dump or []:
|
||||
print(f"Dumping memory from {start:08x} to {end:08x}:")
|
||||
for i, addr in enumerate(range(start, end)):
|
||||
sep = "\n" if i % 16 == 15 else " "
|
||||
sys.stdout.write(f"{mem.get8(addr):02x}{sep}")
|
||||
print("")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
|
@ -0,0 +1,2 @@
|
|||
tb
|
||||
dut.cpp
|
|
@ -0,0 +1,33 @@
|
|||
TOP := hazard5_cpu_2port
|
||||
CDEFINES := DUAL_PORT
|
||||
|
||||
CPU_RESET_VECTOR := 32'hc0
|
||||
EXTENSION_C := 1
|
||||
EXTENSION_M := 1
|
||||
MULDIV_UNROLL := 1
|
||||
MUL_FAST := 0
|
||||
REDUCED_BYPASS := 0
|
||||
|
||||
.PHONY: clean tb all
|
||||
|
||||
all: tb
|
||||
|
||||
SYNTH_CMD += read_verilog -I ../../hdl $(shell listfiles ../../hdl/hazard5.f);
|
||||
SYNTH_CMD += chparam -set EXTENSION_C $(EXTENSION_C) $(TOP);
|
||||
SYNTH_CMD += chparam -set EXTENSION_M $(EXTENSION_M) $(TOP);
|
||||
SYNTH_CMD += chparam -set CSR_COUNTER 1 $(TOP);
|
||||
SYNTH_CMD += chparam -set RESET_VECTOR $(CPU_RESET_VECTOR) $(TOP);
|
||||
SYNTH_CMD += chparam -set REDUCED_BYPASS $(REDUCED_BYPASS) $(TOP);
|
||||
SYNTH_CMD += chparam -set MULDIV_UNROLL $(MULDIV_UNROLL) $(TOP);
|
||||
SYNTH_CMD += chparam -set MUL_FAST $(MUL_FAST) $(TOP);
|
||||
SYNTH_CMD += prep -flatten -top $(TOP); async2sync;
|
||||
SYNTH_CMD += write_cxxrtl dut.cpp
|
||||
|
||||
dut.cpp:
|
||||
yosys -p "$(SYNTH_CMD)" 2>&1 > cxxrtl.log
|
||||
|
||||
clean::
|
||||
rm -f dut.cpp cxxrtl.log tb
|
||||
|
||||
tb: dut.cpp
|
||||
clang++ -O3 -std=c++14 $(addprefix -D,$(CDEFINES)) -I $(shell yosys-config --datdir)/include tb.cpp -o tb
|
|
@ -0,0 +1,217 @@
|
|||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
// jesus fuck i forgot how bad iostream formatting was, give me printf or give me death
|
||||
#include <stdio.h>
|
||||
|
||||
// Device-under-test model generated by CXXRTL:
|
||||
#include "dut.cpp"
|
||||
#include <backends/cxxrtl/cxxrtl_vcd.h>
|
||||
|
||||
static const unsigned int MEM_SIZE = 16 * 1024 * 1024;
|
||||
uint8_t mem[MEM_SIZE];
|
||||
|
||||
static const unsigned int IO_BASE = 0x80000000;
|
||||
enum {
|
||||
IO_PRINT_CHAR = 0,
|
||||
IO_PRINT_U32 = 4,
|
||||
IO_EXIT = 8
|
||||
};
|
||||
|
||||
const char *help_str =
|
||||
"Usage: tb binfile [vcdfile] [--dump start end] [--cycles n]\n"
|
||||
" binfile : Binary to load into start of memory\n"
|
||||
" vcdfile : Path to dump waveforms to\n"
|
||||
" --dump start end : Print out memory contents between start and end (exclusive)\n"
|
||||
" after execution finishes. Can be passed multiple times.\n"
|
||||
" --cycles n : Maximum number of cycles to run before exiting.\n"
|
||||
;
|
||||
|
||||
void exit_help(std::string errtext = "") {
|
||||
std::cerr << errtext << help_str;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
if (argc < 2)
|
||||
exit_help();
|
||||
|
||||
bool dump_waves = false;
|
||||
std::string waves_path;
|
||||
std::vector<std::pair<uint32_t, uint32_t>> dump_ranges;
|
||||
int64_t max_cycles = 100000;
|
||||
|
||||
for (int i = 2; i < argc; ++i) {
|
||||
std::string s(argv[i]);
|
||||
if (i == 2 && s.rfind("--", 0) != 0) {
|
||||
// Optional positional argument: vcdfile
|
||||
dump_waves = true;
|
||||
waves_path = s;
|
||||
}
|
||||
else if (s == "--dump") {
|
||||
if (argc - i < 3)
|
||||
exit_help("Option --dump requires 2 arguments\n");
|
||||
dump_ranges.push_back(std::pair<uint32_t, uint32_t>(
|
||||
std::stoul(argv[i + 1], 0, 0),
|
||||
std::stoul(argv[i + 2], 0, 0)
|
||||
));;
|
||||
i += 2;
|
||||
}
|
||||
else if (s == "--cycles") {
|
||||
if (argc - i < 2)
|
||||
exit_help("Option --cycles requires an argument\n");
|
||||
max_cycles = std::stol(argv[i + 1], 0, 0);
|
||||
i += 1;
|
||||
}
|
||||
else {
|
||||
std::cerr << "Unrecognised argument " << s << "\n";
|
||||
exit_help("");
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DUAL_PORT
|
||||
cxxrtl_design::p_hazard5__cpu__2port top;
|
||||
#else
|
||||
cxxrtl_design::p_hazard5__cpu__1port top;
|
||||
#endif
|
||||
|
||||
std::fill(std::begin(mem), std::end(mem), 0);
|
||||
|
||||
std::ifstream fd(argv[1], std::ios::binary | std::ios::ate);
|
||||
std::streamsize bin_size = fd.tellg();
|
||||
if (bin_size > MEM_SIZE) {
|
||||
std::cerr << "Binary file (" << bin_size << " bytes) is larger than memory (" << MEM_SIZE << " bytes)\n";
|
||||
return -1;
|
||||
}
|
||||
fd.seekg(0, std::ios::beg);
|
||||
fd.read((char*)mem, bin_size);
|
||||
|
||||
std::ofstream waves_fd;
|
||||
cxxrtl::vcd_writer vcd;
|
||||
if (dump_waves) {
|
||||
waves_fd.open(waves_path);
|
||||
cxxrtl::debug_items all_debug_items;
|
||||
top.debug_info(all_debug_items);
|
||||
vcd.timescale(1, "us");
|
||||
vcd.add(all_debug_items);
|
||||
}
|
||||
|
||||
bool bus_trans = false;
|
||||
bool bus_write = false;
|
||||
#ifdef DUAL_PORT
|
||||
bool bus_trans_i = false;
|
||||
uint32_t bus_addr_i = 0;
|
||||
#endif
|
||||
uint32_t bus_addr = 0;
|
||||
uint8_t bus_size = 0;
|
||||
// Never generate bus stalls
|
||||
#ifdef DUAL_PORT
|
||||
top.p_i__hready.set<bool>(true);
|
||||
top.p_d__hready.set<bool>(true);
|
||||
#else
|
||||
top.p_ahblm__hready.set<bool>(true);
|
||||
#endif
|
||||
|
||||
// Reset + initial clock pulse
|
||||
top.step();
|
||||
top.p_clk.set<bool>(true);
|
||||
top.step();
|
||||
top.p_clk.set<bool>(false);
|
||||
top.p_rst__n.set<bool>(true);
|
||||
top.step();
|
||||
|
||||
for (int64_t cycle = 0; cycle < max_cycles; ++cycle) {
|
||||
top.p_clk.set<bool>(false);
|
||||
top.step();
|
||||
if (dump_waves)
|
||||
vcd.sample(cycle * 2);
|
||||
top.p_clk.set<bool>(true);
|
||||
top.step();
|
||||
// Handle current data phase, then move current address phase to data phase
|
||||
uint32_t rdata = 0;
|
||||
if (bus_trans && bus_write) {
|
||||
#ifdef DUAL_PORT
|
||||
uint32_t wdata = top.p_d__hwdata.get<uint32_t>();
|
||||
#else
|
||||
uint32_t wdata = top.p_ahblm__hwdata.get<uint32_t>();
|
||||
#endif
|
||||
if (bus_addr <= MEM_SIZE) {
|
||||
unsigned int n_bytes = 1u << bus_size;
|
||||
// Note we are relying on hazard5's byte lane replication
|
||||
for (unsigned int i = 0; i < n_bytes; ++i) {
|
||||
mem[bus_addr + i] = wdata >> (8 * i) & 0xffu;
|
||||
}
|
||||
}
|
||||
else if (bus_addr == IO_BASE + IO_PRINT_CHAR) {
|
||||
putchar(wdata);
|
||||
}
|
||||
else if (bus_addr == IO_BASE + IO_PRINT_U32) {
|
||||
printf("%08x\n", wdata);
|
||||
}
|
||||
else if (bus_addr == IO_BASE + IO_EXIT) {
|
||||
printf("CPU requested halt. Exit code %d\n", wdata);
|
||||
printf("Ran for %ld cycles\n", cycle + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (bus_trans && !bus_write) {
|
||||
if (bus_addr <= MEM_SIZE) {
|
||||
bus_addr &= ~0x3u;
|
||||
rdata =
|
||||
(uint32_t)mem[bus_addr] |
|
||||
mem[bus_addr + 1] << 8 |
|
||||
mem[bus_addr + 2] << 16 |
|
||||
mem[bus_addr + 3] << 24;
|
||||
}
|
||||
}
|
||||
#ifdef DUAL_PORT
|
||||
top.p_d__hrdata.set<uint32_t>(rdata);
|
||||
if (bus_trans_i) {
|
||||
bus_addr_i &= ~0x3u;
|
||||
top.p_i__hrdata.set<uint32_t>(
|
||||
(uint32_t)mem[bus_addr_i] |
|
||||
mem[bus_addr_i + 1] << 8 |
|
||||
mem[bus_addr_i + 2] << 16 |
|
||||
mem[bus_addr_i + 3] << 24
|
||||
);
|
||||
}
|
||||
#else
|
||||
top.p_ahblm__hrdata.set<uint32_t>(rdata);
|
||||
#endif
|
||||
|
||||
#ifdef DUAL_PORT
|
||||
bus_trans = top.p_d__htrans.get<uint8_t>() >> 1;
|
||||
bus_write = top.p_d__hwrite.get<bool>();
|
||||
bus_size = top.p_d__hsize.get<uint8_t>();
|
||||
bus_addr = top.p_d__haddr.get<uint32_t>();
|
||||
bus_trans_i = top.p_i__htrans.get<uint8_t>() >> 1;
|
||||
bus_addr_i = top.p_i__haddr.get<uint32_t>();
|
||||
#else
|
||||
bus_trans = top.p_ahblm__htrans.get<uint8_t>() >> 1;
|
||||
bus_write = top.p_ahblm__hwrite.get<bool>();
|
||||
bus_size = top.p_ahblm__hsize.get<uint8_t>();
|
||||
bus_addr = top.p_ahblm__haddr.get<uint32_t>();
|
||||
#endif
|
||||
|
||||
if (dump_waves) {
|
||||
// The extra step() is just here to get the bus responses to line up nicely
|
||||
// in the VCD (hopefully is a quick update)
|
||||
top.step();
|
||||
vcd.sample(cycle * 2 + 1);
|
||||
waves_fd << vcd.buffer;
|
||||
vcd.buffer.clear();
|
||||
}
|
||||
}
|
||||
|
||||
for (auto r : dump_ranges) {
|
||||
printf("Dumping memory from %08x to %08x:\n", r.first, r.second);
|
||||
for (int i = 0; i < r.second - r.first; ++i)
|
||||
printf("%02x%c", mem[r.first + i], i % 16 == 15 ? '\n' : ' ');
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue