Import from hazard5 9743a1b

This commit is contained in:
Luke Wren 2021-05-21 02:34:16 +01:00
commit 6dad4e20bb
72 changed files with 11152 additions and 0 deletions

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "test/riscv-compliance/riscv-compliance"]
path = test/riscv-compliance/riscv-compliance
url = https://github.com/riscv/riscv-compliance.git

13
License Normal file
View File

@ -0,0 +1,13 @@
DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE
Version 3, April 2008
Copyright (C) 2020 Luke Wren
Everyone is permitted to copy and distribute verbatim or modified
copies of this license document and accompanying software, and
changing either is allowed.
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. You just DO WHAT THE FUCK YOU WANT TO.
1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK.

4
hdl/Makefile Normal file
View File

@ -0,0 +1,4 @@
DOTF=hazard5.f
TOP=hazard5_alu
include $(SCRIPTS)/formal.mk

117
hdl/arith/hazard5_alu.v Normal file
View File

@ -0,0 +1,117 @@
/**********************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2018 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*********************************************************************/
module hazard5_alu #(
parameter W_DATA = 32
) (
input wire [3:0] aluop,
input wire [W_DATA-1:0] op_a,
input wire [W_DATA-1:0] op_b,
output reg [W_DATA-1:0] result,
output wire [W_DATA-1:0] result_add, // for load/stores
output wire cmp
);
`include "hazard5_ops.vh"
function msb;
input [W_DATA-1:0] x;
begin
msb = x[W_DATA-1];
end
endfunction
wire sub = aluop != ALUOP_ADD;
wire [W_DATA-1:0] sum = op_a + (op_b ^ {W_DATA{sub}}) + sub;
wire [W_DATA-1:0] op_xor = op_a ^ op_b;
wire lt = msb(op_a) == msb(op_b) ? msb(sum) :
aluop == ALUOP_LTU ? msb(op_b) :
msb(op_a) ;
assign cmp = aluop == ALUOP_SUB ? |op_xor : lt;
assign result_add = sum;
wire [W_DATA-1:0] shift_dout;
reg shift_right_nleft;
reg shift_arith;
hazard5_shift_barrel #(
.W_DATA(W_DATA),
.W_SHAMT(5)
) shifter (
.din(op_a),
.shamt(op_b[4:0]),
.right_nleft(shift_right_nleft),
.arith(shift_arith),
.dout(shift_dout)
);
// We can implement all bitwise ops with 1 LUT4/bit total, since each result bit
// uses only two operand bits. Much better than feeding each into main mux tree.
reg [W_DATA-1:0] bitwise;
always @ (*) begin: bitwise_ops
case (aluop[1:0])
ALUOP_AND[1:0]: bitwise = op_a & op_b;
ALUOP_OR[1:0]: bitwise = op_a | op_b;
default: bitwise = op_a ^ op_b;
endcase
end
always @ (*) begin
shift_right_nleft = 1'b0;
shift_arith = 1'b0;
case (aluop)
ALUOP_ADD: begin result = sum; end
ALUOP_SUB: begin result = sum; end
ALUOP_LT: begin result = {{W_DATA-1{1'b0}}, lt}; end
ALUOP_LTU: begin result = {{W_DATA-1{1'b0}}, lt}; end
ALUOP_SRL: begin shift_right_nleft = 1'b1; result = shift_dout; end
ALUOP_SRA: begin shift_right_nleft = 1'b1; shift_arith = 1'b1; result = shift_dout; end
ALUOP_SLL: begin result = shift_dout; end
default: begin result = bitwise; end
endcase
end
`ifdef FORMAL
`ifndef RISCV_FORMAL
// Really we're just interested in the shifts and comparisons, as these are
// the nontrivial ones. However, easier to test everything!
wire clk;
always @ (posedge clk) begin
case(aluop)
default: begin end
ALUOP_ADD: assert(result == op_a + op_b);
ALUOP_SUB: assert(result == op_a - op_b);
ALUOP_LT: assert(result == $signed(op_a) < $signed(op_b));
ALUOP_LTU: assert(result == op_a < op_b);
ALUOP_AND: assert(result == (op_a & op_b));
ALUOP_OR: assert(result == (op_a | op_b));
ALUOP_XOR: assert(result == (op_a ^ op_b));
ALUOP_SRL: assert(result == op_a >> op_b[4:0]);
ALUOP_SRA: assert($signed(result) == $signed(op_a) >>> $signed(op_b[4:0]));
ALUOP_SLL: assert(result == op_a << op_b[4:0]);
endcase
end
`endif
`endif
endmodule

View File

@ -0,0 +1,75 @@
/**********************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2021 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*********************************************************************/
module hazard5_mul_fast #(
parameter XLEN = 32
) (
input wire clk,
input wire rst_n,
input wire [XLEN-1:0] op_a,
input wire [XLEN-1:0] op_b,
input wire op_vld,
output wire [XLEN-1:0] result,
output reg result_vld
);
// This pipestage is folded into the front of the DSP tiles on UP5k. Note the
// intention is to register the bypassed core regs at the end of X (since
// bypass is quite slow), then perform multiply combinatorially in stage M,
// and mux into MW result register.
reg [XLEN-1:0] op_a_r;
reg [XLEN-1:0] op_b_r;
always @ (posedge clk) begin
if (op_vld) begin
op_a_r <= op_a;
op_b_r <= op_b;
end
end
// This should be inferred as 3 DSP tiles on UP5k:
//
// 1. Register then multiply a[15: 0] and b[15: 0]
// 2. Register then multiply a[31:16] and b[15: 0], then directly add output of 1
// 3. Register then multiply a[15: 0] and b[31:16], then directly add output of 2
//
// So there is quite a long path (1x 16-bit multiply, then 2x 16-bit add). On
// other platforms you may just end up with a pile of gates.
`ifndef RISCV_FORMAL_ALTOPS
assign result = op_a_r * op_b_r;
`else
// riscv-formal can use a simpler function, since it's just confirming the
// result is correctly hooked up.
assign result = result_vld ? (op_a_r + op_b_r) ^ 32'h5876063e : 32'hdeadbeef;
`endif
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
result_vld <= 1'b0;
end else begin
result_vld <= op_vld;
end
end
endmodule

View File

@ -0,0 +1,294 @@
/**********************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2018 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*********************************************************************/
// Combined multiply/divide/modulo circuit.
// All operations performed at 1 bit per clock; aiming for minimal resource usage.
// There are lots of opportunities for off-by-one errors here. See muldiv_model.py
// for a simple reference model of the mul/div/mod iterations.
//
// When op_kill is high, the current calculation halts immediately. op_vld can be
// asserted on the same cycle, and the new calculation begins without delay, regardless
// of op_rdy. This may be used by the processor on e.g. mispredict or trap.
//
// The actual multiply/divide hardware is unsigned. We handle signedness at
// input/output.
module hazard5_muldiv_seq #(
parameter XLEN = 32,
parameter UNROLL = 1,
parameter W_CTR = $clog2(XLEN + 1) // do not modify
) (
input wire clk,
input wire rst_n,
input wire [2:0] op,
input wire op_vld,
output wire op_rdy,
input wire op_kill,
input wire [XLEN-1:0] op_a,
input wire [XLEN-1:0] op_b,
output wire [XLEN-1:0] result_h, // mulh* or rem*
output wire [XLEN-1:0] result_l, // mul or div*
output wire result_vld
);
`include "hazard5_ops.vh"
//synthesis translate_off
generate if (UNROLL & (UNROLL - 1) || ~|UNROLL)
initial $fatal("%m: UNROLL must be a positive power of 2");
endgenerate
//synthesis translate_on
// ----------------------------------------------------------------------------
// Operation decode, operand sign adjustment
// On the first cycle, op_a and op_b go straight through to the accumulator
// and the divisor/multiplicand register. They are then adjusted in-place
// on the next cycle. This allows the same circuits to be reused for sign
// adjustment before output (and helps input timing).
reg [W_MULOP-1:0] op_r;
reg [2*XLEN-1:0] accum;
reg [XLEN-1:0] op_b_r;
reg op_a_neg_r;
reg op_b_neg_r;
wire op_a_signed =
op_r == M_OP_MULH ||
op_r == M_OP_MULHSU ||
op_r == M_OP_DIV ||
op_r == M_OP_REM;
wire op_b_signed =
op_r == M_OP_MULH ||
op_r == M_OP_DIV ||
op_r == M_OP_REM;
wire op_a_neg = op_a_signed && accum[XLEN-1];
wire op_b_neg = op_b_signed && op_b_r[XLEN-1];
wire is_div = op_r[2];
// Controls for modifying sign of all/part of accumulator
wire accum_neg_l;
wire accum_inv_h;
wire accum_incr_h;
// ----------------------------------------------------------------------------
// Arithmetic circuit
// Combinatorials:
reg [2*XLEN-1:0] accum_next;
reg [2*XLEN-1:0] addend;
reg [2*XLEN-1:0] shift_tmp;
reg [2*XLEN-1:0] addsub_tmp;
reg neg_l_borrow;
always @ (*) begin: alu
integer i;
// Multiply/divide iteration layers
accum_next = accum;
addend = {2*XLEN{1'b0}};
addsub_tmp = {2*XLEN{1'b0}};
neg_l_borrow = 1'b0;
for (i = 0; i < UNROLL; i = i + 1) begin
addend = {is_div && |op_b_r, op_b_r, {XLEN-1{1'b0}}};
shift_tmp = is_div ? accum_next : accum_next >> 1;
addsub_tmp = shift_tmp + addend;
accum_next = (is_div ? !addsub_tmp[2 * XLEN - 1] : accum_next[0]) ?
addsub_tmp : shift_tmp;
if (is_div)
accum_next = {accum_next[2*XLEN-2:0], !addsub_tmp[2 * XLEN - 1]};
end
// Alternative path for negation of all/part of accumulator
if (accum_neg_l)
{neg_l_borrow, accum_next[XLEN-1:0]} = {~accum[XLEN-1:0]} + 1'b1;
if (accum_incr_h || accum_inv_h)
accum_next[XLEN +: XLEN] = (accum[XLEN +: XLEN] ^ {XLEN{accum_inv_h}})
+ accum_incr_h;
end
// ----------------------------------------------------------------------------
// Main state machine
reg sign_preadj_done;
reg [W_CTR-1:0] ctr;
reg sign_postadj_done;
reg sign_postadj_carry;
localparam CTR_TOP = XLEN[W_CTR-1:0];
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
ctr <= {W_CTR{1'b0}};
sign_preadj_done <= 1'b1;
sign_postadj_done <= 1'b1;
sign_postadj_carry <= 1'b0;
op_r <= {W_MULOP{1'b0}};
op_a_neg_r <= 1'b0;
op_b_neg_r <= 1'b0;
op_b_r <= {XLEN{1'b0}};
accum <= {XLEN*2{1'b0}};
end else if (op_kill || (op_vld && op_rdy)) begin
// Initialise circuit with operands + state
ctr <= op_vld ? CTR_TOP : {W_CTR{1'b0}};
sign_preadj_done <= !op_vld;
sign_postadj_done <= !op_vld;
sign_postadj_carry <= 1'b0;
op_r <= op;
op_b_r <= op_b;
accum <= {{XLEN{1'b0}}, op_a};
end else if (!sign_preadj_done) begin
// Pre-adjust sign if necessary, else perform first iteration immediately
op_a_neg_r <= op_a_neg;
op_b_neg_r <= op_b_neg;
sign_preadj_done <= 1'b1;
if (accum_neg_l || (op_b_neg ^ is_div)) begin
if (accum_neg_l)
accum[0 +: XLEN] <= accum_next[0 +: XLEN];
if (op_b_neg ^ is_div)
op_b_r <= -op_b_r;
end else begin
ctr <= ctr - UNROLL[W_CTR-1:0];
accum <= accum_next;
end
end else if (|ctr) begin
ctr <= ctr - UNROLL[W_CTR-1:0];
accum <= accum_next;
end else if (!sign_postadj_done || sign_postadj_carry) begin
sign_postadj_done <= 1'b1;
if (accum_inv_h || accum_incr_h)
accum[XLEN +: XLEN] <= accum_next[XLEN +: XLEN];
if (accum_neg_l) begin
accum[0 +: XLEN] <= accum_next[0 +: XLEN];
if (!is_div) begin
sign_postadj_carry <= neg_l_borrow;
sign_postadj_done <= !neg_l_borrow;
end
end
end
end
// ----------------------------------------------------------------------------
// Sign adjustment control
// Pre-adjustment: for any a, b we want |a|, |b|. Note that the magnitude of any
// 32-bit signed integer is representable by a 32-bit unsigned integer.
// Post-adjustment for division:
// We seek q, r to satisfy a = b * q + r, where a and b are given,
// and |r| < |b|. One way to do this is if
// sgn(r) = sgn(a)
// sgn(q) = sgn(a) ^ sgn(b)
// This has additional nice properties like
// -(a / b) = (-a) / b = a / (-b)
// Post-adjustment for multiplication:
// We have calculated the 2*XLEN result of |a| * |b|.
// Negate the entire accumulator if sgn(a) ^ sgn(b).
// This is done in two steps (to share div/mod circuit, and avoid 64-bit carry):
// - Negate lower half of accumulator, and invert upper half
// - Increment upper half if lower half carried
wire do_postadj = ~|{ctr, sign_postadj_done};
wire op_signs_differ = op_a_neg_r ^ op_b_neg_r;
assign accum_neg_l =
!sign_preadj_done && op_a_neg ||
do_postadj && !sign_postadj_carry && op_signs_differ && !(is_div && ~|op_b_r);
assign {accum_incr_h, accum_inv_h} =
do_postadj && is_div && op_a_neg_r ? 2'b11 :
do_postadj && !is_div && op_signs_differ && !sign_postadj_carry ? 2'b01 :
do_postadj && !is_div && op_signs_differ && sign_postadj_carry ? 2'b10 :
2'b00 ;
// ----------------------------------------------------------------------------
// Outputs
assign op_rdy = ~|{ctr, accum_neg_l, accum_incr_h, accum_inv_h};
assign result_vld = op_rdy;
`ifndef RISCV_FORMAL_ALTOPS
assign {result_h, result_l} = accum;
`else
// Provide arithmetically simpler alternative operations, to speed up formal checks
always assert(XLEN == 32); // TODO may care about this one day
reg [XLEN-1:0] fml_a_saved;
reg [XLEN-1:0] fml_b_saved;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
fml_a_saved <= {XLEN{1'b0}};
fml_b_saved <= {XLEN{1'b0}};
end else if (op_vld && op_rdy) begin
fml_a_saved <= op_a;
fml_b_saved <= op_b;
end
end
assign result_h =
op_r == M_OP_MULH ? (fml_a_saved + fml_b_saved) ^ 32'hf6583fb7 :
op_r == M_OP_MULHSU ? (fml_a_saved - fml_b_saved) ^ 32'hecfbe137 :
op_r == M_OP_MULHU ? (fml_a_saved + fml_b_saved) ^ 32'h949ce5e8 :
op_r == M_OP_REM ? (fml_a_saved - fml_b_saved) ^ 32'h8da68fa5 :
op_r == M_OP_REMU ? (fml_a_saved - fml_b_saved) ^ 32'h3138d0e1 : 32'hdeadbeef;
assign result_l =
op_r == M_OP_MUL ? (fml_a_saved + fml_b_saved) ^ 32'h5876063e :
op_r == M_OP_DIV ? (fml_a_saved - fml_b_saved) ^ 32'h7f8529ec :
op_r == M_OP_DIVU ? (fml_a_saved - fml_b_saved) ^ 32'h10e8fd70 : 32'hdeadbeef;
`endif
// ----------------------------------------------------------------------------
// Interface properties
`ifdef FORMAL
always @ (posedge clk) if (rst_n && $past(rst_n)) begin: properties
integer i;
reg alive;
if ($past(op_rdy && !op_vld))
assert(op_rdy);
if (result_vld && $past(result_vld) && !$past(op_kill))
assert($stable({result_h, result_l}));
// Kill will halt an in-progress operation, but a new operation may be
// asserted simultaneously with kill.
if ($past(op_kill))
assert(op_rdy == !$past(op_vld));
// We should be periodically ready (liveness property), unless new operations
// are forced in immediately, simultaneous with a kill, in which case there
// is no intermediate ready state.
alive = op_rdy || (op_kill && op_vld);
for (i = 1; i <= XLEN / UNROLL + 3; i = i + 1)
alive = alive || $past(op_rdy || (op_kill && op_vld), i);
assert(alive);
end
`endif
endmodule

View File

@ -0,0 +1,57 @@
/******************************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2019 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*****************************************************************************/
// Really something like this should be in a utility library (or the language!),
// but Hazard5 is supposed to be self-contained
module hazard5_priority_encode #(
parameter W_REQ = 16,
parameter W_GNT = $clog2(W_REQ) // do not modify
) (
input wire [W_REQ-1:0] req,
output wire [W_GNT-1:0] gnt
);
// First do a priority-select of the input bitmap.
reg [W_REQ-1:0] deny;
always @ (*) begin: smear
integer i;
deny[0] = 1'b0;
for (i = 1; i < W_REQ; i = i + 1)
deny[i] = deny[i - 1] || req[i - 1];
end
wire [W_REQ-1:0] gnt_onehot = req & ~deny;
// As the result is onehot, we can now just OR in the representation of each
// encoded integer.
reg [W_GNT-1:0] gnt_accum;
always @ (*) begin: encode
integer i;
gnt_accum = {W_GNT{1'b0}};
for (i = 0; i < W_REQ; i = i + 1) begin
gnt_accum = gnt_accum | ({W_GNT{gnt_onehot[i]}} & i[W_GNT-1:0]);
end
end
assign gnt = gnt_accum;
endmodule

View File

@ -0,0 +1,72 @@
/**********************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2018 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*********************************************************************/
module hazard5_shift_1bit_seq #(
parameter W_DATA = 32,
parameter W_SHAMT = 5
) (
input wire clk,
input wire rst_n,
input wire [W_DATA-1:0] din,
input wire din_vld, // can be asserted at any time, we always respond
input wire [W_SHAMT-1:0] shamt,
input wire right_nleft,
input wire arith,
output wire [W_DATA-1:0] dout,
output wire dout_vld,
);
reg [W_DATA-1:0] accum;
reg [W_DATA-1:0] accum_next;
reg [W_SHAMT-1:0] shamt_remaining;
reg flipped;
// Handle actual shifting
wire sext = arith && accum[W_DATA - 1];
always @ (*) begin: shift_unit
accum_next = accum;
if (din_vld) begin
accum_next = din;
end else if (shamt_remaining) begin
if (right_nleft)
accum_next = {sext, accum[W_DATA-1:1]};
else
accum_next = {accum << 1};
end
end
// No reset on datapath
always @ (posedge clk)
accum <= accum_next;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
shamt_remaining <= {W_SHAMT{1'b0}};
end else if (din_vld) begin
shamt_remaining <= shamt;
end else begin
shamt_remaining <= shamt_remaining - |shamt_remaining;
end
end
assign dout_vld = shamt_remaining == 0;
assign dout = accum;
endmodule

View File

@ -0,0 +1,71 @@
/**********************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2018 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*********************************************************************/
// Implement the three shifts (left logical, right logical, right arithmetic)
// using a single log-type barrel shifter. Around 240 LUTs for 32 bits.
// (7 layers of 32 2-input muxes, some extra LUTs and LUT inputs used for arith)
module hazard5_shift_barrel #(
parameter W_DATA = 32,
parameter W_SHAMT = 5
) (
input wire [W_DATA-1:0] din,
input wire [W_SHAMT-1:0] shamt,
input wire right_nleft,
input wire arith,
output reg [W_DATA-1:0] dout
);
integer i;
reg [W_DATA-1:0] din_rev;
reg [W_DATA-1:0] shift_accum;
wire sext = arith && din_rev[0]; // haha
always @ (*) begin
for (i = 0; i < W_DATA; i = i + 1)
din_rev[i] = right_nleft ? din[W_DATA - 1 - i] : din[i];
end
always @ (*) begin
shift_accum = din_rev;
for (i = 0; i < W_SHAMT; i = i + 1) begin
if (shamt[i]) begin
shift_accum = (shift_accum << (1 << i)) |
({W_DATA{sext}} & ~({W_DATA{1'b1}} << (1 << i)));
end
end
end
always @ (*) begin
for (i = 0; i < W_DATA; i = i + 1)
dout[i] = right_nleft ? shift_accum[W_DATA - 1 - i] : shift_accum[i];
end
`ifdef FORMAL
always @ (*) begin
if (right_nleft && arith) begin: asr
assert($signed(dout) == $signed(din) >>> $signed(shamt));
end else if (right_nleft && !arith) begin
assert(dout == din >> shamt);
end else if (!right_nleft && !arith) begin
assert(dout == din << shamt);
end
end
`endif
endmodule

View File

@ -0,0 +1,106 @@
/**********************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2018 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*********************************************************************/
// Implement the three shifts using a single log-sequential shifter.
// On each clock, the shifter can left-shift by a power-of-two amount (arith or
// logical), OR it can reverse the accumulator.
//
// The accumulator is wired in reverse to the output. So the sequences are:
// - Right shift: flip, then shift. Output wiring flips again. Internal left-shifts
// are effectively right shifts.
// - Left shift: perform shift ops, then flip, so that reversed output cancels.
//
// An additional cycle is consumed to load the input into the accumulator; this
// simplifies muxing. In total, a shift consumes between 2 and 7 cycles on a
// 32-bit machine, depending on the bit weight of shamt.
module hazard5_shift_log_seq #(
parameter W_DATA = 32,
parameter W_SHAMT = 5
) (
input wire clk,
input wire rst_n,
input wire [W_DATA-1:0] din,
input wire din_vld, // can be asserted at any time, we always respond
input wire [W_SHAMT-1:0] shamt,
input wire right_nleft,
input wire arith,
output reg [W_DATA-1:0] dout,
output reg dout_vld,
);
reg [W_DATA-1:0] accum;
reg [W_DATA-1:0] accum_next;
reg [W_SHAMT-1:0] shamt_remaining;
reg flipped;
// Handle actual shifting
wire flip = !flipped && (right_nleft || ~|shamt_remaining);
wire sext = arith && accum[0]; // "Left arithmetic" shifting
always @ (*) begin: shift_unit
integer i;
accum_next = accum;
// The following is a priority mux tree (honest) which the synthesis tool should balance
if (din_vld) begin
accum_next = din;
end else if (flip) begin
for (i = 0; i < W_DATA; i = i + 1)
accum_next[i] = accum[W_DATA - 1 - i];
end else if (shamt_remaining) begin
// Smallest shift first
for (i = 0; i < W_SHAMT; i = i + 1) begin
if (shamt_remaining[i] && ~|(shamt_remaining & ~({W_SHAMT{1'b1}} << i))) begin
accum_next = (accum << (1 << i)) |
({W_DATA{sext}} & ~({W_DATA{1'b1}} << (1 << i)));
end
end
end
end
// No reset on datapath
always @ (posedge clk)
accum <= accum_next;
// State machine
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
shamt_remaining <= {W_SHAMT{1'b0}};
flipped <= 1'b0;
end else if (din_vld) begin
shamt_remaining <= shamt;
flipped <= 1'b0;
end else begin
if (flip)
flipped <= 1'b1;
else
shamt_remaining <= shamt_remaining & {shamt_remaining - 1'b1};
end
end
always @ (*) begin: connect_output
dout_vld = flipped && ~|shamt_remaining;
integer i;
for (i = 0; i < W_DATA; i = i + 1)
dout[i] = accum[W_DATA - 1 - i];
end
endmodule

65
hdl/arith/muldiv_model.py Executable file
View File

@ -0,0 +1,65 @@
#!/usr/bin/env python3
# Quick reference model for sequential unsigned multiply/divide/modulo
def div_step(w, accum, divisor):
sub_tmp = accum - (divisor << (w - 1))
underflow = sub_tmp < 0
if not underflow:
accum = sub_tmp
accum = (accum << 1) | (not underflow)
return accum
def divmod(w, dividend, divisor, debug=True):
accum = dividend
for i in range(w):
accum_prev = accum
accum = div_step(w, accum, divisor)
if debug:
print("Step {:02d}: accum {:0{}x} -> {:0{}x}".format(
i, accum_prev, int(w / 2), accum, int(w / 2)))
return (accum >> w, accum & ((1 << w) - 1))
def mul_step(w, accum, multiplicand):
add_en = accum & 1
accum = accum >> 1
if add_en:
accum += (multiplicand << (w - 1))
return accum
def mul(w, multiplicand, multiplier, debug=True):
accum = multiplier
for i in range(w):
accum_prev = accum
accum = mul_step(w, accum, multiplicand)
if debug:
print("Step {:02d}: accum {:0{}x} -> {:0{}x}".format(
i, accum_prev, int(w / 2), accum, int(w / 2)))
return (accum >> w, accum & ((1 << w) - 1))
def divtest(w=4):
for i in range(2 ** w):
for j in range(1, 2 ** w):
gatemod, gatediv = divmod(w, i, j, debug=False)
goldmod, golddiv = (i % j, i // j)
print("{:02d} % {:02d} = {:02d} (gold {:02d}); ./. = {:02d} (gold {:02d})"
.format(i, j, gatemod, goldmod, gatediv, golddiv))
assert(gatemod == goldmod)
assert(gatediv == golddiv)
def multest(w=4):
for i in range(2 ** w):
for j in range(2 ** w):
gateh, gatel = mul(w, i, j, debug=False)
gold = i * j
goldl, goldh = (gold & ((1 << w) - 1), gold >> w)
print("{:02d} * {:02d} = ({:02d} (gold {:02d}), {:02d} (gold {:02d})"
.format(i, j, gateh, goldh, gatel, goldl))
assert(gatel == goldl)
assert(gateh == goldh)
if __name__ == "__main__":
print("Test division:")
divtest()
print("Test multiplication:")
multest()

14
hdl/hazard5.f Normal file
View File

@ -0,0 +1,14 @@
file arith/hazard5_alu.v
file arith/hazard5_shift_barrel.v
file arith/hazard5_priority_encode.v
file arith/hazard5_muldiv_seq.v
file arith/hazard5_mul_fast.v
file hazard5_frontend.v
file hazard5_instr_decompress.v
file hazard5_decode.v
file hazard5_csr.v
file hazard5_regfile_1w2r.v
file hazard5_core.v
file hazard5_cpu_1port.v
file hazard5_cpu_2port.v
include .

64
hdl/hazard5_config.vh Normal file
View File

@ -0,0 +1,64 @@
// Hazard5 CPU configuration parameters
// To configure Hazard5 you can either edit this file, or set parameters on
// your top-level instantiation, it's up to you. These parameters are all
// plumbed through Hazard5's internal hierarchy to the appropriate places.
// ----------------------------------------------------------------------------
// Reset state configuration
// RESET_VECTOR: Address of first instruction executed.
parameter RESET_VECTOR = 32'h0,
// MTVEC_INIT: Initial value of trap vector base. Bits clear in MTVEC_WMASK
// will never change from this initial value. Bits set in MTVEC_WMASK can be
// written/set/cleared as normal. Note that, if CSR_M_TRAP is set, MTVEC_INIT
// should probably have a different value from RESET_VECTOR.
parameter MTVEC_INIT = 32'h00000000,
// ----------------------------------------------------------------------------
// RISC-V ISA and CSR support
// EXTENSION_C: Support for compressed (variable-width) instructions
parameter EXTENSION_C = 1,
// EXTENSION_M: Support for hardware multiply/divide/modulo instructions
parameter EXTENSION_M = 1,
// CSR_M_MANDATORY: Bare minimum CSR support e.g. misa. Spec says must = 1 if
// CSRs are present, but I won't tell anyone.
parameter CSR_M_MANDATORY = 1,
// CSR_M_TRAP: Include M-mode trap-handling CSRs, and enable trap support.
parameter CSR_M_TRAP = 1,
// CSR_COUNTER: Include performance counters and relevant M-mode CSRs
parameter CSR_COUNTER = 0,
// ----------------------------------------------------------------------------
// Performance/size options
// REDUCED_BYPASS: Remove all forwarding paths except X->X (so back-to-back
// ALU ops can still run at 1 CPI), to save area.
parameter REDUCED_BYPASS = 0,
// MULDIV_UNROLL: Bits per clock for multiply/divide circuit, if present. Must
// be a power of 2.
parameter MULDIV_UNROLL = 1,
// MUL_FAST: Use single-cycle multiply circuit for MUL instructions, retiring
// to stage M. The sequential multiply/divide circuit is still used for
// MULH/MULHU/MULHSU.
parameter MUL_FAST = 0,
// MTVEC_WMASK: Mask of which bits in MTVEC are modifiable. Save gates by
// making trap vector base partly fixed (legal, as it's WARL). Note the entire
// vector table must always be aligned to its size, rounded up to a power of
// two, so careful with the low-order bits.
parameter MTVEC_WMASK = 32'hfffff000,
// ----------------------------------------------------------------------------
// Port size parameters (do not modify)
parameter W_ADDR = 32, // Do not modify
parameter W_DATA = 32 // Do not modify

728
hdl/hazard5_core.v Normal file
View File

@ -0,0 +1,728 @@
/**********************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2018 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*********************************************************************/
module hazard5_core #(
`include "hazard5_config.vh"
) (
// Global signals
input wire clk,
input wire rst_n,
`ifdef RISCV_FORMAL
`RVFI_OUTPUTS ,
`endif
// Instruction fetch port
output wire bus_aph_req_i,
output wire bus_aph_panic_i, // e.g. branch mispredict + flush
input wire bus_aph_ready_i,
input wire bus_dph_ready_i,
input wire bus_dph_err_i,
output wire [2:0] bus_hsize_i,
output wire [W_ADDR-1:0] bus_haddr_i,
input wire [W_DATA-1:0] bus_rdata_i,
// Load/store port
output reg bus_aph_req_d,
input wire bus_aph_ready_d,
input wire bus_dph_ready_d,
input wire bus_dph_err_d,
output reg [W_ADDR-1:0] bus_haddr_d,
output reg [2:0] bus_hsize_d,
output reg bus_hwrite_d,
output reg [W_DATA-1:0] bus_wdata_d,
input wire [W_DATA-1:0] bus_rdata_d,
// External level-sensitive interrupt sources (tie 0 if unused)
input wire [15:0] irq
);
`include "hazard5_ops.vh"
`ifdef FORMAL
// Only yosys-smtbmc seems to support immediate assertions
`ifdef RISCV_FORMAL
`define ASSERT(x)
`else
`define ASSERT(x) assert(x)
`endif
`else
`define ASSERT(x)
//synthesis translate_off
`undef ASSERT
`define ASSERT(x) if (!x) begin $display("Assertion failed!"); $finish(1); end
//synthesis translate_on
`endif
localparam N_REGS = 32;
// should be localparam but ISIM can't cope
parameter W_REGADDR = $clog2(N_REGS);
localparam NOP_INSTR = 32'h13; // addi x0, x0, 0
wire flush_d_x;
wire d_stall;
wire x_stall;
wire m_stall;
localparam HSIZE_WORD = 3'd2;
localparam HSIZE_HWORD = 3'd1;
localparam HSIZE_BYTE = 3'd0;
// ============================================================================
// Pipe Stage F
// ============================================================================
wire m_jump_req;
wire [W_ADDR-1:0] m_jump_target;
wire d_jump_req;
wire [W_ADDR-1:0] d_jump_target;
wire f_jump_req = d_jump_req || m_jump_req;
wire [W_ADDR-1:0] f_jump_target = m_jump_req ? m_jump_target : d_jump_target;
wire f_jump_rdy;
wire f_jump_now = f_jump_req && f_jump_rdy;
wire [31:0] fd_cir;
wire [1:0] fd_cir_vld;
wire [1:0] df_cir_use;
wire df_cir_lock;
assign bus_aph_panic_i = m_jump_req;
wire f_mem_size;
assign bus_hsize_i = f_mem_size ? HSIZE_WORD : HSIZE_HWORD;
hazard5_frontend #(
.EXTENSION_C(EXTENSION_C),
.W_ADDR(W_ADDR),
.W_DATA(32),
.FIFO_DEPTH(2),
.RESET_VECTOR(RESET_VECTOR)
) frontend (
.clk (clk),
.rst_n (rst_n),
.mem_size (f_mem_size),
.mem_addr (bus_haddr_i),
.mem_addr_vld (bus_aph_req_i),
.mem_addr_rdy (bus_aph_ready_i),
.mem_data (bus_rdata_i),
.mem_data_vld (bus_dph_ready_i),
.jump_target (f_jump_target),
.jump_target_vld (f_jump_req),
.jump_target_rdy (f_jump_rdy),
.cir (fd_cir),
.cir_vld (fd_cir_vld),
.cir_use (df_cir_use),
.cir_lock (df_cir_lock)
);
assign flush_d_x = m_jump_req && f_jump_rdy;
// ============================================================================
// Pipe Stage D
// ============================================================================
// X-check on pieces of instruction which frontend claims are valid
//synthesis translate_off
always @ (posedge clk) begin
if (rst_n) begin
if (|fd_cir_vld && (^fd_cir[15:0] === 1'bx)) begin
$display("CIR LSBs are X, should be valid!");
$finish;
end
if (fd_cir_vld[1] && (^fd_cir === 1'bX)) begin
$display("CIR contains X, should be fully valid!");
$finish;
end
end
end
//synthesis translate_on
wire [W_ADDR-1:0] d_pc; // FIXME only used for riscv-formal
// To register file
wire [W_REGADDR-1:0] d_rs1;
wire [W_REGADDR-1:0] d_rs2;
// To X
wire [W_DATA-1:0] dx_imm;
wire [W_REGADDR-1:0] dx_rs1;
wire [W_REGADDR-1:0] dx_rs2;
wire [W_REGADDR-1:0] dx_rd;
wire [W_ALUSRC-1:0] dx_alusrc_a;
wire [W_ALUSRC-1:0] dx_alusrc_b;
wire [W_ALUOP-1:0] dx_aluop;
wire [W_MEMOP-1:0] dx_memop;
wire [W_MULOP-1:0] dx_mulop;
wire [W_BCOND-1:0] dx_branchcond;
wire [W_ADDR-1:0] dx_jump_target;
wire dx_jump_is_regoffs;
wire dx_result_is_linkaddr;
wire [W_ADDR-1:0] dx_pc;
wire [W_ADDR-1:0] dx_mispredict_addr;
wire [W_EXCEPT-1:0] dx_except;
wire dx_csr_ren;
wire dx_csr_wen;
wire [1:0] dx_csr_wtype;
wire dx_csr_w_imm;
hazard5_decode #(
.EXTENSION_C (EXTENSION_C),
.EXTENSION_M (EXTENSION_M),
.HAVE_CSR (CSR_M_MANDATORY || CSR_M_TRAP || CSR_COUNTER),
.W_ADDR (W_ADDR),
.W_DATA (W_DATA),
.RESET_VECTOR (RESET_VECTOR),
.W_REGADDR (W_REGADDR)
) inst_hazard5_decode (
.clk (clk),
.rst_n (rst_n),
.fd_cir (fd_cir),
.fd_cir_vld (fd_cir_vld),
.df_cir_use (df_cir_use),
.df_cir_lock (df_cir_lock),
.d_jump_req (d_jump_req),
.d_jump_target (d_jump_target),
.d_pc (d_pc),
.d_stall (d_stall),
.x_stall (x_stall),
.flush_d_x (flush_d_x),
.f_jump_rdy (f_jump_rdy),
.f_jump_now (f_jump_now),
.f_jump_target (f_jump_target),
.d_rs1 (d_rs1),
.d_rs2 (d_rs2),
.dx_imm (dx_imm),
.dx_rs1 (dx_rs1),
.dx_rs2 (dx_rs2),
.dx_rd (dx_rd),
.dx_alusrc_a (dx_alusrc_a),
.dx_alusrc_b (dx_alusrc_b),
.dx_aluop (dx_aluop),
.dx_memop (dx_memop),
.dx_mulop (dx_mulop),
.dx_csr_ren (dx_csr_ren),
.dx_csr_wen (dx_csr_wen),
.dx_csr_wtype (dx_csr_wtype),
.dx_csr_w_imm (dx_csr_w_imm),
.dx_branchcond (dx_branchcond),
.dx_jump_target (dx_jump_target),
.dx_jump_is_regoffs (dx_jump_is_regoffs),
.dx_result_is_linkaddr (dx_result_is_linkaddr),
.dx_pc (dx_pc),
.dx_mispredict_addr (dx_mispredict_addr),
.dx_except (dx_except)
);
// ============================================================================
// Pipe Stage X
// ============================================================================
// Register the write which took place to the regfile on previous cycle, and bypass.
// This is an alternative to a write -> read bypass in the regfile,
// which we can't implement whilst maintaining BRAM inference compatibility (iCE40).
reg [W_REGADDR-1:0] mw_rd;
reg [W_DATA-1:0] mw_result;
// From register file:
wire [W_DATA-1:0] dx_rdata1;
wire [W_DATA-1:0] dx_rdata2;
// Combinational regs for muxing
reg [W_DATA-1:0] x_rs1_bypass;
reg [W_DATA-1:0] x_rs2_bypass;
reg [W_DATA-1:0] x_op_a;
reg [W_DATA-1:0] x_op_b;
wire [W_DATA-1:0] x_alu_result;
wire [W_DATA-1:0] x_alu_add;
wire x_alu_cmp;
wire [W_DATA-1:0] x_trap_addr;
wire [W_DATA-1:0] x_mepc;
wire x_trap_enter;
wire x_trap_exit;
reg [W_REGADDR-1:0] xm_rs1;
reg [W_REGADDR-1:0] xm_rs2;
reg [W_REGADDR-1:0] xm_rd;
reg [W_DATA-1:0] xm_result;
reg [W_ADDR-1:0] xm_jump_target;
reg [W_DATA-1:0] xm_store_data;
reg xm_jump;
reg [W_MEMOP-1:0] xm_memop;
// For JALR, the LSB of the result must be cleared by hardware
wire [W_ADDR-1:0] x_taken_jump_target = dx_jump_is_regoffs ? x_alu_add & ~32'h1 : dx_jump_target;
wire [W_ADDR-1:0] x_jump_target =
x_trap_exit ? x_mepc : // Note precedence -- it's possible to have enter && exit, but in this case enter_rdy is false.
x_trap_enter ? x_trap_addr :
dx_imm[31] && dx_branchcond != BCOND_ALWAYS ? dx_mispredict_addr :
x_taken_jump_target;
reg x_stall_raw;
wire x_stall_muldiv;
assign x_stall = m_stall ||
x_stall_raw || x_stall_muldiv || bus_aph_req_d && !bus_aph_ready_d;
wire m_fast_mul_result_vld;
wire m_generating_result = xm_memop < MEMOP_SW || m_fast_mul_result_vld;
// Load-use hazard detection
always @ (*) begin
x_stall_raw = 1'b0;
if (REDUCED_BYPASS) begin
x_stall_raw =
|xm_rd && (xm_rd == dx_rs1 || xm_rd == dx_rs2) ||
|mw_rd && (mw_rd == dx_rs1 || mw_rd == dx_rs2);
end else if (m_generating_result) begin
// With the full bypass network, load-use (or fast multiply-use) is the only RAW stall
if (|xm_rd && xm_rd == dx_rs1) begin
// Store addresses cannot be bypassed later, so there is no exception here.
x_stall_raw = 1'b1;
end else if (|xm_rd && xm_rd == dx_rs2) begin
// Store data can be bypassed in M. Any other instructions must stall.
x_stall_raw = !(dx_memop == MEMOP_SW || dx_memop == MEMOP_SH || dx_memop == MEMOP_SB);
end
end
end
// AHB transaction request
wire x_memop_vld = !dx_memop[3];
wire x_memop_write = dx_memop == MEMOP_SW || dx_memop == MEMOP_SH || dx_memop == MEMOP_SB;
wire x_unaligned_addr =
bus_hsize_d == HSIZE_WORD && |bus_haddr_d[1:0] ||
bus_hsize_d == HSIZE_HWORD && bus_haddr_d[0];
wire x_except_load_misaligned = x_memop_vld && x_unaligned_addr && !x_memop_write;
wire x_except_store_misaligned = x_memop_vld && x_unaligned_addr && x_memop_write;
always @ (*) begin
// Need to be careful not to use anything hready-sourced to gate htrans!
bus_haddr_d = x_alu_add;
bus_hwrite_d = x_memop_write;
case (dx_memop)
MEMOP_LW: bus_hsize_d = HSIZE_WORD;
MEMOP_SW: bus_hsize_d = HSIZE_WORD;
MEMOP_LH: bus_hsize_d = HSIZE_HWORD;
MEMOP_LHU: bus_hsize_d = HSIZE_HWORD;
MEMOP_SH: bus_hsize_d = HSIZE_HWORD;
default: bus_hsize_d = HSIZE_BYTE;
endcase
// m_jump_req implies flush_d_x is coming. Can't use flush_d_x because it's
// possible for a mispredicted load/store to go through whilst a late jump
// request is stalled, if there are two bus masters.
bus_aph_req_d = x_memop_vld && !(x_stall_raw || m_jump_req || x_trap_enter);
end
// ALU operand muxes and bypass
always @ (*) begin
if (~|dx_rs1) begin
x_rs1_bypass = {W_DATA{1'b0}};
end else if (xm_rd == dx_rs1) begin
x_rs1_bypass = xm_result;
end else if (mw_rd == dx_rs1 && !REDUCED_BYPASS) begin
x_rs1_bypass = mw_result;
end else begin
x_rs1_bypass = dx_rdata1;
end
if (~|dx_rs2) begin
x_rs2_bypass = {W_DATA{1'b0}};
end else if (xm_rd == dx_rs2) begin
x_rs2_bypass = xm_result;
end else if (mw_rd == dx_rs2 && !REDUCED_BYPASS) begin
x_rs2_bypass = mw_result;
end else begin
x_rs2_bypass = dx_rdata2;
end
if (|dx_alusrc_a)
x_op_a = dx_pc;
else
x_op_a = x_rs1_bypass;
if (|dx_alusrc_b)
x_op_b = dx_imm;
else
x_op_b = x_rs2_bypass;
end
// CSRs and Trap Handling
wire x_except_ecall = dx_except == EXCEPT_ECALL;
wire x_except_breakpoint = dx_except == EXCEPT_EBREAK;
wire x_except_invalid_instr = dx_except == EXCEPT_INSTR_ILLEGAL;
assign x_trap_exit = dx_except == EXCEPT_MRET && !(x_stall || m_jump_req);
wire x_trap_enter_rdy = !(x_stall || m_jump_req || x_trap_exit);
wire x_trap_is_exception; // diagnostic
`ifdef FORMAL
always @ (posedge clk) begin
if (flush_d_x)
assert(!x_trap_enter_rdy);
if (x_trap_exit)
assert(!bus_aph_req_d);
end
`endif
wire [W_DATA-1:0] x_csr_wdata = dx_csr_w_imm ?
{{W_DATA-5{1'b0}}, dx_rs1} : x_rs1_bypass;
wire [W_DATA-1:0] x_csr_rdata;
hazard5_csr #(
.XLEN (W_DATA),
.CSR_M_MANDATORY (CSR_M_MANDATORY),
.CSR_M_TRAP (CSR_M_TRAP),
.CSR_COUNTER (CSR_COUNTER),
.EXTENSION_C (EXTENSION_C),
.EXTENSION_M (EXTENSION_M),
.MTVEC_WMASK (MTVEC_WMASK),
.MTVEC_INIT (MTVEC_INIT)
) inst_hazard5_csr (
.clk (clk),
.rst_n (rst_n),
// CSR access port
// *en_soon are early access strobes which are not a function of bus stall.
// Can generate access faults (hence traps), but do not actually perform access.
.addr (dx_imm[11:0]),
.wdata (x_csr_wdata),
.wen_soon (dx_csr_wen),
.wen (dx_csr_wen && !(x_stall || flush_d_x)),
.wtype (dx_csr_wtype),
.rdata (x_csr_rdata),
.ren_soon (dx_csr_ren),
.ren (dx_csr_ren && !(x_stall || flush_d_x)),
// Trap signalling
.trap_addr (x_trap_addr),
.trap_enter_vld (x_trap_enter),
.trap_enter_rdy (x_trap_enter_rdy),
.trap_exit (x_trap_exit),
.trap_is_exception (x_trap_is_exception),
.mepc_in (dx_pc),
.mepc_out (x_mepc),
// IRQ and exception requests
.irq (irq),
.except_instr_misaligned (1'b0), // TODO
.except_instr_fault (1'b0), // TODO
.except_instr_invalid (x_except_invalid_instr),
.except_breakpoint (x_except_breakpoint),
.except_load_misaligned (x_except_load_misaligned),
.except_load_fault (1'b0), // TODO
.except_store_misaligned (x_except_store_misaligned),
.except_store_fault (1'b0), // TODO
.except_ecall (x_except_ecall),
// Other CSR-specific signalling
.instr_ret (1'b0) // TODO
);
// Multiply/divide
wire [W_DATA-1:0] x_muldiv_result;
wire [W_DATA-1:0] m_fast_mul_result;
generate
if (EXTENSION_M) begin: has_muldiv
wire x_muldiv_op_vld;
wire x_muldiv_op_rdy;
wire x_muldiv_result_vld;
wire [W_DATA-1:0] x_muldiv_result_h;
wire [W_DATA-1:0] x_muldiv_result_l;
reg x_muldiv_posted;
always @ (posedge clk or negedge rst_n)
if (!rst_n)
x_muldiv_posted <= 1'b0;
else
x_muldiv_posted <= (x_muldiv_posted || (x_muldiv_op_vld && x_muldiv_op_rdy)) && x_stall;
wire x_muldiv_kill = flush_d_x || x_trap_enter; // TODO this takes an extra cycle to kill muldiv before trap entry
wire x_use_fast_mul = MUL_FAST && dx_aluop == ALUOP_MULDIV && dx_mulop == M_OP_MUL;
assign x_muldiv_op_vld = (dx_aluop == ALUOP_MULDIV && !x_use_fast_mul)
&& !(x_muldiv_posted || x_stall_raw || x_muldiv_kill);
hazard5_muldiv_seq #(
.XLEN (W_DATA),
.UNROLL (MULDIV_UNROLL)
) muldiv (
.clk (clk),
.rst_n (rst_n),
.op (dx_mulop),
.op_vld (x_muldiv_op_vld),
.op_rdy (x_muldiv_op_rdy),
.op_kill (x_muldiv_kill),
.op_a (x_rs1_bypass),
.op_b (x_rs2_bypass),
.result_h (x_muldiv_result_h),
.result_l (x_muldiv_result_l),
.result_vld (x_muldiv_result_vld)
);
// TODO fusion of MULHx->MUL and DIVy->REMy sequences
wire x_muldiv_result_is_high =
dx_mulop == M_OP_MULH ||
dx_mulop == M_OP_MULHSU ||
dx_mulop == M_OP_MULHU ||
dx_mulop == M_OP_REM ||
dx_mulop == M_OP_REMU;
assign x_muldiv_result = x_muldiv_result_is_high ? x_muldiv_result_h : x_muldiv_result_l;
assign x_stall_muldiv = x_muldiv_op_vld || !x_muldiv_result_vld;
if (MUL_FAST) begin: has_fast_mul
wire x_issue_fast_mul = x_use_fast_mul && |dx_rd && !(x_stall || flush_d_x);
hazard5_mul_fast #(
.XLEN(W_DATA)
) inst_hazard5_mul_fast (
.clk (clk),
.rst_n (rst_n),
.op_a (x_rs1_bypass),
.op_b (x_rs2_bypass),
.op_vld (x_issue_fast_mul),
.result (m_fast_mul_result),
.result_vld (m_fast_mul_result_vld)
);
end else begin: no_fast_mul
assign m_fast_mul_result = {W_DATA{1'b0}};
assign m_fast_mul_result_vld = 1'b0;
end
`ifdef FORMAL
always @ (posedge clk) if (dx_aluop != ALUOP_MULDIV) assert(!x_stall_muldiv);
`endif
end else begin: no_muldiv
assign x_muldiv_result = {W_DATA{1'b0}};
assign m_fast_mul_result = {W_DATA{1'b0}};
assign m_fast_mul_result_vld = 1'b0;
assign x_stall_muldiv = 1'b0;
end
endgenerate
// State machine and branch detection
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
xm_jump <= 1'b0;
xm_memop <= MEMOP_NONE;
{xm_rs1, xm_rs2, xm_rd} <= {3 * W_REGADDR{1'b0}};
end else begin
// TODO: this assertion may become untrue depending on how we handle exceptions/IRQs when stalled?
//`ASSERT(!(m_stall && flush_d_x));// bubble insertion logic below is broken otherwise
if (!m_stall) begin
{xm_rs1, xm_rs2, xm_rd} <= {dx_rs1, dx_rs2, dx_rd};
// If the transfer is unaligned, make sure it is completely NOP'd on the bus
xm_memop <= dx_memop | {x_unaligned_addr, 3'h0};
if (x_stall || flush_d_x || x_trap_enter) begin
// Insert bubble
xm_rd <= {W_REGADDR{1'b0}};
xm_jump <= 1'b0;
xm_memop <= MEMOP_NONE;
end
if (!(x_stall || flush_d_x)) begin
case (dx_branchcond)
BCOND_ALWAYS: xm_jump <= 1'b1;
// For branches, we are either taking a branch late, or recovering from
// an incorrectly taken branch, depending on sign of branch offset.
BCOND_ZERO: xm_jump <= !x_alu_cmp ^ dx_imm[31];
BCOND_NZERO: xm_jump <= x_alu_cmp ^ dx_imm[31];
default xm_jump <= 1'b0;
endcase
if (x_trap_enter || x_trap_exit)
xm_jump <= 1'b1;
end
end
end
end
// No reset on datapath flops
always @ (posedge clk)
if (!m_stall) begin
xm_result <=
dx_result_is_linkaddr ? dx_mispredict_addr :
dx_csr_ren ? x_csr_rdata :
EXTENSION_M && dx_aluop == ALUOP_MULDIV ? x_muldiv_result :
x_alu_result;
xm_store_data <= x_rs2_bypass;
xm_jump_target <= x_jump_target;
end
hazard5_alu alu (
.aluop (dx_aluop),
.op_a (x_op_a),
.op_b (x_op_b),
.result (x_alu_result),
.result_add (x_alu_add),
.cmp (x_alu_cmp)
);
// ============================================================================
// Pipe Stage M
// ============================================================================
reg [W_DATA-1:0] m_rdata_shift;
reg [W_DATA-1:0] m_wdata;
reg [W_DATA-1:0] m_result;
assign m_jump_req = xm_jump;
assign m_jump_target = xm_jump_target;
assign m_stall = (!xm_memop[3] && !bus_dph_ready_d) || (m_jump_req && !f_jump_rdy);
wire m_except_bus_fault = bus_dph_err_d; // TODO: handle differently for LSU/ifetch?
always @ (*) begin
// Local forwarding of store data
if (|mw_rd && xm_rs2 == mw_rd && !REDUCED_BYPASS) begin
m_wdata = mw_result;
end else begin
m_wdata = xm_store_data;
end
// Replicate store data to ensure appropriate byte lane is driven
case (xm_memop)
MEMOP_SW: bus_wdata_d = m_wdata;
MEMOP_SH: bus_wdata_d = {2{m_wdata[15:0]}};
MEMOP_SB: bus_wdata_d = {4{m_wdata[7:0]}};
default: bus_wdata_d = 32'h0;
endcase
// Pick out correct data from load access, and sign/unsign extend it.
// This is slightly cheaper than a normal shift:
case (xm_result[1:0])
2'b00: m_rdata_shift = bus_rdata_d;
2'b01: m_rdata_shift = {bus_rdata_d[31:8], bus_rdata_d[15:8]};
2'b10: m_rdata_shift = {bus_rdata_d[31:16], bus_rdata_d[31:16]};
2'b11: m_rdata_shift = {bus_rdata_d[31:8], bus_rdata_d[31:24]};
endcase
case (xm_memop)
MEMOP_LW: m_result = m_rdata_shift;
MEMOP_LH: m_result = {{16{m_rdata_shift[15]}}, m_rdata_shift[15:0]};
MEMOP_LHU: m_result = {16'h0, m_rdata_shift[15:0]};
MEMOP_LB: m_result = {{24{m_rdata_shift[7]}}, m_rdata_shift[7:0]};
MEMOP_LBU: m_result = {24'h0, m_rdata_shift[7:0]};
default: begin
if (MUL_FAST && m_fast_mul_result_vld) begin
m_result = m_fast_mul_result;
end else begin
m_result = xm_result;
end
end
endcase
end
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
mw_rd <= {W_REGADDR{1'b0}};
end else if (!m_stall) begin
//synthesis translate_off
// TODO: proper exception support
if (m_except_bus_fault) begin
$display("Bus fault!");
$finish;
end
if (^bus_wdata_d === 1'bX) begin
$display("Writing Xs to memory!");
$finish;
end
//synthesis translate_on
mw_rd <= xm_rd;
end
end
// No need to reset result register, as reset on mw_rd protects register file from it
always @ (posedge clk)
if (!m_stall)
mw_result <= m_result;
// ============================================================================
// Pipe Stage W
// ============================================================================
// mw_result and mw_rd register the most recent write to the register file,
// so that X can bypass them in.
wire w_reg_wen = |xm_rd && !m_stall;
//synthesis translate_off
always @ (posedge clk) begin
if (rst_n) begin
if (w_reg_wen && (^m_result === 1'bX)) begin
$display("Writing X to register file!");
$finish;
end
end
end
//synthesis translate_on
hazard5_regfile_1w2r #(
.FAKE_DUALPORT(0),
`ifdef SIM
.RESET_REGS(1),
`elsif FORMAL
.RESET_REGS(1),
`else
.RESET_REGS(0),
`endif
.N_REGS(N_REGS),
.W_DATA(W_DATA)
) inst_regfile_1w2r (
.clk (clk),
.rst_n (rst_n),
// On stall, we feed X's addresses back into regfile
// so that output does not change.
.raddr1 (x_stall ? dx_rs1 : d_rs1),
.rdata1 (dx_rdata1),
.raddr2 (x_stall ? dx_rs2 : d_rs2),
.rdata2 (dx_rdata2),
.waddr (xm_rd),
.wdata (m_result),
.wen (w_reg_wen)
);
`ifdef RISCV_FORMAL
`include "hazard5_rvfi_monitor.vh"
`endif
`ifdef HAZARD5_FORMAL_REGRESSION
// Each formal regression provides its own file with the below name:
`include "hazard5_formal_regression.vh"
`endif
endmodule

213
hdl/hazard5_cpu_1port.v Normal file
View File

@ -0,0 +1,213 @@
/**********************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2021 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*********************************************************************/
// Single-ported top level file for Hazard5 CPU. This file instantiates the
// Hazard5 core, and arbitrates its instruction fetch and load/store signals
// down to a single AHB-Lite master port.
module hazard5_cpu_1port #(
`include "hazard5_config.vh"
) (
// Global signals
input wire clk,
input wire rst_n,
`ifdef RISCV_FORMAL
`RVFI_OUTPUTS ,
`endif
// AHB-lite Master port
output reg [W_ADDR-1:0] ahblm_haddr,
output reg ahblm_hwrite,
output reg [1:0] ahblm_htrans,
output reg [2:0] ahblm_hsize,
output wire [2:0] ahblm_hburst,
output reg [3:0] ahblm_hprot,
output wire ahblm_hmastlock,
input wire ahblm_hready,
input wire ahblm_hresp,
output wire [W_DATA-1:0] ahblm_hwdata,
input wire [W_DATA-1:0] ahblm_hrdata,
// External level-sensitive interrupt sources (tie 0 if unused)
input wire [15:0] irq
);
// ----------------------------------------------------------------------------
// Processor core
// Instruction fetch signals
wire core_aph_req_i;
wire core_aph_panic_i;
wire core_aph_ready_i;
wire core_dph_ready_i;
wire core_dph_err_i;
wire [2:0] core_hsize_i;
wire [W_ADDR-1:0] core_haddr_i;
wire [W_DATA-1:0] core_rdata_i;
// Load/store signals
wire core_aph_req_d;
wire core_aph_ready_d;
wire core_dph_ready_d;
wire core_dph_err_d;
wire [W_ADDR-1:0] core_haddr_d;
wire [2:0] core_hsize_d;
wire core_hwrite_d;
wire [W_DATA-1:0] core_wdata_d;
wire [W_DATA-1:0] core_rdata_d;
hazard5_core #(
.RESET_VECTOR (RESET_VECTOR),
.EXTENSION_C (EXTENSION_C),
.EXTENSION_M (EXTENSION_M),
.MULDIV_UNROLL (MULDIV_UNROLL),
.MUL_FAST (MUL_FAST),
.CSR_M_MANDATORY (CSR_M_MANDATORY),
.CSR_M_TRAP (CSR_M_TRAP),
.CSR_COUNTER (CSR_COUNTER),
.MTVEC_WMASK (MTVEC_WMASK),
.MTVEC_INIT (MTVEC_INIT),
.REDUCED_BYPASS (REDUCED_BYPASS)
) core (
.clk (clk),
.rst_n (rst_n),
`ifdef RISCV_FORMAL
`RVFI_CONN ,
`endif
.bus_aph_req_i (core_aph_req_i),
.bus_aph_panic_i (core_aph_panic_i),
.bus_aph_ready_i (core_aph_ready_i),
.bus_dph_ready_i (core_dph_ready_i),
.bus_dph_err_i (core_dph_err_i),
.bus_hsize_i (core_hsize_i),
.bus_haddr_i (core_haddr_i),
.bus_rdata_i (core_rdata_i),
.bus_aph_req_d (core_aph_req_d),
.bus_aph_ready_d (core_aph_ready_d),
.bus_dph_ready_d (core_dph_ready_d),
.bus_dph_err_d (core_dph_err_d),
.bus_haddr_d (core_haddr_d),
.bus_hsize_d (core_hsize_d),
.bus_hwrite_d (core_hwrite_d),
.bus_wdata_d (core_wdata_d),
.bus_rdata_d (core_rdata_d),
.irq (irq)
);
// ----------------------------------------------------------------------------
// Arbitration state machine
wire bus_gnt_i;
wire bus_gnt_d;
reg bus_hold_aph;
reg [1:0] bus_gnt_id_prev;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
bus_hold_aph <= 1'b0;
bus_gnt_id_prev <= 2'h0;
end else begin
bus_hold_aph <= ahblm_htrans[1] && !ahblm_hready;
bus_gnt_id_prev <= {bus_gnt_i, bus_gnt_d};
end
end
assign {bus_gnt_i, bus_gnt_d} =
bus_hold_aph ? bus_gnt_id_prev :
core_aph_panic_i ? 2'b10 :
core_aph_req_d ? 2'b01 :
core_aph_req_i ? 2'b10 :
2'b00 ;
// Keep track of whether instr/data access is active in AHB dataphase.
reg bus_active_dph_i;
reg bus_active_dph_d;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
bus_active_dph_i <= 1'b0;
bus_active_dph_d <= 1'b0;
end else if (ahblm_hready) begin
bus_active_dph_i <= bus_gnt_i;
bus_active_dph_d <= bus_gnt_d;
end
end
// ----------------------------------------------------------------------------
// Address phase request muxing
localparam HTRANS_IDLE = 2'b00;
localparam HTRANS_NSEQ = 2'b10;
// Noncacheable nonbufferable privileged data/instr:
localparam HPROT_DATA = 4'b0011;
localparam HPROT_INSTR = 4'b0010;
assign ahblm_hburst = 3'b000; // HBURST_SINGLE
assign ahblm_hmastlock = 1'b0;
always @ (*) begin
if (bus_gnt_d) begin
ahblm_htrans = HTRANS_NSEQ;
ahblm_haddr = core_haddr_d;
ahblm_hsize = core_hsize_d;
ahblm_hwrite = core_hwrite_d;
ahblm_hprot = HPROT_DATA;
end else if (bus_gnt_i) begin
ahblm_htrans = HTRANS_NSEQ;
ahblm_haddr = core_haddr_i;
ahblm_hsize = core_hsize_i;
ahblm_hwrite = 1'b0;
ahblm_hprot = HPROT_INSTR;
end else begin
ahblm_htrans = HTRANS_IDLE;
ahblm_haddr = {W_ADDR{1'b0}};
ahblm_hsize = 3'h0;
ahblm_hwrite = 1'b0;
ahblm_hprot = 4'h0;
end
end
// ----------------------------------------------------------------------------
// Response routing
// Data buses directly connected
assign core_rdata_d = ahblm_hrdata;
assign core_rdata_i = ahblm_hrdata;
assign ahblm_hwdata = core_wdata_d;
// Handhshake based on grant and bus stall
assign core_aph_ready_i = ahblm_hready && bus_gnt_i;
assign core_dph_ready_i = ahblm_hready && bus_active_dph_i;
assign core_dph_err_i = ahblm_hready && bus_active_dph_i && ahblm_hresp;
assign core_aph_ready_d = ahblm_hready && bus_gnt_d;
assign core_dph_ready_d = ahblm_hready && bus_active_dph_d;
assign core_dph_err_d = ahblm_hready && bus_active_dph_d && ahblm_hresp;
endmodule

188
hdl/hazard5_cpu_2port.v Normal file
View File

@ -0,0 +1,188 @@
/**********************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2021 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*********************************************************************/
// Dual-ported top level file for Hazard5 CPU. This file instantiates the
// Hazard5 core, and interfaces its instruction fetch and load/store signals
// to a pair of AHB-Lite master ports.
module hazard5_cpu_2port #(
`include "hazard5_config.vh"
) (
// Global signals
input wire clk,
input wire rst_n,
`ifdef RISCV_FORMAL
`RVFI_OUTPUTS ,
`endif
// Instruction fetch port
output wire [W_ADDR-1:0] i_haddr,
output wire i_hwrite,
output wire [1:0] i_htrans,
output wire [2:0] i_hsize,
output wire [2:0] i_hburst,
output wire [3:0] i_hprot,
output wire i_hmastlock,
input wire i_hready,
input wire i_hresp,
output wire [W_DATA-1:0] i_hwdata,
input wire [W_DATA-1:0] i_hrdata,
// Load/store port
output wire [W_ADDR-1:0] d_haddr,
output wire d_hwrite,
output wire [1:0] d_htrans,
output wire [2:0] d_hsize,
output wire [2:0] d_hburst,
output wire [3:0] d_hprot,
output wire d_hmastlock,
input wire d_hready,
input wire d_hresp,
output wire [W_DATA-1:0] d_hwdata,
input wire [W_DATA-1:0] d_hrdata,
// External level-sensitive interrupt sources (tie 0 if unused)
input wire [15:0] irq
);
// ----------------------------------------------------------------------------
// Processor core
// Instruction fetch signals
wire core_aph_req_i;
wire core_aph_panic_i; // unused as there's no arbitration
wire core_aph_ready_i;
wire core_dph_ready_i;
wire core_dph_err_i;
wire [2:0] core_hsize_i;
wire [W_ADDR-1:0] core_haddr_i;
wire [W_DATA-1:0] core_rdata_i;
// Load/store signals
wire core_aph_req_d;
wire core_aph_ready_d;
wire core_dph_ready_d;
wire core_dph_err_d;
wire [W_ADDR-1:0] core_haddr_d;
wire [2:0] core_hsize_d;
wire core_hwrite_d;
wire [W_DATA-1:0] core_wdata_d;
wire [W_DATA-1:0] core_rdata_d;
hazard5_core #(
.RESET_VECTOR (RESET_VECTOR),
.EXTENSION_C (EXTENSION_C),
.EXTENSION_M (EXTENSION_M),
.MULDIV_UNROLL (MULDIV_UNROLL),
.MUL_FAST (MUL_FAST),
.CSR_M_MANDATORY (CSR_M_MANDATORY),
.CSR_M_TRAP (CSR_M_TRAP),
.CSR_COUNTER (CSR_COUNTER),
.MTVEC_WMASK (MTVEC_WMASK),
.MTVEC_INIT (MTVEC_INIT),
.REDUCED_BYPASS (REDUCED_BYPASS)
) core (
.clk (clk),
.rst_n (rst_n),
`ifdef RISCV_FORMAL
`RVFI_CONN ,
`endif
.bus_aph_req_i (core_aph_req_i),
.bus_aph_panic_i (core_aph_panic_i),
.bus_aph_ready_i (core_aph_ready_i),
.bus_dph_ready_i (core_dph_ready_i),
.bus_dph_err_i (core_dph_err_i),
.bus_hsize_i (core_hsize_i),
.bus_haddr_i (core_haddr_i),
.bus_rdata_i (core_rdata_i),
.bus_aph_req_d (core_aph_req_d),
.bus_aph_ready_d (core_aph_ready_d),
.bus_dph_ready_d (core_dph_ready_d),
.bus_dph_err_d (core_dph_err_d),
.bus_haddr_d (core_haddr_d),
.bus_hsize_d (core_hsize_d),
.bus_hwrite_d (core_hwrite_d),
.bus_wdata_d (core_wdata_d),
.bus_rdata_d (core_rdata_d),
.irq (irq)
);
// ----------------------------------------------------------------------------
// Instruction port
localparam HTRANS_IDLE = 2'b00;
localparam HTRANS_NSEQ = 2'b10;
assign i_haddr = core_haddr_i;
assign i_htrans = core_aph_req_i ? HTRANS_NSEQ : HTRANS_IDLE;
assign i_hsize = core_hsize_i;
reg dphase_active_i;
always @ (posedge clk or negedge rst_n)
if (!rst_n)
dphase_active_i <= 1'b0;
else if (i_hready)
dphase_active_i <= core_aph_req_i;
assign core_aph_ready_i = i_hready && core_aph_req_i;
assign core_dph_ready_i = i_hready && dphase_active_i;
assign core_dph_err_i = i_hready && dphase_active_i && i_hresp;
assign core_rdata_i = i_hrdata;
assign i_hwrite = 1'b0;
assign i_hburst = 3'h0;
assign i_hprot = 4'b0010;
assign i_hmastlock = 1'b0;
assign i_hwdata = {W_DATA{1'b0}};
// ----------------------------------------------------------------------------
// Load/store port
assign d_haddr = core_haddr_d;
assign d_htrans = core_aph_req_d ? HTRANS_NSEQ : HTRANS_IDLE;
assign d_hwrite = core_hwrite_d;
assign d_hsize = core_hsize_d;
reg dphase_active_d;
always @ (posedge clk or negedge rst_n)
if (!rst_n)
dphase_active_d <= 1'b0;
else if (d_hready)
dphase_active_d <= core_aph_req_d;
assign core_aph_ready_d = d_hready && core_aph_req_d;
assign core_dph_ready_d = d_hready && dphase_active_d;
assign core_dph_err_d = d_hready && dphase_active_d && d_hresp;
assign core_rdata_d = d_hrdata;
assign d_hwdata = core_wdata_d;
assign d_hburst = 3'h0;
assign d_hprot = 4'b0010;
assign d_hmastlock = 1'b0;
endmodule

778
hdl/hazard5_csr.v Normal file
View File

@ -0,0 +1,778 @@
/******************************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2019 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*****************************************************************************/
// Control and Status Registers (CSRs)
// Also includes CSR-related logic like interrupt enable/masking,
// trap vector calculation.
module hazard5_csr #(
parameter XLEN = 32, // Must be 32
parameter CSR_M_MANDATORY = 1, // Include mandatory M-mode CSRs e.g. misa, marchid
parameter CSR_M_TRAP = 1, // Include M-mode trap setup/handling CSRs
parameter CSR_COUNTER = 1, // Include counter/timer CSRs
parameter EXTENSION_C = 0, // For misa
parameter EXTENSION_M = 0, // For misa
parameter MTVEC_WMASK = 32'hfffff000, // Save gates by making trap vector base partially fixed (legal, as it's WARL)
parameter MTVEC_INIT = 32'h0,// Initial value of trap vector base
parameter W_COUNTER = 64 // This *should* be 64, but can be reduced to save gates.
// The full 64 bits is writeable, so high-word increment can
// be implemented in software, and a narrower hw counter used
) (
input wire clk,
input wire rst_n,
// Read port is combinatorial.
// Write port is synchronous, and write effects will be observed on the next clock cycle.
// The *_soon strobes are versions which the core does not gate with its stall signal.
// These are needed because:
// - Core stall is a function of bus stall
// - Illegal CSR accesses produce trap entry
// - Trap entry (not necessarily caused by CSR access) gates outgoing bus accesses
// - Through-paths from e.g. hready to htrans are problematic for timing/implementation
input wire [11:0] addr,
input wire [XLEN-1:0] wdata,
input wire wen,
input wire wen_soon, // wen will be asserted once some stall condition clears
input wire [1:0] wtype,
output reg [XLEN-1:0] rdata,
input wire ren,
input wire ren_soon, // ren will be asserted once some stall condition clears
// Trap signalling
// *We* tell the core that we are taking a trap, and where to, based on:
// - Synchronous exception inputs from the core
// - External IRQ signals
// - Masking etc based on the state of CSRs like mie
//
// We do this by raising trap_enter_vld, and keeping it raised until trap_enter_rdy
// goes high. trap_addr has the absolute value of trap target address.
// Once trap_enter_vld && _rdy, mepc_in is copied to mepc, and other trap state is set.
//
// Note that an exception input can go away, e.g. if the pipe gets flushed. In this
// case we lower trap_enter_vld.
//
// The core tells *us* that we are leaving the trap, by putting a 1-clock pulse on
// trap_exit. The core will simultaneously produce a jump (specifically a mispredict)
// to mepc_out.
output wire [XLEN-1:0] trap_addr,
output wire trap_enter_vld,
input wire trap_enter_rdy,
input wire trap_exit,
output wire trap_is_exception, // diagnostic
input wire [XLEN-1:0] mepc_in,
output wire [XLEN-1:0] mepc_out,
// Exceptions must *not* be a function of bus stall.
input wire [15:0] irq,
input wire except_instr_misaligned,
input wire except_instr_fault,
input wire except_instr_invalid,
input wire except_breakpoint,
input wire except_load_misaligned,
input wire except_load_fault,
input wire except_store_misaligned,
input wire except_store_fault,
input wire except_ecall,
// Other CSR-specific signalling
input wire instr_ret
);
// TODO block CSR access when entering trap?
`include "hazard5_ops.vh"
localparam X0 = {XLEN{1'b0}};
// ----------------------------------------------------------------------------
// List of M-mode CSRs (we implement a configurable subset of M-mode).
// ----------------------------------------------------------------------------
// The CSR block is the only piece of hardware which needs to know this mapping.
// Machine Information Registers (RO)
localparam MVENDORID = 12'hf11; // Vendor ID.
localparam MARCHID = 12'hf12; // Architecture ID.
localparam MIMPID = 12'hf13; // Implementation ID.
localparam MHARTID = 12'hf14; // Hardware thread ID.
// Machine Trap Setup (RW)
localparam MSTATUS = 12'h300; // Machine status register.
localparam MISA = 12'h301; // ISA and extensions
localparam MEDELEG = 12'h302; // Machine exception delegation register.
localparam MIDELEG = 12'h303; // Machine interrupt delegation register.
localparam MIE = 12'h304; // Machine interrupt-enable register.
localparam MTVEC = 12'h305; // Machine trap-handler base address.
localparam MCOUNTEREN = 12'h306; // Machine counter enable.
// Machine Trap Handling (RW)
localparam MSCRATCH = 12'h340; // Scratch register for machine trap handlers.
localparam MEPC = 12'h341; // Machine exception program counter.
localparam MCAUSE = 12'h342; // Machine trap cause.
localparam MTVAL = 12'h343; // Machine bad address or instruction.
localparam MIP = 12'h344; // Machine interrupt pending.
// Machine Memory Protection (RW)
localparam PMPCFG0 = 12'h3a0; // Physical memory protection configuration.
localparam PMPCFG1 = 12'h3a1; // Physical memory protection configuration, RV32 only.
localparam PMPCFG2 = 12'h3a2; // Physical memory protection configuration.
localparam PMPCFG3 = 12'h3a3; // Physical memory protection configuration, RV32 only.
localparam PMPADDR0 = 12'h3b0; // Physical memory protection address register.
localparam PMPADDR1 = 12'h3b1; // Physical memory protection address register.
// Performance counters (RW)
localparam MCYCLE = 12'hb00; // Raw cycles since start of day
localparam MTIME = 12'hb01; // "Wall clock", can be aliased to MCYCLE
localparam MINSTRET = 12'hb02; // Instruction retire count since start of day
localparam MHPMCOUNTER3 = 12'hb03; // WARL (we tie to 0)
localparam MHPMCOUNTER4 = 12'hb04; // WARL (we tie to 0)
localparam MHPMCOUNTER5 = 12'hb05; // WARL (we tie to 0)
localparam MHPMCOUNTER6 = 12'hb06; // WARL (we tie to 0)
localparam MHPMCOUNTER7 = 12'hb07; // WARL (we tie to 0)
localparam MHPMCOUNTER8 = 12'hb08; // WARL (we tie to 0)
localparam MHPMCOUNTER9 = 12'hb09; // WARL (we tie to 0)
localparam MHPMCOUNTER10 = 12'hb0a; // WARL (we tie to 0)
localparam MHPMCOUNTER11 = 12'hb0b; // WARL (we tie to 0)
localparam MHPMCOUNTER12 = 12'hb0c; // WARL (we tie to 0)
localparam MHPMCOUNTER13 = 12'hb0d; // WARL (we tie to 0)
localparam MHPMCOUNTER14 = 12'hb0e; // WARL (we tie to 0)
localparam MHPMCOUNTER15 = 12'hb0f; // WARL (we tie to 0)
localparam MHPMCOUNTER16 = 12'hb10; // WARL (we tie to 0)
localparam MHPMCOUNTER17 = 12'hb11; // WARL (we tie to 0)
localparam MHPMCOUNTER18 = 12'hb12; // WARL (we tie to 0)
localparam MHPMCOUNTER19 = 12'hb13; // WARL (we tie to 0)
localparam MHPMCOUNTER20 = 12'hb14; // WARL (we tie to 0)
localparam MHPMCOUNTER21 = 12'hb15; // WARL (we tie to 0)
localparam MHPMCOUNTER22 = 12'hb16; // WARL (we tie to 0)
localparam MHPMCOUNTER23 = 12'hb17; // WARL (we tie to 0)
localparam MHPMCOUNTER24 = 12'hb18; // WARL (we tie to 0)
localparam MHPMCOUNTER25 = 12'hb19; // WARL (we tie to 0)
localparam MHPMCOUNTER26 = 12'hb1a; // WARL (we tie to 0)
localparam MHPMCOUNTER27 = 12'hb1b; // WARL (we tie to 0)
localparam MHPMCOUNTER28 = 12'hb1c; // WARL (we tie to 0)
localparam MHPMCOUNTER29 = 12'hb1d; // WARL (we tie to 0)
localparam MHPMCOUNTER30 = 12'hb1e; // WARL (we tie to 0)
localparam MHPMCOUNTER31 = 12'hb1f; // WARL (we tie to 0)
localparam MCYCLEH = 12'hb80; // High halves of each counter
localparam MTIMEH = 12'hb81;
localparam MINSTRETH = 12'hb82;
localparam MHPMCOUNTER3H = 12'hb83;
localparam MHPMCOUNTER4H = 12'hb84;
localparam MHPMCOUNTER5H = 12'hb85;
localparam MHPMCOUNTER6H = 12'hb86;
localparam MHPMCOUNTER7H = 12'hb87;
localparam MHPMCOUNTER8H = 12'hb88;
localparam MHPMCOUNTER9H = 12'hb89;
localparam MHPMCOUNTER10H = 12'hb8a;
localparam MHPMCOUNTER11H = 12'hb8b;
localparam MHPMCOUNTER12H = 12'hb8c;
localparam MHPMCOUNTER13H = 12'hb8d;
localparam MHPMCOUNTER14H = 12'hb8e;
localparam MHPMCOUNTER15H = 12'hb8f;
localparam MHPMCOUNTER16H = 12'hb90;
localparam MHPMCOUNTER17H = 12'hb91;
localparam MHPMCOUNTER18H = 12'hb92;
localparam MHPMCOUNTER19H = 12'hb93;
localparam MHPMCOUNTER20H = 12'hb94;
localparam MHPMCOUNTER21H = 12'hb95;
localparam MHPMCOUNTER22H = 12'hb96;
localparam MHPMCOUNTER23H = 12'hb97;
localparam MHPMCOUNTER24H = 12'hb98;
localparam MHPMCOUNTER25H = 12'hb99;
localparam MHPMCOUNTER26H = 12'hb9a;
localparam MHPMCOUNTER27H = 12'hb9b;
localparam MHPMCOUNTER28H = 12'hb9c;
localparam MHPMCOUNTER29H = 12'hb9d;
localparam MHPMCOUNTER30H = 12'hb9e;
localparam MHPMCOUNTER31H = 12'hb9f;
localparam MCOUNTINHIBIT = 12'h302; // WARL (we must tie 0 as CYCLE and TIME are aliased)
localparam MHPMEVENT3 = 12'h323; // WARL (we tie to 0)
localparam MHPMEVENT4 = 12'h324; // WARL (we tie to 0)
localparam MHPMEVENT5 = 12'h325; // WARL (we tie to 0)
localparam MHPMEVENT6 = 12'h326; // WARL (we tie to 0)
localparam MHPMEVENT7 = 12'h327; // WARL (we tie to 0)
localparam MHPMEVENT8 = 12'h328; // WARL (we tie to 0)
localparam MHPMEVENT9 = 12'h329; // WARL (we tie to 0)
localparam MHPMEVENT10 = 12'h32a; // WARL (we tie to 0)
localparam MHPMEVENT11 = 12'h32b; // WARL (we tie to 0)
localparam MHPMEVENT12 = 12'h32c; // WARL (we tie to 0)
localparam MHPMEVENT13 = 12'h32d; // WARL (we tie to 0)
localparam MHPMEVENT14 = 12'h32e; // WARL (we tie to 0)
localparam MHPMEVENT15 = 12'h32f; // WARL (we tie to 0)
localparam MHPMEVENT16 = 12'h330; // WARL (we tie to 0)
localparam MHPMEVENT17 = 12'h331; // WARL (we tie to 0)
localparam MHPMEVENT18 = 12'h332; // WARL (we tie to 0)
localparam MHPMEVENT19 = 12'h333; // WARL (we tie to 0)
localparam MHPMEVENT20 = 12'h334; // WARL (we tie to 0)
localparam MHPMEVENT21 = 12'h335; // WARL (we tie to 0)
localparam MHPMEVENT22 = 12'h336; // WARL (we tie to 0)
localparam MHPMEVENT23 = 12'h337; // WARL (we tie to 0)
localparam MHPMEVENT24 = 12'h338; // WARL (we tie to 0)
localparam MHPMEVENT25 = 12'h339; // WARL (we tie to 0)
localparam MHPMEVENT26 = 12'h33a; // WARL (we tie to 0)
localparam MHPMEVENT27 = 12'h33b; // WARL (we tie to 0)
localparam MHPMEVENT28 = 12'h33c; // WARL (we tie to 0)
localparam MHPMEVENT29 = 12'h33d; // WARL (we tie to 0)
localparam MHPMEVENT30 = 12'h33e; // WARL (we tie to 0)
localparam MHPMEVENT31 = 12'h33f; // WARL (we tie to 0)
// TODO
// Decoding all these damn HPMs bloats the logic. If we don't decode them, we
// can still trap the illegal opcode and emulate them. This is ugly and
// contravenes the standard, but why on earth would they mandate 100 useless
// registers with no defined operation?
// If you really want them, set this to 1:
localparam DECODE_HPM = 0;
// ----------------------------------------------------------------------------
// CSR state + update logic
// ----------------------------------------------------------------------------
// Names are (reg)_(field)
// Generic update logic for write/set/clear of an entire CSR:
function [XLEN-1:0] update;
input [XLEN-1:0] prev;
begin
update =
wtype == CSR_WTYPE_C ? prev & ~wdata :
wtype == CSR_WTYPE_S ? prev | wdata :
wdata;
end
endfunction
// ----------------------------------------------------------------------------
// Trap-handling
// Two-level interrupt enable stack, shuffled on entry/exit:
reg mstatus_mpie;
reg mstatus_mie;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
mstatus_mpie <= 1'b0;
mstatus_mie <= 1'b0;
end else if (CSR_M_TRAP) begin
if (trap_enter_vld && trap_enter_rdy) begin
mstatus_mpie <= mstatus_mie;
mstatus_mie <= 1'b0;
end else if (trap_exit) begin
mstatus_mpie <= 1'b1;
mstatus_mie <= mstatus_mpie;
end else if (wen && addr == MSTATUS) begin
{mstatus_mpie, mstatus_mie} <=
wtype == CSR_WTYPE_C ? {mstatus_mpie, mstatus_mie} & ~{wdata[7], wdata[3]} :
wtype == CSR_WTYPE_S ? {mstatus_mpie, mstatus_mie} | {wdata[7], wdata[3]} :
{wdata[7], wdata[3]} ;
end
end
end
reg [XLEN-1:0] mscratch;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
mscratch <= X0;
end else if (CSR_M_TRAP) begin
if (wen && addr == MSCRATCH)
mscratch <= update(mscratch);
end
end
// Trap vector base
reg [XLEN-1:0] mtvec_reg;
wire [XLEN-1:0] mtvec = (mtvec_reg & MTVEC_WMASK) | (MTVEC_INIT & ~MTVEC_WMASK);
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
mtvec_reg <= MTVEC_INIT;
end else if (CSR_M_TRAP) begin
if (wen && addr == MTVEC)
mtvec_reg <= update(mtvec_reg);
end
end
// Exception program counter
reg [XLEN-1:0] mepc;
assign mepc_out = mepc;
// LSB is always 0
localparam MEPC_MASK = {{XLEN-1{1'b1}}, 1'b0};
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
mepc <= X0;
end else if (CSR_M_TRAP) begin
if (trap_enter_vld && trap_enter_rdy) begin
mepc <= mepc_in & MEPC_MASK;
end else if (wen && addr == MEPC) begin
mepc <= update(mepc) & MEPC_MASK;
end
end
end
// Interrupt enable (reserved bits are tied to 0)
reg [XLEN-1:0] mie;
localparam MIE_CONST_MASK = 32'h0000f777;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
mie <= X0;
end else if (CSR_M_TRAP) begin
if (wen && addr == MIE)
mie <= update(mie) & ~MIE_CONST_MASK;
end
end
wire [15:0] mie_irq = mie[31:16]; // Per-IRQ mask. Nonstandard, but legal.
wire mie_meie = mie[11]; // Global external IRQ enable. This is ANDed over our per-IRQ mask
wire mie_mtie = mie[7]; // Timer interrupt enable
wire mie_msie = mie[3]; // Software interrupt enable
// Interrupt status ("pending") register, handled later
wire [XLEN-1:0] mip;
// None of the bits we implement are directly writeable.
// MSIP is only writeable by a "platform-defined" mechanism, and we don't implement
// one!
// Trap cause registers. The non-constant bits can be written by software,
// and update automatically on trap entry. (bits 30:0 are WLRL, so we tie most off)
reg mcause_irq;
reg [4:0] mcause_code;
wire mcause_irq_next;
wire [4:0] mcause_code_next;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
mcause_irq <= 1'b0;
mcause_code <= 5'h0;
end else if (CSR_M_TRAP) begin
if (trap_enter_vld && trap_enter_rdy) begin
mcause_irq <= mcause_irq_next;
mcause_code <= mcause_code_next;
end else if (wen && addr == MCAUSE) begin
{mcause_irq, mcause_code} <=
wtype == CSR_WTYPE_C ? {mcause_irq, mcause_code} & ~{wdata[31], wdata[4:0]} :
wtype == CSR_WTYPE_S ? {mcause_irq, mcause_code} | {wdata[31], wdata[4:0]} :
{wdata[31], wdata[4:0]} ;
end
end
end
// ----------------------------------------------------------------------------
// Counters
// MCYCLE and MTIME are aliased (fine as long as MCOUNTINHIBIT[0] is tied low)
reg [XLEN-1:0] mcycleh;
reg [XLEN-1:0] mcycle;
reg [XLEN-1:0] minstreth;
reg [XLEN-1:0] minstret;
wire [XLEN-1:0] ctr_update = update(
{addr[7], addr[1]} == 2'b00 ? mcycle :
{addr[7], addr[1]} == 2'b01 ? minstret :
{addr[7], addr[1]} == 2'b10 ? mcycleh :
minstreth
);
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
mcycleh <= X0;
mcycle <= X0;
minstreth <= X0;
minstret <= X0;
end else if (CSR_COUNTER) begin
// Hold the top (2 * XLEN - W_COUNTER) bits constant to save gates:
{mcycleh, mcycle} <= (({mcycleh, mcycle} + 1'b1) & ~({2*XLEN{1'b1}} << W_COUNTER))
| ({mcycleh, mcycle} & ({2*XLEN{1'b1}} << W_COUNTER));
if (instr_ret)
{minstreth, minstret} <= (({minstreth, minstret} + 1'b1) & ~({2*XLEN{1'b1}} << W_COUNTER))
| ({minstreth, minstret} & ({2*XLEN{1'b1}} << W_COUNTER));
if (wen) begin
if (addr == MCYCLEH)
mcycleh <= ctr_update;
if (addr == MCYCLE)
mcycle <= ctr_update;
if (addr == MINSTRETH)
minstreth <= ctr_update;
if (addr == MINSTRET)
minstret <= ctr_update;
end
end
end
// ----------------------------------------------------------------------------
// Read port + detect addressing of unmapped CSRs
// ----------------------------------------------------------------------------
reg decode_match;
always @ (*) begin
decode_match = 1'b0;
rdata = {XLEN{1'b0}};
case (addr)
// ------------------------------------------------------------------------
// Mandatory CSRs
MISA: if (CSR_M_MANDATORY) begin
// WARL, so it is legal to be tied constant
decode_match = 1'b1;
rdata = {
2'h1, // MXL: 32-bit
{XLEN-28{1'b0}}, // WLRL
13'd0, // Z...N, no
|EXTENSION_M,
3'd0, // L...J, no
1'b1, // Integer ISA
5'd0, // H...D, no
|EXTENSION_C,
2'b0
};
end
MVENDORID: if (CSR_M_MANDATORY) begin
decode_match = !wen_soon; // MRO
// I don't have a JEDEC ID. It is legal to tie this to 0 if non-commercial.
rdata = {XLEN{1'b0}};
end
MARCHID: if (CSR_M_MANDATORY) begin
decode_match = !wen_soon; // MRO
// I don't have a RV foundation ID. It is legal to tie this to 0.
rdata = {XLEN{1'b0}};
end
MIMPID: if (CSR_M_MANDATORY) begin
decode_match = !wen_soon; // MRO
// TODO put git SHA or something here
rdata = {XLEN{1'b0}};
end
MHARTID: if (CSR_M_MANDATORY) begin
decode_match = !wen_soon; // MRO
// There is only one hart, and spec says this must be numbered 0.
rdata = {XLEN{1'b0}};
end
MSTATUS: if (CSR_M_MANDATORY || CSR_M_TRAP) begin
decode_match = 1'b1;
rdata = {
1'b0, // Never any dirty state besides GPRs
8'd0, // (WPRI)
1'b0, // TSR (Trap SRET), tied 0 if no S mode.
1'b0, // TW (Timeout Wait), tied 0 if only M mode.
1'b0, // TVM (trap virtual memory), tied 0 if no S mode.
1'b0, // MXR (Make eXecutable Readable), tied 0 if not S mode.
1'b0, // SUM, tied 0, we have no S or U mode
1'b0, // MPRV (modify privilege), tied 0 if no U mode
4'd0, // XS, FS always "off" (no extension state to clear!)
2'b11, // MPP (M-mode previous privilege), we are always M-mode
2'd0, // (WPRI)
1'b0, // SPP, tied 0 if S mode not supported
mstatus_mpie,
3'd0, // No S, U
mstatus_mie,
3'd0 // No S, U
};
end
// MEDELEG, MIDELEG should not exist for M-only implementations. Will raise
// illegal instruction exception if accessed.
// ------------------------------------------------------------------------
// Trap-handling CSRs
// TODO bit of a hack but this is a 32 bit synthesised register with
// set/clear/write/read, don't turn it on unless we really have to
MSCRATCH: if (CSR_M_TRAP && CSR_M_MANDATORY) begin
decode_match = 1'b1;
rdata = mscratch;
end
MEPC: if (CSR_M_TRAP) begin
decode_match = 1'b1;
rdata = mepc;
end
MCAUSE: if (CSR_M_TRAP) begin
decode_match = 1'b1;
rdata = {
mcause_irq, // Sign bit is 1 for IRQ, 0 for exception
{26{1'b0}}, // Padding
mcause_code[4:0] // Enough for 16 external IRQs, which is all we have room for in mip/mie
};
end
MTVAL: if (CSR_M_TRAP) begin
decode_match = 1'b1;
// Hardwired to 0
end
MIE: if (CSR_M_TRAP) begin
decode_match = 1'b1;
rdata = mie;
end
MIP: if (CSR_M_TRAP) begin
decode_match = 1'b1;
rdata = mip;
end
MTVEC: if (CSR_M_TRAP) begin
decode_match = 1'b1;
rdata = {
mtvec[XLEN-1:2], // BASE
2'h1 // MODE = Vectored (Direct is useless, and we don't have CLIC)
};
end
// ------------------------------------------------------------------------
// Counter CSRs
// Get the tied WARLs out the way first
MHPMCOUNTER3: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER4: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER5: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER6: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER7: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER8: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER9: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER10: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER11: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER12: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER13: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER14: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER15: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER16: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER17: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER18: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER19: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER20: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER21: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER22: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER23: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER24: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER25: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER26: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER27: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER28: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER29: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER30: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER31: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER3H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER4H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER5H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER6H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER7H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER8H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER9H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER10H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER11H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER12H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER13H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER14H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER15H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER16H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER17H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER18H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER19H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER20H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER21H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER22H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER23H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER24H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER25H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER26H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER27H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER28H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER29H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER30H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMCOUNTER31H: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT3: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT4: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT5: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT6: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT7: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT8: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT9: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT10: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT11: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT12: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT13: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT14: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT15: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT16: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT17: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT18: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT19: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT20: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT21: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT22: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT23: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT24: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT25: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT26: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT27: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT28: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT29: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT30: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MHPMEVENT31: if (DECODE_HPM && CSR_COUNTER) begin decode_match = 1'b1; end
MCOUNTINHIBIT: if (CSR_COUNTER) begin decode_match = 1'b1; end
// Phew...
MCYCLE: if (CSR_COUNTER) begin
decode_match = 1'b1;
rdata = mcycle;
end
MTIME: if (CSR_COUNTER) begin
decode_match = 1'b1;
rdata = mcycle; // Can be aliased as long as we tie MCOUNTINHIBIT[0] to 0
end
MINSTRET: if (CSR_COUNTER) begin
decode_match = 1'b1;
rdata = minstret;
end
MCYCLEH: if (CSR_COUNTER) begin
decode_match = 1'b1;
rdata = mcycleh;
end
MTIMEH: if (CSR_COUNTER) begin
decode_match = 1'b1;
rdata = mcycleh; // Can be aliased as long as we tie MCOUNTINHIBIT[0] to 0
end
MINSTRETH: if (CSR_COUNTER) begin
decode_match = 1'b1;
rdata = minstreth;
end
default: begin end
endcase
end
wire csr_access_error = (wen_soon || ren_soon) && !decode_match;
// ----------------------------------------------------------------------------
// Trap request generation
// ----------------------------------------------------------------------------
// Keep track of whether we are in a trap; we do not permit exception nesting.
// TODO lockup condition?
reg in_trap;
always @ (posedge clk or negedge rst_n)
if (!rst_n)
in_trap <= 1'b0;
else
in_trap <= (in_trap || (trap_enter_vld && trap_enter_rdy)) && !trap_exit;
// Exception selection
// Most-significant is lowest priority
// FIXME: this is different from the priority order given in the spec, but will get us off the ground
wire [15:0] exception_req = {
4'h0, // reserved by spec
except_ecall,
3'h0, // nonimplemented privileges
except_store_fault,
except_store_misaligned,
except_load_fault,
except_load_misaligned,
except_breakpoint,
except_instr_invalid || csr_access_error,
except_instr_fault,
except_instr_misaligned
};
wire exception_req_any = |exception_req && !in_trap;
wire [3:0] exception_req_num;
hazard5_priority_encode #(
.W_REQ(16)
) except_priority (
.req (exception_req),
.gnt (exception_req_num)
);
// Interrupt masking and selection
reg [15:0] irq_r;
always @ (posedge clk or negedge rst_n)
if (!rst_n)
irq_r <= 16'h0;
else
irq_r <= irq;
assign mip = {
irq_r, // Our nonstandard bits for per-IRQ status
4'h0, // Reserved
|irq_r, // Global pending bit for external IRQs
3'h0, // Reserved
1'b0, // Timer (FIXME)
3'h0, // Reserved
1'b0, // Software interrupt
3'h0 // Reserved
};
// We don't actually trap the aggregate IRQ, just provide it for software info
wire [31:0] mip_no_global = mip & 32'hffff_f7ff;
wire irq_any = |(mip_no_global & {{16{mie_meie}}, {16{1'b1}}}) && mstatus_mie;
wire [4:0] irq_num;
hazard5_priority_encode #(
.W_REQ(32)
) irq_priority (
.req (mip_no_global),
.gnt (irq_num)
);
wire [11:0] mtvec_offs = (exception_req_any ?
{8'h0, exception_req_num} :
12'h10 + {7'h0, irq_num}
) << 2;
assign trap_addr = mtvec | {20'h0, mtvec_offs};
assign trap_enter_vld = CSR_M_TRAP && (exception_req_any || irq_any);
assign trap_is_exception = exception_req_any;
assign mcause_irq_next = !exception_req_any;
assign mcause_code_next = exception_req_any ? exception_req_num : {1'b0, irq_num};
// ----------------------------------------------------------------------------
`ifdef RISCV_FORMAL
always @ (posedge clk) begin
// We disallow double exceptions -- this causes riscv-formal to complain that
// loads/stores don't trap inside of traps. Therefore assume this doesn't happen
if (in_trap)
assume(!(except_load_misaligned || except_store_misaligned));
// Something is screwed up if this happens
if ($past(trap_enter_vld && trap_enter_rdy))
assert(!wen);
// Don't do this
assert(!(trap_enter_vld && trap_enter_rdy && trap_exit));
// Should be impossible to get into the trap and exit it so quickly:
if (in_trap && !$past(in_trap))
assert(!trap_exit);
// Should be impossible to get to another mret so soon after exiting:
assert(!(trap_exit && $past(trap_exit)));
end
`endif
endmodule

376
hdl/hazard5_decode.v Normal file
View File

@ -0,0 +1,376 @@
/******************************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2019 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*****************************************************************************/
module hazard5_decode #(
parameter EXTENSION_C = 1, // compressed instruction extension
parameter EXTENSION_M = 1, // mul/div/mod instruction extension
parameter HAVE_CSR = 0,
parameter W_ADDR = 32,
parameter W_DATA = 32,
parameter RESET_VECTOR = 32'h0,
parameter W_REGADDR = 5
) (
input wire clk,
input wire rst_n,
input wire [31:0] fd_cir,
input wire [1:0] fd_cir_vld,
output wire [1:0] df_cir_use,
output wire df_cir_lock,
output reg d_jump_req,
output reg [W_ADDR-1:0] d_jump_target,
output wire [W_ADDR-1:0] d_pc, // FIXME only added for riscv-formal
output wire d_stall,
input wire x_stall,
input wire flush_d_x,
input wire f_jump_rdy,
input wire f_jump_now,
input wire [W_ADDR-1:0] f_jump_target,
output reg [W_REGADDR-1:0] d_rs1, // combinatorial
output reg [W_REGADDR-1:0] d_rs2, // combinatorial
output reg [W_DATA-1:0] dx_imm,
output reg [W_REGADDR-1:0] dx_rs1,
output reg [W_REGADDR-1:0] dx_rs2,
output reg [W_REGADDR-1:0] dx_rd,
output reg [W_ALUSRC-1:0] dx_alusrc_a,
output reg [W_ALUSRC-1:0] dx_alusrc_b,
output reg [W_ALUOP-1:0] dx_aluop,
output reg [W_MEMOP-1:0] dx_memop,
output reg [W_MULOP-1:0] dx_mulop,
output reg dx_csr_ren,
output reg dx_csr_wen,
output reg [1:0] dx_csr_wtype,
output reg dx_csr_w_imm,
output reg [W_BCOND-1:0] dx_branchcond,
output reg [W_ADDR-1:0] dx_jump_target,
output reg dx_jump_is_regoffs,
output reg dx_result_is_linkaddr,
output reg [W_ADDR-1:0] dx_pc,
output reg [W_ADDR-1:0] dx_mispredict_addr,
output reg [2:0] dx_except
);
// TODO TODO factor this out in a cleaner way, e.g. separate out registers and stall logic.
`include "rv_opcodes.vh"
`include "hazard5_ops.vh"
// ============================================================================
// PC/CIR control
// ============================================================================
wire d_starved = ~|fd_cir_vld || fd_cir_vld[0] && d_instr_is_32bit;
assign d_stall = x_stall ||
d_starved || (d_jump_req && !f_jump_rdy);
assign df_cir_use =
d_starved || d_stall ? 2'h0 :
d_instr_is_32bit ? 2'h2 : 2'h1;
// CIR Locking is required if we successfully assert a jump request, but decode is stalled.
// (This only happens if decode stall is caused by X stall, not if fetch is starved!)
// The reason for this is that, if the CIR is not locked in, it can be trashed by
// incoming fetch data before the roadblock clears ahead of us, which will squash any other
// side effects this instruction may have besides jumping! This includes:
// - Linking for JAL
// - Mispredict recovery for branches
// Note that it is not possible to simply gate the jump request based on X stalling,
// because X stall is a function of hready, and jump request feeds haddr htrans etc.
// Note it is possible for d_jump_req and m_jump_req to be asserted
// simultaneously, hence checking flush:
wire jump_caused_by_d = d_jump_req && f_jump_rdy && !flush_d_x;
wire assert_cir_lock = jump_caused_by_d && d_stall;
wire deassert_cir_lock = !d_stall;
reg cir_lock_prev;
assign df_cir_lock = (cir_lock_prev && !deassert_cir_lock) || assert_cir_lock;
always @ (posedge clk or negedge rst_n)
if (!rst_n)
cir_lock_prev <= 1'b0;
else
cir_lock_prev <= df_cir_lock;
reg [W_ADDR-1:0] pc;
wire [W_ADDR-1:0] pc_next = pc + (d_instr_is_32bit ? 32'h4 : 32'h2);
assign d_pc = pc;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
pc <= RESET_VECTOR;
end else begin
if ((f_jump_now && !assert_cir_lock) || (cir_lock_prev && deassert_cir_lock)) begin
pc <= f_jump_target;
`ifdef FORMAL
// Being cheeky above to save a 32 bit mux. Check that we never get an M target by mistake.
if (cir_lock_prev && deassert_cir_lock)
assert(f_jump_target == d_jump_target);
`endif
end else if (!d_stall && !df_cir_lock) begin
pc <= pc_next;
end
end
end
// If the current CIR is there due to locking, it is a jump which has already had primary effect.
wire d_invalid;
wire jump_enable = !d_starved && !cir_lock_prev && !d_invalid;
reg [W_ADDR-1:0] d_jump_offs;
always @ (*) begin
// JAL is major opcode 1101111,
// branches are 1100011.
case (d_instr[3])
1'b1: d_jump_offs = d_imm_j;
default: d_jump_offs = d_imm_b;
endcase
d_jump_target = pc + d_jump_offs;
casez ({d_instr[31], d_instr})
{1'b1, RV_BEQ }: d_jump_req = jump_enable;
{1'b1, RV_BNE }: d_jump_req = jump_enable;
{1'b1, RV_BLT }: d_jump_req = jump_enable;
{1'b1, RV_BGE }: d_jump_req = jump_enable;
{1'b1, RV_BLTU}: d_jump_req = jump_enable;
{1'b1, RV_BGEU}: d_jump_req = jump_enable;
{1'bz, RV_JAL }: d_jump_req = jump_enable;
default: d_jump_req = 1'b0;
endcase
end
// ============================================================================
// Expand compressed instructions
// ============================================================================
wire [31:0] d_instr;
wire d_instr_is_32bit;
wire d_invalid_16bit;
reg d_invalid_32bit;
assign d_invalid = d_invalid_16bit || d_invalid_32bit;
hazard5_instr_decompress #(
.PASSTHROUGH(!EXTENSION_C)
) decomp (
.instr_in (fd_cir),
.instr_is_32bit (d_instr_is_32bit),
.instr_out (d_instr),
.invalid (d_invalid_16bit)
);
// ============================================================================
// Decode X controls
// ============================================================================
// Decode various immmediate formats
wire [31:0] d_imm_i = {{21{d_instr[31]}}, d_instr[30:20]};
wire [31:0] d_imm_s = {{21{d_instr[31]}}, d_instr[30:25], d_instr[11:7]};
wire [31:0] d_imm_b = {{20{d_instr[31]}}, d_instr[7], d_instr[30:25], d_instr[11:8], 1'b0};
wire [31:0] d_imm_u = {d_instr[31:12], {12{1'b0}}};
wire [31:0] d_imm_j = {{12{d_instr[31]}}, d_instr[19:12], d_instr[20], d_instr[30:21], 1'b0};
// Combinatorials:
reg [W_REGADDR-1:0] d_rd;
reg [W_DATA-1:0] d_imm;
reg [W_DATA-1:0] d_branchoffs;
reg [W_ALUSRC-1:0] d_alusrc_a;
reg [W_ALUSRC-1:0] d_alusrc_b;
reg [W_ALUOP-1:0] d_aluop;
reg [W_MEMOP-1:0] d_memop;
reg [W_MULOP-1:0] d_mulop;
reg [W_BCOND-1:0] d_branchcond;
reg d_jump_is_regoffs;
reg d_result_is_linkaddr;
reg d_csr_ren;
reg d_csr_wen;
reg [1:0] d_csr_wtype;
reg d_csr_w_imm;
reg [W_EXCEPT-1:0] d_except;
localparam X0 = {W_REGADDR{1'b0}};
always @ (*) begin
// Assign some defaults
d_rs1 = d_instr[19:15];
d_rs2 = d_instr[24:20];
d_rd = d_instr[11: 7];
d_imm = d_imm_i;
d_branchoffs = d_imm_i;
d_alusrc_a = ALUSRCA_RS1;
d_alusrc_b = ALUSRCB_RS2;
d_aluop = ALUOP_ADD;
d_memop = MEMOP_NONE;
d_mulop = M_OP_MUL;
d_csr_ren = 1'b0;
d_csr_wen = 1'b0;
d_csr_wtype = CSR_WTYPE_W;
d_csr_w_imm = 1'b0;
d_branchcond = BCOND_NEVER;
d_jump_is_regoffs = 1'b0;
d_result_is_linkaddr = 1'b0;
d_invalid_32bit = 1'b0;
d_except = EXCEPT_NONE;
casez (d_instr)
RV_BEQ: begin d_rd = X0; d_aluop = ALUOP_SUB; d_branchcond = BCOND_ZERO; end
RV_BNE: begin d_rd = X0; d_aluop = ALUOP_SUB; d_branchcond = BCOND_NZERO; end
RV_BLT: begin d_rd = X0; d_aluop = ALUOP_LT; d_branchcond = BCOND_NZERO; end
RV_BGE: begin d_rd = X0; d_aluop = ALUOP_LT; d_branchcond = BCOND_ZERO; end
RV_BLTU: begin d_rd = X0; d_aluop = ALUOP_LTU; d_branchcond = BCOND_NZERO; end
RV_BGEU: begin d_rd = X0; d_aluop = ALUOP_LTU; d_branchcond = BCOND_ZERO; end
RV_JALR: begin d_result_is_linkaddr = 1'b1; d_jump_is_regoffs = 1'b1; d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_branchcond = BCOND_ALWAYS; end
RV_JAL: begin d_result_is_linkaddr = 1'b1; d_rs2 = X0; d_rs1 = X0; end
RV_LUI: begin d_aluop = ALUOP_ADD; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_rs1 = X0; end
RV_AUIPC: begin d_aluop = ALUOP_ADD; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_alusrc_a = ALUSRCA_PC; d_rs1 = X0; end
RV_ADDI: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_SLLI: begin d_aluop = ALUOP_SLL; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_SLTI: begin d_aluop = ALUOP_LT; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_SLTIU: begin d_aluop = ALUOP_LTU; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_XORI: begin d_aluop = ALUOP_XOR; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_SRLI: begin d_aluop = ALUOP_SRL; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_SRAI: begin d_aluop = ALUOP_SRA; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_ORI: begin d_aluop = ALUOP_OR; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_ANDI: begin d_aluop = ALUOP_AND; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_ADD: begin d_aluop = ALUOP_ADD; end
RV_SUB: begin d_aluop = ALUOP_SUB; end
RV_SLL: begin d_aluop = ALUOP_SLL; end
RV_SLT: begin d_aluop = ALUOP_LT; end
RV_SLTU: begin d_aluop = ALUOP_LTU; end
RV_XOR: begin d_aluop = ALUOP_XOR; end
RV_SRL: begin d_aluop = ALUOP_SRL; end
RV_SRA: begin d_aluop = ALUOP_SRA; end
RV_OR: begin d_aluop = ALUOP_OR; end
RV_AND: begin d_aluop = ALUOP_AND; end
RV_LB: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LB; end
RV_LH: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LH; end
RV_LW: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LW; end
RV_LBU: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LBU; end
RV_LHU: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LHU; end
RV_SB: begin d_aluop = ALUOP_ADD; d_imm = d_imm_s; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_SB; d_rd = X0; end
RV_SH: begin d_aluop = ALUOP_ADD; d_imm = d_imm_s; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_SH; d_rd = X0; end
RV_SW: begin d_aluop = ALUOP_ADD; d_imm = d_imm_s; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_SW; d_rd = X0; end
RV_MUL: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MUL; end else begin d_invalid_32bit = 1'b1; end
RV_MULH: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MULH; end else begin d_invalid_32bit = 1'b1; end
RV_MULHSU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MULHSU; end else begin d_invalid_32bit = 1'b1; end
RV_MULHU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MULHU; end else begin d_invalid_32bit = 1'b1; end
RV_DIV: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_DIV; end else begin d_invalid_32bit = 1'b1; end
RV_DIVU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_DIVU; end else begin d_invalid_32bit = 1'b1; end
RV_REM: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REM; end else begin d_invalid_32bit = 1'b1; end
RV_REMU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REMU; end else begin d_invalid_32bit = 1'b1; end
RV_FENCE: begin d_rd = X0; end // NOP
RV_FENCE_I: begin d_rd = X0; d_rs1 = X0; d_rs2 = X0; d_branchcond = BCOND_NZERO; d_imm[31] = 1'b1; end // Pretend we are recovering from a mispredicted-taken backward branch. Mispredict recovery flushes frontend.
RV_CSRRW: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = 1'b1 ; d_csr_ren = |d_rd; d_csr_wtype = CSR_WTYPE_W; end else begin d_invalid_32bit = 1'b1; end
RV_CSRRS: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_S; end else begin d_invalid_32bit = 1'b1; end
RV_CSRRC: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_C; end else begin d_invalid_32bit = 1'b1; end
RV_CSRRWI: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = 1'b1 ; d_csr_ren = |d_rd; d_csr_wtype = CSR_WTYPE_W; d_csr_w_imm = 1'b1; end else begin d_invalid_32bit = 1'b1; end
RV_CSRRSI: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_S; d_csr_w_imm = 1'b1; end else begin d_invalid_32bit = 1'b1; end
RV_CSRRCI: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_C; d_csr_w_imm = 1'b1; end else begin d_invalid_32bit = 1'b1; end
RV_ECALL: if (HAVE_CSR) begin d_except = EXCEPT_ECALL; d_rs2 = X0; d_rs1 = X0; d_rd = X0; end else begin d_invalid_32bit = 1'b1; end
RV_EBREAK: if (HAVE_CSR) begin d_except = EXCEPT_EBREAK; d_rs2 = X0; d_rs1 = X0; d_rd = X0; end else begin d_invalid_32bit = 1'b1; end
RV_MRET: if (HAVE_CSR) begin d_except = EXCEPT_MRET; d_rs2 = X0; d_rs1 = X0; d_rd = X0; end else begin d_invalid_32bit = 1'b1; end
default: begin d_invalid_32bit = 1'b1; end
endcase
end
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
{dx_rs1, dx_rs2, dx_rd} <= {(3 * W_REGADDR){1'b0}};
dx_alusrc_a <= ALUSRCA_RS1;
dx_alusrc_b <= ALUSRCB_RS2;
dx_aluop <= ALUOP_ADD;
dx_memop <= MEMOP_NONE;
dx_mulop <= M_OP_MUL;
dx_csr_ren <= 1'b0;
dx_csr_wen <= 1'b0;
dx_csr_wtype <= CSR_WTYPE_W;
dx_csr_w_imm <= 1'b0;
dx_branchcond <= BCOND_NEVER;
dx_jump_is_regoffs <= 1'b0;
dx_result_is_linkaddr <= 1'b0;
dx_except <= EXCEPT_NONE;
end else if (flush_d_x || (d_stall && !x_stall)) begin
// Bubble insertion
dx_branchcond <= BCOND_NEVER;
dx_memop <= MEMOP_NONE;
dx_rd <= 5'h0;
dx_except <= EXCEPT_NONE;
dx_csr_ren <= 1'b0;
dx_csr_wen <= 1'b0;
// Don't start a multiply in a pipe bubble
if (EXTENSION_M)
dx_aluop <= ALUOP_ADD;
// Also need to clear rs1, rs2, due to a nasty sequence of events:
// Suppose we have a load, followed by a dependent branch, which is predicted taken
// - branch will stall in D until AHB master becomes free
// - on next cycle, prediction causes jump, and bubble is in X
// - if X gets branch's rs1, rs2, it will cause spurious RAW stall
// - on next cycle, branch will not progress into X due to RAW stall, but *will* be replaced in D due to jump
// - branch mispredict now cannot be corrected
dx_rs1 <= 5'h0;
dx_rs2 <= 5'h0;
end else if (!x_stall) begin
// These ones can have side effects
dx_rs1 <= d_invalid ? {W_REGADDR{1'b0}} : d_rs1;
dx_rs2 <= d_invalid ? {W_REGADDR{1'b0}} : d_rs2;
dx_rd <= d_invalid ? {W_REGADDR{1'b0}} : d_rd;
dx_memop <= d_invalid ? MEMOP_NONE : d_memop;
dx_branchcond <= d_invalid ? BCOND_NEVER : d_branchcond;
dx_csr_ren <= d_invalid ? 1'b0 : d_csr_ren;
dx_csr_wen <= d_invalid ? 1'b0 : d_csr_wen;
dx_except <= d_invalid ? EXCEPT_INSTR_ILLEGAL : d_except;
dx_aluop <= d_invalid && EXTENSION_M ? ALUOP_ADD : d_aluop;
// These can't
dx_alusrc_a <= d_alusrc_a;
dx_alusrc_b <= d_alusrc_b;
dx_mulop <= d_mulop;
dx_jump_is_regoffs <= d_jump_is_regoffs;
dx_result_is_linkaddr <= d_result_is_linkaddr;
dx_csr_wtype <= d_csr_wtype;
dx_csr_w_imm <= d_csr_w_imm;
end
end
// No reset required on these; will be masked by the resettable pipeline controls until they're valid
always @ (posedge clk) begin
if (!x_stall) begin
dx_imm <= d_imm;
dx_jump_target <= d_jump_target;
dx_mispredict_addr <= pc_next;
dx_pc <= pc;
end
if (flush_d_x) begin
// The target of a late jump must be propagated *immediately* to X PC, as
// mepc may sample X PC at any time due to IRQ, and must not capture
// misprediction.
// Also required for flush while X stalled (e.g. if a muldiv enters X while
// a 1 cycle bus stall holds off the jump request in M)
dx_pc <= f_jump_target;
`ifdef FORMAL
// This should only be caused by late jumps
assert(f_jump_now);
`endif
end
end
endmodule

300
hdl/hazard5_frontend.v Normal file
View File

@ -0,0 +1,300 @@
module hazard5_frontend #(
parameter EXTENSION_C = 1,
parameter W_ADDR = 32, // other sizes currently unsupported
parameter W_DATA = 32, // other sizes currently unsupported
parameter FIFO_DEPTH = 2, // power of 2, >= 1
parameter RESET_VECTOR = 0
) (
input wire clk,
input wire rst_n,
// Fetch interface
// addr_vld may be asserted at any time, but after assertion,
// neither addr nor addr_vld may change until the cycle after addr_rdy.
// There is no backpressure on the data interface; the front end
// must ensure it does not request data it cannot receive.
// addr_rdy and dat_vld may be functions of hready, and
// may not be used to compute combinational outputs.
output wire mem_size, // 1'b1 -> 32 bit access
output wire [W_ADDR-1:0] mem_addr,
output wire mem_addr_vld,
input wire mem_addr_rdy,
input wire [W_DATA-1:0] mem_data,
input wire mem_data_vld,
// Jump/flush interface
// Processor may assert vld at any time. The request will not go through
// unless rdy is high. Processor *may* alter request during this time.
// Inputs must not be a function of hready.
input wire [W_ADDR-1:0] jump_target,
input wire jump_target_vld,
output wire jump_target_rdy,
// Interface to Decode
// Note reg/wire distinction
// => decode is providing live feedback on the CIR it is decoding,
// which we fetched previously
// This works OK because size is decoded from 2 LSBs of instruction, so cheap.
output reg [31:0] cir,
output reg [1:0] cir_vld, // number of valid halfwords in CIR
input wire [1:0] cir_use, // number of halfwords D intends to consume
// *may* be a function of hready
input wire cir_lock // Lock-in current contents and level of CIR.
// Assert simultaneously with a jump request,
// if decode is going to stall. This stops the CIR
// from being trashed by incoming fetch data;
// jump instructions have other side effects besides jumping!
);
`undef ASSERT
`ifdef HAZARD5_FRONTEND_ASSERTIONS
`define ASSERT(x) assert(x)
`else
`define ASSERT(x)
`endif
// ISIM doesn't support some of this:
// //synthesis translate_off
// initial if (W_DATA != 32) begin $error("Frontend requires 32-bit databus"); end
// initial if ((1 << $clog2(FIFO_DEPTH)) != FIFO_DEPTH) begin $error("Frontend FIFO depth must be power of 2"); end
// initial if (~|FIFO_DEPTH) begin $error("Frontend FIFO depth must be > 0"); end
// //synthesis translate_on
localparam W_BUNDLE = W_DATA / 2;
parameter W_FIFO_LEVEL = $clog2(FIFO_DEPTH + 1);
// ============================================================================
// Fetch Queue (FIFO)
// ============================================================================
// This is a little different from either a normal sync fifo or sync fwft fifo
// so it's worth implementing from scratch
wire jump_now = jump_target_vld && jump_target_rdy;
reg [W_DATA-1:0] fifo_mem [0:FIFO_DEPTH];
reg [FIFO_DEPTH-1:0] fifo_valid;
wire fifo_push;
wire fifo_pop;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
fifo_valid <= {FIFO_DEPTH{1'b0}};
end else if (jump_now) begin
fifo_valid <= {FIFO_DEPTH{1'b0}};
end else if (fifo_push || fifo_pop) begin
fifo_valid <= ~(~fifo_valid << fifo_push) >> fifo_pop;
end
end
always @ (posedge clk) begin: fifo_data_shift
integer i;
for (i = 0; i < FIFO_DEPTH; i = i + 1) begin
if (fifo_pop || (fifo_push && !fifo_valid[i])) begin
fifo_mem[i] <= fifo_valid[i + 1] ? fifo_mem[i + 1] : fifo_wdata;
end
end
end
wire [W_DATA-1:0] fifo_wdata = mem_data;
wire [W_DATA-1:0] fifo_rdata = fifo_mem[0];
always @ (*) fifo_mem[FIFO_DEPTH] = fifo_wdata;
wire fifo_full = fifo_valid[FIFO_DEPTH - 1];
wire fifo_empty = !fifo_valid[0];
wire fifo_almost_full = FIFO_DEPTH == 1 || (!fifo_valid[FIFO_DEPTH - 1] && fifo_valid[FIFO_DEPTH - 2]);
// ============================================================================
// Fetch Request + State Logic
// ============================================================================
// Keep track of some useful state of the memory interface
reg mem_addr_hold;
reg [1:0] pending_fetches;
reg [1:0] ctr_flush_pending;
wire [1:0] pending_fetches_next = pending_fetches + (mem_addr_vld && !mem_addr_hold) - mem_data_vld;
wire cir_must_refill;
// If fetch data is forwarded past the FIFO, ensure it is not also written to it.
assign fifo_push = mem_data_vld && ~|ctr_flush_pending && !(cir_must_refill && fifo_empty);
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
mem_addr_hold <= 1'b0;
pending_fetches <= 2'h0;
ctr_flush_pending <= 2'h0;
end else begin
`ASSERT(ctr_flush_pending <= pending_fetches);
`ASSERT(pending_fetches < 2'd3);
`ASSERT(!(mem_data_vld && !pending_fetches));
// `ASSERT(!($past(mem_addr_hold) && $past(mem_addr_vld) && !$stable(mem_addr)));
mem_addr_hold <= mem_addr_vld && !mem_addr_rdy;
pending_fetches <= pending_fetches_next;
if (jump_now) begin
ctr_flush_pending <= pending_fetches - mem_data_vld;
end else if (|ctr_flush_pending && mem_data_vld) begin
ctr_flush_pending <= ctr_flush_pending - 1'b1;
end
end
end
// Fetch addr runs ahead of the PC, in word increments.
reg [W_ADDR-1:0] fetch_addr;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
fetch_addr <= RESET_VECTOR;
end else begin
if (jump_now) begin
// Post-increment if jump request is going straight through
fetch_addr <= {jump_target[W_ADDR-1:2] + (mem_addr_rdy && !mem_addr_hold), 2'b00};
end else if (mem_addr_vld && mem_addr_rdy) begin
fetch_addr <= fetch_addr + 32'h4;
end
end
end
// Using the non-registered version of pending_fetches would improve FIFO
// utilisation, but create a combinatorial path from hready to address phase!
wire fetch_stall = fifo_full
|| fifo_almost_full && |pending_fetches // TODO causes issue with depth 1: only one in flight, so bus rate halved.
|| pending_fetches > 2'h1;
// unaligned jump is handled in two different places:
// - during address phase, offset may be applied to fetch_addr if hready was low when jump_target_vld was high
// - during data phase, need to assemble CIR differently.
wire unaligned_jump_now = EXTENSION_C && jump_now && jump_target[1];
reg unaligned_jump_aph;
reg unaligned_jump_dph;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
unaligned_jump_aph <= 1'b0;
unaligned_jump_dph <= 1'b0;
end else if (EXTENSION_C) begin
`ASSERT(!(unaligned_jump_aph && !unaligned_jump_dph));
`ASSERT(!($past(jump_now && !jump_target[1]) && unaligned_jump_aph));
`ASSERT(!($past(jump_now && !jump_target[1]) && unaligned_jump_dph));
if (mem_addr_rdy || (jump_now && !unaligned_jump_now)) begin
unaligned_jump_aph <= 1'b0;
end
if ((mem_data_vld && ~|ctr_flush_pending && !cir_lock)
|| (jump_now && !unaligned_jump_now)) begin
unaligned_jump_dph <= 1'b0;
end
if (fifo_pop) begin
// Following a lock/unlock of the CIR, we may have an unaligned fetch in
// the FIFO, rather than consuming straight from the bus.
unaligned_jump_dph <= 1'b0;
end
if (unaligned_jump_now) begin
unaligned_jump_dph <= 1'b1;
unaligned_jump_aph <= !mem_addr_rdy;
end
end
end
// Combinatorially generate the address-phase request
reg reset_holdoff;
always @ (posedge clk or negedge rst_n)
if (!rst_n)
reset_holdoff <= 1'b1;
else
reset_holdoff <= 1'b0;
reg [W_ADDR-1:0] mem_addr_r;
reg mem_addr_vld_r;
reg mem_size_r;
assign mem_addr = mem_addr_r;
assign mem_addr_vld = mem_addr_vld_r && !reset_holdoff;
assign mem_size = mem_size_r;
always @ (*) begin
mem_addr_r = {W_ADDR{1'b0}};
mem_addr_vld_r = 1'b1;
mem_size_r = 1'b1; // almost all accesses are 32 bit
case (1'b1)
mem_addr_hold : begin mem_addr_r = {fetch_addr[W_ADDR-1:2], unaligned_jump_aph, 1'b0}; mem_size_r = !unaligned_jump_aph; end
jump_target_vld : begin mem_addr_r = jump_target; mem_size_r = !unaligned_jump_now; end
!fetch_stall : begin mem_addr_r = fetch_addr; end
default : begin mem_addr_vld_r = 1'b0; end
endcase
end
assign jump_target_rdy = !mem_addr_hold;
// ============================================================================
// Instruction assembly yard
// ============================================================================
// buf_level is the number of valid halfwords in {hwbuf, cir}.
// cir_vld and hwbuf_vld are functions of this.
reg [1:0] buf_level;
reg [W_BUNDLE-1:0] hwbuf;
reg hwbuf_vld;
wire [W_DATA-1:0] fetch_data = fifo_empty ? mem_data : fifo_rdata;
wire fetch_data_vld = !fifo_empty || (mem_data_vld && ~|ctr_flush_pending);
// Shift any recycled instruction data down to backfill D's consumption
// We don't care about anything which is invalid or will be overlaid with fresh data,
// so choose these values in a way that minimises muxes
wire [3*W_BUNDLE-1:0] instr_data_shifted =
cir_use[1] ? {hwbuf, cir[W_BUNDLE +: W_BUNDLE], hwbuf} :
cir_use[0] && EXTENSION_C ? {hwbuf, hwbuf, cir[W_BUNDLE +: W_BUNDLE]} :
{hwbuf, cir};
// Saturating subtraction: on cir_lock dassertion,
// buf_level will be 0 but cir_use will be positive!
wire [1:0] cir_use_clipped = |buf_level ? cir_use : 2'h0;
wire [1:0] level_next_no_fetch = buf_level - cir_use_clipped;
// Overlay fresh fetch data onto the shifted/recycled instruction data
// Again, if something won't be looked at, generate cheapest possible garbage.
// Don't care if fetch data is valid or not, as will just retry next cycle (as long as flags set correctly)
wire [3*W_BUNDLE-1:0] instr_data_plus_fetch =
cir_lock || (level_next_no_fetch[1] && !unaligned_jump_dph) ? instr_data_shifted :
unaligned_jump_dph && EXTENSION_C ? {instr_data_shifted[W_BUNDLE +: 2*W_BUNDLE], fetch_data[W_BUNDLE +: W_BUNDLE]} :
level_next_no_fetch[0] && EXTENSION_C ? {fetch_data, instr_data_shifted[0 +: W_BUNDLE]} :
{instr_data_shifted[2*W_BUNDLE +: W_BUNDLE], fetch_data};
assign cir_must_refill = !cir_lock && !level_next_no_fetch[1];
assign fifo_pop = cir_must_refill && !fifo_empty;
wire [1:0] buf_level_next =
jump_now || |ctr_flush_pending || cir_lock ? 2'h0 :
fetch_data_vld && unaligned_jump_dph ? 2'h1 :
buf_level + {cir_must_refill && fetch_data_vld, 1'b0} - cir_use_clipped;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
buf_level <= 2'h0;
hwbuf_vld <= 1'b0;
cir_vld <= 2'h0;
end else begin
`ASSERT(cir_vld <= 2);
`ASSERT(cir_use <= 2);
`ASSERT(cir_use <= cir_vld);
`ASSERT(cir_vld <= buf_level || $past(cir_lock));
// Update CIR flags
buf_level <= buf_level_next;
hwbuf_vld <= &buf_level_next;
if (!cir_lock)
cir_vld <= buf_level_next & ~(buf_level_next >> 1'b1);
// Update CIR contents
end
end
// No need to reset these as they will be written before first use
always @ (posedge clk)
{hwbuf, cir} <= instr_data_plus_fetch;
endmodule

View File

@ -0,0 +1,114 @@
module hazard5_instr_decompress #(
parameter PASSTHROUGH = 0
) (
input wire [31:0] instr_in,
output reg instr_is_32bit,
output reg [31:0] instr_out,
output reg invalid
);
`include "rv_opcodes.vh"
localparam W_REGADDR = 5;
// Long-register formats: cr, ci, css
// Short-register formats: ciw, cl, cs, cb, cj
wire [W_REGADDR-1:0] rd_l = instr_in[11:7];
wire [W_REGADDR-1:0] rs1_l = instr_in[11:7];
wire [W_REGADDR-1:0] rs2_l = instr_in[6:2];
wire [W_REGADDR-1:0] rd_s = {2'b01, instr_in[4:2]};
wire [W_REGADDR-1:0] rs1_s = {2'b01, instr_in[9:7]};
wire [W_REGADDR-1:0] rs2_s = {2'b01, instr_in[4:2]};
// I don't even O_O
wire [31:0] imm_ci = {{7{instr_in[12]}}, instr_in[6:2], {20{1'b0}}};
wire [31:0] imm_cj = {instr_in[12], instr_in[8], instr_in[10:9], instr_in[6], instr_in[7],
instr_in[2], instr_in[11], instr_in[5:3], {9{instr_in[12]}}, {12{1'b0}}};
wire [31:0] imm_cb =
{{20{1'b0}}, instr_in[11:10], instr_in[4:3], instr_in[12], {7{1'b0}}} |
{{4{instr_in[12]}}, instr_in[6:5], instr_in[2], {25{1'b0}}};
generate
if (PASSTHROUGH) begin
always @ (*) begin
instr_is_32bit = 1'b1;
instr_out = instr_in;
invalid = 1'b0;
end
end else begin
always @ (*) begin;
if (instr_in[1:0] == 2'b11) begin
instr_is_32bit = 1'b1;
instr_out = instr_in;
invalid = 1'b0;
end else begin
instr_is_32bit = 1'b0;
instr_out = 32'h0;
invalid = 1'b0;
casez (instr_in[15:0])
16'h0: invalid = 1'b1;
RV_C_ADDI4SPN: instr_out = RV_NOZ_ADDI | ({27'h0, rd_s} << RV_RD_LSB) | (5'h2 << RV_RS1_LSB)
| ({instr_in[10:7], instr_in[12:11], instr_in[5], instr_in[6], 2'b00} << 20);
RV_C_LW: instr_out = RV_NOZ_LW | ({27'h0, rd_s} << RV_RD_LSB) | (rs1_s << RV_RS1_LSB)
| ({instr_in[5], instr_in[12:10], instr_in[6], 2'b00} << 20);
RV_C_SW: instr_out = RV_NOZ_SW | (rs2_s << RV_RS2_LSB) | (rs1_s << RV_RS1_LSB)
| ({instr_in[11:10], instr_in[6], 2'b00} << 7) | ({instr_in[5], instr_in[12]} << 25);
RV_C_ADDI: instr_out = RV_NOZ_ADDI | (rd_l << RV_RD_LSB) | (rs1_l << RV_RS1_LSB) | imm_ci;
RV_C_JAL: instr_out = RV_NOZ_JAL | (5'h1 << RV_RD_LSB) | imm_cj;
RV_C_J: instr_out = RV_NOZ_JAL | (5'h0 << RV_RD_LSB) | imm_cj;
RV_C_LI: instr_out = RV_NOZ_ADDI | (rd_l << RV_RD_LSB) | imm_ci;
RV_C_LUI: begin
if (rd_l == 5'h2) begin
// addi6sp
instr_out = RV_NOZ_ADDI | (5'h2 << RV_RD_LSB) | (5'h2 << RV_RS1_LSB) |
({{3{instr_in[12]}}, instr_in[4:3], instr_in[5], instr_in[2], instr_in[6]} << 24);
end else begin
instr_out = RV_NOZ_LUI | (rd_l << RV_RD_LSB) | ({{15{instr_in[12]}}, instr_in[6:2]} << 12);
end
invalid = !{instr_in[12], instr_in[6:2]}; // RESERVED if imm == 0
end
RV_C_SLLI: instr_out = RV_NOZ_SLLI | (rs1_l << RV_RD_LSB) | (rs1_l << RV_RS1_LSB) | imm_ci;
RV_C_SRAI: instr_out = RV_NOZ_SRAI | (rs1_s << RV_RD_LSB) | (rs1_s << RV_RS1_LSB) | imm_ci;
RV_C_SRLI: instr_out = RV_NOZ_SRLI | (rs1_s << RV_RD_LSB) | (rs1_s << RV_RS1_LSB) | imm_ci;
RV_C_ANDI: instr_out = RV_NOZ_ANDI | (rs1_s << RV_RD_LSB) | (rs1_s << RV_RS1_LSB) | imm_ci;
RV_C_AND: instr_out = RV_NOZ_AND | (rs1_s << RV_RD_LSB) | (rs1_s << RV_RS1_LSB) | (rs2_s << RV_RS2_LSB);
RV_C_OR: instr_out = RV_NOZ_OR | (rs1_s << RV_RD_LSB) | (rs1_s << RV_RS1_LSB) | (rs2_s << RV_RS2_LSB);
RV_C_XOR: instr_out = RV_NOZ_XOR | (rs1_s << RV_RD_LSB) | (rs1_s << RV_RS1_LSB) | (rs2_s << RV_RS2_LSB);
RV_C_SUB: instr_out = RV_NOZ_SUB | (rs1_s << RV_RD_LSB) | (rs1_s << RV_RS1_LSB) | (rs2_s << RV_RS2_LSB);
RV_C_ADD: begin
if (rs2_l) begin
instr_out = RV_NOZ_ADD | (rd_l << RV_RD_LSB) | (rs1_l << RV_RS1_LSB) | (rs2_l << RV_RS2_LSB);
end else begin // jalr
instr_out = RV_NOZ_JALR | (5'h1 << RV_RD_LSB) | (rs1_l << RV_RS1_LSB);
invalid = !rs1_l; // EBREAK; not supported!
end
end
RV_C_MV: begin
if (rs2_l) begin // mv
instr_out = RV_NOZ_ADD | (rd_l << RV_RD_LSB) | (rs2_l << RV_RS2_LSB);
end else begin // jr
instr_out = RV_NOZ_JALR | (rs1_l << RV_RS1_LSB);
invalid = !rs1_l; // RESERVED
end
end
RV_C_LWSP: begin
instr_out = RV_NOZ_LW | (rd_l << RV_RD_LSB) | (5'h2 << RV_RS1_LSB)
| ({instr_in[3:2], instr_in[12], instr_in[6:4], 2'b00} << 20);
invalid = !rd_l; // RESERVED
end
RV_C_SWSP: instr_out = RV_NOZ_SW | (rs2_l << RV_RS2_LSB) | (5'h2 << RV_RS1_LSB)
| ({instr_in[11:9], 2'b00} << 7) | ({instr_in[8:7], instr_in[12]} << 25);
RV_C_BEQZ: instr_out = RV_NOZ_BEQ | (rs1_s << RV_RS1_LSB) | imm_cb;
RV_C_BNEZ: instr_out = RV_NOZ_BNE | (rs1_s << RV_RS1_LSB) | imm_cb;
default: invalid = 1'b1;
endcase
end
end
end
endgenerate
endmodule

74
hdl/hazard5_ops.vh Normal file
View File

@ -0,0 +1,74 @@
localparam W_ALUOP = 4;
localparam W_ALUSRC = 2;
localparam W_MEMOP = 4;
localparam W_BCOND = 2;
// ALU operation selectors
localparam ALUOP_ADD = 4'h0;
localparam ALUOP_SUB = 4'h1;
localparam ALUOP_LT = 4'h2;
localparam ALUOP_LTU = 4'h4;
localparam ALUOP_AND = 4'h6;
localparam ALUOP_OR = 4'h7;
localparam ALUOP_XOR = 4'h8;
localparam ALUOP_SRL = 4'h9;
localparam ALUOP_SRA = 4'ha;
localparam ALUOP_SLL = 4'hb;
localparam ALUOP_MULDIV = 4'hc;
// Parameters to control ALU input muxes. Bypass mux paths are
// controlled by X, so D has no parameters to choose these.
localparam ALUSRCA_RS1 = 2'h0;
localparam ALUSRCA_PC = 2'h1;
localparam ALUSRCB_RS2 = 2'h0;
localparam ALUSRCB_IMM = 2'h1;
localparam MEMOP_LW = 4'h0;
localparam MEMOP_LH = 4'h1;
localparam MEMOP_LB = 4'h2;
localparam MEMOP_LHU = 4'h3;
localparam MEMOP_LBU = 4'h4;
localparam MEMOP_SW = 4'h5;
localparam MEMOP_SH = 4'h6;
localparam MEMOP_SB = 4'h7;
localparam MEMOP_NONE = 4'h8;
localparam BCOND_NEVER = 2'h0;
localparam BCOND_ALWAYS = 2'h1;
localparam BCOND_ZERO = 2'h2;
localparam BCOND_NZERO = 2'h3;
// CSR access types
localparam CSR_WTYPE_W = 2'h0;
localparam CSR_WTYPE_S = 2'h1;
localparam CSR_WTYPE_C = 2'h2;
// Exceptional condition signals which travel alongside (or instead of)
// instructions in the pipeline. These are speculative and can be flushed
// on e.g. branch mispredict
localparam W_EXCEPT = 3;
localparam EXCEPT_NONE = 3'h0;
localparam EXCEPT_ECALL = 3'h1;
localparam EXCEPT_EBREAK = 3'h2;
localparam EXCEPT_MRET = 3'h3; // separate, but handled similarly
localparam EXCEPT_INSTR_ILLEGAL = 3'h4;
localparam EXCEPT_INSTR_MISALIGN = 3'h5;
localparam EXCEPT_INSTR_FAULT = 3'h6;
// Operations for M extension (these are just instr[14:12])
localparam W_MULOP = 3;
localparam M_OP_MUL = 3'h0;
localparam M_OP_MULH = 3'h1;
localparam M_OP_MULHSU = 3'h2;
localparam M_OP_MULHU = 3'h3;
localparam M_OP_DIV = 3'h4;
localparam M_OP_DIVU = 3'h5;
localparam M_OP_REM = 3'h6;
localparam M_OP_REMU = 3'h7;

View File

@ -0,0 +1,95 @@
/**********************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2018 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*********************************************************************/
// Register file
// Single write port, dual read port
// FAKE_DUALPORT: if 1, implement regfile with pair of memories.
// Write ports are ganged together, read ports operate independently.
// This allows BRAM inference on FPGAs with single-read-port BRAMs.
// (Looking at you iCE40)
module hazard5_regfile_1w2r #(
parameter FAKE_DUALPORT = 0,
parameter RESET_REGS = 0, // Unsupported for FAKE_DUALPORT
parameter N_REGS = 16,
parameter W_DATA = 32,
parameter W_ADDR = $clog2(W_DATA) // should be localparam. ISIM...
) (
input wire clk,
input wire rst_n,
input wire [W_ADDR-1:0] raddr1,
output reg [W_DATA-1:0] rdata1,
input wire [W_ADDR-1:0] raddr2,
output reg [W_DATA-1:0] rdata2,
input wire [W_ADDR-1:0] waddr,
input wire [W_DATA-1:0] wdata,
input wire wen
);
generate
if (FAKE_DUALPORT) begin: fake_dualport
reg [W_DATA-1:0] mem1 [0:N_REGS-1];
reg [W_DATA-1:0] mem2 [0:N_REGS-1];
always @ (posedge clk) begin
if (wen) begin
mem1[waddr] <= wdata;
mem2[waddr] <= wdata;
end
rdata1 <= mem1[raddr1];
rdata2 <= mem2[raddr2];
end
end else if (RESET_REGS) begin: real_dualport_reset
// This will presumably always be implemented with flops
reg [W_DATA-1:0] mem [0:N_REGS-1];
integer i;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
// It's best to ask nicely:
// synthesis please_on
for (i = 0; i < N_REGS; i = i + 1) begin
mem[i] <= {W_DATA{1'b0}};
end
// synthesis please_off
end else begin
if (wen) begin
mem[waddr] <= wdata;
end
rdata1 <= mem[raddr1];
rdata2 <= mem[raddr2];
end
end
end else begin: real_dualport_noreset
// This should be inference-compatible on FPGAs with dual-port BRAMs
reg [W_DATA-1:0] mem [0:N_REGS-1];
always @ (posedge clk) begin
if (wen) begin
mem[waddr] <= wdata;
end
rdata1 <= mem[raddr1];
rdata2 <= mem[raddr2];
end
end
endgenerate
endmodule

235
hdl/hazard5_rvfi_monitor.vh Normal file
View File

@ -0,0 +1,235 @@
// ----------------------------------------------------------------------------
// RVFI Instrumentation
// ----------------------------------------------------------------------------
// To be included into hazard5_cpu.v for use with riscv-formal.
// Contains some state modelling to diagnose exactly what the core is doing,
// and report this in a way RVFI understands.
// We consider instructions to "retire" as they cross the M/W pipe register.
//
// All modelling signals prefixed with rvfm (riscv-formal monitor)
// ----------------------------------------------------------------------------
// Instruction monitor
// Diagnose whether X, M contain valid in-flight instructions, to produce
// rvfi_valid signal.
// TODO fix all the redundant RVFI registers in a nice way
reg rvfm_x_valid, rvfm_m_valid;
reg [31:0] rvfm_x_instr;
reg [31:0] rvfm_m_instr;
wire rvfm_x_trap = x_trap_is_exception && x_trap_enter;
reg rvfm_m_trap;
reg rvfm_entered_intr;
reg rvfi_valid_r;
reg [31:0] rvfi_insn_r;
reg rvfi_trap_r;
assign rvfi_valid = rvfi_valid_r;
assign rvfi_insn = rvfi_insn_r;
assign rvfi_trap = rvfi_trap_r;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
rvfm_x_valid <= 1'b0;
rvfm_m_valid <= 1'b0;
rvfm_m_trap <= 1'b0;
rvfm_entered_intr <= 1'b0;
rvfi_valid_r <= 1'b0;
rvfi_trap_r <= 1'b0;
rvfi_insn_r <= 32'h0;
end else begin
if (!x_stall) begin
// Squash X instrs on IRQ entry -- these instructions will be reexecuted on return.
rvfm_m_valid <= rvfm_x_valid && !(x_trap_enter && x_trap_enter_rdy && !rvfm_x_trap);
rvfm_m_instr <= rvfm_x_instr;
rvfm_x_valid <= 1'b0;
rvfm_m_trap <= rvfm_x_trap;
end else if (!m_stall) begin
rvfm_m_valid <= 1'b0;
end
if (flush_d_x) begin
rvfm_x_valid <= 1'b0;
rvfm_m_valid <= rvfm_m_valid && m_stall;
end else if (df_cir_use) begin
rvfm_x_valid <= 1'b1;
rvfm_x_instr <= {
fd_cir[31:16] & {16{df_cir_use[1]}},
fd_cir[15:0]
};
end
rvfi_valid_r <= rvfm_m_valid && !m_stall;
rvfi_insn_r <= rvfm_m_instr;
rvfi_trap_r <= rvfm_m_trap;
// Take note of M-jump in pipe bubble in between instruction retires:
rvfm_entered_intr <= (rvfm_entered_intr && !rvfi_valid)
|| (m_jump_req && f_jump_now && !rvfm_m_valid);
// Sanity checks
if (dx_rd != 5'h0)
assert(rvfm_x_valid);
if (xm_rd != 5'h0)
assert(rvfm_m_valid);
end
end
// Hazard5 is an in-order core:
reg [63:0] rvfm_retire_ctr;
assign rvfi_order = rvfm_retire_ctr;
always @ (posedge clk or negedge rst_n)
if (!rst_n)
rvfm_retire_ctr <= 0;
else if (rvfi_valid)
rvfm_retire_ctr <= rvfm_retire_ctr + 1;
assign rvfi_mode = 2'h3; // M-mode only
assign rvfi_intr = rvfi_valid && rvfm_entered_intr;
assign rvfi_halt = 1'b0; // TODO
// ----------------------------------------------------------------------------
// PC and jump monitor
reg rvfm_dx_have_jumped;
reg [31:0] rvfm_xm_pc;
reg [31:0] rvfm_xm_pc_next;
// Get a strange error from Yosys with $past() on this signal (possibly due to comb terms), so just flop it explicitly
reg rvfm_past_df_cir_lock;
always @ (posedge clk or negedge rst_n)
if (!rst_n)
rvfm_past_df_cir_lock <= 1'b0;
else
rvfm_past_df_cir_lock <= df_cir_lock;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
rvfm_dx_have_jumped <= 0;
rvfm_xm_pc <= 0;
rvfm_xm_pc_next <= 0;
end else begin
if (!d_stall) begin
rvfm_dx_have_jumped <= d_jump_req && f_jump_now || rvfm_past_df_cir_lock;
end
if (!x_stall) begin
rvfm_xm_pc <= dx_pc;
rvfm_xm_pc_next <= rvfm_dx_have_jumped ? dx_jump_target : dx_mispredict_addr;
end
end
end
reg [31:0] rvfi_pc_rdata_r;
reg [31:0] rvfi_pc_wdata_r;
assign rvfi_pc_rdata = rvfi_pc_rdata_r;
assign rvfi_pc_wdata = rvfi_pc_wdata_r;
always @ (posedge clk) begin
if (!m_stall) begin
rvfi_pc_rdata_r <= rvfm_xm_pc;
rvfi_pc_wdata_r <= m_jump_req ? m_jump_target : rvfm_xm_pc_next;
end
end
// ----------------------------------------------------------------------------
// Register file monitor:
assign rvfi_rd_addr = mw_rd;
assign rvfi_rd_wdata = mw_rd ? mw_result : 32'h0;
// Do not reimplement internal bypassing logic. Danger of implementing
// it correctly here but incorrectly in core.
reg [31:0] rvfm_xm_rdata1;
always @ (posedge clk or negedge rst_n)
if (!rst_n)
rvfm_xm_rdata1 <= 32'h0;
else if (!x_stall)
rvfm_xm_rdata1 <= x_rs1_bypass;
reg [4:0] rvfi_rs1_addr_r;
reg [4:0] rvfi_rs2_addr_r;
reg [31:0] rvfi_rs1_rdata_r;
reg [31:0] rvfi_rs2_rdata_r;
assign rvfi_rs1_addr = rvfi_rs1_addr_r;
assign rvfi_rs2_addr = rvfi_rs2_addr_r;
assign rvfi_rs1_rdata = rvfi_rs1_rdata_r;
assign rvfi_rs2_rdata = rvfi_rs2_rdata_r;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
rvfi_rs1_addr_r <= 5'h0;
rvfi_rs2_addr_r <= 5'h0;
rvfi_rs1_rdata_r <= 32'h0;
rvfi_rs2_rdata_r <= 32'h0;
end else begin
rvfi_rs1_addr_r <= m_stall ? 5'h0 : xm_rs1;
rvfi_rs2_addr_r <= m_stall ? 5'h0 : xm_rs2;
rvfi_rs1_rdata_r <= rvfm_xm_rdata1;
rvfi_rs2_rdata_r <= m_wdata;
end
end
// ----------------------------------------------------------------------------
// Load/store monitor: based on bus signals, NOT processor internals.
// Marshal up a description of the current data phase, and then register this
// into the RVFI signals.
`ifndef RISCV_FORMAL_ALIGNED_MEM
initial $fatal;
`endif
reg [31:0] rvfm_haddr_dph;
reg rvfm_hwrite_dph;
reg [1:0] rvfm_htrans_dph;
reg [2:0] rvfm_hsize_dph;
always @ (posedge clk) begin
if (ahblm_hready) begin
rvfm_htrans_dph <= ahblm_htrans & {2{ahb_gnt_d}}; // Load/store only!
rvfm_haddr_dph <= ahblm_haddr;
rvfm_hwrite_dph <= ahblm_hwrite;
rvfm_hsize_dph <= ahblm_hsize;
end
end
wire [3:0] rvfm_mem_bytemask_dph = (
rvfm_hsize_dph == 3'h0 ? 4'h1 :
rvfm_hsize_dph == 3'h1 ? 4'h3 :
4'hf
) << rvfm_haddr_dph[1:0];
reg [31:0] rvfi_mem_addr_r;
reg [3:0] rvfi_mem_rmask_r;
reg [31:0] rvfi_mem_rdata_r;
reg [3:0] rvfi_mem_wmask_r;
reg [31:0] rvfi_mem_wdata_r;
assign rvfi_mem_addr = rvfi_mem_addr_r;
assign rvfi_mem_rmask = rvfi_mem_rmask_r;
assign rvfi_mem_rdata = rvfi_mem_rdata_r;
assign rvfi_mem_wmask = rvfi_mem_wmask_r;
assign rvfi_mem_wdata = rvfi_mem_wdata_r;
always @ (posedge clk) begin
if (ahblm_hready) begin
// RVFI has an AXI-like concept of byte strobes, rather than AHB-like
rvfi_mem_addr_r <= rvfm_haddr_dph & 32'hffff_fffc;
{rvfi_mem_rmask_r, rvfi_mem_wmask_r} <= 0;
if (rvfm_htrans_dph[1] && rvfm_hwrite_dph) begin
rvfi_mem_wmask_r <= rvfm_mem_bytemask_dph;
rvfi_mem_wdata_r <= ahblm_hwdata;
end else if (rvfm_htrans_dph[1] && !rvfm_hwrite_dph) begin
rvfi_mem_rmask_r <= rvfm_mem_bytemask_dph;
rvfi_mem_rdata_r <= ahblm_hrdata;
end
end else begin
// As far as RVFI is concerned nothing happens except final cycle of dphase
{rvfi_mem_rmask_r, rvfi_mem_wmask_r} <= 0;
end
end

View File

@ -0,0 +1,74 @@
module rvfi_wrapper (
input wire clock,
input wire reset,
`RVFI_OUTPUTS
);
// ----------------------------------------------------------------------------
// Memory Interface
// ----------------------------------------------------------------------------
(* keep *) wire [31:0] haddr;
(* keep *) wire hwrite;
(* keep *) wire [1:0] htrans;
(* keep *) wire [2:0] hsize;
(* keep *) wire [2:0] hburst;
(* keep *) wire [3:0] hprot;
(* keep *) wire hmastlock;
(* keep *) `rvformal_rand_reg hready;
(* keep *) wire hresp;
(* keep *) wire [31:0] hwdata;
(* keep *) `rvformal_rand_reg [31:0] hrdata;
// AHB-lite requires: data phase of IDLE has no wait states
always @ (posedge clock)
if ($past(htrans) == 2'b00 && $past(hready))
assume(hready);
// Handling of bus faults is not tested
// always assume(!hresp);
`ifdef RISCV_FORMAL_FAIRNESS
reg [7:0] bus_fairness_ctr;
localparam MAX_STALL_LENGTH = 8;
always @ (posedge clock) begin
if (reset)
bus_fairness_ctr <= 8'h0;
else if (hready)
bus_fairness_ctr <= 8'h0;
else
bus_fairness_ctr <= bus_fairness_ctr + ~&bus_fairness_ctr;
assume(bus_fairness_ctr <= MAX_STALL_LENGTH);
end
`endif
// ----------------------------------------------------------------------------
// Device Under Test
// ----------------------------------------------------------------------------
hazard5_cpu #(
.RESET_VECTOR (0),
.EXTENSION_C (1),
.EXTENSION_M (1)
) dut (
.clk (clock),
.rst_n (!reset),
.ahblm_haddr (haddr),
.ahblm_hwrite (hwrite),
.ahblm_htrans (htrans),
.ahblm_hsize (hsize),
.ahblm_hburst (hburst),
.ahblm_hprot (hprot),
.ahblm_hmastlock (hmastlock),
.ahblm_hready (hready),
.ahblm_hresp (hresp),
.ahblm_hwdata (hwdata),
.ahblm_hrdata (hrdata),
`RVFI_CONN
);
endmodule

147
hdl/rv_opcodes.vh Normal file
View File

@ -0,0 +1,147 @@
localparam RV_RS1_LSB = 15;
localparam RV_RS1_BITS = 5;
localparam RV_RS2_LSB = 20;
localparam RV_RS2_BITS = 5;
localparam RV_RD_LSB = 7;
localparam RV_RD_BITS = 5;
// Base ISA (some of these are Z now)
localparam RV_BEQ = 32'b?????????????????000?????1100011;
localparam RV_BNE = 32'b?????????????????001?????1100011;
localparam RV_BLT = 32'b?????????????????100?????1100011;
localparam RV_BGE = 32'b?????????????????101?????1100011;
localparam RV_BLTU = 32'b?????????????????110?????1100011;
localparam RV_BGEU = 32'b?????????????????111?????1100011;
localparam RV_JALR = 32'b?????????????????000?????1100111;
localparam RV_JAL = 32'b?????????????????????????1101111;
localparam RV_LUI = 32'b?????????????????????????0110111;
localparam RV_AUIPC = 32'b?????????????????????????0010111;
localparam RV_ADDI = 32'b?????????????????000?????0010011;
localparam RV_SLLI = 32'b0000000??????????001?????0010011;
localparam RV_SLTI = 32'b?????????????????010?????0010011;
localparam RV_SLTIU = 32'b?????????????????011?????0010011;
localparam RV_XORI = 32'b?????????????????100?????0010011;
localparam RV_SRLI = 32'b0000000??????????101?????0010011;
localparam RV_SRAI = 32'b0100000??????????101?????0010011;
localparam RV_ORI = 32'b?????????????????110?????0010011;
localparam RV_ANDI = 32'b?????????????????111?????0010011;
localparam RV_ADD = 32'b0000000??????????000?????0110011;
localparam RV_SUB = 32'b0100000??????????000?????0110011;
localparam RV_SLL = 32'b0000000??????????001?????0110011;
localparam RV_SLT = 32'b0000000??????????010?????0110011;
localparam RV_SLTU = 32'b0000000??????????011?????0110011;
localparam RV_XOR = 32'b0000000??????????100?????0110011;
localparam RV_SRL = 32'b0000000??????????101?????0110011;
localparam RV_SRA = 32'b0100000??????????101?????0110011;
localparam RV_OR = 32'b0000000??????????110?????0110011;
localparam RV_AND = 32'b0000000??????????111?????0110011;
localparam RV_LB = 32'b?????????????????000?????0000011;
localparam RV_LH = 32'b?????????????????001?????0000011;
localparam RV_LW = 32'b?????????????????010?????0000011;
localparam RV_LBU = 32'b?????????????????100?????0000011;
localparam RV_LHU = 32'b?????????????????101?????0000011;
localparam RV_SB = 32'b?????????????????000?????0100011;
localparam RV_SH = 32'b?????????????????001?????0100011;
localparam RV_SW = 32'b?????????????????010?????0100011;
localparam RV_FENCE = 32'b?????????????????000?????0001111;
localparam RV_FENCE_I = 32'b?????????????????001?????0001111;
localparam RV_ECALL = 32'b00000000000000000000000001110011;
localparam RV_EBREAK = 32'b00000000000100000000000001110011;
localparam RV_CSRRW = 32'b?????????????????001?????1110011;
localparam RV_CSRRS = 32'b?????????????????010?????1110011;
localparam RV_CSRRC = 32'b?????????????????011?????1110011;
localparam RV_CSRRWI = 32'b?????????????????101?????1110011;
localparam RV_CSRRSI = 32'b?????????????????110?????1110011;
localparam RV_CSRRCI = 32'b?????????????????111?????1110011;
localparam RV_MRET = 32'b00110000001000000000000001110011;
localparam RV_SYSTEM = 32'b?????????????????????????1110011;
// M extension
localparam RV_MUL = 32'b0000001??????????000?????0110011;
localparam RV_MULH = 32'b0000001??????????001?????0110011;
localparam RV_MULHSU = 32'b0000001??????????010?????0110011;
localparam RV_MULHU = 32'b0000001??????????011?????0110011;
localparam RV_DIV = 32'b0000001??????????100?????0110011;
localparam RV_DIVU = 32'b0000001??????????101?????0110011;
localparam RV_REM = 32'b0000001??????????110?????0110011;
localparam RV_REMU = 32'b0000001??????????111?????0110011;
// C Extension
localparam RV_C_ADDI4SPN = 16'b000???????????00; // *** illegal if imm 0
localparam RV_C_LW = 16'b010???????????00;
localparam RV_C_SW = 16'b110???????????00;
localparam RV_C_ADDI = 16'b000???????????01;
localparam RV_C_JAL = 16'b001???????????01;
localparam RV_C_J = 16'b101???????????01;
localparam RV_C_LI = 16'b010???????????01;
// addi16sp when rd=2:
localparam RV_C_LUI = 16'b011???????????01; // *** reserved if imm 0 (for both LUI and ADDI16SP)
localparam RV_C_SRLI = 16'b100000????????01; // On RV32 imm[5] (instr[12]) must be 0, else reserved NSE.
localparam RV_C_SRAI = 16'b100001????????01; // On RV32 imm[5] (instr[12]) must be 0, else reserved NSE.
localparam RV_C_ANDI = 16'b100?10????????01;
localparam RV_C_SUB = 16'b100011???00???01;
localparam RV_C_XOR = 16'b100011???01???01;
localparam RV_C_OR = 16'b100011???10???01;
localparam RV_C_AND = 16'b100011???11???01;
localparam RV_C_BEQZ = 16'b110???????????01;
localparam RV_C_BNEZ = 16'b111???????????01;
localparam RV_C_SLLI = 16'b0000??????????10; // On RV32 imm[5] (instr[12]) must be 0, else reserved NSE.
// jr if !rs2:
localparam RV_C_MV = 16'b1000??????????10; // *** reserved if JR and !rs1 (instr[11:7])
// jalr if !rs2:
localparam RV_C_ADD = 16'b1001??????????10; // *** EBREAK if !instr[11:2]
localparam RV_C_LWSP = 16'b010???????????10;
localparam RV_C_SWSP = 16'b110???????????10;
// Copies provided here with 0 instead of ? so that these can be used to build 32-bit instructions in the decompressor
localparam RV_NOZ_BEQ = 32'b00000000000000000000000001100011;
localparam RV_NOZ_BNE = 32'b00000000000000000001000001100011;
localparam RV_NOZ_BLT = 32'b00000000000000000100000001100011;
localparam RV_NOZ_BGE = 32'b00000000000000000101000001100011;
localparam RV_NOZ_BLTU = 32'b00000000000000000110000001100011;
localparam RV_NOZ_BGEU = 32'b00000000000000000111000001100011;
localparam RV_NOZ_JALR = 32'b00000000000000000000000001100111;
localparam RV_NOZ_JAL = 32'b00000000000000000000000001101111;
localparam RV_NOZ_LUI = 32'b00000000000000000000000000110111;
localparam RV_NOZ_AUIPC = 32'b00000000000000000000000000010111;
localparam RV_NOZ_ADDI = 32'b00000000000000000000000000010011;
localparam RV_NOZ_SLLI = 32'b00000000000000000001000000010011;
localparam RV_NOZ_SLTI = 32'b00000000000000000010000000010011;
localparam RV_NOZ_SLTIU = 32'b00000000000000000011000000010011;
localparam RV_NOZ_XORI = 32'b00000000000000000100000000010011;
localparam RV_NOZ_SRLI = 32'b00000000000000000101000000010011;
localparam RV_NOZ_SRAI = 32'b01000000000000000101000000010011;
localparam RV_NOZ_ORI = 32'b00000000000000000110000000010011;
localparam RV_NOZ_ANDI = 32'b00000000000000000111000000010011;
localparam RV_NOZ_ADD = 32'b00000000000000000000000000110011;
localparam RV_NOZ_SUB = 32'b01000000000000000000000000110011;
localparam RV_NOZ_SLL = 32'b00000000000000000001000000110011;
localparam RV_NOZ_SLT = 32'b00000000000000000010000000110011;
localparam RV_NOZ_SLTU = 32'b00000000000000000011000000110011;
localparam RV_NOZ_XOR = 32'b00000000000000000100000000110011;
localparam RV_NOZ_SRL = 32'b00000000000000000101000000110011;
localparam RV_NOZ_SRA = 32'b01000000000000000101000000110011;
localparam RV_NOZ_OR = 32'b00000000000000000110000000110011;
localparam RV_NOZ_AND = 32'b00000000000000000111000000110011;
localparam RV_NOZ_LB = 32'b00000000000000000000000000000011;
localparam RV_NOZ_LH = 32'b00000000000000000001000000000011;
localparam RV_NOZ_LW = 32'b00000000000000000010000000000011;
localparam RV_NOZ_LBU = 32'b00000000000000000100000000000011;
localparam RV_NOZ_LHU = 32'b00000000000000000101000000000011;
localparam RV_NOZ_SB = 32'b00000000000000000000000000100011;
localparam RV_NOZ_SH = 32'b00000000000000000001000000100011;
localparam RV_NOZ_SW = 32'b00000000000000000010000000100011;
localparam RV_NOZ_FENCE = 32'b00000000000000000000000000001111;
localparam RV_NOZ_FENCE_I = 32'b00000000000000000001000000001111;
localparam RV_NOZ_ECALL = 32'b00000000000000000000000001110011;
localparam RV_NOZ_EBREAK = 32'b00000000000100000000000001110011;
localparam RV_NOZ_CSRRW = 32'b00000000000000000001000001110011;
localparam RV_NOZ_CSRRS = 32'b00000000000000000010000001110011;
localparam RV_NOZ_CSRRC = 32'b00000000000000000011000001110011;
localparam RV_NOZ_CSRRWI = 32'b00000000000000000101000001110011;
localparam RV_NOZ_CSRRSI = 32'b00000000000000000110000001110011;
localparam RV_NOZ_CSRRCI = 32'b00000000000000000111000001110011;
localparam RV_NOZ_SYSTEM = 32'b00000000000000000000000001110011;

7
test/.gitignore vendored Normal file
View File

@ -0,0 +1,7 @@
*.o
*.out
*.bin
*.vcd
*.elf
*.dis
*.log

241
test/common/init.S Normal file
View File

@ -0,0 +1,241 @@
#define COLLAPSE_WEAK_HANDLERS
#define IO_BASE 0x80000000
#define IO_PRINT_CHAR (IO_BASE + 0x0)
#define IO_PRINT_U32 (IO_BASE + 0x4)
#define IO_EXIT (IO_BASE + 0x8)
// Provide trap vector table, reset handler and weak default trap handlers for
// Hazard5. This is not a crt0: the reset handler calls an external _start
.option push
.option norelax
.option norvc
.section .vectors
.macro VEC name:req
.p2align 2
j \name
.endm
// ----------------------------------------------------------------------------
// Vector table
// Hazard5 requires 4k alignment of mtvec
.p2align 12
.vector_table:
// Exceptions
VEC handle_instr_misalign
VEC handle_instr_fault
VEC handle_instr_illegal
VEC handle_breakpoint
VEC handle_load_misalign
VEC handle_load_fault
VEC handle_store_misalign
VEC handle_store_fault
VEC .halt
VEC .halt
VEC .halt
VEC handle_ecall
VEC .halt
VEC .halt
VEC .halt
VEC .halt
// Standard interrupts
// Note: global EIRQ does not fire. Instead we have 16 separate vectors
VEC .halt
VEC .halt
VEC .halt
VEC isr_machine_softirq
VEC .halt
VEC .halt
VEC .halt
VEC isr_machine_timer
VEC .halt
VEC .halt
VEC .halt
VEC .halt
VEC .halt
VEC .halt
VEC .halt
VEC .halt
// External interrupts
VEC isr_irq0
VEC isr_irq1
VEC isr_irq2
VEC isr_irq3
VEC isr_irq4
VEC isr_irq5
VEC isr_irq6
VEC isr_irq7
VEC isr_irq8
VEC isr_irq9
VEC isr_irq10
VEC isr_irq11
VEC isr_irq12
VEC isr_irq13
VEC isr_irq14
VEC isr_irq15
// ----------------------------------------------------------------------------
// Reset handler
.reset_handler:
la sp, __stack_top
la t0, .vector_table
csrw mtvec, t0
// newlib _start expects argc, argv on the stack. Leave stack 16-byte aligned.
addi sp, sp, -16
li a0, 1
sw a0, (sp)
la a0, progname
sw a0, 4(sp)
jal _start
j .halt
.global _exit
_exit:
li a1, IO_EXIT
sw a0, (a1)
.global _sbrk
_sbrk:
la a1, heap_ptr
lw a2, (a1)
add a0, a0, a2
sw a0, (a1)
mv a0, a2
ret
heap_ptr:
.word _end
.global .halt
.halt:
j .halt
progname:
.asciz "hazard5-testbench"
// ----------------------------------------------------------------------------
// Weak handler/ISR symbols
// Routine to print out trap name, trap address, and some core registers
// (x8..x15, ra, sp). The default handlers are all patched into this routine,
// so the CPU will print some basic diagnostics on any unhandled trap
// (assuming the processor is not internally completely broken)
// argument in x28, return in x27, trashes x28...x30
_tb_puts:
li x29, IO_PRINT_CHAR
1:
lbu x30, (x28)
addi x28, x28, 1
beqz x30, 2f
sw x30, (x29)
j 1b
2:
jr x27
.macro print_reg str reg
la x28, \str
jal x27, _tb_puts
sw \reg, (x31)
.endm
_weak_handler_name_in_x31:
la x28, _str_unhandled_trap
jal x27, _tb_puts
mv x28, x31
jal x27, _tb_puts
la x28, _str_at_mepc
jal x27, _tb_puts
li x31, IO_PRINT_U32
csrr x28, mepc
sw x28, (x31)
print_reg _str_s0 s0
print_reg _str_s1 s1
print_reg _str_a0 a0
print_reg _str_a1 a1
print_reg _str_a2 a2
print_reg _str_a3 a3
print_reg _str_a4 a4
print_reg _str_a5 a5
print_reg _str_ra ra
print_reg _str_sp sp
li x31, IO_EXIT
li x30, -1
sw x30, (x31)
// Should be unreachable:
j .halt
_str_unhandled_trap: .asciz "*** Unhandled trap ***\n"
_str_at_mepc: .asciz " @ mepc = "
_str_s0: .asciz "s0: "
_str_s1: .asciz "s1: "
_str_a0: .asciz "a0: "
_str_a1: .asciz "a1: "
_str_a2: .asciz "a2: "
_str_a3: .asciz "a3: "
_str_a4: .asciz "a4: "
_str_a5: .asciz "a5: "
_str_ra: .asciz "ra: "
_str_sp: .asciz "sp: "
// Provide a default weak handler for each trap, which calls into the above
// diagnostic routine with the trap name (a null-terminated string) in x31
.macro weak_handler name:req
.p2align 2
.global \name
.type \name,%function
.weak \name
\name:
la x31, _str_\name
j _weak_handler_name_in_x31
_str_\name:
.asciz "\name"
.endm
weak_handler handle_instr_misalign
weak_handler handle_instr_fault
weak_handler handle_instr_illegal
weak_handler handle_breakpoint
weak_handler handle_load_misalign
weak_handler handle_load_fault
weak_handler handle_store_misalign
weak_handler handle_store_fault
weak_handler handle_ecall
weak_handler isr_machine_softirq
weak_handler isr_machine_timer
weak_handler isr_irq0
weak_handler isr_irq1
weak_handler isr_irq2
weak_handler isr_irq3
weak_handler isr_irq4
weak_handler isr_irq5
weak_handler isr_irq6
weak_handler isr_irq7
weak_handler isr_irq8
weak_handler isr_irq9
weak_handler isr_irq10
weak_handler isr_irq11
weak_handler isr_irq12
weak_handler isr_irq13
weak_handler isr_irq14
weak_handler isr_irq15
// You can relax now
.option pop

253
test/common/memmap.ld Normal file
View File

@ -0,0 +1,253 @@
/* Script for -z combreloc: combine and sort reloc sections */
/* Copyright (C) 2014-2017 Free Software Foundation, Inc.
Copying and distribution of this script, with or without modification,
are permitted in any medium without royalty provided the copyright
notice and this notice are preserved. */
/* Modified from GCC default for hazard5 testbench */
MEMORY
{
RAM (wx) : ORIGIN = 0x0, LENGTH = 16M
}
OUTPUT_FORMAT("elf32-littleriscv", "elf32-littleriscv",
"elf32-littleriscv")
OUTPUT_ARCH(riscv)
ENTRY(_start)
SEARCH_DIR("/opt/riscv/riscv32-unknown-elf/lib");
SECTIONS
{
/* Read-only sections, merged into text segment: */
PROVIDE (__executable_start = ORIGIN(RAM)); . = ORIGIN(RAM);
.interp : { *(.interp) }
.note.gnu.build-id : { *(.note.gnu.build-id) }
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.rela.dyn :
{
*(.rela.init)
*(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
*(.rela.fini)
*(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
*(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
*(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
*(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
*(.rela.ctors)
*(.rela.dtors)
*(.rela.got)
*(.rela.sdata .rela.sdata.* .rela.gnu.linkonce.s.*)
*(.rela.sbss .rela.sbss.* .rela.gnu.linkonce.sb.*)
*(.rela.sdata2 .rela.sdata2.* .rela.gnu.linkonce.s2.*)
*(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*)
*(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
PROVIDE_HIDDEN (__rela_iplt_start = .);
*(.rela.iplt)
PROVIDE_HIDDEN (__rela_iplt_end = .);
}
.rela.plt :
{
*(.rela.plt)
}
.init :
{
KEEP (*(SORT_NONE(.init)))
}
.plt : { *(.plt) }
.iplt : { *(.iplt) }
.text :
{
*(.vectors)
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
*(.text.exit .text.exit.*)
*(.text.startup .text.startup.*)
*(.text.hot .text.hot.*)
*(.text .stub .text.* .gnu.linkonce.t.*)
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
}
.fini :
{
KEEP (*(SORT_NONE(.fini)))
}
PROVIDE (__etext = .);
PROVIDE (_etext = .);
PROVIDE (etext = .);
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
.sdata2 :
{
*(.sdata2 .sdata2.* .gnu.linkonce.s2.*)
}
.sbss2 : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) }
.eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) }
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) }
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
.gcc_except_table.*) }
.gnu_extab : ONLY_IF_RO { *(.gnu_extab*) }
/* These sections are generated by the Sun/Oracle C++ compiler. */
.exception_ranges : ONLY_IF_RO { *(.exception_ranges
.exception_ranges*) }
/* Adjust the address for the data segment. We want to adjust up to
the same address within the page on the next page up. */
. = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
/* Exception handling */
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) }
.gnu_extab : ONLY_IF_RW { *(.gnu_extab) }
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
.exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
/* Thread Local Storage sections */
.tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
.preinit_array :
{
PROVIDE_HIDDEN (__preinit_array_start = .);
KEEP (*(.preinit_array))
PROVIDE_HIDDEN (__preinit_array_end = .);
}
.init_array :
{
PROVIDE_HIDDEN (__init_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
PROVIDE_HIDDEN (__init_array_end = .);
}
.fini_array :
{
PROVIDE_HIDDEN (__fini_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors))
PROVIDE_HIDDEN (__fini_array_end = .);
}
.ctors :
{
/* gcc uses crtbegin.o to find the start of
the constructors, so we make sure it is
first. Because this is a wildcard, it
doesn't matter if the user does not
actually link against crtbegin.o; the
linker won't look for a file to match a
wildcard. The wildcard also means that it
doesn't matter which directory crtbegin.o
is in. */
KEEP (*crtbegin.o(.ctors))
KEEP (*crtbegin?.o(.ctors))
/* We don't want to include the .ctor section from
the crtend.o file until after the sorted ctors.
The .ctor section from the crtend file contains the
end of ctors marker and it must be last */
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
KEEP (*(SORT(.ctors.*)))
KEEP (*(.ctors))
}
.dtors :
{
KEEP (*crtbegin.o(.dtors))
KEEP (*crtbegin?.o(.dtors))
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
KEEP (*(SORT(.dtors.*)))
KEEP (*(.dtors))
}
.jcr : { KEEP (*(.jcr)) }
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
.dynamic : { *(.dynamic) }
. = DATA_SEGMENT_RELRO_END (0, .);
.data :
{
*(.data .data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
}
.data1 : { *(.data1) }
.got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
/* We want the small data sections together, so single-instruction offsets
can access them all, and initialized data all before uninitialized, so
we can shorten the on-disk segment size. */
.sdata :
{
__global_pointer$ = . + 0x800;
*(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*)
*(.sdata .sdata.* .gnu.linkonce.s.*)
}
_edata = .; PROVIDE (edata = .);
. = .;
__bss_start = .;
.sbss :
{
*(.dynsbss)
*(.sbss .sbss.* .gnu.linkonce.sb.*)
*(.scommon)
}
.bss :
{
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
*(COMMON)
/* Align here to ensure that the .bss section occupies space up to
_end. Align after .bss to ensure correct alignment even if the
.bss section disappears because there are no input sections.
FIXME: Why do we need it? When there is no .bss section, we don't
pad the .data section. */
. = ALIGN(. != 0 ? 32 / 8 : 1);
}
. = ALIGN(32 / 8);
. = SEGMENT_START("ldata-segment", .);
. = ALIGN(32 / 8);
__bss_end = .;
_end = .; PROVIDE (end = .);
.noload (NOLOAD):
{
*(.noload .noload.*)
}
. = DATA_SEGMENT_END (.);
PROVIDE(__stack_top = ORIGIN(RAM) + LENGTH(RAM));
/* Stabs debugging sections. */
.stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) }
.stab.excl 0 : { *(.stab.excl) }
.stab.exclstr 0 : { *(.stab.exclstr) }
.stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
/* DWARF debug sections.
Symbols in the DWARF debugging sections are relative to the beginning
of the section so we begin them at 0. */
/* DWARF 1 */
.debug 0 : { *(.debug) }
.line 0 : { *(.line) }
/* GNU DWARF 1 extensions */
.debug_srcinfo 0 : { *(.debug_srcinfo) }
.debug_sfnames 0 : { *(.debug_sfnames) }
/* DWARF 1.1 and DWARF 2 */
.debug_aranges 0 : { *(.debug_aranges) }
.debug_pubnames 0 : { *(.debug_pubnames) }
/* DWARF 2 */
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
.debug_abbrev 0 : { *(.debug_abbrev) }
.debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
.debug_frame 0 : { *(.debug_frame) }
.debug_str 0 : { *(.debug_str) }
.debug_loc 0 : { *(.debug_loc) }
.debug_macinfo 0 : { *(.debug_macinfo) }
/* SGI/MIPS DWARF 2 extensions */
.debug_weaknames 0 : { *(.debug_weaknames) }
.debug_funcnames 0 : { *(.debug_funcnames) }
.debug_typenames 0 : { *(.debug_typenames) }
.debug_varnames 0 : { *(.debug_varnames) }
/* DWARF 3 */
.debug_pubtypes 0 : { *(.debug_pubtypes) }
.debug_ranges 0 : { *(.debug_ranges) }
/* DWARF Extension. */
.debug_macro 0 : { *(.debug_macro) }
.debug_addr 0 : { *(.debug_addr) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
/DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
}

View File

@ -0,0 +1,48 @@
ifndef SRCS
$(error Must define list of test sources as SRCS)
endif
ifndef APP
$(error Must define application name as APP)
endif
CCFLAGS ?=
LDSCRIPT ?= ../common/memmap.ld
CROSS_PREFIX ?= riscv32-unknown-elf-
TBDIR ?= ../tb_cxxrtl
INCDIR ?= ../common
MAX_CYCLES ?= 100000
###############################################################################
.SUFFIXES:
.PHONY: all run view tb clean clean_tb
all: run
run: $(APP).bin
$(TBDIR)/tb $(APP).bin $(APP)_run.vcd --cycles $(MAX_CYCLES)
view: run
gtkwave $(APP)_run.vcd
bin: $(APP).bin
tb:
$(MAKE) -C $(TBDIR) tb
clean:
rm -f $(APP).elf $(APP).bin $(APP).dis $(APP)_run.vcd
clean_tb: clean
$(MAKE) -C $(TBDIR) clean
###############################################################################
$(APP).bin: $(APP).elf
$(CROSS_PREFIX)objcopy -O binary $^ $@
$(CROSS_PREFIX)objdump -h $(APP).elf > $(APP).dis
$(CROSS_PREFIX)objdump -d $(APP).elf >> $(APP).dis
$(APP).elf: $(SRCS) $(wildcard %.h)
$(CROSS_PREFIX)gcc $(CCFLAGS) $(SRCS) -T $(LDSCRIPT) $(addprefix -I,$(INCDIR)) -o $(APP).elf

View File

@ -0,0 +1,33 @@
#ifndef _TB_CXXRTL_IO_H
#define _TB_CXXRTL_IO_H
#include <stdint.h>
#define IO_BASE 0x80000000
struct io_hw {
volatile uint32_t print_char;
volatile uint32_t print_u32;
volatile uint32_t exit;
};
#define mm_io ((struct io_hw *const)IO_BASE)
static inline void tb_putc(char c) {
mm_io->print_char = (uint32_t)c;
}
static inline void tb_puts(const char *s) {
while (*s)
tb_putc(*s++);
}
static inline void tb_put_u32(uint32_t x) {
mm_io->print_u32 = x;
}
static inline void tb_exit(uint32_t ret) {
mm_io->exit = ret;
}
#endif

45
test/coremark/Makefile Normal file
View File

@ -0,0 +1,45 @@
APP := coremark
MAX_CYCLES := 100000000
CROSS_PREFIX ?= riscv32-unknown-elf-
TBDIR ?= ../tb_cxxrtl
###############################################################################
.SUFFIXES:
.PHONY: all run waves view bin tb clean clean_tb
all: run
run: $(APP).bin
$(TBDIR)/tb $(APP).bin --cycles $(MAX_CYCLES)
waves: $(APP).bin
$(TBDIR)/tb $(APP).bin $(APPNAME)_run.vcd --cycles $(MAX_CYCLES)
view: run
gtkwave $(APP)_run.vcd
bin: $(APP).bin
tb:
$(MAKE) -C $(TBDIR) tb
clean:
rm -f $(APP).elf $(APP).bin $(APP).dis $(APP)_run.vcd
rm -rf dist/build/
clean_tb: clean
$(MAKE) -C $(TBDIR) clean
###############################################################################
$(APP).bin: $(APP).elf
$(CROSS_PREFIX)objcopy -O binary $^ $@
$(CROSS_PREFIX)objdump -h $(APP).elf > $(APP).dis
$(CROSS_PREFIX)objdump -d $(APP).elf >> $(APP).dis
$(APP).elf:
make -C dist
cp dist/build/coremark.elf $(APP).elf

1
test/coremark/dist/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
build

100
test/coremark/dist/LICENSE.md vendored Normal file
View File

@ -0,0 +1,100 @@
# COREMARK® ACCEPTABLE USE AGREEMENT
This ACCEPTABLE USE AGREEMENT (this “Agreement”) is offered by Embedded Microprocessor Benchmark Consortium, a California nonprofit corporation (“Licensor”), to users of its CoreMark® software (“Licensee”) exclusively on the following terms.
Licensor offers benchmarking software (“Software”) pursuant to an open source license, but carefully controls use of its benchmarks and their associated goodwill. Licensor has registered its trademark in one of the benchmarks available through the Software, COREMARK, Ser. No. 85/487,290; Reg. No. 4,179,307 (the “Trademark”), and promotes the use of a standard metric as a benchmark for assessing the performance of embedded systems. Solely on the terms described herein, Licensee may use and display the Trademark in connection with the generation of data regarding measurement and analysis of computer and embedded system benchmarking via the Software (the “Licensed Use”).
## Article 1 License Grant.
1.1. License. Subject to the terms and conditions of this Agreement, Licensor hereby grants to Licensee, and Licensee hereby accepts from Licensor, a personal, non-exclusive, royalty-free, revocable right and license to use and display the Trademark during the term of this Agreement (the “Term”), solely and exclusively in connection with the Licensed Use. During the Term, Licensee (i) shall not modify or otherwise create derivative works of the Trademark, and (ii) may use the Trademark only to the extent permitted under this License. Neither Licensee nor any affiliate or agent thereof shall otherwise use the Trademark without the prior express written consent of Licensor, which may be withheld in its sole and absolute discretion. All rights not expressly granted to Licensee hereunder shall remain the exclusive property of Licensor.
1.2. Modifications to the Software. Licensee shall not use the Trademark in connection with any use of a modified, derivative, or otherwise altered copy of the Software.
1.3. Licensors Use. Nothing in this Agreement shall preclude Licensor or any of its successors or assigns from using or permitting other entities to use the Trademark, whether or not such entity directly or indirectly competes or conflicts with Licensees Licensed Use in any manner.
1.4. Term and Termination. This Agreement is perpetual unless terminated by either of the parties. Licensee may terminate this Agreement for convenience, without cause or liability, for any reason or for no reason whatsoever, upon ten (10) business days written notice. Licensor may terminate this Agreement effective immediately upon notice of breach. Upon termination, Licensee shall immediately remove all implementations of the Trademark from the Licensed Use, and delete all digitals files and records of all materials related to the Trademark.
## Article 2 Ownership.
2.1. Ownership. Licensee acknowledges and agrees that Licensor is the owner of all right, title, and interest in and to the Trademark, and all such right, title, and interest shall remain with Licensor. Licensee shall not contest, dispute, challenge, oppose, or seek to cancel Licensors right, title, and interest in and to the Trademark. Licensee shall not prosecute any application for registration of the Trademark. Licensee shall display appropriate notices regarding ownership of the Trademark in connection with the Licensed Use.
2.2. Goodwill. Licensee acknowledges that Licensee shall not acquire any right, title, or interest in the Trademark by virtue of this Agreement other than the license granted hereunder, and disclaims any such right, title, interest, or ownership. All goodwill and reputation generated by Licensees use of the Trademark shall inure to the exclusive benefit of Licensor. Licensee shall not by any act or omission use the Trademark in any manner that disparages or reflects adversely on Licensor or its Licensed Use or reputation. Licensee shall not take any action that would interfere with or prejudice Licensors ownership or registration of the Trademark, the validity of the Trademark or the validity of the license granted by this Agreement. If Licensor determines and notifies Licensee that any act taken in connection with the Licensed Use (i) is inaccurate, unlawful or offensive to good taste; (ii) fails to provide for proper trademark notices, or (iii) otherwise violates Licensees obligations under this Agreement, the license granted under this Agreement shall terminate.
## Article 3 Indemnification.
3.1. Indemnification Generally. Licensee agrees to indemnify, defend, and hold harmless (collectively “indemnify” or “indemnification”) Licensor, including Licensors members, managers, officers, and employees (collectively “Related Persons”), from and against, and pay or reimburse Licensor and such Related Persons for, any and all third-party actions, claims, demands, proceedings, investigations, inquiries (collectively, “Claims”), and any and all liabilities, obligations, fines, deficiencies, costs, expenses, royalties, losses, and damages (including reasonable outside counsel fees and expenses) associated with such Claims, to the extent that such Claim arises out of (i) Licensees material breach of this Agreement, or (ii) any allegation(s) that Licensees actions infringe or violate any third-party intellectual property right, including without limitation, any U.S. copyright, patent, or trademark, or are otherwise found to be tortious or criminal (whether or not such indemnified person is a named party in a legal proceeding).
3.2. Notice and Defense of Claims. Licensor shall promptly notify Licensee of any Claim for which indemnification is sought, following actual knowledge of such Claim, provided however that the failure to give such notice shall not relieve Licensee of its obligations hereunder except to the extent that Licensee is materially prejudiced by such failure. In the event that any third-party Claim is brought, Licensee shall have the right and option to undertake and control the defense of such action with counsel of its choice, provided however that (i) Licensor at its own expense may participate and appear on an equal footing with Licensee in the defense of any such Claim, (ii) Licensor may undertake and control such defense in the event of the material failure of Licensee to undertake and control the same; and (iii) the defense of any Claim relating to the intellectual property rights of Licensor or its licensors and any related counterclaims shall be solely controlled by Licensor with counsel of its choice. Licensee shall not consent to judgment or concede or settle or compromise any Claim without the prior written approval of Licensor (whose approval shall not be unreasonably withheld), unless such concession or settlement or compromise includes a full and unconditional release of Licensor and any applicable Related Persons from all liabilities in respect of such Claim.
## Article 4 Miscellaneous.
4.1. Relationship of the Parties. This Agreement does not create a partnership, franchise, joint venture, agency, fiduciary, or employment relationship between the parties.
4.2. No Third-Party Beneficiaries. Except for the rights of Related Persons under Article 3 (Indemnification), there are no third-party beneficiaries to this Agreement.
4.3. Assignment. Licensees rights hereunder are non-assignable, and may not be sublicensed.
4.4. Equitable Relief. Licensee acknowledges that the remedies available at law for any breach of this Agreement will, by their nature, be inadequate. Accordingly, Licensor may obtain injunctive relief or other equitable relief to restrain a breach or threatened breach of this Agreement or to specifically enforce this Agreement, without proving that any monetary damages have been sustained, and without the requirement of posting of a bond prior to obtaining such equitable relief.
4.5. Governing Law. This Agreement will be interpreted, construed, and enforced in all respects in accordance with the laws of the State of California, without reference to its conflict of law principles.
4.6. Attorneys Fees. If any legal action, arbitration or other proceeding is brought for the enforcement of this Agreement, or because of an alleged dispute, breach, default, or misrepresentation in connection with any of the provisions of this Agreement, the successful or prevailing party shall be entitled to recover its reasonable attorneys fees and other reasonable costs incurred in that action or proceeding, in addition to any other relief to which it may be entitled.
4.7. Amendment; Waiver. This Agreement may not be amended, nor may any rights under it be waived, except in writing by Licensor.
4.8. Severability. If any provision of this Agreement is held by a court of competent jurisdiction to be contrary to law, the provision shall be modified by the court and interpreted so as best to accomplish the objectives of the original provision to the fullest extent
permitted by law, and the remaining provisions of this Agreement shall remain in effect.
4.9. Entire Agreement. This Agreement constitutes the entire agreement between the parties and supersedes all prior and contemporaneous agreements, proposals or representations, written or oral, concerning its subject matter.
# Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
## TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
You must give any other recipients of the Work or Derivative Works a copy of this License; and
You must cause any modified files to carry prominent notices stating that You changed the files; and
You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS

144
test/coremark/dist/Makefile vendored Normal file
View File

@ -0,0 +1,144 @@
# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Original Author: Shay Gal-on
# Edits here:
PORT_DIR := barebones
OPATH := build/
# Make sure the default target is to simply build and run the benchmark.
RSTAMP = v1.0
.PHONY: run score
run: $(OUTFILE) rerun score
score:
@echo "Check run1.log and run2.log for results."
@echo "See README.md for run and reporting rules."
ifndef PORT_DIR
# Ports for a couple of common self hosted platforms
UNAME=$(shell if command -v uname 2> /dev/null; then uname ; fi)
ifneq (,$(findstring CYGWIN,$(UNAME)))
PORT_DIR=cygwin
endif
ifneq (,$(findstring Darwin,$(UNAME)))
PORT_DIR=macos
endif
ifneq (,$(findstring FreeBSD,$(UNAME)))
PORT_DIR=freebsd
endif
ifneq (,$(findstring Linux,$(UNAME)))
PORT_DIR=linux
endif
endif
ifndef PORT_DIR
$(error PLEASE define PORT_DIR! (e.g. make PORT_DIR=simple))
endif
vpath %.c $(PORT_DIR)
vpath %.h $(PORT_DIR)
vpath %.mak $(PORT_DIR)
include $(PORT_DIR)/core_portme.mak
ifndef ITERATIONS
ITERATIONS=0
endif
ifdef REBUILD
FORCE_REBUILD=force_rebuild
endif
CFLAGS += -DITERATIONS=$(ITERATIONS)
CORE_FILES = core_list_join core_main core_matrix core_state core_util
ORIG_SRCS = $(addsuffix .c,$(CORE_FILES))
SRCS = $(ORIG_SRCS) $(PORT_SRCS)
OBJS = $(addprefix $(OPATH),$(addsuffix $(OEXT),$(CORE_FILES)) $(PORT_OBJS))
OUTNAME = coremark$(EXE)
OUTFILE = $(OPATH)$(OUTNAME)
LOUTCMD = $(OFLAG) $(OUTFILE) $(LFLAGS_END)
OUTCMD = $(OUTFLAG) $(OUTFILE) $(LFLAGS_END)
HEADERS = coremark.h
CHECK_FILES = $(ORIG_SRCS) $(HEADERS)
$(OPATH):
$(MKDIR) $(OPATH)
.PHONY: compile link
ifdef SEPARATE_COMPILE
$(OPATH)$(PORT_DIR):
$(MKDIR) $(OPATH)$(PORT_DIR)
compile: $(OPATH) $(OPATH)$(PORT_DIR) $(OBJS) $(HEADERS)
link: compile
$(LD) $(LFLAGS) $(XLFLAGS) $(OBJS) $(LOUTCMD)
else
compile: $(OPATH) $(SRCS) $(HEADERS)
$(CC) $(CFLAGS) $(XCFLAGS) $(SRCS) $(OUTCMD)
link: compile
@echo "Link performed along with compile"
endif
$(OUTFILE): $(SRCS) $(HEADERS) Makefile core_portme.mak $(EXTRA_DEPENDS) $(FORCE_REBUILD)
$(MAKE) port_prebuild
$(MAKE) link
$(MAKE) port_postbuild
.PHONY: rerun
rerun:
$(MAKE) XCFLAGS="$(XCFLAGS) -DPERFORMANCE_RUN=1" load run1.log
$(MAKE) XCFLAGS="$(XCFLAGS) -DVALIDATION_RUN=1" load run2.log
PARAM1=$(PORT_PARAMS) 0x0 0x0 0x66 $(ITERATIONS)
PARAM2=$(PORT_PARAMS) 0x3415 0x3415 0x66 $(ITERATIONS)
PARAM3=$(PORT_PARAMS) 8 8 8 $(ITERATIONS)
run1.log-PARAM=$(PARAM1) 7 1 2000
run2.log-PARAM=$(PARAM2) 7 1 2000
run3.log-PARAM=$(PARAM3) 7 1 1200
run1.log run2.log run3.log: load
$(MAKE) port_prerun
$(RUN) $(OUTFILE) $($(@)-PARAM) > $(OPATH)$@
$(MAKE) port_postrun
.PHONY: gen_pgo_data
gen_pgo_data: run3.log
.PHONY: load
load: $(OUTFILE)
$(MAKE) port_preload
$(LOAD) $(OUTFILE)
$(MAKE) port_postload
.PHONY: clean
clean:
rm -f $(OUTFILE) $(OPATH)*.log *.info $(OPATH)index.html $(PORT_CLEAN)
.PHONY: force_rebuild
force_rebuild:
echo "Forcing Rebuild"
.PHONY: check
check:
md5sum -c coremark.md5
ifdef ETC
# Targets related to testing and releasing CoreMark. Not part of the general release!
include Makefile.internal
endif

View File

@ -0,0 +1,161 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include "coremark.h"
#include "core_portme.h"
#if VALIDATION_RUN
volatile ee_s32 seed1_volatile = 0x3415;
volatile ee_s32 seed2_volatile = 0x3415;
volatile ee_s32 seed3_volatile = 0x66;
#endif
#if PERFORMANCE_RUN
volatile ee_s32 seed1_volatile = 0x0;
volatile ee_s32 seed2_volatile = 0x0;
volatile ee_s32 seed3_volatile = 0x66;
#endif
#if PROFILE_RUN
volatile ee_s32 seed1_volatile = 0x8;
volatile ee_s32 seed2_volatile = 0x8;
volatile ee_s32 seed3_volatile = 0x8;
#endif
volatile ee_s32 seed4_volatile = ITERATIONS;
volatile ee_s32 seed5_volatile = 0;
/* Porting : Timing functions
How to capture time and convert to seconds must be ported to whatever is
supported by the platform. e.g. Read value from on board RTC, read value from
cpu clock cycles performance counter etc. Sample implementation for standard
time.h and windows.h definitions included.
*/
#define read_csr(csrname) ({ \
uint32_t __csr_tmp_u32; \
__asm__ volatile ("csrr %0, " #csrname : "=r" (__csr_tmp_u32)); \
__csr_tmp_u32; \
})
CORETIMETYPE
barebones_clock()
{
return read_csr(mcycle);
// #error \
// "You must implement a method to measure time in barebones_clock()! This function should return current time.\n"
}
/* Define : TIMER_RES_DIVIDER
Divider to trade off timer resolution and total time that can be
measured.
Use lower values to increase resolution, but make sure that overflow
does not occur. If there are issues with the return value overflowing,
increase this value.
*/
#define GETMYTIME(_t) (*_t = barebones_clock())
#define MYTIMEDIFF(fin, ini) ((fin) - (ini))
#define TIMER_RES_DIVIDER 1
#define SAMPLE_TIME_IMPLEMENTATION 1
// #define EE_TICKS_PER_SEC (CLOCKS_PER_SEC / TIMER_RES_DIVIDER)
#define EE_TICKS_PER_SEC (1000 * 1000)
/** Define Host specific (POSIX), or target specific global time variables. */
static CORETIMETYPE start_time_val, stop_time_val;
/* Function : start_time
This function will be called right before starting the timed portion of
the benchmark.
Implementation may be capturing a system timer (as implemented in the
example code) or zeroing some system parameters - e.g. setting the cpu clocks
cycles to 0.
*/
void
start_time(void)
{
GETMYTIME(&start_time_val);
}
/* Function : stop_time
This function will be called right after ending the timed portion of the
benchmark.
Implementation may be capturing a system timer (as implemented in the
example code) or other system parameters - e.g. reading the current value of
cpu cycles counter.
*/
void
stop_time(void)
{
GETMYTIME(&stop_time_val);
}
/* Function : get_time
Return an abstract "ticks" number that signifies time on the system.
Actual value returned may be cpu cycles, milliseconds or any other
value, as long as it can be converted to seconds by <time_in_secs>. This
methodology is taken to accomodate any hardware or simulated platform. The
sample implementation returns millisecs by default, and the resolution is
controlled by <TIMER_RES_DIVIDER>
*/
CORE_TICKS
get_time(void)
{
CORE_TICKS elapsed
= (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
return elapsed;
}
/* Function : time_in_secs
Convert the value returned by get_time to seconds.
The <secs_ret> type is used to accomodate systems with no support for
floating point. Default implementation implemented by the EE_TICKS_PER_SEC
macro above.
*/
secs_ret
time_in_secs(CORE_TICKS ticks)
{
secs_ret retval = ((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
return retval;
}
ee_u32 default_num_contexts = 1;
/* Function : portable_init
Target specific initialization code
Test for some common mistakes.
*/
void
portable_init(core_portable *p, int *argc, char *argv[])
{
ee_printf("IO setup.\n");
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *))
{
ee_printf(
"ERROR! Please define ee_ptr_int to a type that holds a "
"pointer!\n");
}
if (sizeof(ee_u32) != 4)
{
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
}
p->portable_id = 1;
}
/* Function : portable_fini
Target specific final code
*/
void
portable_fini(core_portable *p)
{
p->portable_id = 0;
}

View File

@ -0,0 +1,212 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
/* Topic : Description
This file contains configuration constants required to execute on
different platforms
*/
#ifndef CORE_PORTME_H
#define CORE_PORTME_H
/************************/
/* Data types and settings */
/************************/
/* Configuration : HAS_FLOAT
Define to 1 if the platform supports floating point.
*/
#ifndef HAS_FLOAT
#define HAS_FLOAT 1
#endif
/* Configuration : HAS_TIME_H
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
*/
#ifndef HAS_TIME_H
#define HAS_TIME_H 1
#endif
/* Configuration : USE_CLOCK
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
*/
#ifndef USE_CLOCK
#define USE_CLOCK 1
#endif
/* Configuration : HAS_STDIO
Define to 1 if the platform has stdio.h.
*/
#ifndef HAS_STDIO
#define HAS_STDIO 0
#endif
/* Configuration : HAS_PRINTF
Define to 1 if the platform has stdio.h and implements the printf
function.
*/
#ifndef HAS_PRINTF
#define HAS_PRINTF 0
#endif
/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
Initialize these strings per platform
*/
#ifndef COMPILER_VERSION
#ifdef __GNUC__
#define COMPILER_VERSION "GCC"__VERSION__
#else
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
#endif
#endif
#ifndef COMPILER_FLAGS
#define COMPILER_FLAGS \
FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
#endif
#ifndef MEM_LOCATION
#define MEM_LOCATION "STACK"
#endif
/* Data Types :
To avoid compiler issues, define the data types that need ot be used for
8b, 16b and 32b in <core_portme.h>.
*Imprtant* :
ee_ptr_int needs to be the data type used to hold pointers, otherwise
coremark may fail!!!
*/
#include <stdint.h>
#include <stddef.h>
typedef signed short ee_s16;
typedef unsigned short ee_u16;
typedef signed int ee_s32;
typedef double ee_f32;
typedef unsigned char ee_u8;
typedef unsigned int ee_u32;
typedef ee_u32 ee_ptr_int;
typedef size_t ee_size_t;
#define NULL ((void *)0)
/* align_mem :
This macro is used to align an offset to point to a 32b value. It is
used in the Matrix algorithm to initialize the input memory blocks.
*/
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3))
/* Configuration : CORE_TICKS
Define type of return from the timing functions.
*/
#define CORETIMETYPE ee_u32
typedef ee_u32 CORE_TICKS;
/* Configuration : SEED_METHOD
Defines method to get seed values that cannot be computed at compile
time.
Valid values :
SEED_ARG - from command line.
SEED_FUNC - from a system function.
SEED_VOLATILE - from volatile variables.
*/
#ifndef SEED_METHOD
#define SEED_METHOD SEED_VOLATILE
#endif
/* Configuration : MEM_METHOD
Defines method to get a block of memry.
Valid values :
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
MEM_STATIC - to use a static memory array.
MEM_STACK - to allocate the data block on the stack (NYI).
*/
#ifndef MEM_METHOD
#define MEM_METHOD MEM_STACK
#endif
/* Configuration : MULTITHREAD
Define for parallel execution
Valid values :
1 - only one context (default).
N>1 - will execute N copies in parallel.
Note :
If this flag is defined to more then 1, an implementation for launching
parallel contexts must be defined.
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK>
to enable them.
It is valid to have a different implementation of <core_start_parallel>
and <core_end_parallel> in <core_portme.c>, to fit a particular architecture.
*/
#ifndef MULTITHREAD
#define MULTITHREAD 1
#define USE_PTHREAD 0
#define USE_FORK 0
#define USE_SOCKET 0
#endif
/* Configuration : MAIN_HAS_NOARGC
Needed if platform does not support getting arguments to main.
Valid values :
0 - argc/argv to main is supported
1 - argc/argv to main is not supported
Note :
This flag only matters if MULTITHREAD has been defined to a value
greater then 1.
*/
#ifndef MAIN_HAS_NOARGC
#define MAIN_HAS_NOARGC 0
#endif
/* Configuration : MAIN_HAS_NORETURN
Needed if platform does not support returning a value from main.
Valid values :
0 - main returns an int, and return value will be 0.
1 - platform does not support returning a value from main
*/
#ifndef MAIN_HAS_NORETURN
#define MAIN_HAS_NORETURN 0
#endif
/* Variable : default_num_contexts
Not used for this simple port, must cintain the value 1.
*/
extern ee_u32 default_num_contexts;
typedef struct CORE_PORTABLE_S
{
ee_u8 portable_id;
} core_portable;
/* target specific init/fini */
void portable_init(core_portable *p, int *argc, char *argv[]);
void portable_fini(core_portable *p);
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) \
&& !defined(VALIDATION_RUN)
#if (TOTAL_DATA_SIZE == 1200)
#define PROFILE_RUN 1
#elif (TOTAL_DATA_SIZE == 2000)
#define PERFORMANCE_RUN 1
#else
#define VALIDATION_RUN 1
#endif
#endif
int ee_printf(const char *fmt, ...);
#endif /* CORE_PORTME_H */

83
test/coremark/dist/barebones/core_portme.mak vendored Executable file
View File

@ -0,0 +1,83 @@
# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Original Author: Shay Gal-on
#File : core_portme.mak
# Use this flag to define how to to get an executable (e.g -o)
OUTFLAG= -o
CC = riscv32-unknown-elf-gcc
LD = riscv32-unknown-elf-gcc
AS = riscv32-unknown-elf-gcc
# Flag : CFLAGS
# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
PORT_CFLAGS = -O3 -g
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\"
#Flag : LFLAGS_END
# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).
# Note : On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
SEPARATE_COMPILE=1
# Flag : SEPARATE_COMPILE
# You must also define below how to create an object file, and how to link.
OBJOUT = -o
LFLAGS = -T ../../common/memmap.ld
ASFLAGS = -c
OFLAG = -o
COUT = -c
LFLAGS_END =
# Flag : PORT_SRCS
# Port specific source files can be added here
# You may also need cvt.c if the fcvt functions are not provided as intrinsics by your compiler!
PORT_SRCS = $(PORT_DIR)/core_portme.c $(PORT_DIR)/ee_printf.c $(PORT_DIR)/init.S
PORT_OBJS = $(addsuffix $(OEXT),$(patsubst %.c,%,$(patsubst %.S,%,$(PORT_SRCS))))
vpath %.c $(PORT_DIR)
vpath %.s $(PORT_DIR)
vpath %.S $(PORT_DIR)
# Flag : LOAD
# For a simple port, we assume self hosted compile and run, no load needed.
# Flag : RUN
# For a simple port, we assume self hosted compile and run, simple invocation of the executable
LOAD = echo "Please set LOAD to the process of loading the executable to the flash"
RUN = echo "Please set LOAD to the process of running the executable (e.g. via jtag, or board reset)"
OEXT = .o
EXE = .elf
$(OPATH)$(PORT_DIR)/%$(OEXT) : %.c
$(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@
$(OPATH)%$(OEXT) : %.c
$(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@
$(OPATH)$(PORT_DIR)/%$(OEXT) : %.s
$(AS) $(ASFLAGS) $< $(OBJOUT) $@
$(OPATH)$(PORT_DIR)/%$(OEXT) : %.S
$(AS) $(ASFLAGS) $< $(OBJOUT) $@
# Target : port_pre% and port_post%
# For the purpose of this simple port, no pre or post steps needed.
.PHONY : port_prebuild port_postbuild port_prerun port_postrun port_preload port_postload
port_pre% port_post% :
MKDIR = mkdir -p

127
test/coremark/dist/barebones/cvt.c vendored Normal file
View File

@ -0,0 +1,127 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <math.h>
#define CVTBUFSIZE 80
static char CVTBUF[CVTBUFSIZE];
static char *
cvt(double arg, int ndigits, int *decpt, int *sign, char *buf, int eflag)
{
int r2;
double fi, fj;
char * p, *p1;
if (ndigits < 0)
ndigits = 0;
if (ndigits >= CVTBUFSIZE - 1)
ndigits = CVTBUFSIZE - 2;
r2 = 0;
*sign = 0;
p = &buf[0];
if (arg < 0)
{
*sign = 1;
arg = -arg;
}
arg = modf(arg, &fi);
p1 = &buf[CVTBUFSIZE];
if (fi != 0)
{
p1 = &buf[CVTBUFSIZE];
while (fi != 0)
{
fj = modf(fi / 10, &fi);
*--p1 = (int)((fj + .03) * 10) + '0';
r2++;
}
while (p1 < &buf[CVTBUFSIZE])
*p++ = *p1++;
}
else if (arg > 0)
{
while ((fj = arg * 10) < 1)
{
arg = fj;
r2--;
}
}
p1 = &buf[ndigits];
if (eflag == 0)
p1 += r2;
*decpt = r2;
if (p1 < &buf[0])
{
buf[0] = '\0';
return buf;
}
while (p <= p1 && p < &buf[CVTBUFSIZE])
{
arg *= 10;
arg = modf(arg, &fj);
*p++ = (int)fj + '0';
}
if (p1 >= &buf[CVTBUFSIZE])
{
buf[CVTBUFSIZE - 1] = '\0';
return buf;
}
p = p1;
*p1 += 5;
while (*p1 > '9')
{
*p1 = '0';
if (p1 > buf)
++*--p1;
else
{
*p1 = '1';
(*decpt)++;
if (eflag == 0)
{
if (p > buf)
*p = '0';
p++;
}
}
}
*p = '\0';
return buf;
}
char *
ecvt(double arg, int ndigits, int *decpt, int *sign)
{
return cvt(arg, ndigits, decpt, sign, CVTBUF, 1);
}
char *
ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf)
{
return cvt(arg, ndigits, decpt, sign, buf, 1);
}
char *
fcvt(double arg, int ndigits, int *decpt, int *sign)
{
return cvt(arg, ndigits, decpt, sign, CVTBUF, 0);
}
char *
fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf)
{
return cvt(arg, ndigits, decpt, sign, buf, 0);
}

703
test/coremark/dist/barebones/ee_printf.c vendored Normal file
View File

@ -0,0 +1,703 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <coremark.h>
#include <stdarg.h>
#define ZEROPAD (1 << 0) /* Pad with zero */
#define SIGN (1 << 1) /* Unsigned/signed long */
#define PLUS (1 << 2) /* Show plus */
#define SPACE (1 << 3) /* Spacer */
#define LEFT (1 << 4) /* Left justified */
#define HEX_PREP (1 << 5) /* 0x */
#define UPPERCASE (1 << 6) /* 'ABCDEF' */
#define is_digit(c) ((c) >= '0' && (c) <= '9')
static char * digits = "0123456789abcdefghijklmnopqrstuvwxyz";
static char * upper_digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
static ee_size_t strnlen(const char *s, ee_size_t count);
static ee_size_t
strnlen(const char *s, ee_size_t count)
{
const char *sc;
for (sc = s; *sc != '\0' && count--; ++sc)
;
return sc - s;
}
static int
skip_atoi(const char **s)
{
int i = 0;
while (is_digit(**s))
i = i * 10 + *((*s)++) - '0';
return i;
}
static char *
number(char *str, long num, int base, int size, int precision, int type)
{
char c, sign, tmp[66];
char *dig = digits;
int i;
if (type & UPPERCASE)
dig = upper_digits;
if (type & LEFT)
type &= ~ZEROPAD;
if (base < 2 || base > 36)
return 0;
c = (type & ZEROPAD) ? '0' : ' ';
sign = 0;
if (type & SIGN)
{
if (num < 0)
{
sign = '-';
num = -num;
size--;
}
else if (type & PLUS)
{
sign = '+';
size--;
}
else if (type & SPACE)
{
sign = ' ';
size--;
}
}
if (type & HEX_PREP)
{
if (base == 16)
size -= 2;
else if (base == 8)
size--;
}
i = 0;
if (num == 0)
tmp[i++] = '0';
else
{
while (num != 0)
{
tmp[i++] = dig[((unsigned long)num) % (unsigned)base];
num = ((unsigned long)num) / (unsigned)base;
}
}
if (i > precision)
precision = i;
size -= precision;
if (!(type & (ZEROPAD | LEFT)))
while (size-- > 0)
*str++ = ' ';
if (sign)
*str++ = sign;
if (type & HEX_PREP)
{
if (base == 8)
*str++ = '0';
else if (base == 16)
{
*str++ = '0';
*str++ = digits[33];
}
}
if (!(type & LEFT))
while (size-- > 0)
*str++ = c;
while (i < precision--)
*str++ = '0';
while (i-- > 0)
*str++ = tmp[i];
while (size-- > 0)
*str++ = ' ';
return str;
}
static char *
eaddr(char *str, unsigned char *addr, int size, int precision, int type)
{
char tmp[24];
char *dig = digits;
int i, len;
if (type & UPPERCASE)
dig = upper_digits;
len = 0;
for (i = 0; i < 6; i++)
{
if (i != 0)
tmp[len++] = ':';
tmp[len++] = dig[addr[i] >> 4];
tmp[len++] = dig[addr[i] & 0x0F];
}
if (!(type & LEFT))
while (len < size--)
*str++ = ' ';
for (i = 0; i < len; ++i)
*str++ = tmp[i];
while (len < size--)
*str++ = ' ';
return str;
}
static char *
iaddr(char *str, unsigned char *addr, int size, int precision, int type)
{
char tmp[24];
int i, n, len;
len = 0;
for (i = 0; i < 4; i++)
{
if (i != 0)
tmp[len++] = '.';
n = addr[i];
if (n == 0)
tmp[len++] = digits[0];
else
{
if (n >= 100)
{
tmp[len++] = digits[n / 100];
n = n % 100;
tmp[len++] = digits[n / 10];
n = n % 10;
}
else if (n >= 10)
{
tmp[len++] = digits[n / 10];
n = n % 10;
}
tmp[len++] = digits[n];
}
}
if (!(type & LEFT))
while (len < size--)
*str++ = ' ';
for (i = 0; i < len; ++i)
*str++ = tmp[i];
while (len < size--)
*str++ = ' ';
return str;
}
#if HAS_FLOAT
char * ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf);
char * fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf);
static void ee_bufcpy(char *d, char *s, int count);
void
ee_bufcpy(char *pd, char *ps, int count)
{
char *pe = ps + count;
while (ps != pe)
*pd++ = *ps++;
}
static void
parse_float(double value, char *buffer, char fmt, int precision)
{
int decpt, sign, exp, pos;
char *digits = NULL;
char cvtbuf[80];
int capexp = 0;
int magnitude;
if (fmt == 'G' || fmt == 'E')
{
capexp = 1;
fmt += 'a' - 'A';
}
if (fmt == 'g')
{
digits = ecvtbuf(value, precision, &decpt, &sign, cvtbuf);
magnitude = decpt - 1;
if (magnitude < -4 || magnitude > precision - 1)
{
fmt = 'e';
precision -= 1;
}
else
{
fmt = 'f';
precision -= decpt;
}
}
if (fmt == 'e')
{
digits = ecvtbuf(value, precision + 1, &decpt, &sign, cvtbuf);
if (sign)
*buffer++ = '-';
*buffer++ = *digits;
if (precision > 0)
*buffer++ = '.';
ee_bufcpy(buffer, digits + 1, precision);
buffer += precision;
*buffer++ = capexp ? 'E' : 'e';
if (decpt == 0)
{
if (value == 0.0)
exp = 0;
else
exp = -1;
}
else
exp = decpt - 1;
if (exp < 0)
{
*buffer++ = '-';
exp = -exp;
}
else
*buffer++ = '+';
buffer[2] = (exp % 10) + '0';
exp = exp / 10;
buffer[1] = (exp % 10) + '0';
exp = exp / 10;
buffer[0] = (exp % 10) + '0';
buffer += 3;
}
else if (fmt == 'f')
{
digits = fcvtbuf(value, precision, &decpt, &sign, cvtbuf);
if (sign)
*buffer++ = '-';
if (*digits)
{
if (decpt <= 0)
{
*buffer++ = '0';
*buffer++ = '.';
for (pos = 0; pos < -decpt; pos++)
*buffer++ = '0';
while (*digits)
*buffer++ = *digits++;
}
else
{
pos = 0;
while (*digits)
{
if (pos++ == decpt)
*buffer++ = '.';
*buffer++ = *digits++;
}
}
}
else
{
*buffer++ = '0';
if (precision > 0)
{
*buffer++ = '.';
for (pos = 0; pos < precision; pos++)
*buffer++ = '0';
}
}
}
*buffer = '\0';
}
static void
decimal_point(char *buffer)
{
while (*buffer)
{
if (*buffer == '.')
return;
if (*buffer == 'e' || *buffer == 'E')
break;
buffer++;
}
if (*buffer)
{
int n = strnlen(buffer, 256);
while (n > 0)
{
buffer[n + 1] = buffer[n];
n--;
}
*buffer = '.';
}
else
{
*buffer++ = '.';
*buffer = '\0';
}
}
static void
cropzeros(char *buffer)
{
char *stop;
while (*buffer && *buffer != '.')
buffer++;
if (*buffer++)
{
while (*buffer && *buffer != 'e' && *buffer != 'E')
buffer++;
stop = buffer--;
while (*buffer == '0')
buffer--;
if (*buffer == '.')
buffer--;
while (buffer != stop)
*++buffer = 0;
}
}
static char *
flt(char *str, double num, int size, int precision, char fmt, int flags)
{
char tmp[80];
char c, sign;
int n, i;
// Left align means no zero padding
if (flags & LEFT)
flags &= ~ZEROPAD;
// Determine padding and sign char
c = (flags & ZEROPAD) ? '0' : ' ';
sign = 0;
if (flags & SIGN)
{
if (num < 0.0)
{
sign = '-';
num = -num;
size--;
}
else if (flags & PLUS)
{
sign = '+';
size--;
}
else if (flags & SPACE)
{
sign = ' ';
size--;
}
}
// Compute the precision value
if (precision < 0)
precision = 6; // Default precision: 6
// Convert floating point number to text
parse_float(num, tmp, fmt, precision);
if ((flags & HEX_PREP) && precision == 0)
decimal_point(tmp);
if (fmt == 'g' && !(flags & HEX_PREP))
cropzeros(tmp);
n = strnlen(tmp, 256);
// Output number with alignment and padding
size -= n;
if (!(flags & (ZEROPAD | LEFT)))
while (size-- > 0)
*str++ = ' ';
if (sign)
*str++ = sign;
if (!(flags & LEFT))
while (size-- > 0)
*str++ = c;
for (i = 0; i < n; i++)
*str++ = tmp[i];
while (size-- > 0)
*str++ = ' ';
return str;
}
#endif
static int
ee_vsprintf(char *buf, const char *fmt, va_list args)
{
int len;
unsigned long num;
int i, base;
char * str;
char * s;
int flags; // Flags to number()
int field_width; // Width of output field
int precision; // Min. # of digits for integers; max number of chars for
// from string
int qualifier; // 'h', 'l', or 'L' for integer fields
for (str = buf; *fmt; fmt++)
{
if (*fmt != '%')
{
*str++ = *fmt;
continue;
}
// Process flags
flags = 0;
repeat:
fmt++; // This also skips first '%'
switch (*fmt)
{
case '-':
flags |= LEFT;
goto repeat;
case '+':
flags |= PLUS;
goto repeat;
case ' ':
flags |= SPACE;
goto repeat;
case '#':
flags |= HEX_PREP;
goto repeat;
case '0':
flags |= ZEROPAD;
goto repeat;
}
// Get field width
field_width = -1;
if (is_digit(*fmt))
field_width = skip_atoi(&fmt);
else if (*fmt == '*')
{
fmt++;
field_width = va_arg(args, int);
if (field_width < 0)
{
field_width = -field_width;
flags |= LEFT;
}
}
// Get the precision
precision = -1;
if (*fmt == '.')
{
++fmt;
if (is_digit(*fmt))
precision = skip_atoi(&fmt);
else if (*fmt == '*')
{
++fmt;
precision = va_arg(args, int);
}
if (precision < 0)
precision = 0;
}
// Get the conversion qualifier
qualifier = -1;
if (*fmt == 'l' || *fmt == 'L')
{
qualifier = *fmt;
fmt++;
}
// Default base
base = 10;
switch (*fmt)
{
case 'c':
if (!(flags & LEFT))
while (--field_width > 0)
*str++ = ' ';
*str++ = (unsigned char)va_arg(args, int);
while (--field_width > 0)
*str++ = ' ';
continue;
case 's':
s = va_arg(args, char *);
if (!s)
s = "<NULL>";
len = strnlen(s, precision);
if (!(flags & LEFT))
while (len < field_width--)
*str++ = ' ';
for (i = 0; i < len; ++i)
*str++ = *s++;
while (len < field_width--)
*str++ = ' ';
continue;
case 'p':
if (field_width == -1)
{
field_width = 2 * sizeof(void *);
flags |= ZEROPAD;
}
str = number(str,
(unsigned long)va_arg(args, void *),
16,
field_width,
precision,
flags);
continue;
case 'A':
flags |= UPPERCASE;
case 'a':
if (qualifier == 'l')
str = eaddr(str,
va_arg(args, unsigned char *),
field_width,
precision,
flags);
else
str = iaddr(str,
va_arg(args, unsigned char *),
field_width,
precision,
flags);
continue;
// Integer number formats - set up the flags and "break"
case 'o':
base = 8;
break;
case 'X':
flags |= UPPERCASE;
case 'x':
base = 16;
break;
case 'd':
case 'i':
flags |= SIGN;
case 'u':
break;
#if HAS_FLOAT
case 'f':
str = flt(str,
va_arg(args, double),
field_width,
precision,
*fmt,
flags | SIGN);
continue;
#endif
default:
if (*fmt != '%')
*str++ = '%';
if (*fmt)
*str++ = *fmt;
else
--fmt;
continue;
}
if (qualifier == 'l')
num = va_arg(args, unsigned long);
else if (flags & SIGN)
num = va_arg(args, int);
else
num = va_arg(args, unsigned int);
str = number(str, num, base, field_width, precision, flags);
}
*str = '\0';
return str - buf;
}
#include "tb_cxxrtl_io.h"
void
uart_send_char(char c)
{
tb_putc(c);
// #error "You must implement the method uart_send_char to use this file!\n";
/* Output of a char to a UART usually follows the following model:
Wait until UART is ready
Write char to UART
Wait until UART is done
Or in code:
while (*UART_CONTROL_ADDRESS != UART_READY);
*UART_DATA_ADDRESS = c;
while (*UART_CONTROL_ADDRESS != UART_READY);
Check the UART sample code on your platform or the board
documentation.
*/
}
int
ee_printf(const char *fmt, ...)
{
char buf[1024], *p;
va_list args;
int n = 0;
va_start(args, fmt);
ee_vsprintf(buf, fmt, args);
va_end(args);
p = buf;
while (*p)
{
uart_send_char(*p);
n++;
p++;
}
return n;
}

1
test/coremark/dist/barebones/init.S vendored Symbolic link
View File

@ -0,0 +1 @@
../../../common/init.S

View File

@ -0,0 +1 @@
../../../common/tb_cxxrtl_io.h

595
test/coremark/dist/core_list_join.c vendored Normal file
View File

@ -0,0 +1,595 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include "coremark.h"
/*
Topic: Description
Benchmark using a linked list.
Linked list is a common data structure used in many applications.
For our purposes, this will excercise the memory units of the processor.
In particular, usage of the list pointers to find and alter data.
We are not using Malloc since some platforms do not support this
library.
Instead, the memory block being passed in is used to create a list,
and the benchmark takes care not to add more items then can be
accomodated by the memory block. The porting layer will make sure
that we have a valid memory block.
All operations are done in place, without using any extra memory.
The list itself contains list pointers and pointers to data items.
Data items contain the following:
idx - An index that captures the initial order of the list.
data - Variable data initialized based on the input parameters. The 16b
are divided as follows: o Upper 8b are backup of original data. o Bit 7
indicates if the lower 7 bits are to be used as is or calculated. o Bits 0-2
indicate type of operation to perform to get a 7b value. o Bits 3-6 provide
input for the operation.
*/
/* local functions */
list_head *core_list_find(list_head *list, list_data *info);
list_head *core_list_reverse(list_head *list);
list_head *core_list_remove(list_head *item);
list_head *core_list_undo_remove(list_head *item_removed,
list_head *item_modified);
list_head *core_list_insert_new(list_head * insert_point,
list_data * info,
list_head **memblock,
list_data **datablock,
list_head * memblock_end,
list_data * datablock_end);
typedef ee_s32 (*list_cmp)(list_data *a, list_data *b, core_results *res);
list_head *core_list_mergesort(list_head * list,
list_cmp cmp,
core_results *res);
ee_s16
calc_func(ee_s16 *pdata, core_results *res)
{
ee_s16 data = *pdata;
ee_s16 retval;
ee_u8 optype
= (data >> 7)
& 1; /* bit 7 indicates if the function result has been cached */
if (optype) /* if cached, use cache */
return (data & 0x007f);
else
{ /* otherwise calculate and cache the result */
ee_s16 flag = data & 0x7; /* bits 0-2 is type of function to perform */
ee_s16 dtype
= ((data >> 3)
& 0xf); /* bits 3-6 is specific data for the operation */
dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */
switch (flag)
{
case 0:
if (dtype < 0x22) /* set min period for bit corruption */
dtype = 0x22;
retval = core_bench_state(res->size,
res->memblock[3],
res->seed1,
res->seed2,
dtype,
res->crc);
if (res->crcstate == 0)
res->crcstate = retval;
break;
case 1:
retval = core_bench_matrix(&(res->mat), dtype, res->crc);
if (res->crcmatrix == 0)
res->crcmatrix = retval;
break;
default:
retval = data;
break;
}
res->crc = crcu16(retval, res->crc);
retval &= 0x007f;
*pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */
return retval;
}
}
/* Function: cmp_complex
Compare the data item in a list cell.
Can be used by mergesort.
*/
ee_s32
cmp_complex(list_data *a, list_data *b, core_results *res)
{
ee_s16 val1 = calc_func(&(a->data16), res);
ee_s16 val2 = calc_func(&(b->data16), res);
return val1 - val2;
}
/* Function: cmp_idx
Compare the idx item in a list cell, and regen the data.
Can be used by mergesort.
*/
ee_s32
cmp_idx(list_data *a, list_data *b, core_results *res)
{
if (res == NULL)
{
a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16 >> 8));
b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16 >> 8));
}
return a->idx - b->idx;
}
void
copy_info(list_data *to, list_data *from)
{
to->data16 = from->data16;
to->idx = from->idx;
}
/* Benchmark for linked list:
- Try to find multiple data items.
- List sort
- Operate on data from list (crc)
- Single remove/reinsert
* At the end of this function, the list is back to original state
*/
ee_u16
core_bench_list(core_results *res, ee_s16 finder_idx)
{
ee_u16 retval = 0;
ee_u16 found = 0, missed = 0;
list_head *list = res->list;
ee_s16 find_num = res->seed3;
list_head *this_find;
list_head *finder, *remover;
list_data info;
ee_s16 i;
info.idx = finder_idx;
/* find <find_num> values in the list, and change the list each time
* (reverse and cache if value found) */
for (i = 0; i < find_num; i++)
{
info.data16 = (i & 0xff);
this_find = core_list_find(list, &info);
list = core_list_reverse(list);
if (this_find == NULL)
{
missed++;
retval += (list->next->info->data16 >> 8) & 1;
}
else
{
found++;
if (this_find->info->data16 & 0x1) /* use found value */
retval += (this_find->info->data16 >> 9) & 1;
/* and cache next item at the head of the list (if any) */
if (this_find->next != NULL)
{
finder = this_find->next;
this_find->next = finder->next;
finder->next = list->next;
list->next = finder;
}
}
if (info.idx >= 0)
info.idx++;
#if CORE_DEBUG
ee_printf("List find %d: [%d,%d,%d]\n", i, retval, missed, found);
#endif
}
retval += found * 4 - missed;
/* sort the list by data content and remove one item*/
if (finder_idx > 0)
list = core_list_mergesort(list, cmp_complex, res);
remover = core_list_remove(list->next);
/* CRC data content of list from location of index N forward, and then undo
* remove */
finder = core_list_find(list, &info);
if (!finder)
finder = list->next;
while (finder)
{
retval = crc16(list->info->data16, retval);
finder = finder->next;
}
#if CORE_DEBUG
ee_printf("List sort 1: %04x\n", retval);
#endif
remover = core_list_undo_remove(remover, list->next);
/* sort the list by index, in effect returning the list to original state */
list = core_list_mergesort(list, cmp_idx, NULL);
/* CRC data content of list */
finder = list->next;
while (finder)
{
retval = crc16(list->info->data16, retval);
finder = finder->next;
}
#if CORE_DEBUG
ee_printf("List sort 2: %04x\n", retval);
#endif
return retval;
}
/* Function: core_list_init
Initialize list with data.
Parameters:
blksize - Size of memory to be initialized.
memblock - Pointer to memory block.
seed - Actual values chosen depend on the seed parameter.
The seed parameter MUST be supplied from a source that cannot be
determined at compile time
Returns:
Pointer to the head of the list.
*/
list_head *
core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed)
{
/* calculated pointers for the list */
ee_u32 per_item = 16 + sizeof(struct list_data_s);
ee_u32 size = (blksize / per_item)
- 2; /* to accomodate systems with 64b pointers, and make sure
same code is executed, set max list elements */
list_head *memblock_end = memblock + size;
list_data *datablock = (list_data *)(memblock_end);
list_data *datablock_end = datablock + size;
/* some useful variables */
ee_u32 i;
list_head *finder, *list = memblock;
list_data info;
/* create a fake items for the list head and tail */
list->next = NULL;
list->info = datablock;
list->info->idx = 0x0000;
list->info->data16 = (ee_s16)0x8080;
memblock++;
datablock++;
info.idx = 0x7fff;
info.data16 = (ee_s16)0xffff;
core_list_insert_new(
list, &info, &memblock, &datablock, memblock_end, datablock_end);
/* then insert size items */
for (i = 0; i < size; i++)
{
ee_u16 datpat = ((ee_u16)(seed ^ i) & 0xf);
ee_u16 dat
= (datpat << 3) | (i & 0x7); /* alternate between algorithms */
info.data16 = (dat << 8) | dat; /* fill the data with actual data and
upper bits with rebuild value */
core_list_insert_new(
list, &info, &memblock, &datablock, memblock_end, datablock_end);
}
/* and now index the list so we know initial seed order of the list */
finder = list->next;
i = 1;
while (finder->next != NULL)
{
if (i < size / 5) /* first 20% of the list in order */
finder->info->idx = i++;
else
{
ee_u16 pat = (ee_u16)(i++ ^ seed); /* get a pseudo random number */
finder->info->idx = 0x3fff
& (((i & 0x07) << 8)
| pat); /* make sure the mixed items end up
after the ones in sequence */
}
finder = finder->next;
}
list = core_list_mergesort(list, cmp_idx, NULL);
#if CORE_DEBUG
ee_printf("Initialized list:\n");
finder = list;
while (finder)
{
ee_printf(
"[%04x,%04x]", finder->info->idx, (ee_u16)finder->info->data16);
finder = finder->next;
}
ee_printf("\n");
#endif
return list;
}
/* Function: core_list_insert
Insert an item to the list
Parameters:
insert_point - where to insert the item.
info - data for the cell.
memblock - pointer for the list header
datablock - pointer for the list data
memblock_end - end of region for list headers
datablock_end - end of region for list data
Returns:
Pointer to new item.
*/
list_head *
core_list_insert_new(list_head * insert_point,
list_data * info,
list_head **memblock,
list_data **datablock,
list_head * memblock_end,
list_data * datablock_end)
{
list_head *newitem;
if ((*memblock + 1) >= memblock_end)
return NULL;
if ((*datablock + 1) >= datablock_end)
return NULL;
newitem = *memblock;
(*memblock)++;
newitem->next = insert_point->next;
insert_point->next = newitem;
newitem->info = *datablock;
(*datablock)++;
copy_info(newitem->info, info);
return newitem;
}
/* Function: core_list_remove
Remove an item from the list.
Operation:
For a singly linked list, remove by copying the data from the next item
over to the current cell, and unlinking the next item.
Note:
since there is always a fake item at the end of the list, no need to
check for NULL.
Returns:
Removed item.
*/
list_head *
core_list_remove(list_head *item)
{
list_data *tmp;
list_head *ret = item->next;
/* swap data pointers */
tmp = item->info;
item->info = ret->info;
ret->info = tmp;
/* and eliminate item */
item->next = item->next->next;
ret->next = NULL;
return ret;
}
/* Function: core_list_undo_remove
Undo a remove operation.
Operation:
Since we want each iteration of the benchmark to be exactly the same,
we need to be able to undo a remove.
Link the removed item back into the list, and switch the info items.
Parameters:
item_removed - Return value from the <core_list_remove>
item_modified - List item that was modified during <core_list_remove>
Returns:
The item that was linked back to the list.
*/
list_head *
core_list_undo_remove(list_head *item_removed, list_head *item_modified)
{
list_data *tmp;
/* swap data pointers */
tmp = item_removed->info;
item_removed->info = item_modified->info;
item_modified->info = tmp;
/* and insert item */
item_removed->next = item_modified->next;
item_modified->next = item_removed;
return item_removed;
}
/* Function: core_list_find
Find an item in the list
Operation:
Find an item by idx (if not 0) or specific data value
Parameters:
list - list head
info - idx or data to find
Returns:
Found item, or NULL if not found.
*/
list_head *
core_list_find(list_head *list, list_data *info)
{
if (info->idx >= 0)
{
while (list && (list->info->idx != info->idx))
list = list->next;
return list;
}
else
{
while (list && ((list->info->data16 & 0xff) != info->data16))
list = list->next;
return list;
}
}
/* Function: core_list_reverse
Reverse a list
Operation:
Rearrange the pointers so the list is reversed.
Parameters:
list - list head
info - idx or data to find
Returns:
Found item, or NULL if not found.
*/
list_head *
core_list_reverse(list_head *list)
{
list_head *next = NULL, *tmp;
while (list)
{
tmp = list->next;
list->next = next;
next = list;
list = tmp;
}
return next;
}
/* Function: core_list_mergesort
Sort the list in place without recursion.
Description:
Use mergesort, as for linked list this is a realistic solution.
Also, since this is aimed at embedded, care was taken to use iterative
rather then recursive algorithm. The sort can either return the list to
original order (by idx) , or use the data item to invoke other other
algorithms and change the order of the list.
Parameters:
list - list to be sorted.
cmp - cmp function to use
Returns:
New head of the list.
Note:
We have a special header for the list that will always be first,
but the algorithm could theoretically modify where the list starts.
*/
list_head *
core_list_mergesort(list_head *list, list_cmp cmp, core_results *res)
{
list_head *p, *q, *e, *tail;
ee_s32 insize, nmerges, psize, qsize, i;
insize = 1;
while (1)
{
p = list;
list = NULL;
tail = NULL;
nmerges = 0; /* count number of merges we do in this pass */
while (p)
{
nmerges++; /* there exists a merge to be done */
/* step `insize' places along from p */
q = p;
psize = 0;
for (i = 0; i < insize; i++)
{
psize++;
q = q->next;
if (!q)
break;
}
/* if q hasn't fallen off end, we have two lists to merge */
qsize = insize;
/* now we have two lists; merge them */
while (psize > 0 || (qsize > 0 && q))
{
/* decide whether next element of merge comes from p or q */
if (psize == 0)
{
/* p is empty; e must come from q. */
e = q;
q = q->next;
qsize--;
}
else if (qsize == 0 || !q)
{
/* q is empty; e must come from p. */
e = p;
p = p->next;
psize--;
}
else if (cmp(p->info, q->info, res) <= 0)
{
/* First element of p is lower (or same); e must come from
* p. */
e = p;
p = p->next;
psize--;
}
else
{
/* First element of q is lower; e must come from q. */
e = q;
q = q->next;
qsize--;
}
/* add the next element to the merged list */
if (tail)
{
tail->next = e;
}
else
{
list = e;
}
tail = e;
}
/* now p has stepped `insize' places along, and q has too */
p = q;
}
tail->next = NULL;
/* If we have done only one merge, we're finished. */
if (nmerges <= 1) /* allow for nmerges==0, the empty list case */
return list;
/* Otherwise repeat, merging lists twice the size */
insize *= 2;
}
#if COMPILER_REQUIRES_SORT_RETURN
return list;
#endif
}

442
test/coremark/dist/core_main.c vendored Normal file
View File

@ -0,0 +1,442 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
/* File: core_main.c
This file contains the framework to acquire a block of memory, seed
initial parameters, tun t he benchmark and report the results.
*/
#include "coremark.h"
/* Function: iterate
Run the benchmark for a specified number of iterations.
Operation:
For each type of benchmarked algorithm:
a - Initialize the data block for the algorithm.
b - Execute the algorithm N times.
Returns:
NULL.
*/
static ee_u16 list_known_crc[] = { (ee_u16)0xd4b0,
(ee_u16)0x3340,
(ee_u16)0x6a79,
(ee_u16)0xe714,
(ee_u16)0xe3c1 };
static ee_u16 matrix_known_crc[] = { (ee_u16)0xbe52,
(ee_u16)0x1199,
(ee_u16)0x5608,
(ee_u16)0x1fd7,
(ee_u16)0x0747 };
static ee_u16 state_known_crc[] = { (ee_u16)0x5e47,
(ee_u16)0x39bf,
(ee_u16)0xe5a4,
(ee_u16)0x8e3a,
(ee_u16)0x8d84 };
void *
iterate(void *pres)
{
ee_u32 i;
ee_u16 crc;
core_results *res = (core_results *)pres;
ee_u32 iterations = res->iterations;
res->crc = 0;
res->crclist = 0;
res->crcmatrix = 0;
res->crcstate = 0;
for (i = 0; i < iterations; i++)
{
crc = core_bench_list(res, 1);
res->crc = crcu16(crc, res->crc);
crc = core_bench_list(res, -1);
res->crc = crcu16(crc, res->crc);
if (i == 0)
res->crclist = res->crc;
}
return NULL;
}
#if (SEED_METHOD == SEED_ARG)
ee_s32 get_seed_args(int i, int argc, char *argv[]);
#define get_seed(x) (ee_s16) get_seed_args(x, argc, argv)
#define get_seed_32(x) get_seed_args(x, argc, argv)
#else /* via function or volatile */
ee_s32 get_seed_32(int i);
#define get_seed(x) (ee_s16) get_seed_32(x)
#endif
#if (MEM_METHOD == MEM_STATIC)
ee_u8 static_memblk[TOTAL_DATA_SIZE];
#endif
char *mem_name[3] = { "Static", "Heap", "Stack" };
/* Function: main
Main entry routine for the benchmark.
This function is responsible for the following steps:
1 - Initialize input seeds from a source that cannot be determined at
compile time. 2 - Initialize memory block for use. 3 - Run and time the
benchmark. 4 - Report results, testing the validity of the output if the
seeds are known.
Arguments:
1 - first seed : Any value
2 - second seed : Must be identical to first for iterations to be
identical 3 - third seed : Any value, should be at least an order of
magnitude less then the input size, but bigger then 32. 4 - Iterations :
Special, if set to 0, iterations will be automatically determined such that
the benchmark will run between 10 to 100 secs
*/
#if MAIN_HAS_NOARGC
MAIN_RETURN_TYPE
main(void)
{
int argc = 0;
char *argv[1];
#else
MAIN_RETURN_TYPE
main(int argc, char *argv[])
{
#endif
ee_u16 i, j = 0, num_algorithms = 0;
ee_s16 known_id = -1, total_errors = 0;
ee_u16 seedcrc = 0;
CORE_TICKS total_time;
core_results results[MULTITHREAD];
#if (MEM_METHOD == MEM_STACK)
ee_u8 stack_memblock[TOTAL_DATA_SIZE * MULTITHREAD];
#endif
/* first call any initializations needed */
portable_init(&(results[0].port), &argc, argv);
/* First some checks to make sure benchmark will run ok */
if (sizeof(struct list_head_s) > 128)
{
ee_printf("list_head structure too big for comparable data!\n");
return MAIN_RETURN_VAL;
}
results[0].seed1 = get_seed(1);
results[0].seed2 = get_seed(2);
results[0].seed3 = get_seed(3);
results[0].iterations = get_seed_32(4);
#if CORE_DEBUG
results[0].iterations = 1;
#endif
results[0].execs = get_seed_32(5);
if (results[0].execs == 0)
{ /* if not supplied, execute all algorithms */
results[0].execs = ALL_ALGORITHMS_MASK;
}
/* put in some default values based on one seed only for easy testing */
if ((results[0].seed1 == 0) && (results[0].seed2 == 0)
&& (results[0].seed3 == 0))
{ /* perfromance run */
results[0].seed1 = 0;
results[0].seed2 = 0;
results[0].seed3 = 0x66;
}
if ((results[0].seed1 == 1) && (results[0].seed2 == 0)
&& (results[0].seed3 == 0))
{ /* validation run */
results[0].seed1 = 0x3415;
results[0].seed2 = 0x3415;
results[0].seed3 = 0x66;
}
#if (MEM_METHOD == MEM_STATIC)
results[0].memblock[0] = (void *)static_memblk;
results[0].size = TOTAL_DATA_SIZE;
results[0].err = 0;
#if (MULTITHREAD > 1)
#error "Cannot use a static data area with multiple contexts!"
#endif
#elif (MEM_METHOD == MEM_MALLOC)
for (i = 0; i < MULTITHREAD; i++)
{
ee_s32 malloc_override = get_seed(7);
if (malloc_override != 0)
results[i].size = malloc_override;
else
results[i].size = TOTAL_DATA_SIZE;
results[i].memblock[0] = portable_malloc(results[i].size);
results[i].seed1 = results[0].seed1;
results[i].seed2 = results[0].seed2;
results[i].seed3 = results[0].seed3;
results[i].err = 0;
results[i].execs = results[0].execs;
}
#elif (MEM_METHOD == MEM_STACK)
for (i = 0; i < MULTITHREAD; i++)
{
results[i].memblock[0] = stack_memblock + i * TOTAL_DATA_SIZE;
results[i].size = TOTAL_DATA_SIZE;
results[i].seed1 = results[0].seed1;
results[i].seed2 = results[0].seed2;
results[i].seed3 = results[0].seed3;
results[i].err = 0;
results[i].execs = results[0].execs;
}
#else
#error "Please define a way to initialize a memory block."
#endif
/* Data init */
/* Find out how space much we have based on number of algorithms */
for (i = 0; i < NUM_ALGORITHMS; i++)
{
if ((1 << (ee_u32)i) & results[0].execs)
num_algorithms++;
}
for (i = 0; i < MULTITHREAD; i++)
results[i].size = results[i].size / num_algorithms;
/* Assign pointers */
for (i = 0; i < NUM_ALGORITHMS; i++)
{
ee_u32 ctx;
if ((1 << (ee_u32)i) & results[0].execs)
{
for (ctx = 0; ctx < MULTITHREAD; ctx++)
results[ctx].memblock[i + 1]
= (char *)(results[ctx].memblock[0]) + results[0].size * j;
j++;
}
}
/* call inits */
for (i = 0; i < MULTITHREAD; i++)
{
if (results[i].execs & ID_LIST)
{
results[i].list = core_list_init(
results[0].size, results[i].memblock[1], results[i].seed1);
}
if (results[i].execs & ID_MATRIX)
{
core_init_matrix(results[0].size,
results[i].memblock[2],
(ee_s32)results[i].seed1
| (((ee_s32)results[i].seed2) << 16),
&(results[i].mat));
}
if (results[i].execs & ID_STATE)
{
core_init_state(
results[0].size, results[i].seed1, results[i].memblock[3]);
}
}
/* automatically determine number of iterations if not set */
if (results[0].iterations == 0)
{
secs_ret secs_passed = 0;
ee_u32 divisor;
results[0].iterations = 1;
while (secs_passed < (secs_ret)1)
{
results[0].iterations *= 10;
start_time();
iterate(&results[0]);
stop_time();
secs_passed = time_in_secs(get_time());
}
/* now we know it executes for at least 1 sec, set actual run time at
* about 10 secs */
divisor = (ee_u32)secs_passed;
if (divisor == 0) /* some machines cast float to int as 0 since this
conversion is not defined by ANSI, but we know at
least one second passed */
divisor = 1;
results[0].iterations *= 1 + 10 / divisor;
}
/* perform actual benchmark */
start_time();
#if (MULTITHREAD > 1)
if (default_num_contexts > MULTITHREAD)
{
default_num_contexts = MULTITHREAD;
}
for (i = 0; i < default_num_contexts; i++)
{
results[i].iterations = results[0].iterations;
results[i].execs = results[0].execs;
core_start_parallel(&results[i]);
}
for (i = 0; i < default_num_contexts; i++)
{
core_stop_parallel(&results[i]);
}
#else
iterate(&results[0]);
#endif
stop_time();
total_time = get_time();
/* get a function of the input to report */
seedcrc = crc16(results[0].seed1, seedcrc);
seedcrc = crc16(results[0].seed2, seedcrc);
seedcrc = crc16(results[0].seed3, seedcrc);
seedcrc = crc16(results[0].size, seedcrc);
switch (seedcrc)
{ /* test known output for common seeds */
case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */
known_id = 0;
ee_printf("6k performance run parameters for coremark.\n");
break;
case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per
algorithm */
known_id = 1;
ee_printf("6k validation run parameters for coremark.\n");
break;
case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm
*/
known_id = 2;
ee_printf("Profile generation run parameters for coremark.\n");
break;
case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */
known_id = 3;
ee_printf("2K performance run parameters for coremark.\n");
break;
case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per
algorithm */
known_id = 4;
ee_printf("2K validation run parameters for coremark.\n");
break;
default:
total_errors = -1;
break;
}
if (known_id >= 0)
{
for (i = 0; i < default_num_contexts; i++)
{
results[i].err = 0;
if ((results[i].execs & ID_LIST)
&& (results[i].crclist != list_known_crc[known_id]))
{
ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n",
i,
results[i].crclist,
list_known_crc[known_id]);
results[i].err++;
}
if ((results[i].execs & ID_MATRIX)
&& (results[i].crcmatrix != matrix_known_crc[known_id]))
{
ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n",
i,
results[i].crcmatrix,
matrix_known_crc[known_id]);
results[i].err++;
}
if ((results[i].execs & ID_STATE)
&& (results[i].crcstate != state_known_crc[known_id]))
{
ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n",
i,
results[i].crcstate,
state_known_crc[known_id]);
results[i].err++;
}
total_errors += results[i].err;
}
}
total_errors += check_data_types();
/* and report results */
ee_printf("CoreMark Size : %lu\n", (long unsigned)results[0].size);
ee_printf("Total ticks : %lu\n", (long unsigned)total_time);
#if HAS_FLOAT
ee_printf("Total time (secs): %f\n", time_in_secs(total_time));
if (time_in_secs(total_time) > 0)
ee_printf("Iterations/Sec : %f\n",
default_num_contexts * results[0].iterations
/ time_in_secs(total_time));
#else
ee_printf("Total time (secs): %d\n", time_in_secs(total_time));
if (time_in_secs(total_time) > 0)
ee_printf("Iterations/Sec : %d\n",
default_num_contexts * results[0].iterations
/ time_in_secs(total_time));
#endif
if (time_in_secs(total_time) < 10)
{
ee_printf(
"ERROR! Must execute for at least 10 secs for a valid result!\n");
total_errors++;
}
ee_printf("Iterations : %lu\n",
(long unsigned)default_num_contexts * results[0].iterations);
ee_printf("Compiler version : %s\n", COMPILER_VERSION);
ee_printf("Compiler flags : %s\n", COMPILER_FLAGS);
#if (MULTITHREAD > 1)
ee_printf("Parallel %s : %d\n", PARALLEL_METHOD, default_num_contexts);
#endif
ee_printf("Memory location : %s\n", MEM_LOCATION);
/* output for verification */
ee_printf("seedcrc : 0x%04x\n", seedcrc);
if (results[0].execs & ID_LIST)
for (i = 0; i < default_num_contexts; i++)
ee_printf("[%d]crclist : 0x%04x\n", i, results[i].crclist);
if (results[0].execs & ID_MATRIX)
for (i = 0; i < default_num_contexts; i++)
ee_printf("[%d]crcmatrix : 0x%04x\n", i, results[i].crcmatrix);
if (results[0].execs & ID_STATE)
for (i = 0; i < default_num_contexts; i++)
ee_printf("[%d]crcstate : 0x%04x\n", i, results[i].crcstate);
for (i = 0; i < default_num_contexts; i++)
ee_printf("[%d]crcfinal : 0x%04x\n", i, results[i].crc);
if (total_errors == 0)
{
ee_printf(
"Correct operation validated. See README.md for run and reporting "
"rules.\n");
#if HAS_FLOAT
if (known_id == 3)
{
ee_printf("CoreMark 1.0 : %f / %s %s",
default_num_contexts * results[0].iterations
/ time_in_secs(total_time),
COMPILER_VERSION,
COMPILER_FLAGS);
#if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC)
ee_printf(" / %s", MEM_LOCATION);
#else
ee_printf(" / %s", mem_name[MEM_METHOD]);
#endif
#if (MULTITHREAD > 1)
ee_printf(" / %d:%s", default_num_contexts, PARALLEL_METHOD);
#endif
ee_printf("\n");
}
#endif
}
if (total_errors > 0)
ee_printf("Errors detected\n");
if (total_errors < 0)
ee_printf(
"Cannot validate operation for these seed values, please compare "
"with results on a known platform.\n");
#if (MEM_METHOD == MEM_MALLOC)
for (i = 0; i < MULTITHREAD; i++)
portable_free(results[i].memblock[0]);
#endif
/* And last call any target specific code for finalizing */
portable_fini(&(results[0].port));
return MAIN_RETURN_VAL;
}

359
test/coremark/dist/core_matrix.c vendored Normal file
View File

@ -0,0 +1,359 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include "coremark.h"
/*
Topic: Description
Matrix manipulation benchmark
This very simple algorithm forms the basis of many more complex
algorithms.
The tight inner loop is the focus of many optimizations (compiler as
well as hardware based) and is thus relevant for embedded processing.
The total available data space will be divided to 3 parts:
NxN Matrix A - initialized with small values (upper 3/4 of the bits all
zero). NxN Matrix B - initialized with medium values (upper half of the bits all
zero). NxN Matrix C - used for the result.
The actual values for A and B must be derived based on input that is not
available at compile time.
*/
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val);
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval);
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val);
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val);
#define matrix_test_next(x) (x + 1)
#define matrix_clip(x, y) ((y) ? (x)&0x0ff : (x)&0x0ffff)
#define matrix_big(x) (0xf000 | (x))
#define bit_extract(x, from, to) (((x) >> (from)) & (~(0xffffffff << (to))))
#if CORE_DEBUG
void
printmat(MATDAT *A, ee_u32 N, char *name)
{
ee_u32 i, j;
ee_printf("Matrix %s [%dx%d]:\n", name, N, N);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
if (j != 0)
ee_printf(",");
ee_printf("%d", A[i * N + j]);
}
ee_printf("\n");
}
}
void
printmatC(MATRES *C, ee_u32 N, char *name)
{
ee_u32 i, j;
ee_printf("Matrix %s [%dx%d]:\n", name, N, N);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
if (j != 0)
ee_printf(",");
ee_printf("%d", C[i * N + j]);
}
ee_printf("\n");
}
}
#endif
/* Function: core_bench_matrix
Benchmark function
Iterate <matrix_test> N times,
changing the matrix values slightly by a constant amount each time.
*/
ee_u16
core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc)
{
ee_u32 N = p->N;
MATRES *C = p->C;
MATDAT *A = p->A;
MATDAT *B = p->B;
MATDAT val = (MATDAT)seed;
crc = crc16(matrix_test(N, C, A, B, val), crc);
return crc;
}
/* Function: matrix_test
Perform matrix manipulation.
Parameters:
N - Dimensions of the matrix.
C - memory for result matrix.
A - input matrix
B - operator matrix (not changed during operations)
Returns:
A CRC value that captures all results calculated in the function.
In particular, crc of the value calculated on the result matrix
after each step by <matrix_sum>.
Operation:
1 - Add a constant value to all elements of a matrix.
2 - Multiply a matrix by a constant.
3 - Multiply a matrix by a vector.
4 - Multiply a matrix by a matrix.
5 - Add a constant value to all elements of a matrix.
After the last step, matrix A is back to original contents.
*/
ee_s16
matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val)
{
ee_u16 crc = 0;
MATDAT clipval = matrix_big(val);
matrix_add_const(N, A, val); /* make sure data changes */
#if CORE_DEBUG
printmat(A, N, "matrix_add_const");
#endif
matrix_mul_const(N, C, A, val);
crc = crc16(matrix_sum(N, C, clipval), crc);
#if CORE_DEBUG
printmatC(C, N, "matrix_mul_const");
#endif
matrix_mul_vect(N, C, A, B);
crc = crc16(matrix_sum(N, C, clipval), crc);
#if CORE_DEBUG
printmatC(C, N, "matrix_mul_vect");
#endif
matrix_mul_matrix(N, C, A, B);
crc = crc16(matrix_sum(N, C, clipval), crc);
#if CORE_DEBUG
printmatC(C, N, "matrix_mul_matrix");
#endif
matrix_mul_matrix_bitextract(N, C, A, B);
crc = crc16(matrix_sum(N, C, clipval), crc);
#if CORE_DEBUG
printmatC(C, N, "matrix_mul_matrix_bitextract");
#endif
matrix_add_const(N, A, -val); /* return matrix to initial value */
return crc;
}
/* Function : matrix_init
Initialize the memory block for matrix benchmarking.
Parameters:
blksize - Size of memory to be initialized.
memblk - Pointer to memory block.
seed - Actual values chosen depend on the seed parameter.
p - pointers to <mat_params> containing initialized matrixes.
Returns:
Matrix dimensions.
Note:
The seed parameter MUST be supplied from a source that cannot be
determined at compile time
*/
ee_u32
core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p)
{
ee_u32 N = 0;
MATDAT *A;
MATDAT *B;
ee_s32 order = 1;
MATDAT val;
ee_u32 i = 0, j = 0;
if (seed == 0)
seed = 1;
while (j < blksize)
{
i++;
j = i * i * 2 * 4;
}
N = i - 1;
A = (MATDAT *)align_mem(memblk);
B = A + N * N;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
seed = ((order * seed) % 65536);
val = (seed + order);
val = matrix_clip(val, 0);
B[i * N + j] = val;
val = (val + order);
val = matrix_clip(val, 1);
A[i * N + j] = val;
order++;
}
}
p->A = A;
p->B = B;
p->C = (MATRES *)align_mem(B + N * N);
p->N = N;
#if CORE_DEBUG
printmat(A, N, "A");
printmat(B, N, "B");
#endif
return N;
}
/* Function: matrix_sum
Calculate a function that depends on the values of elements in the
matrix.
For each element, accumulate into a temporary variable.
As long as this value is under the parameter clipval,
add 1 to the result if the element is bigger then the previous.
Otherwise, reset the accumulator and add 10 to the result.
*/
ee_s16
matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval)
{
MATRES tmp = 0, prev = 0, cur = 0;
ee_s16 ret = 0;
ee_u32 i, j;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
cur = C[i * N + j];
tmp += cur;
if (tmp > clipval)
{
ret += 10;
tmp = 0;
}
else
{
ret += (cur > prev) ? 1 : 0;
}
prev = cur;
}
}
return ret;
}
/* Function: matrix_mul_const
Multiply a matrix by a constant.
This could be used as a scaler for instance.
*/
void
matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val)
{
ee_u32 i, j;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
C[i * N + j] = (MATRES)A[i * N + j] * (MATRES)val;
}
}
}
/* Function: matrix_add_const
Add a constant value to all elements of a matrix.
*/
void
matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val)
{
ee_u32 i, j;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
A[i * N + j] += val;
}
}
}
/* Function: matrix_mul_vect
Multiply a matrix by a vector.
This is common in many simple filters (e.g. fir where a vector of
coefficients is applied to the matrix.)
*/
void
matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
{
ee_u32 i, j;
for (i = 0; i < N; i++)
{
C[i] = 0;
for (j = 0; j < N; j++)
{
C[i] += (MATRES)A[i * N + j] * (MATRES)B[j];
}
}
}
/* Function: matrix_mul_matrix
Multiply a matrix by a matrix.
Basic code is used in many algorithms, mostly with minor changes such as
scaling.
*/
void
matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
{
ee_u32 i, j, k;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
C[i * N + j] = 0;
for (k = 0; k < N; k++)
{
C[i * N + j] += (MATRES)A[i * N + k] * (MATRES)B[k * N + j];
}
}
}
}
/* Function: matrix_mul_matrix_bitextract
Multiply a matrix by a matrix, and extract some bits from the result.
Basic code is used in many algorithms, mostly with minor changes such as
scaling.
*/
void
matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
{
ee_u32 i, j, k;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
C[i * N + j] = 0;
for (k = 0; k < N; k++)
{
MATRES tmp = (MATRES)A[i * N + k] * (MATRES)B[k * N + j];
C[i * N + j] += bit_extract(tmp, 2, 4) * bit_extract(tmp, 5, 7);
}
}
}
}

330
test/coremark/dist/core_state.c vendored Normal file
View File

@ -0,0 +1,330 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include "coremark.h"
/* local functions */
enum CORE_STATE core_state_transition(ee_u8 **instr, ee_u32 *transition_count);
/*
Topic: Description
Simple state machines like this one are used in many embedded products.
For more complex state machines, sometimes a state transition table
implementation is used instead, trading speed of direct coding for ease of
maintenance.
Since the main goal of using a state machine in CoreMark is to excercise
the switch/if behaviour, we are using a small moore machine.
In particular, this machine tests type of string input,
trying to determine whether the input is a number or something else.
(see core_state.png).
*/
/* Function: core_bench_state
Benchmark function
Go over the input twice, once direct, and once after introducing some
corruption.
*/
ee_u16
core_bench_state(ee_u32 blksize,
ee_u8 *memblock,
ee_s16 seed1,
ee_s16 seed2,
ee_s16 step,
ee_u16 crc)
{
ee_u32 final_counts[NUM_CORE_STATES];
ee_u32 track_counts[NUM_CORE_STATES];
ee_u8 *p = memblock;
ee_u32 i;
#if CORE_DEBUG
ee_printf("State Bench: %d,%d,%d,%04x\n", seed1, seed2, step, crc);
#endif
for (i = 0; i < NUM_CORE_STATES; i++)
{
final_counts[i] = track_counts[i] = 0;
}
/* run the state machine over the input */
while (*p != 0)
{
enum CORE_STATE fstate = core_state_transition(&p, track_counts);
final_counts[fstate]++;
#if CORE_DEBUG
ee_printf("%d,", fstate);
}
ee_printf("\n");
#else
}
#endif
p = memblock;
while (p < (memblock + blksize))
{ /* insert some corruption */
if (*p != ',')
*p ^= (ee_u8)seed1;
p += step;
}
p = memblock;
/* run the state machine over the input again */
while (*p != 0)
{
enum CORE_STATE fstate = core_state_transition(&p, track_counts);
final_counts[fstate]++;
#if CORE_DEBUG
ee_printf("%d,", fstate);
}
ee_printf("\n");
#else
}
#endif
p = memblock;
while (p < (memblock + blksize))
{ /* undo corruption is seed1 and seed2 are equal */
if (*p != ',')
*p ^= (ee_u8)seed2;
p += step;
}
/* end timing */
for (i = 0; i < NUM_CORE_STATES; i++)
{
crc = crcu32(final_counts[i], crc);
crc = crcu32(track_counts[i], crc);
}
return crc;
}
/* Default initialization patterns */
static ee_u8 *intpat[4]
= { (ee_u8 *)"5012", (ee_u8 *)"1234", (ee_u8 *)"-874", (ee_u8 *)"+122" };
static ee_u8 *floatpat[4] = { (ee_u8 *)"35.54400",
(ee_u8 *)".1234500",
(ee_u8 *)"-110.700",
(ee_u8 *)"+0.64400" };
static ee_u8 *scipat[4] = { (ee_u8 *)"5.500e+3",
(ee_u8 *)"-.123e-2",
(ee_u8 *)"-87e+832",
(ee_u8 *)"+0.6e-12" };
static ee_u8 *errpat[4] = { (ee_u8 *)"T0.3e-1F",
(ee_u8 *)"-T.T++Tq",
(ee_u8 *)"1T3.4e4z",
(ee_u8 *)"34.0e-T^" };
/* Function: core_init_state
Initialize the input data for the state machine.
Populate the input with several predetermined strings, interspersed.
Actual patterns chosen depend on the seed parameter.
Note:
The seed parameter MUST be supplied from a source that cannot be
determined at compile time
*/
void
core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p)
{
ee_u32 total = 0, next = 0, i;
ee_u8 *buf = 0;
#if CORE_DEBUG
ee_u8 *start = p;
ee_printf("State: %d,%d\n", size, seed);
#endif
size--;
next = 0;
while ((total + next + 1) < size)
{
if (next > 0)
{
for (i = 0; i < next; i++)
*(p + total + i) = buf[i];
*(p + total + i) = ',';
total += next + 1;
}
seed++;
switch (seed & 0x7)
{
case 0: /* int */
case 1: /* int */
case 2: /* int */
buf = intpat[(seed >> 3) & 0x3];
next = 4;
break;
case 3: /* float */
case 4: /* float */
buf = floatpat[(seed >> 3) & 0x3];
next = 8;
break;
case 5: /* scientific */
case 6: /* scientific */
buf = scipat[(seed >> 3) & 0x3];
next = 8;
break;
case 7: /* invalid */
buf = errpat[(seed >> 3) & 0x3];
next = 8;
break;
default: /* Never happen, just to make some compilers happy */
break;
}
}
size++;
while (total < size)
{ /* fill the rest with 0 */
*(p + total) = 0;
total++;
}
#if CORE_DEBUG
ee_printf("State Input: %s\n", start);
#endif
}
static ee_u8
ee_isdigit(ee_u8 c)
{
ee_u8 retval;
retval = ((c >= '0') & (c <= '9')) ? 1 : 0;
return retval;
}
/* Function: core_state_transition
Actual state machine.
The state machine will continue scanning until either:
1 - an invalid input is detcted.
2 - a valid number has been detected.
The input pointer is updated to point to the end of the token, and the
end state is returned (either specific format determined or invalid).
*/
enum CORE_STATE
core_state_transition(ee_u8 **instr, ee_u32 *transition_count)
{
ee_u8 * str = *instr;
ee_u8 NEXT_SYMBOL;
enum CORE_STATE state = CORE_START;
for (; *str && state != CORE_INVALID; str++)
{
NEXT_SYMBOL = *str;
if (NEXT_SYMBOL == ',') /* end of this input */
{
str++;
break;
}
switch (state)
{
case CORE_START:
if (ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INT;
}
else if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-')
{
state = CORE_S1;
}
else if (NEXT_SYMBOL == '.')
{
state = CORE_FLOAT;
}
else
{
state = CORE_INVALID;
transition_count[CORE_INVALID]++;
}
transition_count[CORE_START]++;
break;
case CORE_S1:
if (ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INT;
transition_count[CORE_S1]++;
}
else if (NEXT_SYMBOL == '.')
{
state = CORE_FLOAT;
transition_count[CORE_S1]++;
}
else
{
state = CORE_INVALID;
transition_count[CORE_S1]++;
}
break;
case CORE_INT:
if (NEXT_SYMBOL == '.')
{
state = CORE_FLOAT;
transition_count[CORE_INT]++;
}
else if (!ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INVALID;
transition_count[CORE_INT]++;
}
break;
case CORE_FLOAT:
if (NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e')
{
state = CORE_S2;
transition_count[CORE_FLOAT]++;
}
else if (!ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INVALID;
transition_count[CORE_FLOAT]++;
}
break;
case CORE_S2:
if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-')
{
state = CORE_EXPONENT;
transition_count[CORE_S2]++;
}
else
{
state = CORE_INVALID;
transition_count[CORE_S2]++;
}
break;
case CORE_EXPONENT:
if (ee_isdigit(NEXT_SYMBOL))
{
state = CORE_SCIENTIFIC;
transition_count[CORE_EXPONENT]++;
}
else
{
state = CORE_INVALID;
transition_count[CORE_EXPONENT]++;
}
break;
case CORE_SCIENTIFIC:
if (!ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INVALID;
transition_count[CORE_INVALID]++;
}
break;
default:
break;
}
}
*instr = str;
return state;
}

249
test/coremark/dist/core_util.c vendored Normal file
View File

@ -0,0 +1,249 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include "coremark.h"
/* Function: get_seed
Get a values that cannot be determined at compile time.
Since different embedded systems and compilers are used, 3 different
methods are provided: 1 - Using a volatile variable. This method is only
valid if the compiler is forced to generate code that reads the value of a
volatile variable from memory at run time. Please note, if using this method,
you would need to modify core_portme.c to generate training profile. 2 -
Command line arguments. This is the preferred method if command line
arguments are supported. 3 - System function. If none of the first 2 methods
is available on the platform, a system function which is not a stub can be
used.
e.g. read the value on GPIO pins connected to switches, or invoke
special simulator functions.
*/
#if (SEED_METHOD == SEED_VOLATILE)
extern volatile ee_s32 seed1_volatile;
extern volatile ee_s32 seed2_volatile;
extern volatile ee_s32 seed3_volatile;
extern volatile ee_s32 seed4_volatile;
extern volatile ee_s32 seed5_volatile;
ee_s32
get_seed_32(int i)
{
ee_s32 retval;
switch (i)
{
case 1:
retval = seed1_volatile;
break;
case 2:
retval = seed2_volatile;
break;
case 3:
retval = seed3_volatile;
break;
case 4:
retval = seed4_volatile;
break;
case 5:
retval = seed5_volatile;
break;
default:
retval = 0;
break;
}
return retval;
}
#elif (SEED_METHOD == SEED_ARG)
ee_s32
parseval(char *valstring)
{
ee_s32 retval = 0;
ee_s32 neg = 1;
int hexmode = 0;
if (*valstring == '-')
{
neg = -1;
valstring++;
}
if ((valstring[0] == '0') && (valstring[1] == 'x'))
{
hexmode = 1;
valstring += 2;
}
/* first look for digits */
if (hexmode)
{
while (((*valstring >= '0') && (*valstring <= '9'))
|| ((*valstring >= 'a') && (*valstring <= 'f')))
{
ee_s32 digit = *valstring - '0';
if (digit > 9)
digit = 10 + *valstring - 'a';
retval *= 16;
retval += digit;
valstring++;
}
}
else
{
while ((*valstring >= '0') && (*valstring <= '9'))
{
ee_s32 digit = *valstring - '0';
retval *= 10;
retval += digit;
valstring++;
}
}
/* now add qualifiers */
if (*valstring == 'K')
retval *= 1024;
if (*valstring == 'M')
retval *= 1024 * 1024;
retval *= neg;
return retval;
}
ee_s32
get_seed_args(int i, int argc, char *argv[])
{
if (argc > i)
return parseval(argv[i]);
return 0;
}
#elif (SEED_METHOD == SEED_FUNC)
/* If using OS based function, you must define and implement the functions below
* in core_portme.h and core_portme.c ! */
ee_s32
get_seed_32(int i)
{
ee_s32 retval;
switch (i)
{
case 1:
retval = portme_sys1();
break;
case 2:
retval = portme_sys2();
break;
case 3:
retval = portme_sys3();
break;
case 4:
retval = portme_sys4();
break;
case 5:
retval = portme_sys5();
break;
default:
retval = 0;
break;
}
return retval;
}
#endif
/* Function: crc*
Service functions to calculate 16b CRC code.
*/
ee_u16
crcu8(ee_u8 data, ee_u16 crc)
{
ee_u8 i = 0, x16 = 0, carry = 0;
for (i = 0; i < 8; i++)
{
x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1));
data >>= 1;
if (x16 == 1)
{
crc ^= 0x4002;
carry = 1;
}
else
carry = 0;
crc >>= 1;
if (carry)
crc |= 0x8000;
else
crc &= 0x7fff;
}
return crc;
}
ee_u16
crcu16(ee_u16 newval, ee_u16 crc)
{
crc = crcu8((ee_u8)(newval), crc);
crc = crcu8((ee_u8)((newval) >> 8), crc);
return crc;
}
ee_u16
crcu32(ee_u32 newval, ee_u16 crc)
{
crc = crc16((ee_s16)newval, crc);
crc = crc16((ee_s16)(newval >> 16), crc);
return crc;
}
ee_u16
crc16(ee_s16 newval, ee_u16 crc)
{
return crcu16((ee_u16)newval, crc);
}
ee_u8
check_data_types()
{
ee_u8 retval = 0;
if (sizeof(ee_u8) != 1)
{
ee_printf("ERROR: ee_u8 is not an 8b datatype!\n");
retval++;
}
if (sizeof(ee_u16) != 2)
{
ee_printf("ERROR: ee_u16 is not a 16b datatype!\n");
retval++;
}
if (sizeof(ee_s16) != 2)
{
ee_printf("ERROR: ee_s16 is not a 16b datatype!\n");
retval++;
}
if (sizeof(ee_s32) != 4)
{
ee_printf("ERROR: ee_s32 is not a 32b datatype!\n");
retval++;
}
if (sizeof(ee_u32) != 4)
{
ee_printf("ERROR: ee_u32 is not a 32b datatype!\n");
retval++;
}
if (sizeof(ee_ptr_int) != sizeof(int *))
{
ee_printf(
"ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n");
retval++;
}
if (retval > 0)
{
ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n");
}
return retval;
}

183
test/coremark/dist/coremark.h vendored Normal file
View File

@ -0,0 +1,183 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
/* Topic: Description
This file contains declarations of the various benchmark functions.
*/
/* Configuration: TOTAL_DATA_SIZE
Define total size for data algorithms will operate on
*/
#ifndef TOTAL_DATA_SIZE
#define TOTAL_DATA_SIZE 2 * 1000
#endif
#define SEED_ARG 0
#define SEED_FUNC 1
#define SEED_VOLATILE 2
#define MEM_STATIC 0
#define MEM_MALLOC 1
#define MEM_STACK 2
#include "core_portme.h"
#if HAS_STDIO
#include <stdio.h>
#endif
#if HAS_PRINTF
#define ee_printf printf
#endif
/* Actual benchmark execution in iterate */
void *iterate(void *pres);
/* Typedef: secs_ret
For machines that have floating point support, get number of seconds as
a double. Otherwise an unsigned int.
*/
#if HAS_FLOAT
typedef double secs_ret;
#else
typedef ee_u32 secs_ret;
#endif
#if MAIN_HAS_NORETURN
#define MAIN_RETURN_VAL
#define MAIN_RETURN_TYPE void
#else
#define MAIN_RETURN_VAL 0
#define MAIN_RETURN_TYPE int
#endif
void start_time(void);
void stop_time(void);
CORE_TICKS get_time(void);
secs_ret time_in_secs(CORE_TICKS ticks);
/* Misc useful functions */
ee_u16 crcu8(ee_u8 data, ee_u16 crc);
ee_u16 crc16(ee_s16 newval, ee_u16 crc);
ee_u16 crcu16(ee_u16 newval, ee_u16 crc);
ee_u16 crcu32(ee_u32 newval, ee_u16 crc);
ee_u8 check_data_types(void);
void * portable_malloc(ee_size_t size);
void portable_free(void *p);
ee_s32 parseval(char *valstring);
/* Algorithm IDS */
#define ID_LIST (1 << 0)
#define ID_MATRIX (1 << 1)
#define ID_STATE (1 << 2)
#define ALL_ALGORITHMS_MASK (ID_LIST | ID_MATRIX | ID_STATE)
#define NUM_ALGORITHMS 3
/* list data structures */
typedef struct list_data_s
{
ee_s16 data16;
ee_s16 idx;
} list_data;
typedef struct list_head_s
{
struct list_head_s *next;
struct list_data_s *info;
} list_head;
/*matrix benchmark related stuff */
#define MATDAT_INT 1
#if MATDAT_INT
typedef ee_s16 MATDAT;
typedef ee_s32 MATRES;
#else
typedef ee_f16 MATDAT;
typedef ee_f32 MATRES;
#endif
typedef struct MAT_PARAMS_S
{
int N;
MATDAT *A;
MATDAT *B;
MATRES *C;
} mat_params;
/* state machine related stuff */
/* List of all the possible states for the FSM */
typedef enum CORE_STATE
{
CORE_START = 0,
CORE_INVALID,
CORE_S1,
CORE_S2,
CORE_INT,
CORE_FLOAT,
CORE_EXPONENT,
CORE_SCIENTIFIC,
NUM_CORE_STATES
} core_state_e;
/* Helper structure to hold results */
typedef struct RESULTS_S
{
/* inputs */
ee_s16 seed1; /* Initializing seed */
ee_s16 seed2; /* Initializing seed */
ee_s16 seed3; /* Initializing seed */
void * memblock[4]; /* Pointer to safe memory location */
ee_u32 size; /* Size of the data */
ee_u32 iterations; /* Number of iterations to execute */
ee_u32 execs; /* Bitmask of operations to execute */
struct list_head_s *list;
mat_params mat;
/* outputs */
ee_u16 crc;
ee_u16 crclist;
ee_u16 crcmatrix;
ee_u16 crcstate;
ee_s16 err;
/* ultithread specific */
core_portable port;
} core_results;
/* Multicore execution handling */
#if (MULTITHREAD > 1)
ee_u8 core_start_parallel(core_results *res);
ee_u8 core_stop_parallel(core_results *res);
#endif
/* list benchmark functions */
list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed);
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx);
/* state benchmark functions */
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p);
ee_u16 core_bench_state(ee_u32 blksize,
ee_u8 *memblock,
ee_s16 seed1,
ee_s16 seed2,
ee_s16 step,
ee_u16 crc);
/* matrix benchmark functions */
ee_u32 core_init_matrix(ee_u32 blksize,
void * memblk,
ee_s32 seed,
mat_params *p);
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc);

6
test/dhrystone/Makefile Normal file
View File

@ -0,0 +1,6 @@
SRCS := ../common/init.S src/dhrystone_main.c src/dhrystone.c src/util.c
APP := dhrystone
CCFLAGS := -O3 -fno-inline
MAX_CYCLES := 1000000
include ../common/src_only_app.mk

View File

@ -0,0 +1,29 @@
Copyright (c) 2012-2015, The Regents of the University of California (Regents).
All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the Regents nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING
OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS
BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
dhrystone.c, dhrystone_main.c and dhrystone.h were found at
https://github.com/riscv/riscv-tests. Note: the util files here are not the
ones found there. They're stub files I wrote for running against the Hazard5
CXXRTL testbench.

View File

@ -0,0 +1,181 @@
// See LICENSE for license details.
#pragma GCC optimize ("no-inline")
#include "dhrystone.h"
#ifndef REG
#define REG
/* REG becomes defined as empty */
/* i.e. no register variables */
#else
#undef REG
#define REG register
#endif
extern int Int_Glob;
extern char Ch_1_Glob;
Proc_6 (Enum_Val_Par, Enum_Ref_Par)
/*********************************/
/* executed once */
/* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
Enumeration Enum_Val_Par;
Enumeration *Enum_Ref_Par;
{
*Enum_Ref_Par = Enum_Val_Par;
if (! Func_3 (Enum_Val_Par))
/* then, not executed */
*Enum_Ref_Par = Ident_4;
switch (Enum_Val_Par)
{
case Ident_1:
*Enum_Ref_Par = Ident_1;
break;
case Ident_2:
if (Int_Glob > 100)
/* then */
*Enum_Ref_Par = Ident_1;
else *Enum_Ref_Par = Ident_4;
break;
case Ident_3: /* executed */
*Enum_Ref_Par = Ident_2;
break;
case Ident_4: break;
case Ident_5:
*Enum_Ref_Par = Ident_3;
break;
} /* switch */
} /* Proc_6 */
Proc_7 (Int_1_Par_Val, Int_2_Par_Val, Int_Par_Ref)
/**********************************************/
/* executed three times */
/* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */
/* Int_Par_Ref becomes 7 */
/* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */
/* Int_Par_Ref becomes 17 */
/* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */
/* Int_Par_Ref becomes 18 */
One_Fifty Int_1_Par_Val;
One_Fifty Int_2_Par_Val;
One_Fifty *Int_Par_Ref;
{
One_Fifty Int_Loc;
Int_Loc = Int_1_Par_Val + 2;
*Int_Par_Ref = Int_2_Par_Val + Int_Loc;
} /* Proc_7 */
Proc_8 (Arr_1_Par_Ref, Arr_2_Par_Ref, Int_1_Par_Val, Int_2_Par_Val)
/*********************************************************************/
/* executed once */
/* Int_Par_Val_1 == 3 */
/* Int_Par_Val_2 == 7 */
Arr_1_Dim Arr_1_Par_Ref;
Arr_2_Dim Arr_2_Par_Ref;
int Int_1_Par_Val;
int Int_2_Par_Val;
{
REG One_Fifty Int_Index;
REG One_Fifty Int_Loc;
Int_Loc = Int_1_Par_Val + 5;
Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val;
Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc];
Arr_1_Par_Ref [Int_Loc+30] = Int_Loc;
for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index)
Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc;
Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1;
Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc];
Int_Glob = 5;
} /* Proc_8 */
Enumeration Func_1 (Ch_1_Par_Val, Ch_2_Par_Val)
/*************************************************/
/* executed three times */
/* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */
/* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */
/* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */
Capital_Letter Ch_1_Par_Val;
Capital_Letter Ch_2_Par_Val;
{
Capital_Letter Ch_1_Loc;
Capital_Letter Ch_2_Loc;
Ch_1_Loc = Ch_1_Par_Val;
Ch_2_Loc = Ch_1_Loc;
if (Ch_2_Loc != Ch_2_Par_Val)
/* then, executed */
return (Ident_1);
else /* not executed */
{
Ch_1_Glob = Ch_1_Loc;
return (Ident_2);
}
} /* Func_1 */
Boolean Func_2 (Str_1_Par_Ref, Str_2_Par_Ref)
/*************************************************/
/* executed once */
/* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
/* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
Str_30 Str_1_Par_Ref;
Str_30 Str_2_Par_Ref;
{
REG One_Thirty Int_Loc;
Capital_Letter Ch_Loc;
Int_Loc = 2;
while (Int_Loc <= 2) /* loop body executed once */
if (Func_1 (Str_1_Par_Ref[Int_Loc],
Str_2_Par_Ref[Int_Loc+1]) == Ident_1)
/* then, executed */
{
Ch_Loc = 'A';
Int_Loc += 1;
} /* if, while */
if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
/* then, not executed */
Int_Loc = 7;
if (Ch_Loc == 'R')
/* then, not executed */
return (true);
else /* executed */
{
if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0)
/* then, not executed */
{
Int_Loc += 7;
Int_Glob = Int_Loc;
return (true);
}
else /* executed */
return (false);
} /* if Ch_Loc */
} /* Func_2 */
Boolean Func_3 (Enum_Par_Val)
/***************************/
/* executed once */
/* Enum_Par_Val == Ident_3 */
Enumeration Enum_Par_Val;
{
Enumeration Enum_Loc;
Enum_Loc = Enum_Par_Val;
if (Enum_Loc == Ident_3)
/* then, executed */
return (true);
else /* not executed */
return (false);
} /* Func_3 */

View File

@ -0,0 +1,477 @@
// See LICENSE for license details.
#ifndef _DHRYSTONE_H
#define _DHRYSTONE_H
/****************** "DHRYSTONE" Benchmark Program ***************************/
#define Version "C, Version 2.2"
/* File: dhry_1.c (part 2 of 3)
* Author: Reinhold P. Weicker
* Siemens Nixdorf, Paderborn/Germany
* weicker@specbench.org
* Date: May 25, 1988
* Modified: Steven Pemberton, CWI, Amsterdam; Steven.Pemberton@cwi.nl
* Date: October, 1993; March 1995
* Included both files into one source, that gets compiled
* in two passes. Made program auto-compiling, and auto-running,
* and generally made it much easier to use.
*
* Original Version (in Ada) published in
* "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
* pp. 1013 - 1030, together with the statistics
* on which the distribution of statements etc. is based.
*
* In this C version, the following C library functions are used:
* - strcpy, strcmp (inside the measurement loop)
* - printf, scanf (outside the measurement loop)
* In addition, Berkeley UNIX system calls "times ()" or "time ()"
* are used for execution time measurement. For measurements
* on other systems, these calls have to be changed.
*
* Collection of Results:
* Reinhold Weicker (address see above) and
*
* Rick Richardson
* PC Research. Inc.
* 94 Apple Orchard Drive
* Tinton Falls, NJ 07724
* Phone: (201) 389-8963 (9-17 EST)
* Usenet: ...!uunet!pcrat!rick
*
* Please send results to Rick Richardson and/or Reinhold Weicker.
* Complete information should be given on hardware and software used.
* Hardware information includes: Machine type, CPU, type and size
* of caches; for microprocessors: clock frequency, memory speed
* (number of wait states).
* Software information includes: Compiler (and runtime library)
* manufacturer and version, compilation switches, OS version.
* The Operating System version may give an indication about the compiler;
* Dhrystone itself performs no OS calls in the measurement loop.
*
* The complete output generated by the program should be mailed
* such that at least some checks for correctness can be made.
*
***************************************************************************
*
* Defines: The following "Defines" are possible:
* -DREG (default: Not defined)
* As an approximation to what an average C programmer
* might do, causes the "register" storage class to be applied
* - for local variables, if they are used (dynamically)
* five or more times
* - for parameters if they are used (dynamically)
* six or more times
* Note that an optimal "register" strategy is
* compiler-dependent, and that "register" declarations
* do not necessarily lead to faster execution.
* -DNOSTRUCTASSIGN (default: Not defined)
* Define if the C compiler does not support
* assignment of structures.
* -DNOENUMS (default: Not defined)
* Define if the C compiler does not support
* enumeration types.
* -DTIMES (default)
* -DTIME
* The "times" function of UNIX (returning process times)
* or the "time" function (returning wallclock time)
* is used for measurement.
* For single user machines, "time ()" is adequate. For
* multi-user machines where you cannot get single-user
* access, use the "times ()" function. If you have
* neither, use a stopwatch in the dead of night.
* "printf"s are provided marking the points "Start Timer"
* and "Stop Timer". DO NOT use the UNIX "time(1)"
* command, as this will measure the total time to
* run this program, which will (erroneously) include
* the time to allocate storage (malloc) and to perform
* the initialization.
* -DHZ=nnn
* In Berkeley UNIX, the function "times" returns process
* time in 1/HZ seconds, with HZ = 60 for most systems.
* CHECK YOUR SYSTEM DESCRIPTION BEFORE YOU JUST APPLY
* A VALUE.
*
***************************************************************************
*
* History: Version C/2.1 was made for two reasons:
*
* 1) There was an obvious need for a common C version of
* Dhrystone, since C is at present the most popular system
* programming language for the class of processors
* (microcomputers, minicomputers) where Dhrystone is used most.
* There should be, as far as possible, only one C version of
* Dhrystone such that results can be compared without
* restrictions. In the past, the C versions distributed
* by Rick Richardson (Version 1.1) and by Reinhold Weicker
* had small (though not significant) differences.
*
* 2) As far as it is possible without changes to the Dhrystone
* statistics, optimizing compilers should be prevented from
* removing significant statements.
*
* This C version has been developed in cooperation with
* Rick Richardson (Tinton Falls, NJ), it incorporates many
* ideas from the "Version 1.1" distributed previously by
* him over the UNIX network Usenet.
* I also thank Chaim Benedelac (National Semiconductor),
* David Ditzel (SUN), Earl Killian and John Mashey (MIPS),
* Alan Smith and Rafael Saavedra-Barrera (UC at Berkeley)
* for their help with comments on earlier versions of the
* benchmark.
*
* Changes: In the initialization part, this version follows mostly
* Rick Richardson's version distributed via Usenet, not the
* version distributed earlier via floppy disk by Reinhold Weicker.
* As a concession to older compilers, names have been made
* unique within the first 8 characters.
* Inside the measurement loop, this version follows the
* version previously distributed by Reinhold Weicker.
*
* At several places in the benchmark, code has been added,
* but within the measurement loop only in branches that
* are not executed. The intention is that optimizing compilers
* should be prevented from moving code out of the measurement
* loop, or from removing code altogether. Since the statements
* that are executed within the measurement loop have NOT been
* changed, the numbers defining the "Dhrystone distribution"
* (distribution of statements, operand types and locality)
* still hold. Except for sophisticated optimizing compilers,
* execution times for this version should be the same as
* for previous versions.
*
* Since it has proven difficult to subtract the time for the
* measurement loop overhead in a correct way, the loop check
* has been made a part of the benchmark. This does have
* an impact - though a very minor one - on the distribution
* statistics which have been updated for this version.
*
* All changes within the measurement loop are described
* and discussed in the companion paper "Rationale for
* Dhrystone version 2".
*
* Because of the self-imposed limitation that the order and
* distribution of the executed statements should not be
* changed, there are still cases where optimizing compilers
* may not generate code for some statements. To a certain
* degree, this is unavoidable for small synthetic benchmarks.
* Users of the benchmark are advised to check code listings
* whether code is generated for all statements of Dhrystone.
*
* Version 2.1 is identical to version 2.0 distributed via
* the UNIX network Usenet in March 1988 except that it corrects
* some minor deficiencies that were found by users of version 2.0.
* The only change within the measurement loop is that a
* non-executed "else" part was added to the "if" statement in
* Func_3, and a non-executed "else" part removed from Proc_3.
*
* Version C/2.2, Steven Pemberton, October 1993
* Functionally, identical to version 2.2; the changes are in
* how you compile and use it:
* - Everything is in one file now, but compiled in 2 passes
* - Compile (and run) by running the file through the shell: 'sh dhry.c"
* - Uses the system definition of HZ if one can be found
* - HZ must be defined, otherwise it won't compile (no defaults here)
* - The (uninteresting) output is printed to stderr (dhry2 > /dev/null)
* - The number of loops is passed as a parameter, rather than read
* (dhry2 500000)
* - If the number of loops is insufficient to get a good result,
* it repeats it with loops*10 until it is enough (rather than just
* stopping)
* - Output says which sort of clock it is using, and the HZ value
* - You can use -DREG instead of the -DREG=register of previous versions
* - Some stylistic cleanups.
*
***************************************************************************
*
* Compilation model and measurement (IMPORTANT):
*
* The following "ground rules" apply for measurements:
* - Separate compilation
* - No procedure merging
* - Otherwise, compiler optimizations are allowed but should be indicated
* - Default results are those without register declarations
* See the companion paper "Rationale for Dhrystone Version 2" for a more
* detailed discussion of these ground rules.
*
* For 16-Bit processors (e.g. 80186, 80286), times for all compilation
* models ("small", "medium", "large" etc.) should be given if possible,
* together with a definition of these models for the compiler system used.
*
**************************************************************************
*
* Dhrystone (C version) statistics:
*
* [Comment from the first distribution, updated for version 2.
* Note that because of language differences, the numbers are slightly
* different from the Ada version.]
*
* The following program contains statements of a high level programming
* language (here: C) in a distribution considered representative:
*
* assignments 52 (51.0 %)
* control statements 33 (32.4 %)
* procedure, function calls 17 (16.7 %)
*
* 103 statements are dynamically executed. The program is balanced with
* respect to the three aspects:
*
* - statement type
* - operand type
* - operand locality
* operand global, local, parameter, or constant.
*
* The combination of these three aspects is balanced only approximately.
*
* 1. Statement Type:
* ----------------- number
*
* V1 = V2 9
* (incl. V1 = F(..)
* V = Constant 12
* Assignment, 7
* with array element
* Assignment, 6
* with record component
* --
* 34 34
*
* X = Y +|-|"&&"|"|" Z 5
* X = Y +|-|"==" Constant 6
* X = X +|- 1 3
* X = Y *|/ Z 2
* X = Expression, 1
* two operators
* X = Expression, 1
* three operators
* --
* 18 18
*
* if .... 14
* with "else" 7
* without "else" 7
* executed 3
* not executed 4
* for ... 7 | counted every time
* while ... 4 | the loop condition
* do ... while 1 | is evaluated
* switch ... 1
* break 1
* declaration with 1
* initialization
* --
* 34 34
*
* P (...) procedure call 11
* user procedure 10
* library procedure 1
* X = F (...)
* function call 6
* user function 5
* library function 1
* --
* 17 17
* ---
* 103
*
* The average number of parameters in procedure or function calls
* is 1.82 (not counting the function values aX *
*
* 2. Operators
* ------------
* number approximate
* percentage
*
* Arithmetic 32 50.8
*
* + 21 33.3
* - 7 11.1
* * 3 4.8
* / (int div) 1 1.6
*
* Comparison 27 42.8
*
* == 9 14.3
* /= 4 6.3
* > 1 1.6
* < 3 4.8
* >= 1 1.6
* <= 9 14.3
*
* Logic 4 6.3
*
* && (AND-THEN) 1 1.6
* | (OR) 1 1.6
* ! (NOT) 2 3.2
*
* -- -----
* 63 100.1
*
*
* 3. Operand Type (counted once per operand reference):
* ---------------
* number approximate
* percentage
*
* Integer 175 72.3 %
* Character 45 18.6 %
* Pointer 12 5.0 %
* String30 6 2.5 %
* Array 2 0.8 %
* Record 2 0.8 %
* --- -------
* 242 100.0 %
*
* When there is an access path leading to the final operand (e.g. a record
* component), only the final data type on the access path is counted.
*
*
* 4. Operand Locality:
* -------------------
* number approximate
* percentage
*
* local variable 114 47.1 %
* global variable 22 9.1 %
* parameter 45 18.6 %
* value 23 9.5 %
* reference 22 9.1 %
* function result 6 2.5 %
* constant 55 22.7 %
* --- -------
* 242 100.0 %
*
* The program does not compute anything meaningful, but it is syntactically
* and semantically correct. All variables have a value assigned to them
* before they are used as a source operand.
*
* There has been no explicit effort to account for the effects of a
* cache, or to balance the use of long or short displacements for code or
* data.
*
***************************************************************************
*/
/* Compiler and system dependent definitions: */
/* variables for time measurement: */
#ifdef TIME
#define CLOCK_TYPE "time()"
#undef HZ
#define HZ (1) /* time() returns time in seconds */
extern long time(); /* see library function "time" */
#define Too_Small_Time 2 /* Measurements should last at least 2 seconds */
#define Start_Timer() Begin_Time = time ( (long *) 0)
#define Stop_Timer() End_Time = time ( (long *) 0)
#else
#ifdef MSC_CLOCK /* Use Microsoft C hi-res clock */
#undef HZ
#undef TIMES
#include <time.h>
#define HZ CLK_TCK
#define CLOCK_TYPE "MSC clock()"
extern clock_t clock();
#define Too_Small_Time (2*HZ)
#define Start_Timer() Begin_Time = clock()
#define Stop_Timer() End_Time = clock()
#elif defined(__riscv)
#define HZ 1000000
#define Too_Small_Time 1
#define CLOCK_TYPE "rdcycle()"
#define Start_Timer() Begin_Time = read_csr(mcycle)
#define Stop_Timer() End_Time = read_csr(mcycle)
#else
/* Use times(2) time function unless */
/* explicitly defined otherwise */
#define CLOCK_TYPE "times()"
#include <sys/types.h>
#include <sys/times.h>
#ifndef HZ /* Added by SP 900619 */
#include <sys/param.h> /* If your system doesn't have this, use -DHZ=xxx */
#else
*** You must define HZ!!! ***
#endif /* HZ */
#ifndef PASS2
struct tms time_info;
#endif
/*extern int times ();*/
/* see library function "times" */
#define Too_Small_Time (2*HZ)
/* Measurements should last at least about 2 seconds */
#define Start_Timer() times(&time_info); Begin_Time=(long)time_info.tms_utime
#define Stop_Timer() times(&time_info); End_Time = (long)time_info.tms_utime
#endif /* MSC_CLOCK */
#endif /* TIME */
#define Mic_secs_Per_Second 1000000
#define NUMBER_OF_RUNS 500 /* Default number of runs */
#ifdef NOSTRUCTASSIGN
#define structassign(d, s) memcpy(&(d), &(s), sizeof(d))
#else
#define structassign(d, s) d = s
#endif
#ifdef NOENUM
#define Ident_1 0
#define Ident_2 1
#define Ident_3 2
#define Ident_4 3
#define Ident_5 4
typedef int Enumeration;
#else
typedef enum {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5}
Enumeration;
#endif
/* for boolean and enumeration types in Ada, Pascal */
/* General definitions: */
#include <stdio.h>
#include <string.h>
/* for strcpy, strcmp */
#define Null 0
/* Value of a Null pointer */
#define true 1
#define false 0
typedef int One_Thirty;
typedef int One_Fifty;
typedef char Capital_Letter;
typedef int Boolean;
typedef char Str_30 [31];
typedef int Arr_1_Dim [50];
typedef int Arr_2_Dim [50] [50];
typedef struct record
{
struct record *Ptr_Comp;
Enumeration Discr;
union {
struct {
Enumeration Enum_Comp;
int Int_Comp;
char Str_Comp [31];
} var_1;
struct {
Enumeration E_Comp_2;
char Str_2_Comp [31];
} var_2;
struct {
char Ch_1_Comp;
char Ch_2_Comp;
} var_3;
} variant;
} Rec_Type, *Rec_Pointer;
#endif

View File

@ -0,0 +1,332 @@
// See LICENSE for license details.
//**************************************************************************
// Dhrystone bencmark
//--------------------------------------------------------------------------
//
// This is the classic Dhrystone synthetic integer benchmark.
//
#pragma GCC optimize ("no-inline")
#include "dhrystone.h"
void debug_printf(const char* str, ...);
#include "util.h"
#include <alloca.h>
/* Global Variables: */
Rec_Pointer Ptr_Glob,
Next_Ptr_Glob;
int Int_Glob;
Boolean Bool_Glob;
char Ch_1_Glob,
Ch_2_Glob;
int Arr_1_Glob [50];
int Arr_2_Glob [50] [50];
Enumeration Func_1 ();
/* forward declaration necessary since Enumeration may not simply be int */
#ifndef REG
Boolean Reg = false;
#define REG
/* REG becomes defined as empty */
/* i.e. no register variables */
#else
Boolean Reg = true;
#undef REG
#define REG register
#endif
Boolean Done;
long Begin_Time,
End_Time,
User_Time;
long Microseconds,
Dhrystones_Per_Second;
/* end of variables for time measurement */
int main (int argc, char** argv)
/*****/
/* main program, corresponds to procedures */
/* Main and Proc_0 in the Ada version */
{
One_Fifty Int_1_Loc;
REG One_Fifty Int_2_Loc;
One_Fifty Int_3_Loc;
REG char Ch_Index;
Enumeration Enum_Loc;
Str_30 Str_1_Loc;
Str_30 Str_2_Loc;
REG int Run_Index;
REG int Number_Of_Runs;
/* Arguments */
Number_Of_Runs = NUMBER_OF_RUNS;
/* Initializations */
Next_Ptr_Glob = (Rec_Pointer) alloca (sizeof (Rec_Type));
Ptr_Glob = (Rec_Pointer) alloca (sizeof (Rec_Type));
Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
Ptr_Glob->Discr = Ident_1;
Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
Ptr_Glob->variant.var_1.Int_Comp = 40;
strcpy (Ptr_Glob->variant.var_1.Str_Comp,
"DHRYSTONE PROGRAM, SOME STRING");
strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
Arr_2_Glob [8][7] = 10;
/* Was missing in published program. Without this statement, */
/* Arr_2_Glob [8][7] would have an undefined value. */
/* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
/* overflow may occur for this array element. */
debug_printf("\n");
debug_printf("Dhrystone Benchmark, Version %s\n", Version);
if (Reg)
{
debug_printf("Program compiled with 'register' attribute\n");
}
else
{
debug_printf("Program compiled without 'register' attribute\n");
}
debug_printf("Using %s, HZ=%d\n", CLOCK_TYPE, HZ);
debug_printf("\n");
Done = false;
while (!Done) {
debug_printf("Trying %d runs through Dhrystone:\n", Number_Of_Runs);
/***************/
/* Start timer */
/***************/
setStats(1);
Start_Timer();
for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
{
Proc_5();
Proc_4();
/* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
Int_1_Loc = 2;
Int_2_Loc = 3;
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
Enum_Loc = Ident_2;
Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc);
/* Bool_Glob == 1 */
while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
{
Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
/* Int_3_Loc == 7 */
Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc);
/* Int_3_Loc == 7 */
Int_1_Loc += 1;
} /* while */
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
/* Int_Glob == 5 */
Proc_1 (Ptr_Glob);
for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
/* loop body executed twice */
{
if (Enum_Loc == Func_1 (Ch_Index, 'C'))
/* then, not executed */
{
Proc_6 (Ident_1, &Enum_Loc);
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
Int_2_Loc = Run_Index;
Int_Glob = Run_Index;
}
}
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
Int_2_Loc = Int_2_Loc * Int_1_Loc;
Int_1_Loc = Int_2_Loc / Int_3_Loc;
Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
/* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
Proc_2 (&Int_1_Loc);
/* Int_1_Loc == 5 */
} /* loop "for Run_Index" */
/**************/
/* Stop timer */
/**************/
Stop_Timer();
setStats(0);
User_Time = End_Time - Begin_Time;
if (User_Time < Too_Small_Time)
{
debug_printf("Measured time too small to obtain meaningful results\n");
Number_Of_Runs = Number_Of_Runs * 10;
debug_printf("\n");
} else Done = true;
}
debug_printf("Final values of the variables used in the benchmark:\n");
debug_printf("\n");
debug_printf("Int_Glob: %d\n", Int_Glob);
debug_printf(" should be: %d\n", 5);
debug_printf("Bool_Glob: %d\n", Bool_Glob);
debug_printf(" should be: %d\n", 1);
debug_printf("Ch_1_Glob: %c\n", Ch_1_Glob);
debug_printf(" should be: %c\n", 'A');
debug_printf("Ch_2_Glob: %c\n", Ch_2_Glob);
debug_printf(" should be: %c\n", 'B');
debug_printf("Arr_1_Glob[8]: %d\n", Arr_1_Glob[8]);
debug_printf(" should be: %d\n", 7);
debug_printf("Arr_2_Glob[8][7]: %d\n", Arr_2_Glob[8][7]);
debug_printf(" should be: Number_Of_Runs + 10\n");
debug_printf("Ptr_Glob->\n");
debug_printf(" Ptr_Comp: %d\n", (long) Ptr_Glob->Ptr_Comp);
debug_printf(" should be: (implementation-dependent)\n");
debug_printf(" Discr: %d\n", Ptr_Glob->Discr);
debug_printf(" should be: %d\n", 0);
debug_printf(" Enum_Comp: %d\n", Ptr_Glob->variant.var_1.Enum_Comp);
debug_printf(" should be: %d\n", 2);
debug_printf(" Int_Comp: %d\n", Ptr_Glob->variant.var_1.Int_Comp);
debug_printf(" should be: %d\n", 17);
debug_printf(" Str_Comp: %s\n", Ptr_Glob->variant.var_1.Str_Comp);
debug_printf(" should be: DHRYSTONE PROGRAM, SOME STRING\n");
debug_printf("Next_Ptr_Glob->\n");
debug_printf(" Ptr_Comp: %d\n", (long) Next_Ptr_Glob->Ptr_Comp);
debug_printf(" should be: (implementation-dependent), same as above\n");
debug_printf(" Discr: %d\n", Next_Ptr_Glob->Discr);
debug_printf(" should be: %d\n", 0);
debug_printf(" Enum_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
debug_printf(" should be: %d\n", 1);
debug_printf(" Int_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Int_Comp);
debug_printf(" should be: %d\n", 18);
debug_printf(" Str_Comp: %s\n",
Next_Ptr_Glob->variant.var_1.Str_Comp);
debug_printf(" should be: DHRYSTONE PROGRAM, SOME STRING\n");
debug_printf("Int_1_Loc: %d\n", Int_1_Loc);
debug_printf(" should be: %d\n", 5);
debug_printf("Int_2_Loc: %d\n", Int_2_Loc);
debug_printf(" should be: %d\n", 13);
debug_printf("Int_3_Loc: %d\n", Int_3_Loc);
debug_printf(" should be: %d\n", 7);
debug_printf("Enum_Loc: %d\n", Enum_Loc);
debug_printf(" should be: %d\n", 1);
debug_printf("Str_1_Loc: %s\n", Str_1_Loc);
debug_printf(" should be: DHRYSTONE PROGRAM, 1'ST STRING\n");
debug_printf("Str_2_Loc: %s\n", Str_2_Loc);
debug_printf(" should be: DHRYSTONE PROGRAM, 2'ND STRING\n");
debug_printf("\n");
Microseconds = ((User_Time / Number_Of_Runs) * Mic_secs_Per_Second) / HZ;
Dhrystones_Per_Second = (HZ * Number_Of_Runs) / User_Time;
debug_printf("Microseconds for one run through Dhrystone: %ld\n", Microseconds);
debug_printf("Dhrystones per Second: %ld\n", Dhrystones_Per_Second);
return 0;
}
Proc_1 (Ptr_Val_Par)
/******************/
REG Rec_Pointer Ptr_Val_Par;
/* executed once */
{
REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;
/* == Ptr_Glob_Next */
/* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */
/* corresponds to "rename" in Ada, "with" in Pascal */
structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob);
Ptr_Val_Par->variant.var_1.Int_Comp = 5;
Next_Record->variant.var_1.Int_Comp
= Ptr_Val_Par->variant.var_1.Int_Comp;
Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
Proc_3 (&Next_Record->Ptr_Comp);
/* Ptr_Val_Par->Ptr_Comp->Ptr_Comp
== Ptr_Glob->Ptr_Comp */
if (Next_Record->Discr == Ident_1)
/* then, executed */
{
Next_Record->variant.var_1.Int_Comp = 6;
Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp,
&Next_Record->variant.var_1.Enum_Comp);
Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
Proc_7 (Next_Record->variant.var_1.Int_Comp, 10,
&Next_Record->variant.var_1.Int_Comp);
}
else /* not executed */
structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp);
} /* Proc_1 */
Proc_2 (Int_Par_Ref)
/******************/
/* executed once */
/* *Int_Par_Ref == 1, becomes 4 */
One_Fifty *Int_Par_Ref;
{
One_Fifty Int_Loc;
Enumeration Enum_Loc;
Int_Loc = *Int_Par_Ref + 10;
do /* executed once */
if (Ch_1_Glob == 'A')
/* then, executed */
{
Int_Loc -= 1;
*Int_Par_Ref = Int_Loc - Int_Glob;
Enum_Loc = Ident_1;
} /* if */
while (Enum_Loc != Ident_1); /* true */
} /* Proc_2 */
Proc_3 (Ptr_Ref_Par)
/******************/
/* executed once */
/* Ptr_Ref_Par becomes Ptr_Glob */
Rec_Pointer *Ptr_Ref_Par;
{
if (Ptr_Glob != Null)
/* then, executed */
*Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
} /* Proc_3 */
Proc_4 () /* without parameters */
/*******/
/* executed once */
{
Boolean Bool_Loc;
Bool_Loc = Ch_1_Glob == 'A';
Bool_Glob = Bool_Loc | Bool_Glob;
Ch_2_Glob = 'B';
} /* Proc_4 */
Proc_5 () /* without parameters */
/*******/
/* executed once */
{
Ch_1_Glob = 'A';
Bool_Glob = false;
} /* Proc_5 */

15
test/dhrystone/src/util.c Normal file
View File

@ -0,0 +1,15 @@
#include "util.h"
#include <stdarg.h>
#include <stdio.h>
#include "tb_cxxrtl_io.h"
#define PRINTF_BUF_SIZE 256
void debug_printf(const char* fmt, ...) {
char buf[PRINTF_BUF_SIZE];
va_list args;
va_start(args, fmt);
vsnprintf(buf, PRINTF_BUF_SIZE, fmt, args);
tb_puts(buf);
va_end(args);
}

14
test/dhrystone/src/util.h Normal file
View File

@ -0,0 +1,14 @@
#ifndef _UTIL_H
#define _UTIL_H
#include <stdint.h>
#define setStats(x)
#define read_csr(csrname) ({ \
uint32_t __csr_tmp_u32; \
__asm__ volatile ("csrr %0, " #csrname : "=r" (__csr_tmp_u32)); \
__csr_tmp_u32; \
})
#endif

4
test/hellow/Makefile Normal file
View File

@ -0,0 +1,4 @@
SRCS := ../common/init.S main.c
APP := hellow
include ../common/src_only_app.mk

6
test/hellow/main.c Normal file
View File

@ -0,0 +1,6 @@
#include "tb_cxxrtl_io.h"
void main() {
tb_puts("Hello world from Hazard5 + CXXRTL!\n");
tb_exit(123);
}

1
test/riscv-compliance/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
tmp

View File

@ -0,0 +1,19 @@
TEST = I-ADD-01
TEST_ARCH = rv32i
BIN_ARCH = rv32i
SIM_EXEC = ../tb_cxxrtl/tb
CROSS_PREFIX = /opt/riscv/bin/riscv32-unknown-elf-
TEST_BIN_NAME := $(TEST_ARCH)-$(TEST)-on-$(BIN_ARCH)
TEST_SRC := riscv-compliance/riscv-test-suite/$(TEST_ARCH)/src/$(TEST).S
TEST_VEC := riscv-compliance/riscv-test-suite/$(TEST_ARCH)/references/$(TEST).reference_output
.PHONY: all
all:
mkdir -p tmp
$(CROSS_PREFIX)gcc -I include -T memmap.ld -nostartfiles -march=$(BIN_ARCH) $(TEST_SRC) -o tmp/$(TEST_BIN_NAME).elf
$(CROSS_PREFIX)objdump -d tmp/$(TEST_BIN_NAME).elf > tmp/$(TEST_BIN_NAME).dis
$(CROSS_PREFIX)objcopy -O binary tmp/$(TEST_BIN_NAME).elf tmp/$(TEST_BIN_NAME).bin
$(SIM_EXEC) tmp/$(TEST_BIN_NAME).bin --dump 0x10000 0x10100 | tee tmp/$(TEST_BIN_NAME).log
./compare_testvec tmp/$(TEST_BIN_NAME).log $(TEST_VEC)

View File

@ -0,0 +1,40 @@
#!/usr/bin/env python3
import sys
gold = []
model = []
model_bytes = []
in_testdata = False
for l in open(sys.argv[1]):
if l.startswith("Dumping memory"):
in_testdata = True
continue
if in_testdata:
try:
model_bytes.extend(int(x, 16) for x in l.split(" "))
except ValueError:
break
for i in range(len(model_bytes) // 4):
model.append(model_bytes[i * 4] | model_bytes[i * 4 + 1] << 8 | model_bytes[i * 4 + 2] << 16 | model_bytes[i * 4 + 3] << 24)
for l in open(sys.argv[2]):
line_contents = []
for index in range(0, len(l.strip()), 8):
line_contents.append(int(l[index:index + 8], 16))
gold.extend(reversed(line_contents))
all_match = True
for i, g in enumerate(gold):
if g == model[i]:
eq_str = "\033[1;32m==\033[0;0m"
else:
eq_str = "\033[1;31m!=\033[0;0m"
all_match = False
print("{:03x}: {:08x} (gate) {} {:08x} (gold)".format(i * 4, model[i], eq_str, g))
if all_match:
print("Test PASSED.")
else:
print("Test FAILED.")

View File

@ -0,0 +1,17 @@
#ifndef _COMPLIANCE_IO_H_
#define _COMPLIANCE_IO_H_
#define RVTEST_IO_INIT
#define RVTEST_IO_WRITE_STR(_SP, _STR)
#define RVTEST_IO_CHECK()
// Put this info into a label name so that it can be seen in the disassembly (holy hack batman)
#define LABEL_ASSERT_(reg, val, line) assert_ ## reg ## _ ## val ## _l ## line:
#define LABEL_ASSERT(reg, val, line) LABEL_ASSERT_(reg, val, line)
#define RVTEST_IO_ASSERT_GPR_EQ(_SP, _R, _I) LABEL_ASSERT(_R, xxx, __LINE__) nop
#define RVTEST_IO_ASSERT_SFPR_EQ(_F, _R, _I)
#define RVTEST_IO_ASSERT_DFPR_EQ(_D, _R, _I)
#endif // _COMPLIANCE_IO_H_

View File

@ -0,0 +1,30 @@
#ifndef _COMPLIANCE_TEST_H_
#define _COMPLIANCE_TEST_H_
#define RV_COMPLIANCE_RV32M
#define RV_COMPLIANCE_CODE_BEGIN
#define RV_COMPLIANCE_CODE_END
#define MM_IO_EXIT 0x80000008
.macro RV_COMPLIANCE_HALT
.option push
.option norelax
_write_io_exit:
li a0, MM_IO_EXIT
sw zero, 0(a0)
// Note we should never reach this next instruction (assuming the
// processor is working correctly!)
_end_of_test:
j _end_of_test
.option pop
.endm
#define RV_COMPLIANCE_DATA_BEGIN .section .testdata, "a"
#define RV_COMPLIANCE_DATA_END
#endif // _COMPLIANCE_TEST_H_

View File

@ -0,0 +1 @@
../riscv-compliance/riscv-test-env/riscv_test_macros.h

View File

@ -0,0 +1,43 @@
MEMORY
{
RAM (wx) : ORIGIN = 0x0, LENGTH = 64k
RESULT (w) : ORIGIN = ORIGIN(RAM) + LENGTH(RAM), LENGTH = 64k
}
OUTPUT_FORMAT("elf32-littleriscv", "elf32-littleriscv", "elf32-littleriscv")
OUTPUT_ARCH(riscv)
ENTRY(_start)
SECTIONS
{
.text : {
/* Padding in place of vector table (by default CPU reset vector points to
immediately after vector table */
. = ORIGIN(RAM) + 0xc0;
PROVIDE (_start = .);
*(.text*)
. = ALIGN(4);
} > RAM
.rodata : {
*(.rodata*)
. = ALIGN(4);
} > RAM
.data : {
*(.data*)
. = ALIGN(4);
} > RAM
.bss : {
*(.bss .bss.*)
. = ALIGN(4);
} > RAM
/* Link testout section to upper memory region */
.testdata :
{
PROVIDE(__testdata_start = .);
*(.testdata)
} > RESULT
}

1
test/rvcpp/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
rvcpp

11
test/rvcpp/Makefile Normal file
View File

@ -0,0 +1,11 @@
SRCS=$(wildcard *.cpp)
EXECUTABLE:=rvcpp
.SUFFIXES:
.PHONY: all clean
all:
g++ -std=c++17 -O3 -Wall -Wno-parentheses $(SRCS) -o $(EXECUTABLE)
clean:
rm -f (EXECUTABLE)

136
test/rvcpp/mem.h Normal file
View File

@ -0,0 +1,136 @@
#ifndef _MEM_H
#define _MEM_H
#include "rv_types.h"
struct MemBase32 {
virtual uint8_t r8(ux_t addr) {return 0;}
virtual void w8(ux_t addr, uint8_t data) {}
virtual uint16_t r16(ux_t addr) {return 0;}
virtual void w16(ux_t addr, uint16_t data) {}
virtual uint32_t r32(ux_t addr) {return 0;}
virtual void w32(ux_t addr, uint32_t data) {}
};
struct FlatMem32: MemBase32 {
uint32_t size;
uint32_t *mem;
FlatMem32(uint32_t size_) {
assert(size_ % sizeof(uint32_t) == 0);
size = size_;
mem = new uint32_t[size >> 2];
for (uint64_t i = 0; i < size >> 2; ++i)
mem[i] = 0;
}
~FlatMem32() {
delete mem;
}
virtual uint8_t r8(ux_t addr) {
assert(addr < size);
return mem[addr >> 2] >> 8 * (addr & 0x3) & 0xffu;
}
virtual void w8(ux_t addr, uint8_t data) {
assert(addr < size);
mem[addr >> 2] &= ~(0xffu << 8 * (addr & 0x3));
mem[addr >> 2] |= (uint32_t)data << 8 * (addr & 0x3);
}
virtual uint16_t r16(ux_t addr) {
assert(addr < size && addr + 1 < size); // careful of ~0u
assert(addr % 2 == 0);
return mem[addr >> 2] >> 8 * (addr & 0x2) & 0xffffu;
}
virtual void w16(ux_t addr, uint16_t data) {
assert(addr < size && addr + 1 < size);
assert(addr % 2 == 0);
mem[addr >> 2] &= ~(0xffffu << 8 * (addr & 0x2));
mem[addr >> 2] |= (uint32_t)data << 8 * (addr & 0x2);
}
virtual uint32_t r32(ux_t addr) {
assert(addr < size && addr + 3 < size);
assert(addr % 4 == 0);
return mem[addr >> 2];
}
virtual void w32(ux_t addr, uint32_t data) {
assert(addr < size && addr + 3 < size);
assert(addr % 4 == 0);
mem[addr >> 2] = data;
}
};
struct TBExitException {
ux_t exitcode;
TBExitException(ux_t code): exitcode(code) {}
};
struct TBMemIO: MemBase32 {
virtual void w32(ux_t addr, uint32_t data) {
switch (addr) {
case 0x0:
printf("%c", (char)data);
break;
case 0x4:
printf("%08x\n", data);
break;
case 0x8:
throw TBExitException(data);
break;
}
}
};
struct MemMap32: MemBase32 {
std::vector<std::tuple<uint32_t, uint32_t, MemBase32*> > memmap;
void add(uint32_t base, uint32_t size, MemBase32 *mem) {
memmap.push_back(std::make_tuple(base, size, mem));
}
std::tuple <uint32_t, MemBase32*> map_addr(uint32_t addr) {
for (auto&& [base, size, mem] : memmap) {
if (addr >= base && addr < base + size)
return std::make_tuple(addr - base, mem);
}
throw;
}
// perhaps some templatey-ness required
virtual uint8_t r8(ux_t addr) {
auto [offset, mem] = map_addr(addr);
return mem->r8(offset);
}
virtual void w8(ux_t addr, uint8_t data) {
auto [offset, mem] = map_addr(addr);
mem->w8(offset, data);
}
virtual uint16_t r16(ux_t addr) {
auto [offset, mem] = map_addr(addr);
return mem->r16(offset);
}
virtual void w16(ux_t addr, uint16_t data) {
auto [offset, mem] = map_addr(addr);
mem->w16(offset, data);
}
virtual uint32_t r32(ux_t addr) {
auto [offset, mem] = map_addr(addr);
return mem->r32(offset);
}
virtual void w32(ux_t addr, uint32_t data) {
auto [offset, mem] = map_addr(addr);
mem->w32(offset, data);
}
};
#endif

440
test/rvcpp/rv.cpp Normal file
View File

@ -0,0 +1,440 @@
#include <cstdint>
#include <cassert>
#include <cstdio>
#include <iostream>
#include <fstream>
#include <optional>
#include <tuple>
#include <vector>
#include "rv_types.h"
#include "mem.h"
// Minimal RISC-V interpreter, supporting RV32IM only
// Use unsigned arithmetic everywhere, with explicit sign extension as required.
static inline ux_t sext(ux_t bits, int sign_bit) {
if (sign_bit >= XLEN - 1)
return bits;
else
return (bits & (1u << sign_bit + 1) - 1) - ((bits & 1u << sign_bit) << 1);
}
static inline ux_t imm_i(uint32_t instr) {
return (instr >> 20) - (instr >> 19 & 0x1000);
}
static inline ux_t imm_s(uint32_t instr) {
return (instr >> 20 & 0xfe0u)
+ (instr >> 7 & 0x1fu)
- (instr >> 19 & 0x1000u);
}
static inline ux_t imm_u(uint32_t instr) {
return instr & 0xfffff000u;
}
static inline ux_t imm_b(uint32_t instr) {
return (instr >> 7 & 0x1e)
+ (instr >> 20 & 0x7e0)
+ (instr << 4 & 0x800)
- (instr >> 19 & 0x1000);
}
static inline ux_t imm_j(uint32_t instr) {
return (instr >> 20 & 0x7fe)
+ (instr >> 9 & 0x800)
+ (instr & 0xff000)
- (instr >> 11 & 0x100000);
}
struct RVCSR {
enum {
WRITE = 0,
WRITE_SET = 1,
WRITE_CLEAR = 2
};
enum {
MSCRATCH = 0x340,
MCYCLE = 0xb00,
MTIME = 0xb01,
MINSTRET = 0xb02
};
ux_t mcycle;
ux_t mscratch;
RVCSR(): mcycle(0), mscratch(0) {}
void step() {++mcycle;}
ux_t read(uint16_t addr, bool side_effect=true) {
if (addr == MCYCLE || addr == MTIME || addr == MINSTRET)
return mcycle;
else if (addr == MSCRATCH)
return mscratch;
else
return 0;
}
void write(uint16_t addr, ux_t data, uint op=WRITE) {
if (op == WRITE_CLEAR)
data = read(addr, false) & ~data;
else if (op == WRITE_SET)
data = read(addr, false) | data;
if (addr == MCYCLE)
mcycle = data;
else if (addr == MSCRATCH)
mscratch = data;
}
};
struct RVCore {
std::array<ux_t, 32> regs;
ux_t pc;
RVCSR csr;
RVCore(ux_t reset_vector=0xc0) {
std::fill(std::begin(regs), std::end(regs), 0);
pc = reset_vector;
}
enum {
OPC_LOAD = 0b00'000,
OPC_MISC_MEM = 0b00'011,
OPC_OP_IMM = 0b00'100,
OPC_AUIPC = 0b00'101,
OPC_STORE = 0b01'000,
OPC_OP = 0b01'100,
OPC_LUI = 0b01'101,
OPC_BRANCH = 0b11'000,
OPC_JALR = 0b11'001,
OPC_JAL = 0b11'011,
OPC_SYSTEM = 0b11'100
};
void step(MemBase32 &mem) {
uint32_t instr = mem.r32(pc);
std::optional<ux_t> rd_wdata;
std::optional<ux_t> pc_wdata;
uint regnum_rs1 = instr >> 15 & 0x1f;
uint regnum_rs2 = instr >> 20 & 0x1f;
uint regnum_rd = instr >> 7 & 0x1f;
ux_t rs1 = regs[regnum_rs1];
ux_t rs2 = regs[regnum_rs2];
bool instr_invalid = false;
uint opc = instr >> 2 & 0x1f;
uint funct3 = instr >> 12 & 0x7;
uint funct7 = instr >> 25 & 0x7f;
switch (opc) {
case OPC_OP: {
if (funct7 == 0b00'00000) {
if (funct3 == 0b000)
rd_wdata = rs1 + rs2;
else if (funct3 == 0b001)
rd_wdata = rs1 << (rs2 & 0x1f);
else if (funct3 == 0b010)
rd_wdata = (sx_t)rs1 < (sx_t)rs2;
else if (funct3 == 0b011)
rd_wdata = rs1 < rs2;
else if (funct3 == 0b100)
rd_wdata = rs1 ^ rs2;
else if (funct3 == 0b101)
rd_wdata = rs1 >> (rs2 & 0x1f);
else if (funct3 == 0b110)
rd_wdata = rs1 | rs2;
else if (funct3 == 0b111)
rd_wdata = rs1 & rs2;
else
instr_invalid = true;
}
else if (funct7 == 0b01'00000) {
if (funct3 == 0b000)
rd_wdata = rs1 - rs2;
else if (funct3 == 0b101)
rd_wdata = (sx_t)rs1 >> (rs2 & 0x1f);
else
instr_invalid = true;
}
else if (funct7 == 0b00'00001) {
if (funct3 < 0b100) {
sdx_t mul_op_a = rs1;
sdx_t mul_op_b = rs2;
if (funct3 != 0b011)
mul_op_a -= (mul_op_a & (1 << XLEN - 1)) << 1;
if (funct3 < 0b010)
mul_op_b -= (mul_op_b & (1 << XLEN - 1)) << 1;
sdx_t mul_result = mul_op_a * mul_op_b;
if (funct3 == 0b000)
rd_wdata = mul_result;
else
rd_wdata = mul_result >> XLEN;
}
else {
asm volatile("" : : : "memory");
if (funct3 == 0b100) {
if (rs2 == 0)
rd_wdata = -1;
else if (rs2 == ~0u)
rd_wdata = -rs1;
else
rd_wdata = (sx_t)rs1 / (sx_t)rs2;
}
else if (funct3 == 0b101) {
rd_wdata = rs2 ? rs1 / rs2 : ~0ul;
}
else if (funct3 == 0b110) {
if (rs2 == 0)
rd_wdata = rs1;
else if (rs2 == ~0u) // potential overflow of division
rd_wdata = 0;
else
rd_wdata = (sx_t)rs1 % (sx_t)rs2;
}
else if (funct3 == 0b111) {
rd_wdata = rs2 ? rs1 % rs2 : rs1;
}
}
}
else {
instr_invalid = true;
}
break;
}
case OPC_OP_IMM: {
ux_t imm = imm_i(instr);
if (funct3 == 0b000)
rd_wdata = rs1 + imm;
else if (funct3 == 0b010)
rd_wdata = !!((sx_t)rs1 < (sx_t)imm);
else if (funct3 == 0b011)
rd_wdata = !!(rs1 < imm);
else if (funct3 == 0b100)
rd_wdata = rs1 ^ imm;
else if (funct3 == 0b110)
rd_wdata = rs1 | imm;
else if (funct3 == 0b111)
rd_wdata = rs1 & imm;
else if (funct3 == 0b001 || funct3 == 0b101) {
// shamt is regnum_rs2
if (funct7 == 0b00'00000 && funct3 == 0b001) {
rd_wdata = rs1 << regnum_rs2;
}
else if (funct7 == 0b00'00000 && funct3 == 0b101) {
rd_wdata = rs1 >> regnum_rs2;
}
else if (funct7 == 0b01'00000 && funct3 == 0b101) {
rd_wdata = (sx_t)rs1 >> regnum_rs2;
}
else {
instr_invalid = true;
}
}
else {
instr_invalid = true;
}
break;
}
case OPC_BRANCH: {
ux_t target = pc + imm_b(instr);
bool taken = false;
if ((funct3 & 0b110) == 0b000)
taken = rs1 == rs2;
else if ((funct3 & 0b110) == 0b100)
taken = (sx_t)rs1 < (sx_t) rs2;
else if ((funct3 & 0b110) == 0b110)
taken = rs1 < rs2;
else
instr_invalid = true;
if (!instr_invalid && funct3 & 0b001)
taken = !taken;
if (taken)
pc_wdata = target;
break;
}
case OPC_LOAD: {
ux_t load_addr = rs1 + imm_i(instr);
if (funct3 == 0b000)
rd_wdata = sext(mem.r8(load_addr), 7);
else if (funct3 == 0b001)
rd_wdata = sext(mem.r16(load_addr), 15);
else if (funct3 == 0b010)
rd_wdata = mem.r32(load_addr);
else if (funct3 == 0b100)
rd_wdata = mem.r8(load_addr);
else if (funct3 == 0b101)
rd_wdata = mem.r16(load_addr);
else
instr_invalid = true;
break;
}
case OPC_STORE: {
ux_t store_addr = rs1 + imm_s(instr);
if (funct3 == 0b000)
mem.w8(store_addr, rs2 & 0xffu);
else if (funct3 == 0b001)
mem.w16(store_addr, rs2 & 0xffffu);
else if (funct3 == 0b010)
mem.w32(store_addr, rs2);
else
instr_invalid = true;
break;
}
case OPC_JAL:
rd_wdata = pc + 4;
pc_wdata = pc + imm_j(instr);
break;
case OPC_JALR:
rd_wdata = pc + 4;
pc_wdata = (rs1 + imm_i(instr)) & -2u;
break;
case OPC_LUI:
rd_wdata = imm_u(instr);
break;
case OPC_AUIPC:
rd_wdata = pc + imm_u(instr);
break;
case OPC_SYSTEM: {
uint16_t csr_addr = instr >> 20;
if (funct3 >= 0b001 && funct3 <= 0b011) {
// csrrw, csrrs, csrrc
uint write_op = funct3 - 0b001;
if (write_op != RVCSR::WRITE || regnum_rd != 0)
rd_wdata = csr.read(csr_addr);
if (write_op == RVCSR::WRITE || regnum_rs1 != 0)
csr.write(csr_addr, rs1, write_op);
}
else if (funct3 >= 0b101 && funct3 <= 0b111) {
// csrrwi, csrrsi, csrrci
uint write_op = funct3 - 0b101;
if (write_op != RVCSR::WRITE || regnum_rd != 0)
rd_wdata = csr.read(csr_addr);
if (write_op == RVCSR::WRITE || regnum_rs1 != 0)
csr.write(csr_addr, regnum_rs1, write_op);
}
else {
instr_invalid = true;
}
break;
}
default:
instr_invalid = true;
break;
}
if (instr_invalid)
printf("Invalid instr %08x at %08x\n", instr, pc);
if (pc_wdata)
pc = *pc_wdata;
else
pc = pc + 4;
if (rd_wdata && regnum_rd != 0)
regs[regnum_rd] = *rd_wdata;
csr.step();
}
};
const char *help_str =
"Usage: tb binfile [--dump start end] [--cycles n]\n"
" binfile : Binary to load into start of memory\n"
" --dump start end : Print out memory contents between start and end (exclusive)\n"
" after execution finishes. Can be passed multiple times.\n"
" --cycles n : Maximum number of cycles to run before exiting.\n"
" --memsize n : Memory size in units of 1024 bytes, default is 16 MB\n"
;
void exit_help(std::string errtext = "") {
std::cerr << errtext << help_str;
exit(-1);
}
int main(int argc, char **argv) {
if (argc < 2)
exit_help();
std::vector<std::tuple<uint32_t, uint32_t>> dump_ranges;
int64_t max_cycles = 100000;
uint32_t ramsize = 16 * (1 << 20);
for (int i = 2; i < argc; ++i) {
std::string s(argv[i]);
if (s == "--dump") {
if (argc - i < 3)
exit_help("Option --dump requires 2 arguments\n");
dump_ranges.push_back(std::make_tuple(
std::stoul(argv[i + 1], 0, 0),
std::stoul(argv[i + 2], 0, 0)
));
i += 2;
}
else if (s == "--cycles") {
if (argc - i < 2)
exit_help("Option --cycles requires an argument\n");
max_cycles = std::stol(argv[i + 1], 0, 0);
i += 1;
}
else if (s == "--memsize") {
if (argc - i < 2)
exit_help("Option --memsize requires an argument\n");
ramsize = 1024 * std::stol(argv[i + 1], 0, 0);
i += 1;
}
else {
std::cerr << "Unrecognised argument " << s << "\n";
exit_help("");
}
}
FlatMem32 ram(ramsize);
TBMemIO io;
MemMap32 mem;
mem.add(0, ramsize, &ram);
mem.add(0x80000000u, 12, &io);
std::ifstream fd(argv[1], std::ios::binary | std::ios::ate);
std::streamsize bin_size = fd.tellg();
if (bin_size > ramsize) {
std::cerr << "Binary file (" << bin_size << " bytes) is larger than memory (" << ramsize << " bytes)\n";
return -1;
}
fd.seekg(0, std::ios::beg);
fd.read((char*)ram.mem, bin_size);
RVCore core;
int64_t cyc;
try {
for (cyc = 0; cyc < max_cycles; ++cyc)
core.step(mem);
}
catch (TBExitException e) {
printf("CPU requested halt. Exit code %d\n", e.exitcode);
printf("Ran for %ld cycles\n", cyc + 1);
}
for (auto [start, end] : dump_ranges) {
printf("Dumping memory from %08x to %08x:\n", start, end);
for (uint32_t i = 0; i < end - start; ++i)
printf("%02x%c", mem.r8(start + i), i % 16 == 15 ? '\n' : ' ');
printf("\n");
}
return 0;
}

11
test/rvcpp/rv_types.h Normal file
View File

@ -0,0 +1,11 @@
#ifndef _RV_TYPES
#define _RV_TYPES
enum {XLEN = 32};
typedef uint32_t ux_t;
typedef int32_t sx_t;
typedef unsigned int uint;
typedef int64_t sdx_t;
#endif

491
test/rvpy/rvpy Executable file
View File

@ -0,0 +1,491 @@
#!/usr/bin/env python3
# Minimal RISC-V interpreter, supporting RV32I + Zcsr only, with trace disassembly
import argparse
import sys
XLEN = 32
XLEN_MASK = (1 << XLEN) - 1
def extract(bits, msb, lsb):
return (bits & (1 << msb + 1) - 1) >> lsb
def sext(bits, sign_bit):
return (bits & (1 << sign_bit + 1) - 1) - ((bits & 1 << sign_bit) << 1)
def concat_extract(bits, msb_lsb_pairs, signed=True):
accum = 0
accum_count = 0
for msb, lsb in msb_lsb_pairs:
accum = (accum << (msb - lsb + 1)) | extract(bits, msb, lsb)
accum_count += msb - lsb + 1
if signed:
accum = sext(accum, accum_count - 1)
return accum
# Note these handy functions are not used much in the main loop, because CPython is unable
# to inline them. This and similar changes results in a ~3x performance increase. :(
def imm_i(instr):
# return concat_extract(instr, ((31, 20),))
return (instr >> 20) - (instr >> 19 & 0x1000)
def imm_s(instr):
# return concat_extract(instr, ((31, 25), (11, 7)))
return (instr >> 20 & 0xfe0) + (instr >> 7 & 0x1f) - (instr >> 19 & 0x1000)
def imm_u(instr):
# return concat_extract(instr, ((31, 12),)) << 12
return instr & 0xfffff000 - (instr << 1 & 0x100000000)
def imm_b(instr):
return concat_extract(instr, ((31, 31), (7, 7), (30, 25), (11, 8))) << 1
def imm_j(instr):
return concat_extract(instr, ((31, 31), (19, 12), (20, 20), (30, 21))) << 1
class FlatMemory:
def __init__(self, size):
self.size = size
self.mem = [0] * (size >> 2)
# Reads are unsigned. Writes allow signed or unsigned values and convert
# implicitly to unsigned. Multi-byte accesses are little-endian.
def get8(self, addr):
assert(addr >= 0 and addr < self.size)
return self.mem[addr >> 2] >> (addr & 0x3) * 8 & 0xff
def put8(self, addr, data):
assert(addr >= 0 and addr < self.size)
assert(data >= -1 << 7 and data < 1 << 8)
self.mem[addr >> 2] &= ~(0xff << 8 * (addr & 0x3))
self.mem[addr >> 2] |= (data & 0xff) << 8 * (addr % 4)
def get16(self, addr):
return self.mem[addr >> 2] >> (addr & 0x2) * 8 & 0xffff
def put16(self, addr, data):
assert(data >= -1 << 15 and data < 1 << 16)
for i in range(2):
self.put8(addr + i, data >> 8 * i & 0xff)
def get32(self, addr):
assert(addr >= 0 and addr + 3 < self.size)
return self.mem[addr >> 2]
def put32(self, addr, data):
assert(data >= -1 << 31 and data < 1 << 32)
assert(addr >= 0 and addr + 3 < self.size)
self.mem[addr >> 2] = data & 0xffff_ffff
def loadbin(self, data, offs):
if type(data) not in (bytes, bytearray):
# must be fh
assert(data.mode == "rb")
data = data.read()
assert(offs + len(data) < self.size)
for i, b in enumerate(data):
self.put8(offs + i, b)
class TBExit(Exception):
pass
class MemWithTBIO(FlatMemory):
TB_IO_BASE = 0x80000000
TB_IO_PRINT_CHAR = TB_IO_BASE + 0x0
TB_IO_PRINT_INT = TB_IO_BASE + 0x4
TB_IO_EXIT = TB_IO_BASE + 0x8
def __init__(self, size, io_log_fmt="IO: {}\n"):
super().__init__(size)
self.io_log_fmt = io_log_fmt
def put32(self, addr, data):
if addr < self.TB_IO_BASE:
super().put32(addr, data)
elif addr == self.TB_IO_PRINT_CHAR:
sys.stdout.write(self.io_log_fmt.format(chr(data)))
elif addr == self.TB_IO_PRINT_INT:
sys.stdout.write(self.io_log_fmt.format(f"{data:08x}"))
elif addr == self.TB_IO_EXIT:
raise TBExit(data)
else:
print(f"Unknown IO address {addr:08x}")
class RVCSR:
WRITE = 0
WRITE_SET = 1
WRITE_CLEAR = 2
MSCRATCH = 0x340
MCYCLE = 0xb00
MTIME = 0xb01
MINSTRET = 0xb02
def __init__(self):
self.mcycle = 0
self.mscratch = 0
def step(self):
self.mcycle += 1
def read(self, addr, side_effect=True):
# Close your eyes
if addr in (RVCSR.MCYCLE, RVCSR.MTIME, RVCSR.MINSTRET):
return self.mcycle
elif addr == RVCSR.MSCRATCH:
return self.mscratch
else:
return None
def write(self, addr, data, op=0):
if op == RVCSR.WRITE_CLEAR:
data = self.read(addr, side_effect=False) & ~data
elif op == RVCSR.WRITE_SET:
data = self.read(addr, side_effect=False) | data
if addr == RVCSR.MCYCLE:
self.mcycle = data
elif addr == RVCSR.MSCRATCH:
self.mscratch = data
class RVCore:
def __init__(self, mem, reset_vector=0xc0):
self.regs = [0] * 32
self.mem = mem
self.pc = reset_vector
self.csr = RVCSR()
def step(self, instr=None, log=True):
if instr is None:
instr = self.mem.mem[self.pc >> 2]
regnum_rs1 = instr >> 15 & 0x1f
regnum_rs2 = instr >> 20 & 0x1f
regnum_rd = instr >> 7 & 0x1f
rs1 = self.regs[regnum_rs1]
rs2 = self.regs[regnum_rs2]
rd_wdata = None
pc_wdata = None
log_disasm = None
instr_invalid = False
opc = instr >> 2 & 0x1f
funct3 = instr >> 12 & 0x7
funct7 = instr >> 25 & 0x7f
OPC_LOAD = 0b00_000
OPC_MISC_MEM = 0b00_011
OPC_OP_IMM = 0b00_100
OPC_AUIPC = 0b00_101
OPC_STORE = 0b01_000
OPC_OP = 0b01_100
OPC_LUI = 0b01_101
OPC_BRANCH = 0b11_000
OPC_JALR = 0b11_001
OPC_JAL = 0b11_011
OPC_SYSTEM = 0b11_100
if opc == OPC_OP:
if log: log_reg_str = f" x{regnum_rd}, x{regnum_rs1}, x{regnum_rs2}"
if funct7 == 0b00_00000:
if funct3 == 0b000:
if log: log_disasm = "add" + log_reg_str
rd_wdata = rs1 + rs2
elif funct3 == 0b001:
if log: log_disasm = "sll" + log_reg_str
rd_wdata = rs1 << (rs2 & 0x1f)
elif funct3 == 0b010:
if log: log_disasm = "slt" + log_reg_str
rd_wdata = rs1 < rs2
elif funct3 == 0b011:
if log: log_disasm = "sltu" + log_reg_str
rd_wdata = (rs1 & XLEN_MASK) < (rs2 & XLEN_MASK)
elif funct3 == 0b100:
if log: log_disasm = "xor" + log_reg_str
rd_wdata = rs1 ^ rs2
elif funct3 == 0b101:
if log: log_disasm = "srl" + log_reg_str
rd_wdata = (rs1 & XLEN_MASK) >> (rs2 & 0x1f)
elif funct3 == 0b110:
if log: log_disasm = "or" + log_reg_str
rd_wdata = rs1 | rs2
elif funct3 == 0b111:
if log: log_disasm = "and" + log_reg_str
rd_wdata = rs1 & rs2
else:
instr_invalid = True
elif funct7 == 0b01_00000:
if funct3 == 0b000:
if log: log_disasm = "sub" + log_reg_str
rd_wdata = rs1 - rs2
elif funct3 == 0b101:
if log: log_disasm = "sra" + log_reg_str
rd_wdata = rs1 >> (rs2 & 0x1f)
else:
instr_invalid = True
elif funct7 == 0b00_00001:
if funct3 < 0b100:
if log:
mul_instr_name = {0b000: "mul", 0b001: "mulh", 0b010: "mulhsu", 0b011: "mulhu"}[funct3]
log_disasm = f"{mul_instr_name} x{regnum_rd}, x{regnum_rs1}, x{regnum_rs2}"
mul_op_a = rs1 & XLEN_MASK if funct3 == 0b011 else rs1
mul_op_b = rs2 & XLEN_MASK if funct3 in (0b010, 0b011) else rs2
mul_result = mul_op_a * mul_op_b
if funct3 != 0b000:
mul_result >>= 32
rd_wdata = sext(mul_result, XLEN - 1)
else:
if log:
div_instr_name = {0b100: "div", 0b101: "divu", 0b110: "rem", 0b111: "remu"}[funct3]
log_disasm = f"{div_instr_name} x{regnum_rd}, x{regnum_rs1}, x{regnum_rs2}"
if funct3 == 0b100:
rd_wdata = -1 if rs2 == 0 else int(rs1 / rs2)
elif funct3 == 0b101:
rd_wdata = -1 if rs2 == 0 else sext((rs1 & XLEN_MASK) // (rs2 & XLEN_MASK), XLEN - 1)
elif funct3 == 0b110:
rd_wdata = rs1 if rs2 == 0 else rs1 - int(rs1 / rs2) * rs2
elif funct3 == 0b111:
rd_wdata = rs1 if rs2 == 0 else sext((rs1 & XLEN_MASK) % (rs2 & XLEN_MASK), XLEN - 1)
else:
instr_invalid = True
else:
instr_invalid = True
elif opc == OPC_OP_IMM:
imm = (instr >> 20) - (instr >> 19 & 0x1000) # imm_i(instr)
if funct3 == 0b000:
if log: log_disasm = f"addi x{regnum_rd}, x{regnum_rs1}, {imm}"
rd_wdata = rs1 + imm
elif funct3 == 0b010:
if log: log_disasm = f"slti x{regnum_rd}, x{regnum_rs1}, {imm}"
rd_wdata = 1 * (rs1 < imm)
elif funct3 == 0b011:
if log: log_disasm = f"slti x{regnum_rd}, x{regnum_rs1}, {imm & XLEN_MASK}"
rd_wdata = 1 * (rs1 & XLEN_MASK < imm & XLEN_MASK)
elif funct3 == 0b100:
if log: log_disasm = f"xori x{regnum_rd}, x{regnum_rs1}, 0x{imm & XLEN_MASK:x}"
rd_wdata = rs1 ^ imm
elif funct3 == 0b110:
if log: log_disasm = f"ori x{regnum_rd}, x{regnum_rs1}, 0x{imm & XLEN_MASK:x}"
rd_wdata = rs1 | imm
elif funct3 == 0b111:
if log: log_disasm = f"andi x{regnum_rd}, x{regnum_rs1}, 0x{imm & XLEN_MASK:x}"
rd_wdata = rs1 & imm
elif funct3 == 0b001 or funct3 == 0b101:
# shamt is regnum_rs2
if funct7 == 0b00_00000 and funct3 == 0b001:
if log: log_disasm = f"slli x{regnum_rd}, x{regnum_rs1}, {regnum_rs2}"
rd_wdata = rs1 << regnum_rs2
elif funct7 == 0b00_00000 and funct3 == 0b101:
if log: log_disasm = f"srli x{regnum_rd}, x{regnum_rs1}, {regnum_rs2}"
rd_wdata = (rs1 & XLEN_MASK) >> regnum_rs2
elif funct7 == 0b01_00000 and funct3 == 0b101:
if log: log_disasm = f"srai x{regnum_rd}, x{regnum_rs1}, {regnum_rs2}"
rd_wdata = rs1 >> regnum_rs2
else:
instr_invalid = True
else:
instr_invalid = True
elif opc == OPC_JAL:
rd_wdata = self.pc + 4
# pc_wdata = self.pc + imm_j(instr)
pc_wdata = self.pc + (instr >> 20 & 0x7fe) + (instr >> 9 & 0x800) + (instr & 0xff000) - (instr >> 11 & 0x100000)
if log: log_disasm = f"jal x{regnum_rd}, {pc_wdata & XLEN_MASK:08x}"
elif opc == OPC_JALR:
imm = imm_i(instr)
if log: log_disasm = f"jalr x{regnum_rd}, x{regnum_rs1}, {imm}"
rd_wdata = self.pc + 4
# JALR clears LSB always
pc_wdata = (rs1 + imm) & -2
elif opc == OPC_BRANCH:
# target = self.pc + imm_b(instr)
target = self.pc + (instr >> 7 & 0x1e) + (instr >> 20 & 0x7e0) + (instr << 4 & 0x800) - (instr >> 19 & 0x1000)
taken = False
if log: log_branch_str = f" x{regnum_rs1}, x{regnum_rs2}, {target:08x}"
if funct3 == 0b000:
if log: log_disasm = "beq" + log_branch_str
taken = rs1 == rs2
elif funct3 == 0b001:
if log: log_disasm = "bne" + log_branch_str
taken = rs1 != rs2
elif funct3 == 0b100:
if log: log_disasm = "blt" + log_branch_str
taken = rs1 < rs2
elif funct3 == 0b101:
if log: log_disasm = "bge" + log_branch_str
taken = rs1 >= rs2
elif funct3 == 0b110:
if log: log_disasm = "bltu" + log_branch_str
taken = (rs1 & XLEN_MASK) < (rs2 & XLEN_MASK)
elif funct3 == 0b111:
if log: log_disasm = "bgeu" + log_branch_str
taken = (rs1 & XLEN_MASK) >= (rs2 & XLEN_MASK)
else:
instr_invalid = True
if taken:
pc_wdata = target
elif opc == OPC_LOAD:
imm = imm_i(instr)
if log: log_load_str = f" x{regnum_rd}, {imm}(x{regnum_rs1})"
load_addr = imm + rs1 & XLEN_MASK
if funct3 == 0b000:
if log: log_disasm = "lb" + log_load_str
rd_wdata = self.mem.get8(load_addr)
rd_wdata -= rd_wdata << 1 & 0x100
elif funct3 == 0b001:
if log: log_disasm = "lh" + log_load_str
rd_wdata = self.mem.get16(load_addr)
rd_wdata -= rd_wdata << 1 & 0x10000
elif funct3 == 0b010:
if log: log_disasm = "lw" + log_load_str
rd_wdata = self.mem.get32(load_addr)
rd_wdata -= rd_wdata << 1 & 0x100000000
elif funct3 == 0b100:
if log: log_disasm = "lbu" + log_load_str
rd_wdata = self.mem.get8(load_addr)
elif funct3 == 0b101:
if log: log_disasm = "lhu" + log_load_str
rd_wdata = self.mem.get16(load_addr)
else:
instr_invalid = True
elif opc == OPC_STORE:
imm = imm_s(instr)
if log: log_store_str = f" x{regnum_rs2}, {imm}(x{regnum_rs1})"
store_addr = imm + rs1 & XLEN_MASK
if funct3 == 0b000:
if log: log_disasm = "sb" + log_store_str
self.mem.put8(store_addr, rs2 & (1 << 8) - 1)
elif funct3 == 0b001:
if log: log_disasm = "sh" + log_store_str
self.mem.put16(store_addr, rs2 & (1 << 16) - 1)
elif funct3 == 0b010:
if log: log_disasm = "sw" + log_store_str
self.mem.put32(store_addr, rs2)
else:
instr_invalid = True
elif opc == OPC_LUI:
imm = imm_u(instr)
if log: log_disasm = f"lui x{regnum_rd}, 0x{(imm & XLEN_MASK) >> 12:05x}"
rd_wdata = imm
elif opc == OPC_AUIPC:
imm = imm_u(instr)
if log: log_disasm = f"auipc x{regnum_rd}, 0x{(imm & XLEN_MASK) >> 12:05x}"
rd_wdata = self.pc + imm
elif opc == OPC_SYSTEM:
csr_addr = extract(instr, 31, 20)
if funct3 == 0b000 and funct7 == 0b00_00000:
if regnum_rs2 == 0:
if log: log_disasm = "*UNHANDLED* ecall"
pass
elif regnum_rs2 == 1:
if log: log_disasm = "*UNHANDLED* ebreak"
pass
else:
instr_invalid = True
elif funct3 in (0b001, 0b010, 0b011):
if log:
instr_name = {0b001: "csrrw", 0b010: "csrrs", 0b011: "csrrc"}[funct3]
log_disasm = f"{instr_name} x{regnum_rd}, 0x{csr_addr:x}, x{regnum_rs2}"
csr_write_op = funct3 - 0b001
if csr_write_op != RVCSR.WRITE or regnum_rd != 0:
rd_wdata = self.csr.read(csr_addr)
if csr_write_op == RVCSR.WRITE or rs2 != 0:
self.csr.write(csr_addr, rs2, op=csr_write_op)
elif funct3 in (0b101, 0b110, 0b111):
if log:
instr_name = {0b101: "csrrwi", 0b110: "csrrsi", 0b111: "csrrci"}[funct3]
log_disasm = f"{instr_name} x{regnum_rd}, 0x{csr_addr:x}, 0x{regnum_rs2:x}"
csr_write_op = funct3 = 0b101
if csr_write_op != RVCSR.WRITE or regnum_rd != 0:
rd_wdata = self.csr.read(csr_addr)
if csr_write_op == RVCSR.WRITE or regnum_rs2 != 0:
self.csr.write(csr_addr, rs2, op=csr_write_op)
else:
instr_invalid = True
elif opc == OPC_MISC_MEM:
if instr == 0b0000_0000_0000_00000_001_00000_0001111:
if log: log_disasm = "fence.i"
pass
elif (instr & 0b1111_0000_0000_11111_111_11111_1111111) == 0b0000_0000_0000_00000_000_00000_0001111:
if log: log_disasm = f"fence {extract(instr, 27, 24):04b}, {extract(instr, 23, 20):04b}"
pass
else:
instr_invalid = True
if log:
log_str = f"{self.pc:08x}: ({instr:08x}) {log_disasm if log_disasm is not None else '':<25}"
if rd_wdata is not None and regnum_rd != 0:
log_str += f" : x{regnum_rd:<2} <- {rd_wdata & XLEN_MASK:08x}"
else:
log_str += " : " + 15 * " "
if pc_wdata is not None:
log_str += f" : pc <- {pc_wdata & XLEN_MASK:08x}"
else:
log_str += " :"
print(log_str)
if rd_wdata is not None and regnum_rd != 0:
self.regs[regnum_rd] = (rd_wdata & 0xffffffff) - (rd_wdata << 1 & 0x100000000)
if pc_wdata is None:
self.pc = self.pc + 4
else:
self.pc = pc_wdata
if instr_invalid:
print(f"Invalid instruction at {self.pc:08x}: {instr:08x}")
self.csr.step()
def anyint(x):
return int(x, 0)
def main(argv):
parser = argparse.ArgumentParser()
parser.add_argument("binfile")
parser.add_argument("--memsize", default = 1 << 24, type = anyint)
parser.add_argument("--cycles", default = int(1e4), type = anyint)
parser.add_argument("--dump", nargs=2, action="append", type=anyint)
parser.add_argument("--quiet", "-q", action="store_true")
args = parser.parse_args(argv)
if args.quiet:
mem = MemWithTBIO(args.memsize, io_log_fmt="{}")
else:
mem = MemWithTBIO(args.memsize)
mem.loadbin(open(args.binfile, "rb"), 0)
rv = RVCore(mem)
try:
for i in range(args.cycles):
rv.step(log=not args.quiet)
except TBExit as e:
print(f"Processor halted simulation with exit code {e}")
except BrokenPipeError as e:
sys.exit(0)
print(f"Ran for {i + 1} cycles")
for start, end in args.dump or []:
print(f"Dumping memory from {start:08x} to {end:08x}:")
for i, addr in enumerate(range(start, end)):
sep = "\n" if i % 16 == 15 else " "
sys.stdout.write(f"{mem.get8(addr):02x}{sep}")
print("")
if __name__ == "__main__":
main(sys.argv[1:])

2
test/tb_cxxrtl/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
tb
dut.cpp

33
test/tb_cxxrtl/Makefile Normal file
View File

@ -0,0 +1,33 @@
TOP := hazard5_cpu_2port
CDEFINES := DUAL_PORT
CPU_RESET_VECTOR := 32'hc0
EXTENSION_C := 1
EXTENSION_M := 1
MULDIV_UNROLL := 1
MUL_FAST := 0
REDUCED_BYPASS := 0
.PHONY: clean tb all
all: tb
SYNTH_CMD += read_verilog -I ../../hdl $(shell listfiles ../../hdl/hazard5.f);
SYNTH_CMD += chparam -set EXTENSION_C $(EXTENSION_C) $(TOP);
SYNTH_CMD += chparam -set EXTENSION_M $(EXTENSION_M) $(TOP);
SYNTH_CMD += chparam -set CSR_COUNTER 1 $(TOP);
SYNTH_CMD += chparam -set RESET_VECTOR $(CPU_RESET_VECTOR) $(TOP);
SYNTH_CMD += chparam -set REDUCED_BYPASS $(REDUCED_BYPASS) $(TOP);
SYNTH_CMD += chparam -set MULDIV_UNROLL $(MULDIV_UNROLL) $(TOP);
SYNTH_CMD += chparam -set MUL_FAST $(MUL_FAST) $(TOP);
SYNTH_CMD += prep -flatten -top $(TOP); async2sync;
SYNTH_CMD += write_cxxrtl dut.cpp
dut.cpp:
yosys -p "$(SYNTH_CMD)" 2>&1 > cxxrtl.log
clean::
rm -f dut.cpp cxxrtl.log tb
tb: dut.cpp
clang++ -O3 -std=c++14 $(addprefix -D,$(CDEFINES)) -I $(shell yosys-config --datdir)/include tb.cpp -o tb

217
test/tb_cxxrtl/tb.cpp Normal file
View File

@ -0,0 +1,217 @@
#include <iostream>
#include <fstream>
#include <cstdint>
#include <string>
#include <algorithm>
// jesus fuck i forgot how bad iostream formatting was, give me printf or give me death
#include <stdio.h>
// Device-under-test model generated by CXXRTL:
#include "dut.cpp"
#include <backends/cxxrtl/cxxrtl_vcd.h>
static const unsigned int MEM_SIZE = 16 * 1024 * 1024;
uint8_t mem[MEM_SIZE];
static const unsigned int IO_BASE = 0x80000000;
enum {
IO_PRINT_CHAR = 0,
IO_PRINT_U32 = 4,
IO_EXIT = 8
};
const char *help_str =
"Usage: tb binfile [vcdfile] [--dump start end] [--cycles n]\n"
" binfile : Binary to load into start of memory\n"
" vcdfile : Path to dump waveforms to\n"
" --dump start end : Print out memory contents between start and end (exclusive)\n"
" after execution finishes. Can be passed multiple times.\n"
" --cycles n : Maximum number of cycles to run before exiting.\n"
;
void exit_help(std::string errtext = "") {
std::cerr << errtext << help_str;
exit(-1);
}
int main(int argc, char **argv) {
if (argc < 2)
exit_help();
bool dump_waves = false;
std::string waves_path;
std::vector<std::pair<uint32_t, uint32_t>> dump_ranges;
int64_t max_cycles = 100000;
for (int i = 2; i < argc; ++i) {
std::string s(argv[i]);
if (i == 2 && s.rfind("--", 0) != 0) {
// Optional positional argument: vcdfile
dump_waves = true;
waves_path = s;
}
else if (s == "--dump") {
if (argc - i < 3)
exit_help("Option --dump requires 2 arguments\n");
dump_ranges.push_back(std::pair<uint32_t, uint32_t>(
std::stoul(argv[i + 1], 0, 0),
std::stoul(argv[i + 2], 0, 0)
));;
i += 2;
}
else if (s == "--cycles") {
if (argc - i < 2)
exit_help("Option --cycles requires an argument\n");
max_cycles = std::stol(argv[i + 1], 0, 0);
i += 1;
}
else {
std::cerr << "Unrecognised argument " << s << "\n";
exit_help("");
}
}
#ifdef DUAL_PORT
cxxrtl_design::p_hazard5__cpu__2port top;
#else
cxxrtl_design::p_hazard5__cpu__1port top;
#endif
std::fill(std::begin(mem), std::end(mem), 0);
std::ifstream fd(argv[1], std::ios::binary | std::ios::ate);
std::streamsize bin_size = fd.tellg();
if (bin_size > MEM_SIZE) {
std::cerr << "Binary file (" << bin_size << " bytes) is larger than memory (" << MEM_SIZE << " bytes)\n";
return -1;
}
fd.seekg(0, std::ios::beg);
fd.read((char*)mem, bin_size);
std::ofstream waves_fd;
cxxrtl::vcd_writer vcd;
if (dump_waves) {
waves_fd.open(waves_path);
cxxrtl::debug_items all_debug_items;
top.debug_info(all_debug_items);
vcd.timescale(1, "us");
vcd.add(all_debug_items);
}
bool bus_trans = false;
bool bus_write = false;
#ifdef DUAL_PORT
bool bus_trans_i = false;
uint32_t bus_addr_i = 0;
#endif
uint32_t bus_addr = 0;
uint8_t bus_size = 0;
// Never generate bus stalls
#ifdef DUAL_PORT
top.p_i__hready.set<bool>(true);
top.p_d__hready.set<bool>(true);
#else
top.p_ahblm__hready.set<bool>(true);
#endif
// Reset + initial clock pulse
top.step();
top.p_clk.set<bool>(true);
top.step();
top.p_clk.set<bool>(false);
top.p_rst__n.set<bool>(true);
top.step();
for (int64_t cycle = 0; cycle < max_cycles; ++cycle) {
top.p_clk.set<bool>(false);
top.step();
if (dump_waves)
vcd.sample(cycle * 2);
top.p_clk.set<bool>(true);
top.step();
// Handle current data phase, then move current address phase to data phase
uint32_t rdata = 0;
if (bus_trans && bus_write) {
#ifdef DUAL_PORT
uint32_t wdata = top.p_d__hwdata.get<uint32_t>();
#else
uint32_t wdata = top.p_ahblm__hwdata.get<uint32_t>();
#endif
if (bus_addr <= MEM_SIZE) {
unsigned int n_bytes = 1u << bus_size;
// Note we are relying on hazard5's byte lane replication
for (unsigned int i = 0; i < n_bytes; ++i) {
mem[bus_addr + i] = wdata >> (8 * i) & 0xffu;
}
}
else if (bus_addr == IO_BASE + IO_PRINT_CHAR) {
putchar(wdata);
}
else if (bus_addr == IO_BASE + IO_PRINT_U32) {
printf("%08x\n", wdata);
}
else if (bus_addr == IO_BASE + IO_EXIT) {
printf("CPU requested halt. Exit code %d\n", wdata);
printf("Ran for %ld cycles\n", cycle + 1);
break;
}
}
else if (bus_trans && !bus_write) {
if (bus_addr <= MEM_SIZE) {
bus_addr &= ~0x3u;
rdata =
(uint32_t)mem[bus_addr] |
mem[bus_addr + 1] << 8 |
mem[bus_addr + 2] << 16 |
mem[bus_addr + 3] << 24;
}
}
#ifdef DUAL_PORT
top.p_d__hrdata.set<uint32_t>(rdata);
if (bus_trans_i) {
bus_addr_i &= ~0x3u;
top.p_i__hrdata.set<uint32_t>(
(uint32_t)mem[bus_addr_i] |
mem[bus_addr_i + 1] << 8 |
mem[bus_addr_i + 2] << 16 |
mem[bus_addr_i + 3] << 24
);
}
#else
top.p_ahblm__hrdata.set<uint32_t>(rdata);
#endif
#ifdef DUAL_PORT
bus_trans = top.p_d__htrans.get<uint8_t>() >> 1;
bus_write = top.p_d__hwrite.get<bool>();
bus_size = top.p_d__hsize.get<uint8_t>();
bus_addr = top.p_d__haddr.get<uint32_t>();
bus_trans_i = top.p_i__htrans.get<uint8_t>() >> 1;
bus_addr_i = top.p_i__haddr.get<uint32_t>();
#else
bus_trans = top.p_ahblm__htrans.get<uint8_t>() >> 1;
bus_write = top.p_ahblm__hwrite.get<bool>();
bus_size = top.p_ahblm__hsize.get<uint8_t>();
bus_addr = top.p_ahblm__haddr.get<uint32_t>();
#endif
if (dump_waves) {
// The extra step() is just here to get the bus responses to line up nicely
// in the VCD (hopefully is a quick update)
top.step();
vcd.sample(cycle * 2 + 1);
waves_fd << vcd.buffer;
vcd.buffer.clear();
}
}
for (auto r : dump_ranges) {
printf("Dumping memory from %08x to %08x:\n", r.first, r.second);
for (int i = 0; i < r.second - r.first; ++i)
printf("%02x%c", mem[r.first + i], i % 16 == 15 ? '\n' : ' ');
printf("\n");
}
return 0;
}