Hazard3/hdl/hazard3_frontend.v

404 lines
14 KiB
Verilog

/*****************************************************************************\
| Copyright (C) 2021-2022 Luke Wren |
| SPDX-License-Identifier: Apache-2.0 |
\*****************************************************************************/
`default_nettype none
module hazard3_frontend #(
`include "hazard3_config.vh"
) (
input wire clk,
input wire rst_n,
// Fetch interface
// addr_vld may be asserted at any time, but after assertion,
// neither addr nor addr_vld may change until the cycle after addr_rdy.
// There is no backpressure on the data interface; the front end
// must ensure it does not request data it cannot receive.
// addr_rdy and dat_vld may be functions of hready, and
// may not be used to compute combinational outputs.
output wire mem_size, // 1'b1 -> 32 bit access
output wire [W_ADDR-1:0] mem_addr,
output wire mem_priv,
output wire mem_addr_vld,
input wire mem_addr_rdy,
input wire [W_DATA-1:0] mem_data,
input wire mem_data_err,
input wire mem_data_vld,
// Jump/flush interface
// Processor may assert vld at any time. The request will not go through
// unless rdy is high. Processor *may* alter request during this time.
// Inputs must not be a function of hready.
input wire [W_ADDR-1:0] jump_target,
input wire jump_priv,
input wire jump_target_vld,
output wire jump_target_rdy,
// Interface to Decode
output wire [31:0] cir,
output reg [1:0] cir_vld, // number of valid halfwords in CIR
input wire [1:0] cir_use, // number of halfwords D intends to consume
// *may* be a function of hready
output wire [1:0] cir_err, // Bus error on upper/lower halfword of CIR.
input wire cir_lock,// Lock-in current contents and level of CIR.
// Assert simultaneously with a jump request,
// if decode is going to stall. This stops the CIR
// from being trashed by incoming fetch data;
// jump instructions have other side effects besides jumping!
// Provide the rs1/rs2 register numbers which will be in CIR next cycle.
// Coarse: valid if this instruction has a nonzero register operand.
// (Suitable for regfile read)
output reg [4:0] predecode_rs1_coarse,
output reg [4:0] predecode_rs2_coarse,
// Fine: like coarse, but accurate zeroing when the operand is implicit.
// (Suitable for bypass. Still not precise enough for stall logic.)
output reg [4:0] predecode_rs1_fine,
output reg [4:0] predecode_rs2_fine,
// Debugger instruction injection: instruction fetch is suppressed when in
// debug halt state, and the DM can then inject instructions into the last
// entry of the prefetch queue using the vld/rdy handshake.
input wire debug_mode,
input wire [W_DATA-1:0] dbg_instr_data,
input wire dbg_instr_data_vld,
output wire dbg_instr_data_rdy
);
`include "rv_opcodes.vh"
// This is the minimum size (in halfwords) for full fetch throughput, and
// there is little benefit to increasing it:
localparam FIFO_DEPTH = 7;
localparam W_BUNDLE = W_DATA / 2;
// ----------------------------------------------------------------------------
// Fetch Queue (FIFO)
wire jump_now = jump_target_vld && jump_target_rdy;
// Note these registers have more than FIFO_DEPTH bits: these extras won't
// synthesise to registers, and are just there for loop boundary conditions.
// Errors travel alongside data until the processor actually tries to decode
// an instruction whose fetch errored. Up until this point, errors can be
// flushed harmlessly.
reg [W_BUNDLE-1:0] fifo_mem [0:FIFO_DEPTH+1];
reg [FIFO_DEPTH+1:0] fifo_err;
reg [FIFO_DEPTH+1:-1] fifo_valid;
wire [1:0] mem_data_hwvalid;
wire fifo_empty = !fifo_valid[0];
// Full: will overflow after one 32b fetch. Almost full: after two of them.
wire fifo_full = fifo_valid[FIFO_DEPTH - 2];
wire fifo_almost_full = fifo_valid[FIFO_DEPTH - 4];
wire fifo_push;
wire fifo_dbg_inject = DEBUG_SUPPORT && dbg_instr_data_vld && dbg_instr_data_rdy;
// Boundary conditions
always @ (*) begin
fifo_mem[FIFO_DEPTH] = mem_data[31:16];
fifo_mem[FIFO_DEPTH + 1] = mem_data[31:16];
fifo_err[FIFO_DEPTH + 1 -: 2] = 2'b00;
fifo_valid[FIFO_DEPTH + 1 -: 2] = 2'b00;
fifo_valid[-1] = 1'b1;
end
// Apply fetch, then shift out data consumed by decoder
reg [W_BUNDLE-1:0] fifo_plus_fetch [0:FIFO_DEPTH+1];
reg [W_BUNDLE-1:0] fifo_mem_next [0:FIFO_DEPTH-1];
reg [FIFO_DEPTH+1:0] fifo_err_plus_fetch;
reg [FIFO_DEPTH-1:0] fifo_err_next;
reg [FIFO_DEPTH-1:0] fifo_valid_next;
always @ (*) begin: fifo_shift
integer i;
for (i = 0; i < FIFO_DEPTH + 2; i = i + 1) begin
if (fifo_valid[i]) begin
fifo_plus_fetch[i] = fifo_mem[i];
fifo_err_plus_fetch[i] = fifo_err[i];
end else if (fifo_valid[i - 1] && mem_data_hwvalid[0]) begin
fifo_plus_fetch[i] = mem_data[0 * W_BUNDLE +: W_BUNDLE];
fifo_err_plus_fetch[i] = mem_data_err;
end else begin
fifo_plus_fetch[i] = mem_data[1 * W_BUNDLE +: W_BUNDLE];
fifo_err_plus_fetch[i] = mem_data_err;
end
end
for (i = 0; i < FIFO_DEPTH; i = i + 1) begin
if (cir_use[1]) begin
fifo_mem_next[i] = fifo_plus_fetch[i + 2];
end else if (cir_use[0]) begin
fifo_mem_next[i] = fifo_plus_fetch[i + 1];
end else begin
fifo_mem_next[i] = fifo_plus_fetch[i];
end
end
if (jump_now) begin
fifo_err_next = {FIFO_DEPTH{1'b0}};
if (cir_lock) begin
// Flush all but oldest instruction
fifo_valid_next = {{FIFO_DEPTH-2{1'b0}}, fifo_mem[0][1:0] == 2'b11, 1'b1};
end else begin
fifo_valid_next = {FIFO_DEPTH{1'b0}};
end
end else begin
fifo_valid_next = ~(~fifo_valid[FIFO_DEPTH-1:0]
<< (fifo_push && mem_data_hwvalid[0])
<< (fifo_push && mem_data_hwvalid[1])
) >> cir_use;
fifo_err_next = fifo_err_plus_fetch >> cir_use;
end
end
// TODO: instruction injection.
// TODO: CIR locking.
always @ (posedge clk or negedge rst_n) begin: fifo_update
integer i;
if (!rst_n) begin
for (i = 0; i < FIFO_DEPTH; i = i + 1) begin
fifo_mem[i] <= {W_BUNDLE{1'b0}};
end
fifo_valid[FIFO_DEPTH-1:0] <= {FIFO_DEPTH{1'b0}};
end else begin
// Don't clock registers whose contents we won't care about on the
// next cycle (note: inductively, if we don't care about it now we
// will never care about it until we write new data to it.)
for (i = 0; i < FIFO_DEPTH; i = i + 1) begin
if (fifo_valid_next[i]) begin
fifo_mem[i] <= fifo_mem_next[i];
end
end
fifo_valid[FIFO_DEPTH-1:0] <= fifo_valid_next;
fifo_err[FIFO_DEPTH-1:0] <= fifo_err_next;
end
end
// ----------------------------------------------------------------------------
// Fetch Request + State Logic
// Keep track of some useful state of the memory interface
reg mem_addr_hold;
reg [1:0] pending_fetches;
reg [1:0] ctr_flush_pending;
wire [1:0] pending_fetches_next = pending_fetches + (mem_addr_vld && !mem_addr_hold) - mem_data_vld;
// Debugger only injects instructions when the frontend is at rest and empty.
assign dbg_instr_data_rdy = DEBUG_SUPPORT && !fifo_valid[0] && ~|ctr_flush_pending;
assign fifo_push = mem_data_vld && ~|ctr_flush_pending
&& !(DEBUG_SUPPORT && debug_mode);
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
mem_addr_hold <= 1'b0;
pending_fetches <= 2'h0;
ctr_flush_pending <= 2'h0;
end else begin
`ifdef FORMAL
assert(ctr_flush_pending <= pending_fetches);
assert(pending_fetches < 2'd3);
assert(!(mem_data_vld && !pending_fetches));
`endif
mem_addr_hold <= mem_addr_vld && !mem_addr_rdy;
pending_fetches <= pending_fetches_next;
if (jump_now) begin
ctr_flush_pending <= pending_fetches - mem_data_vld;
end else if (|ctr_flush_pending && mem_data_vld) begin
ctr_flush_pending <= ctr_flush_pending - 1'b1;
end
end
end
// Fetch addr runs ahead of the PC, in word increments.
reg [W_ADDR-1:0] fetch_addr;
reg fetch_priv;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
fetch_addr <= RESET_VECTOR;
// M-mode at reset:
fetch_priv <= 1'b1;
end else begin
if (jump_now) begin
// Post-increment if jump request is going straight through
fetch_addr <= {jump_target[W_ADDR-1:2] + (mem_addr_rdy && !mem_addr_hold), 2'b00};
fetch_priv <= jump_priv || !U_MODE;
end else if (mem_addr_vld && mem_addr_rdy) begin
fetch_addr <= fetch_addr + 32'h4;
end
end
end
wire unaligned_jump_now = EXTENSION_C && jump_now && jump_target[1];
reg unaligned_jump_dph;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
unaligned_jump_dph <= 1'b0;
end else if (EXTENSION_C) begin
if ((mem_data_vld && ~|ctr_flush_pending)
|| (jump_now && !unaligned_jump_now)) begin
unaligned_jump_dph <= 1'b0;
end
if (unaligned_jump_now) begin
unaligned_jump_dph <= 1'b1;
end
end
end
`ifdef FORMAL
reg property_after_aligned_jump;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
property_after_aligned_jump <= 1'b0;
end else begin
property_after_aligned_jump <= jump_now && !jump_target[1];
if (property_after_aligned_jump) begin
// Make sure this clears properly (have been subtle historic bugs here)
assert(!unaligned_jump_dph);
end
end
end
`endif
assign mem_data_hwvalid = {1'b1, !unaligned_jump_dph};
// Combinatorially generate the address-phase request
reg reset_holdoff;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
reset_holdoff <= 1'b1;
end else begin
reset_holdoff <= 1'b0;
// This should be impossible, but assert to be sure, because it *will*
// change the fetch address (and we can avoid checking in hardware if
// we can prove it doesn't happen)
`ifdef FORMAL
assert(!(jump_target_vld && reset_holdoff));
`endif
end
end
reg [W_ADDR-1:0] mem_addr_r;
reg mem_priv_r;
reg mem_addr_vld_r;
// Downstream accesses are always word-sized word-aligned.
assign mem_addr = mem_addr_r;
assign mem_priv = mem_priv_r;
assign mem_addr_vld = mem_addr_vld_r && !reset_holdoff;
assign mem_size = 1'b1;
// Using the non-registered version of pending_fetches would improve FIFO
// utilisation, but create a combinatorial path from hready to address phase!
// This means at least a 2-word FIFO is required for full fetch throughput.
wire fetch_stall = fifo_full
|| fifo_almost_full && |pending_fetches
|| pending_fetches > 2'h1;
always @ (*) begin
mem_addr_r = fetch_addr;
mem_priv_r = fetch_priv;
mem_addr_vld_r = 1'b1;
case (1'b1)
mem_addr_hold : begin mem_addr_r = fetch_addr; end
jump_target_vld : begin
mem_addr_r = {jump_target[W_ADDR-1:2], 2'b00};
mem_priv_r = jump_priv || !U_MODE;
end
DEBUG_SUPPORT && debug_mode : begin mem_addr_vld_r = 1'b0; end
!fetch_stall : begin mem_addr_r = fetch_addr; end
default : begin mem_addr_vld_r = 1'b0; end
endcase
end
assign jump_target_rdy = !mem_addr_hold;
// ----------------------------------------------------------------------------
// Instruction assembly yard
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
cir_vld <= 2'h0;
end else begin
`ifdef FORMAL
assert(cir_vld <= 2);
assert(cir_use <= cir_vld);
`endif
cir_vld <= {fifo_valid_next[1], fifo_valid_next[0] && !fifo_valid_next[1]};
end
end
assign cir = {fifo_mem[1], fifo_mem[0]};
assign cir_err = fifo_err[1:0];
// ----------------------------------------------------------------------------
// Register number predecode
wire [31:0] next_instr = {fifo_mem_next[1], fifo_mem_next[0]};
wire next_instr_is_32bit = next_instr[1:0] == 2'b11 || ~|EXTENSION_C;
always @ (*) begin
casez ({next_instr_is_32bit, next_instr[1:0], next_instr[15:13]})
{1'b1, 2'bzz, 3'bzzz}: predecode_rs1_coarse = next_instr[19:15]; // 32-bit R, S, B formats
{1'b0, 2'b00, 3'bz00}: predecode_rs1_coarse = 5'd2; // c.addi4spn + don't care
{1'b0, 2'b01, 3'b0zz}: predecode_rs1_coarse = next_instr[11:7]; // c.addi, c.addi16sp + don't care (jal, li)
{1'b0, 2'b10, 3'bz1z}: predecode_rs1_coarse = 5'd2; // c.lwsp, c.lwsp + don't care
{1'b0, 2'b10, 3'bz0z}: predecode_rs1_coarse = next_instr[11:7];
default: predecode_rs1_coarse = {2'b01, next_instr[9:7]};
endcase
casez ({next_instr_is_32bit, next_instr[1:0]})
{1'b1, 2'bzz}: predecode_rs2_coarse = next_instr[24:20];
{1'b0, 2'b10}: predecode_rs2_coarse = next_instr[6:2];
default: predecode_rs2_coarse = {2'b01, next_instr[4:2]};
endcase
// The "fine" predecode targets those instructions which either:
// - Have an implicit zero-register operand in their expanded form (e.g. c.beqz)
// - Do not have a register operand on that port, but rely on the port being 0
// We don't care about instructions which ignore the reg ports, e.g. ebreak
casez ({|EXTENSION_C, next_instr})
// -> addi rd, x0, imm:
{1'b1, 16'hzzzz, RV_C_LI }: predecode_rs1_fine = 5'd0;
{1'b1, 16'hzzzz, RV_C_MV }: begin
if (next_instr[6:2] == 5'd0) begin
// c.jr has rs1 as normal
predecode_rs1_fine = predecode_rs1_coarse;
end else begin
// -> add rd, x0, rs2:
predecode_rs1_fine = 5'd0;
end
end
default: predecode_rs1_fine = predecode_rs1_coarse;
endcase
casez ({|EXTENSION_C, next_instr})
{1'b1, 16'hzzzz, RV_C_BEQZ}: predecode_rs2_fine = 5'd0; // -> beq rs1, x0, label
{1'b1, 16'hzzzz, RV_C_BNEZ}: predecode_rs2_fine = 5'd0; // -> bne rs1, x0, label
default: predecode_rs2_fine = predecode_rs2_coarse;
endcase
end
endmodule
`ifndef YOSYS
`default_nettype wire
`endif