Working ECP5 debug, seems a bit slow but maybe just due to bitbanged FT231X JTAG.

This commit is contained in:
Luke Wren 2021-07-23 18:32:47 +01:00
parent 8ceae7e9e6
commit 2ae30183aa
10 changed files with 326 additions and 156 deletions

View File

@ -24,15 +24,15 @@ module fpga_icebreaker (
// No external trst_n as iCEBreaker can't easily drive it from FTDI, so we
// generate a pulse internally from FPGA PoR.
input wire tck,
input wire tms,
input wire tdi,
output wire tdo,
input wire tck,
input wire tms,
input wire tdi,
output wire tdo,
output wire mirror_tck,
output wire mirror_tms,
output wire mirror_tdi,
output wire mirror_tdo,
output wire mirror_tck,
output wire mirror_tms,
output wire mirror_tdi,
output wire mirror_tdo,
output wire uart_tx,
input wire uart_rx

View File

@ -0,0 +1,9 @@
file fpga_ulx3s.v
list ../soc/soc.f
# ECP5 DTM is not in main list because the JTAGG primitive doesn't exist on
# most platforms
list ../../hdl/debug/dtm/hazard3_ecp5_jtag_dtm.f
file ../libfpga/common/reset_sync.v
file ../libfpga/common/fpga_reset.v

View File

@ -0,0 +1,56 @@
/**********************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2021 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*********************************************************************/
`default_nettype none
module fpga_ulx3s (
input wire clk_osc,
output wire [7:0] dbg,
output wire uart_tx,
input wire uart_rx
);
wire clk_sys = clk_osc;
wire rst_n_sys;
wire trst_n;
fpga_reset #(
.SHIFT (3)
) rstgen (
.clk (clk_sys),
.force_rst_n (1'b1),
.rst_n (rst_n_sys)
);
example_soc #(
.DTM_TYPE ("ECP5")
) soc_u (
.clk (clk_sys),
.rst_n (rst_n_sys),
.tck (1'b0),
.trst_n (1'b0),
.tms (1'b0),
.tdi (1'b0),
.tdo (/* unused */),
.uart_tx (uart_tx),
.uart_rx (uart_rx)
);
endmodule

View File

@ -20,7 +20,9 @@
`default_nettype none
module example_soc (
module example_soc #(
parameter DTM_TYPE = "JTAG" // can be "JTAG" or "ECP5"
) (
// System clock + reset
input wire clk,
input wire rst_n,
@ -43,10 +45,6 @@ localparam W_DATA = 32;
// ----------------------------------------------------------------------------
// Processor debug
// JTAG-DTM IDCODE, selected after TAP reset, would normally be a
// JEP106-compliant ID
localparam IDCODE = 32'hdeadbeef;
wire dmi_psel;
wire dmi_penable;
wire dmi_pwrite;
@ -68,29 +66,65 @@ reset_sync dmi_reset_sync_u (
.rst_n_out (rst_n_dmi)
);
hazard3_jtag_dtm #(
.IDCODE (IDCODE)
) inst_hazard3_jtag_dtm (
.tck (tck),
.trst_n (trst_n),
.tms (tms),
.tdi (tdi),
.tdo (tdo),
generate
if (DTM_TYPE == "JTAG") begin
.dmihardreset_req (dmihardreset_req),
// Standard RISC-V JTAG-DTM connected to external IOs.
// JTAG-DTM IDCODE should be a JEP106-compliant ID:
localparam IDCODE = 32'hdeadbeef;
.clk_dmi (clk),
.rst_n_dmi (rst_n_dmi),
hazard3_jtag_dtm #(
.IDCODE (IDCODE)
) dtm_u (
.tck (tck),
.trst_n (trst_n),
.tms (tms),
.tdi (tdi),
.tdo (tdo),
.dmihardreset_req (dmihardreset_req),
.clk_dmi (clk),
.rst_n_dmi (rst_n_dmi),
.dmi_psel (dmi_psel),
.dmi_penable (dmi_penable),
.dmi_pwrite (dmi_pwrite),
.dmi_paddr (dmi_paddr),
.dmi_pwdata (dmi_pwdata),
.dmi_prdata (dmi_prdata),
.dmi_pready (dmi_pready),
.dmi_pslverr (dmi_pslverr)
);
end else if (DTM_TYPE == "ECP5") begin
// Attach RISC-V DTM's DTMCS/DMI registers to ECP5 ER1/ER2 registers. This
// allows the processor to be debugged through the ECP5 chip TAP, using
// regular upstream OpenOCD.
// Connects to ECP5 TAP internally by instantiating a JTAGG primitive.
assign tdo = 1'b0;
hazard3_ecp5_jtag_dtm dtm_u (
.dmihardreset_req (dmihardreset_req),
.clk_dmi (clk),
.rst_n_dmi (rst_n_dmi),
.dmi_psel (dmi_psel),
.dmi_penable (dmi_penable),
.dmi_pwrite (dmi_pwrite),
.dmi_paddr (dmi_paddr),
.dmi_pwdata (dmi_pwdata),
.dmi_prdata (dmi_prdata),
.dmi_pready (dmi_pready),
.dmi_pslverr (dmi_pslverr)
);
end
endgenerate
.dmi_psel (dmi_psel),
.dmi_penable (dmi_penable),
.dmi_pwrite (dmi_pwrite),
.dmi_paddr (dmi_paddr),
.dmi_pwdata (dmi_pwdata),
.dmi_prdata (dmi_prdata),
.dmi_pready (dmi_pready),
.dmi_pslverr (dmi_pslverr)
);
localparam N_HARTS = 1;
localparam XLEN = 32;

View File

@ -0,0 +1,11 @@
CHIPNAME=fpga_icebreaker
DOTF=../fpga/fpga_icebreaker.f
SYNTH_OPT=-dsp
DEVICE=up5k
PACKAGE=sg48
include $(SCRIPTS)/synth_ice40.mk
prog: bit
iceprog $(CHIPNAME).bin

View File

@ -1,11 +1 @@
CHIPNAME=fpga_icebreaker
DOTF=../fpga/fpga_icebreaker.f
SYNTH_OPT=-dsp
DEVICE=up5k
PACKAGE=sg48
include $(SCRIPTS)/synth_ice40.mk
prog: bit
iceprog $(CHIPNAME).bin
include Icebreaker.mk

View File

@ -0,0 +1,16 @@
CHIPNAME=fpga_ulx3s
TOP=fpga_ulx3s
DOTF=../fpga/fpga_ulx3s.f
SYNTH_OPT=-abc9
DEVICE=um5g-85k
PACKAGE=CABGA381
include $(SCRIPTS)/synth_ecp5.mk
prog: bit
ujprog $(CHIPNAME).bit
flash: bit
ujprog -j flash $(CHIPNAME).bit

View File

@ -0,0 +1,32 @@
# Reference: https://github.com/emard/ulx3s/blob/master/doc/constraints/ulx3s_v20.lpf
LOCATE COMP "clk_osc" SITE "G2";
IOBUF PORT "clk_osc" PULLMODE=NONE IO_TYPE=LVCMOS33;
FREQUENCY PORT "clk_osc" 25 MHZ;
# UART TX/RX (from FPGA's point of view, i.e. TX is an output)
LOCATE COMP "uart_tx" SITE "L4"; # FPGA transmits to ftdi
LOCATE COMP "uart_rx" SITE "M1"; # FPGA receives from ftdi
IOBUF PORT "uart_tx" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "uart_rx" PULLMODE=UP IO_TYPE=LVCMOS33;
# 8 pins on an IO header for bringing signals out to a logic analyser
LOCATE COMP "dbg[0]" SITE "C11"; # PCLK # "gn[0]"
LOCATE COMP "dbg[1]" SITE "A11"; # PCLK # "gn[1]"
LOCATE COMP "dbg[2]" SITE "B10"; # GR_PCLK # "gn[2]"
LOCATE COMP "dbg[3]" SITE "C10"; # "gn[3]"
LOCATE COMP "dbg[4]" SITE "B11"; # PCLK # "gp[0]"
LOCATE COMP "dbg[5]" SITE "A10"; # PCLK # "gp[1]"
LOCATE COMP "dbg[6]" SITE "A9"; # GR_PCLK # "gp[2]"
LOCATE COMP "dbg[7]" SITE "B9"; # "gp[3]"
IOBUF PORT "dbg[0]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "dbg[1]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "dbg[2]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "dbg[3]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "dbg[4]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "dbg[5]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "dbg[6]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "dbg[7]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;

View File

@ -0,0 +1,41 @@
# Probe config specific to ULX3S.
adapter driver ft232r
ft232r_vid_pid 0x0403 0x6015
# Note adapter_khz doesn't do anything because this is bitbanged JTAG on aux
# UART pins, but... it's mandatory
adapter speed 1000
ft232r_tck_num DSR
ft232r_tms_num DCD
ft232r_tdi_num RI
ft232r_tdo_num CTS
# trst/srst are not used but must have different values than above
ft232r_trst_num RTS
ft232r_srst_num DTR
# This is the ID for the *FPGA's* chip TAP. (note this ID is for 85F version
# of ULX3S -- if you have a different ECP5 size you can either enter the
# correct ID for your ECP5, or remove the -expected-id part). We are going to
# expose processor debug through a pair of custom DRs on this TAP.
set _CHIPNAME lfe5u85
jtag newtap lfe5u85 hazard3 -expected-id 0x41113043 -irlen 8 -irmask 0xFF -ircapture 0x5
# We expose the DTMCS/DMI DRs you would find on a normal RISC-V JTAG-DTM via
# the ECP5 TAP's ER1/ER2 private instructions. As long as you use the correct
# IR length for the ECP5 TAP, and use the new instructions, the ECP5 TAP
# looks a lot like a JTAG-DTM.
set _TARGETNAME $_CHIPNAME.hazard3
target create $_TARGETNAME riscv -chain-position $_TARGETNAME
riscv set_ir dtmcs 0x32
riscv set_ir dmi 0x38
# That's it, it's a normal RISC-V processor now :)
gdb_report_data_abort enable
init
halt

View File

@ -58,7 +58,7 @@ module hazard3_ecp5_jtag_dtm #(
wire jtdo2;
wire jtdo1;
wire jtdi;
wire jtck;
wire jtck_posedge_dont_use;
wire jrti2;
wire jrti1;
wire jshift;
@ -71,7 +71,7 @@ JTAGG jtag_u (
.JTDO2 (jtdo2),
.JTDO1 (jtdo1),
.JTDI (jtdi),
.JTCK (jtck),
.JTCK (jtck_posedge_dont_use),
.JRTI2 (jrti2),
.JRTI1 (jrti1),
.JSHIFT (jshift),
@ -81,25 +81,49 @@ JTAGG jtag_u (
.JCE1 (jce1)
);
// JTAGG primitive asserts its signals synchronously to JTCK's posedge
// (I think), but you get weird and inconsistent results if you try to
// consume them synchronously on JTCK's posedge, possibly due to a lack of
// hold constraints in nextpnr.
//
// A quick hack is to move the sampling onto the negedge of the clock. This
// then creates more problems because we would be running our shift logic on
// a different edge from the control + CDC logic in the DTM core.
//
// So, even worse hack, move all our JTAG-domain logic onto the negedge
// (or near enough) by inverting the clock.
wire jtck = !jtck_posedge_dont_use;
localparam W_DR_SHIFT = W_ADDR + 32 + 2;
wire core_dr_wen;
wire core_dr_ren;
wire core_dr_sel_dmi_ndtmcs;
reg core_dr_wen;
reg core_dr_ren;
reg core_dr_sel_dmi_ndtmcs;
reg dr_shift_en;
wire [W_DR_SHIFT-1:0] core_dr_wdata;
wire [W_DR_SHIFT-1:0] core_dr_rdata;
// We would like to know at all times which DR is selected. Unfortunately
// JTAGG does not tell us this. Instead:
//
// - During run test/idle, jrti1/jrti2 is asserted if IR matches ER1/ER2
//
// - During CAPTURE OR SHIFT, jce1/jce2 is asserted if IR matches ER1/ER2
//
// There is no signal that is valid during UPDATE. So we make our own:
// Decode our shift controls from the interesting ECP5 ones, and re-register
// onto JTCK negedge (our posedge). Note without re-registering we observe
// them a half-cycle (effectively one cycle) too early. This is another
// consequence of the stupid JTDI thing
always @ (posedge jtck or negedge jrst_n) begin
if (!jrst_n) begin
core_dr_sel_dmi_ndtmcs <= 1'b0;
core_dr_wen <= 1'b0;
core_dr_ren <= 1'b0;
dr_shift_en <= 1'b0;
end else begin
core_dr_sel_dmi_ndtmcs <= jce1 ? 1'b0 : jce2 ? 1'b1 : dr_sel_prev;
core_dr_ren <= (jce1 || jce2) && !jshift;
core_dr_wen <= jupdate;
dr_shift_en <= jshift;
end
end
reg dr_sel_prev;
assign core_dr_sel_dmi_ndtmcs = jce1 ? 1'b0 : jce2 ? 1'b1 : dr_sel_prev;
always @ (posedge jtck or negedge jrst_n) begin
if (!jrst_n) begin
@ -109,121 +133,78 @@ always @ (posedge jtck or negedge jrst_n) begin
end
end
// This is equivalent to "in capture DR state and IR is ER1 or ER2"
assign core_dr_ren = (jce1 || jce2) && !jshift;
assign core_dr_wen = jupdate;
// Our DR shifter is made much more complex by the flop inserted by JTAGG
// between TDI and JTDI, which we have no control of. Say we have a total DR
// shift length of 42 (8 addr 32 data 2 op, in DMI) and first consider just
// SHIFT -> UPDATE:
//
// - After 42 SHIFT clocks, the 42nd data bit will be in the JTDI register
//
// - When we UPDATE, the write data must be the concatenation of the JTDI
// register and a 41 bit shift register which follows JTDI
//
// As we shift, JTDI plus 41 other flops form our 42 bit shift register. So
// far, mostly normal. The problem is that when we CAPTURE, we can't put the
// 42nd data bit into the JTDI register, because we have no control of it. We
// can't have a chain of 42 FPGA flops, because then our total scan length
// appears from the outside to be 43 bits. So the trick is:
//
// - The frontmost flop in the 42-bit scan is usually JTDI, but we have an
// additional shadow flop that is used on the first SHIFT cycle after
// CAPTURE
//
// - CAPTURE loads rdata into the shadow flop and the 41 regular shift flops
//
// - The first SHIFT clock drops the shifter LSB (which was previously on
// TDO), clocks the shadow flop down into the 41st position (which would
// normally take data from JTDI), and JTDI is swapped back in place of the
// shadow flop for UPDATE purposes
//
// - We are now in steady-state SHIFT.
//
// So before/after the first SHIFT clock the notional 42-bit register is
// {capture[41:0]} -> {JTDI reg, capture[41:1]} Where capture[41] is
// initially stored in the shadow flop, and then passes on to flop 40 of the
// main shift register. (we don't support zero-bit SHIFT, who cares!)
//
// Ok maybe that was a longwinded explanation but this really confused the
// shit out of me, so this is a gift for future Luke or other readers
reg dr_shift_head;
reg [W_DR_SHIFT-2:0] dr_shift_tail;
reg use_shift_head;
assign core_dr_wdata = core_dr_sel_dmi_ndtmcs ? {jtdi, dr_shift_tail} :
{{W_DR_SHIFT-32{1'b0}}, jtdi, dr_shift_tail[30:0]};
reg [W_DR_SHIFT-1:0] dr_shift;
assign core_dr_wdata = dr_shift;
always @ (posedge jtck or negedge jrst_n) begin
if (!jrst_n) begin
dr_shift_head <= 1'b0;
dr_shift_tail <= {W_DR_SHIFT-1{1'b0}};
use_shift_head <= 1'b0;
dr_shift <= {W_DR_SHIFT{1'b0}};
end else if (core_dr_ren) begin
use_shift_head <= 1'b1;
{dr_shift_head, dr_shift_tail} <= core_dr_rdata;
end else begin
use_shift_head <= 1'b0;
dr_shift_tail <= {
use_shift_head ? dr_shift_head : jtdi,
dr_shift_tail
} >> 1;
dr_shift <= core_dr_rdata;
end else if (dr_shift_en) begin
dr_shift <= {jtdi, dr_shift} >> 1'b1;
if (!core_dr_sel_dmi_ndtmcs)
dr_shift_tail[30] <= jtdi;
dr_shift[31] <= jtdi;
end
end
// Not documented on ECP5: as well as the posedge flop on JTDI, the ECP5 puts
// a negedge flop on JTDO1, JTDO2. (Conjecture based on dicking around with a
// logic analyser.) To get JTDOx to appear with the same timing as our shifter
// LSB (which we update on every JTCK negedge) we:
//
// - Register the LSB of the *next* value of dr_shift on the JTCK posedge, so
// half a cycle earlier than the actual dr_shift update
//
// - This then gets re-registered with the pointless JTDO negedge flops, so
// that it appears with the same timing as our DR shifter update.
reg dr_shift_next_halfcycle;
always @ (negedge jtck or negedge jrst_n) begin
if (!jrst_n) begin
dr_shift_next_halfcycle <= 1'b0;
end else begin
dr_shift_next_halfcycle <=
core_dr_ren ? core_dr_rdata[0] :
dr_shift_en ? dr_shift[1] : dr_shift[0];
end
end
// We have only a single shifter for the ER1 and ER2 chains, so these are tied
// together:
reg shift_tail_neg;
always @ (negedge jtck or negedge jrst_n) begin
if (!jrst_n) begin
shift_tail_neg <= 1'b0;
end else begin
shift_tail_neg <= dr_shift_tail[0];
end
end
assign jtdo1 = shift_tail_neg;
assign jtdo2 = shift_tail_neg;
assign jtdo1 = dr_shift_next_halfcycle;
assign jtdo2 = dr_shift_next_halfcycle;
// The actual DTM is in here:
// hazard3_jtag_dtm_core #(
// .DTMCS_IDLE_HINT(DTMCS_IDLE_HINT),
// .W_ADDR(W_ADDR),
// .W_DR_SHIFT(W_DR_SHIFT)
// ) inst_hazard3_jtag_dtm_core (
// .tck (tck),
// .trst_n (trst_n),
// .clk_dmi (clk_dmi),
// .rst_n_dmi (rst_n_dmi),
hazard3_jtag_dtm_core #(
.DTMCS_IDLE_HINT(DTMCS_IDLE_HINT),
.W_ADDR(W_ADDR),
.W_DR_SHIFT(W_DR_SHIFT)
) inst_hazard3_jtag_dtm_core (
.tck (jtck),
.trst_n (jrst_n),
// .dr_wen (core_dr_wen),
// .dr_ren (core_dr_ren),
// .dr_sel_dmi_ndtmcs (core_dr_sel_dmi_ndtmcs),
// .dr_wdata (core_dr_wdata),
// .dr_rdata (core_dr_rdata),
.clk_dmi (clk_dmi),
.rst_n_dmi (rst_n_dmi),
// .dmihardreset_req (dmihardreset_req),
.dr_wen (core_dr_wen),
.dr_ren (core_dr_ren),
.dr_sel_dmi_ndtmcs (core_dr_sel_dmi_ndtmcs),
.dr_wdata (core_dr_wdata),
.dr_rdata (core_dr_rdata),
// .dmi_psel (dmi_psel),
// .dmi_penable (dmi_penable),
// .dmi_pwrite (dmi_pwrite),
// .dmi_paddr (dmi_paddr),
// .dmi_pwdata (dmi_pwdata),
// .dmi_prdata (dmi_prdata),
// .dmi_pready (dmi_pready),
// .dmi_pslverr (dmi_pslverr)
// );
.dmihardreset_req (dmihardreset_req),
assign core_dr_rdata = 42'h555555550;
.dmi_psel (dmi_psel),
.dmi_penable (dmi_penable),
.dmi_pwrite (dmi_pwrite),
.dmi_paddr (dmi_paddr),
.dmi_pwdata (dmi_pwdata),
.dmi_prdata (dmi_prdata),
.dmi_pready (dmi_pready),
.dmi_pslverr (dmi_pslverr)
);
endmodule