Working ECP5 debug, seems a bit slow but maybe just due to bitbanged FT231X JTAG.

This commit is contained in:
Luke Wren 2021-07-23 18:32:47 +01:00
parent 8ceae7e9e6
commit 2ae30183aa
10 changed files with 326 additions and 156 deletions

View File

@ -0,0 +1,9 @@
file fpga_ulx3s.v
list ../soc/soc.f
# ECP5 DTM is not in main list because the JTAGG primitive doesn't exist on
# most platforms
list ../../hdl/debug/dtm/hazard3_ecp5_jtag_dtm.f
file ../libfpga/common/reset_sync.v
file ../libfpga/common/fpga_reset.v

View File

@ -0,0 +1,56 @@
/**********************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2021 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*********************************************************************/
`default_nettype none
module fpga_ulx3s (
input wire clk_osc,
output wire [7:0] dbg,
output wire uart_tx,
input wire uart_rx
);
wire clk_sys = clk_osc;
wire rst_n_sys;
wire trst_n;
fpga_reset #(
.SHIFT (3)
) rstgen (
.clk (clk_sys),
.force_rst_n (1'b1),
.rst_n (rst_n_sys)
);
example_soc #(
.DTM_TYPE ("ECP5")
) soc_u (
.clk (clk_sys),
.rst_n (rst_n_sys),
.tck (1'b0),
.trst_n (1'b0),
.tms (1'b0),
.tdi (1'b0),
.tdo (/* unused */),
.uart_tx (uart_tx),
.uart_rx (uart_rx)
);
endmodule

View File

@ -20,7 +20,9 @@
`default_nettype none `default_nettype none
module example_soc ( module example_soc #(
parameter DTM_TYPE = "JTAG" // can be "JTAG" or "ECP5"
) (
// System clock + reset // System clock + reset
input wire clk, input wire clk,
input wire rst_n, input wire rst_n,
@ -43,10 +45,6 @@ localparam W_DATA = 32;
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// Processor debug // Processor debug
// JTAG-DTM IDCODE, selected after TAP reset, would normally be a
// JEP106-compliant ID
localparam IDCODE = 32'hdeadbeef;
wire dmi_psel; wire dmi_psel;
wire dmi_penable; wire dmi_penable;
wire dmi_pwrite; wire dmi_pwrite;
@ -68,9 +66,16 @@ reset_sync dmi_reset_sync_u (
.rst_n_out (rst_n_dmi) .rst_n_out (rst_n_dmi)
); );
generate
if (DTM_TYPE == "JTAG") begin
// Standard RISC-V JTAG-DTM connected to external IOs.
// JTAG-DTM IDCODE should be a JEP106-compliant ID:
localparam IDCODE = 32'hdeadbeef;
hazard3_jtag_dtm #( hazard3_jtag_dtm #(
.IDCODE (IDCODE) .IDCODE (IDCODE)
) inst_hazard3_jtag_dtm ( ) dtm_u (
.tck (tck), .tck (tck),
.trst_n (trst_n), .trst_n (trst_n),
.tms (tms), .tms (tms),
@ -92,6 +97,35 @@ hazard3_jtag_dtm #(
.dmi_pslverr (dmi_pslverr) .dmi_pslverr (dmi_pslverr)
); );
end else if (DTM_TYPE == "ECP5") begin
// Attach RISC-V DTM's DTMCS/DMI registers to ECP5 ER1/ER2 registers. This
// allows the processor to be debugged through the ECP5 chip TAP, using
// regular upstream OpenOCD.
// Connects to ECP5 TAP internally by instantiating a JTAGG primitive.
assign tdo = 1'b0;
hazard3_ecp5_jtag_dtm dtm_u (
.dmihardreset_req (dmihardreset_req),
.clk_dmi (clk),
.rst_n_dmi (rst_n_dmi),
.dmi_psel (dmi_psel),
.dmi_penable (dmi_penable),
.dmi_pwrite (dmi_pwrite),
.dmi_paddr (dmi_paddr),
.dmi_pwdata (dmi_pwdata),
.dmi_prdata (dmi_prdata),
.dmi_pready (dmi_pready),
.dmi_pslverr (dmi_pslverr)
);
end
endgenerate
localparam N_HARTS = 1; localparam N_HARTS = 1;
localparam XLEN = 32; localparam XLEN = 32;

View File

@ -0,0 +1,11 @@
CHIPNAME=fpga_icebreaker
DOTF=../fpga/fpga_icebreaker.f
SYNTH_OPT=-dsp
DEVICE=up5k
PACKAGE=sg48
include $(SCRIPTS)/synth_ice40.mk
prog: bit
iceprog $(CHIPNAME).bin

View File

@ -1,11 +1 @@
CHIPNAME=fpga_icebreaker include Icebreaker.mk
DOTF=../fpga/fpga_icebreaker.f
SYNTH_OPT=-dsp
DEVICE=up5k
PACKAGE=sg48
include $(SCRIPTS)/synth_ice40.mk
prog: bit
iceprog $(CHIPNAME).bin

View File

@ -0,0 +1,16 @@
CHIPNAME=fpga_ulx3s
TOP=fpga_ulx3s
DOTF=../fpga/fpga_ulx3s.f
SYNTH_OPT=-abc9
DEVICE=um5g-85k
PACKAGE=CABGA381
include $(SCRIPTS)/synth_ecp5.mk
prog: bit
ujprog $(CHIPNAME).bit
flash: bit
ujprog -j flash $(CHIPNAME).bit

View File

@ -0,0 +1,32 @@
# Reference: https://github.com/emard/ulx3s/blob/master/doc/constraints/ulx3s_v20.lpf
LOCATE COMP "clk_osc" SITE "G2";
IOBUF PORT "clk_osc" PULLMODE=NONE IO_TYPE=LVCMOS33;
FREQUENCY PORT "clk_osc" 25 MHZ;
# UART TX/RX (from FPGA's point of view, i.e. TX is an output)
LOCATE COMP "uart_tx" SITE "L4"; # FPGA transmits to ftdi
LOCATE COMP "uart_rx" SITE "M1"; # FPGA receives from ftdi
IOBUF PORT "uart_tx" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "uart_rx" PULLMODE=UP IO_TYPE=LVCMOS33;
# 8 pins on an IO header for bringing signals out to a logic analyser
LOCATE COMP "dbg[0]" SITE "C11"; # PCLK # "gn[0]"
LOCATE COMP "dbg[1]" SITE "A11"; # PCLK # "gn[1]"
LOCATE COMP "dbg[2]" SITE "B10"; # GR_PCLK # "gn[2]"
LOCATE COMP "dbg[3]" SITE "C10"; # "gn[3]"
LOCATE COMP "dbg[4]" SITE "B11"; # PCLK # "gp[0]"
LOCATE COMP "dbg[5]" SITE "A10"; # PCLK # "gp[1]"
LOCATE COMP "dbg[6]" SITE "A9"; # GR_PCLK # "gp[2]"
LOCATE COMP "dbg[7]" SITE "B9"; # "gp[3]"
IOBUF PORT "dbg[0]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "dbg[1]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "dbg[2]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "dbg[3]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "dbg[4]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "dbg[5]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "dbg[6]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
IOBUF PORT "dbg[7]" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;

View File

@ -0,0 +1,41 @@
# Probe config specific to ULX3S.
adapter driver ft232r
ft232r_vid_pid 0x0403 0x6015
# Note adapter_khz doesn't do anything because this is bitbanged JTAG on aux
# UART pins, but... it's mandatory
adapter speed 1000
ft232r_tck_num DSR
ft232r_tms_num DCD
ft232r_tdi_num RI
ft232r_tdo_num CTS
# trst/srst are not used but must have different values than above
ft232r_trst_num RTS
ft232r_srst_num DTR
# This is the ID for the *FPGA's* chip TAP. (note this ID is for 85F version
# of ULX3S -- if you have a different ECP5 size you can either enter the
# correct ID for your ECP5, or remove the -expected-id part). We are going to
# expose processor debug through a pair of custom DRs on this TAP.
set _CHIPNAME lfe5u85
jtag newtap lfe5u85 hazard3 -expected-id 0x41113043 -irlen 8 -irmask 0xFF -ircapture 0x5
# We expose the DTMCS/DMI DRs you would find on a normal RISC-V JTAG-DTM via
# the ECP5 TAP's ER1/ER2 private instructions. As long as you use the correct
# IR length for the ECP5 TAP, and use the new instructions, the ECP5 TAP
# looks a lot like a JTAG-DTM.
set _TARGETNAME $_CHIPNAME.hazard3
target create $_TARGETNAME riscv -chain-position $_TARGETNAME
riscv set_ir dtmcs 0x32
riscv set_ir dmi 0x38
# That's it, it's a normal RISC-V processor now :)
gdb_report_data_abort enable
init
halt

View File

@ -58,7 +58,7 @@ module hazard3_ecp5_jtag_dtm #(
wire jtdo2; wire jtdo2;
wire jtdo1; wire jtdo1;
wire jtdi; wire jtdi;
wire jtck; wire jtck_posedge_dont_use;
wire jrti2; wire jrti2;
wire jrti1; wire jrti1;
wire jshift; wire jshift;
@ -71,7 +71,7 @@ JTAGG jtag_u (
.JTDO2 (jtdo2), .JTDO2 (jtdo2),
.JTDO1 (jtdo1), .JTDO1 (jtdo1),
.JTDI (jtdi), .JTDI (jtdi),
.JTCK (jtck), .JTCK (jtck_posedge_dont_use),
.JRTI2 (jrti2), .JRTI2 (jrti2),
.JRTI1 (jrti1), .JRTI1 (jrti1),
.JSHIFT (jshift), .JSHIFT (jshift),
@ -81,25 +81,49 @@ JTAGG jtag_u (
.JCE1 (jce1) .JCE1 (jce1)
); );
// JTAGG primitive asserts its signals synchronously to JTCK's posedge
// (I think), but you get weird and inconsistent results if you try to
// consume them synchronously on JTCK's posedge, possibly due to a lack of
// hold constraints in nextpnr.
//
// A quick hack is to move the sampling onto the negedge of the clock. This
// then creates more problems because we would be running our shift logic on
// a different edge from the control + CDC logic in the DTM core.
//
// So, even worse hack, move all our JTAG-domain logic onto the negedge
// (or near enough) by inverting the clock.
wire jtck = !jtck_posedge_dont_use;
localparam W_DR_SHIFT = W_ADDR + 32 + 2; localparam W_DR_SHIFT = W_ADDR + 32 + 2;
wire core_dr_wen; reg core_dr_wen;
wire core_dr_ren; reg core_dr_ren;
wire core_dr_sel_dmi_ndtmcs; reg core_dr_sel_dmi_ndtmcs;
reg dr_shift_en;
wire [W_DR_SHIFT-1:0] core_dr_wdata; wire [W_DR_SHIFT-1:0] core_dr_wdata;
wire [W_DR_SHIFT-1:0] core_dr_rdata; wire [W_DR_SHIFT-1:0] core_dr_rdata;
// We would like to know at all times which DR is selected. Unfortunately // Decode our shift controls from the interesting ECP5 ones, and re-register
// JTAGG does not tell us this. Instead: // onto JTCK negedge (our posedge). Note without re-registering we observe
// // them a half-cycle (effectively one cycle) too early. This is another
// - During run test/idle, jrti1/jrti2 is asserted if IR matches ER1/ER2 // consequence of the stupid JTDI thing
//
// - During CAPTURE OR SHIFT, jce1/jce2 is asserted if IR matches ER1/ER2 always @ (posedge jtck or negedge jrst_n) begin
// if (!jrst_n) begin
// There is no signal that is valid during UPDATE. So we make our own: core_dr_sel_dmi_ndtmcs <= 1'b0;
core_dr_wen <= 1'b0;
core_dr_ren <= 1'b0;
dr_shift_en <= 1'b0;
end else begin
core_dr_sel_dmi_ndtmcs <= jce1 ? 1'b0 : jce2 ? 1'b1 : dr_sel_prev;
core_dr_ren <= (jce1 || jce2) && !jshift;
core_dr_wen <= jupdate;
dr_shift_en <= jshift;
end
end
reg dr_sel_prev; reg dr_sel_prev;
assign core_dr_sel_dmi_ndtmcs = jce1 ? 1'b0 : jce2 ? 1'b1 : dr_sel_prev;
always @ (posedge jtck or negedge jrst_n) begin always @ (posedge jtck or negedge jrst_n) begin
if (!jrst_n) begin if (!jrst_n) begin
@ -109,121 +133,78 @@ always @ (posedge jtck or negedge jrst_n) begin
end end
end end
// This is equivalent to "in capture DR state and IR is ER1 or ER2" reg [W_DR_SHIFT-1:0] dr_shift;
assign core_dr_ren = (jce1 || jce2) && !jshift; assign core_dr_wdata = dr_shift;
assign core_dr_wen = jupdate;
// Our DR shifter is made much more complex by the flop inserted by JTAGG
// between TDI and JTDI, which we have no control of. Say we have a total DR
// shift length of 42 (8 addr 32 data 2 op, in DMI) and first consider just
// SHIFT -> UPDATE:
//
// - After 42 SHIFT clocks, the 42nd data bit will be in the JTDI register
//
// - When we UPDATE, the write data must be the concatenation of the JTDI
// register and a 41 bit shift register which follows JTDI
//
// As we shift, JTDI plus 41 other flops form our 42 bit shift register. So
// far, mostly normal. The problem is that when we CAPTURE, we can't put the
// 42nd data bit into the JTDI register, because we have no control of it. We
// can't have a chain of 42 FPGA flops, because then our total scan length
// appears from the outside to be 43 bits. So the trick is:
//
// - The frontmost flop in the 42-bit scan is usually JTDI, but we have an
// additional shadow flop that is used on the first SHIFT cycle after
// CAPTURE
//
// - CAPTURE loads rdata into the shadow flop and the 41 regular shift flops
//
// - The first SHIFT clock drops the shifter LSB (which was previously on
// TDO), clocks the shadow flop down into the 41st position (which would
// normally take data from JTDI), and JTDI is swapped back in place of the
// shadow flop for UPDATE purposes
//
// - We are now in steady-state SHIFT.
//
// So before/after the first SHIFT clock the notional 42-bit register is
// {capture[41:0]} -> {JTDI reg, capture[41:1]} Where capture[41] is
// initially stored in the shadow flop, and then passes on to flop 40 of the
// main shift register. (we don't support zero-bit SHIFT, who cares!)
//
// Ok maybe that was a longwinded explanation but this really confused the
// shit out of me, so this is a gift for future Luke or other readers
reg dr_shift_head;
reg [W_DR_SHIFT-2:0] dr_shift_tail;
reg use_shift_head;
assign core_dr_wdata = core_dr_sel_dmi_ndtmcs ? {jtdi, dr_shift_tail} :
{{W_DR_SHIFT-32{1'b0}}, jtdi, dr_shift_tail[30:0]};
always @ (posedge jtck or negedge jrst_n) begin always @ (posedge jtck or negedge jrst_n) begin
if (!jrst_n) begin if (!jrst_n) begin
dr_shift_head <= 1'b0; dr_shift <= {W_DR_SHIFT{1'b0}};
dr_shift_tail <= {W_DR_SHIFT-1{1'b0}};
use_shift_head <= 1'b0;
end else if (core_dr_ren) begin end else if (core_dr_ren) begin
use_shift_head <= 1'b1; dr_shift <= core_dr_rdata;
{dr_shift_head, dr_shift_tail} <= core_dr_rdata; end else if (dr_shift_en) begin
end else begin dr_shift <= {jtdi, dr_shift} >> 1'b1;
use_shift_head <= 1'b0;
dr_shift_tail <= {
use_shift_head ? dr_shift_head : jtdi,
dr_shift_tail
} >> 1;
if (!core_dr_sel_dmi_ndtmcs) if (!core_dr_sel_dmi_ndtmcs)
dr_shift_tail[30] <= jtdi; dr_shift[31] <= jtdi;
end
end
// Not documented on ECP5: as well as the posedge flop on JTDI, the ECP5 puts
// a negedge flop on JTDO1, JTDO2. (Conjecture based on dicking around with a
// logic analyser.) To get JTDOx to appear with the same timing as our shifter
// LSB (which we update on every JTCK negedge) we:
//
// - Register the LSB of the *next* value of dr_shift on the JTCK posedge, so
// half a cycle earlier than the actual dr_shift update
//
// - This then gets re-registered with the pointless JTDO negedge flops, so
// that it appears with the same timing as our DR shifter update.
reg dr_shift_next_halfcycle;
always @ (negedge jtck or negedge jrst_n) begin
if (!jrst_n) begin
dr_shift_next_halfcycle <= 1'b0;
end else begin
dr_shift_next_halfcycle <=
core_dr_ren ? core_dr_rdata[0] :
dr_shift_en ? dr_shift[1] : dr_shift[0];
end end
end end
// We have only a single shifter for the ER1 and ER2 chains, so these are tied // We have only a single shifter for the ER1 and ER2 chains, so these are tied
// together: // together:
reg shift_tail_neg; assign jtdo1 = dr_shift_next_halfcycle;
assign jtdo2 = dr_shift_next_halfcycle;
always @ (negedge jtck or negedge jrst_n) begin
if (!jrst_n) begin
shift_tail_neg <= 1'b0;
end else begin
shift_tail_neg <= dr_shift_tail[0];
end
end
assign jtdo1 = shift_tail_neg;
assign jtdo2 = shift_tail_neg;
// The actual DTM is in here: // The actual DTM is in here:
// hazard3_jtag_dtm_core #( hazard3_jtag_dtm_core #(
// .DTMCS_IDLE_HINT(DTMCS_IDLE_HINT), .DTMCS_IDLE_HINT(DTMCS_IDLE_HINT),
// .W_ADDR(W_ADDR), .W_ADDR(W_ADDR),
// .W_DR_SHIFT(W_DR_SHIFT) .W_DR_SHIFT(W_DR_SHIFT)
// ) inst_hazard3_jtag_dtm_core ( ) inst_hazard3_jtag_dtm_core (
// .tck (tck), .tck (jtck),
// .trst_n (trst_n), .trst_n (jrst_n),
// .clk_dmi (clk_dmi),
// .rst_n_dmi (rst_n_dmi),
// .dr_wen (core_dr_wen), .clk_dmi (clk_dmi),
// .dr_ren (core_dr_ren), .rst_n_dmi (rst_n_dmi),
// .dr_sel_dmi_ndtmcs (core_dr_sel_dmi_ndtmcs),
// .dr_wdata (core_dr_wdata),
// .dr_rdata (core_dr_rdata),
// .dmihardreset_req (dmihardreset_req), .dr_wen (core_dr_wen),
.dr_ren (core_dr_ren),
.dr_sel_dmi_ndtmcs (core_dr_sel_dmi_ndtmcs),
.dr_wdata (core_dr_wdata),
.dr_rdata (core_dr_rdata),
// .dmi_psel (dmi_psel), .dmihardreset_req (dmihardreset_req),
// .dmi_penable (dmi_penable),
// .dmi_pwrite (dmi_pwrite),
// .dmi_paddr (dmi_paddr),
// .dmi_pwdata (dmi_pwdata),
// .dmi_prdata (dmi_prdata),
// .dmi_pready (dmi_pready),
// .dmi_pslverr (dmi_pslverr)
// );
assign core_dr_rdata = 42'h555555550; .dmi_psel (dmi_psel),
.dmi_penable (dmi_penable),
.dmi_pwrite (dmi_pwrite),
.dmi_paddr (dmi_paddr),
.dmi_pwdata (dmi_pwdata),
.dmi_prdata (dmi_prdata),
.dmi_pready (dmi_pready),
.dmi_pslverr (dmi_pslverr)
);
endmodule endmodule