One CS-framed transaction: marker + 32-bit count + MOSI + read-latency skip + MISO, MSB-first on the wire, matching OpenOCD's jtagspi so the quartiq proxy bitstreams work unchanged. Half-duplex (tx,txlen,rx,rxlen) signature, single-device chain. NOT yet validated on hardware — protocol follows the OpenOCD reference but has not been confirmed against a live proxy + flash. Validation (read JEDEC ID on the KCU105) is the next step.
347 lines
10 KiB
C
347 lines
10 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "bscan_spi.h"
|
|
|
|
/* JTAG byte format expected by drv_TXRX_DATA / drv_TX_TMS:
|
|
* bit 0 (JTAG_STR_DOUT) = TDI value
|
|
* bit 1 (JTAG_STR_TMS) = TMS value
|
|
* bit 4 (JTAG_STR_DIN) = TDO returned by the driver (1 if TDO was high).
|
|
* In practice, jtag_core treats the input byte as "non-zero if TDO=1",
|
|
* so we just check buf_in[i] != 0 on read. */
|
|
|
|
static int drv_ok(jtag_core *jc)
|
|
{
|
|
return jc && jc->io_functions.drv_TX_TMS && jc->io_functions.drv_TXRX_DATA;
|
|
}
|
|
|
|
/* --- Low-level primitives ----------------------------------------- */
|
|
|
|
int bscan_set_ir(jtag_core *jc, unsigned int opcode, int ir_length)
|
|
{
|
|
unsigned char tms_buf[8];
|
|
unsigned char *data_buf;
|
|
int i;
|
|
|
|
if (!drv_ok(jc) || ir_length <= 0 || ir_length > 32) {
|
|
return -1;
|
|
}
|
|
|
|
/* Idle -> Select-DR -> Select-IR -> Capture-IR -> Shift-IR */
|
|
tms_buf[0] = JTAG_STR_TMS;
|
|
tms_buf[1] = JTAG_STR_TMS;
|
|
tms_buf[2] = 0;
|
|
tms_buf[3] = 0;
|
|
jc->io_functions.drv_TX_TMS(jc, tms_buf, 4);
|
|
|
|
/* Shift IR LSB first; raise TMS on the last bit (-> Exit1-IR) */
|
|
data_buf = malloc(ir_length);
|
|
if (!data_buf) return -1;
|
|
for (i = 0; i < ir_length; i++) {
|
|
data_buf[i] = ((opcode >> i) & 1u) ? JTAG_STR_DOUT : 0;
|
|
if (i == ir_length - 1) {
|
|
data_buf[i] |= JTAG_STR_TMS;
|
|
}
|
|
}
|
|
jc->io_functions.drv_TXRX_DATA(jc, data_buf, NULL, ir_length);
|
|
free(data_buf);
|
|
|
|
/* Exit1-IR -> Update-IR -> Idle */
|
|
tms_buf[0] = JTAG_STR_TMS;
|
|
tms_buf[1] = 0;
|
|
jc->io_functions.drv_TX_TMS(jc, tms_buf, 2);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int bscan_shift_dr(jtag_core *jc, const uint8_t *tdi, uint8_t *tdo, int nbits)
|
|
{
|
|
unsigned char tms_buf[8];
|
|
unsigned char *buf_out, *buf_in;
|
|
int i;
|
|
|
|
if (!drv_ok(jc) || nbits <= 0) {
|
|
return -1;
|
|
}
|
|
|
|
/* Idle -> Select-DR -> Capture-DR -> Shift-DR */
|
|
tms_buf[0] = JTAG_STR_TMS;
|
|
tms_buf[1] = 0;
|
|
tms_buf[2] = 0;
|
|
jc->io_functions.drv_TX_TMS(jc, tms_buf, 3);
|
|
|
|
buf_out = malloc(nbits);
|
|
if (!buf_out) return -1;
|
|
buf_in = tdo ? malloc(nbits) : NULL;
|
|
if (tdo && !buf_in) { free(buf_out); return -1; }
|
|
|
|
for (i = 0; i < nbits; i++) {
|
|
uint8_t bit = 0;
|
|
if (tdi) {
|
|
bit = (tdi[i / 8] >> (i & 7)) & 1u;
|
|
}
|
|
buf_out[i] = bit ? JTAG_STR_DOUT : 0;
|
|
if (i == nbits - 1) {
|
|
buf_out[i] |= JTAG_STR_TMS;
|
|
}
|
|
}
|
|
jc->io_functions.drv_TXRX_DATA(jc, buf_out, buf_in, nbits);
|
|
|
|
if (tdo && buf_in) {
|
|
memset(tdo, 0, (size_t)((nbits + 7) / 8));
|
|
for (i = 0; i < nbits; i++) {
|
|
if (buf_in[i]) {
|
|
tdo[i / 8] |= (uint8_t)(1u << (i & 7));
|
|
}
|
|
}
|
|
}
|
|
|
|
free(buf_out);
|
|
free(buf_in);
|
|
|
|
/* Exit1-DR -> Update-DR -> Idle */
|
|
tms_buf[0] = JTAG_STR_TMS;
|
|
tms_buf[1] = 0;
|
|
jc->io_functions.drv_TX_TMS(jc, tms_buf, 2);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int bscan_idle_cycles(jtag_core *jc, int ncycles)
|
|
{
|
|
unsigned char *buf;
|
|
int i;
|
|
|
|
if (!drv_ok(jc) || ncycles <= 0) {
|
|
return -1;
|
|
}
|
|
buf = malloc(ncycles);
|
|
if (!buf) return -1;
|
|
for (i = 0; i < ncycles; i++) buf[i] = 0;
|
|
jc->io_functions.drv_TX_TMS(jc, buf, ncycles);
|
|
free(buf);
|
|
return 0;
|
|
}
|
|
|
|
/* --- High-level operations ---------------------------------------- */
|
|
|
|
static uint8_t reverse_bits(uint8_t b)
|
|
{
|
|
b = (uint8_t)(((b & 0xF0u) >> 4) | ((b & 0x0Fu) << 4));
|
|
b = (uint8_t)(((b & 0xCCu) >> 2) | ((b & 0x33u) << 2));
|
|
b = (uint8_t)(((b & 0xAAu) >> 1) | ((b & 0x55u) << 1));
|
|
return b;
|
|
}
|
|
|
|
int bscan_load_bitstream(jtag_core *jc, const fpga_target *t,
|
|
const uint8_t *data, size_t nbytes)
|
|
{
|
|
uint8_t *reversed;
|
|
unsigned int bypass;
|
|
size_t i;
|
|
|
|
if (!drv_ok(jc) || !t || !data || nbytes == 0) return -1;
|
|
if (!t->ir_jprogram || !t->ir_cfg_in || !t->ir_jstart) {
|
|
/* No configuration opcodes known for this family. */
|
|
return -1;
|
|
}
|
|
|
|
/* JPROGRAM clears the configuration memory. Min ~10k TCK cycles
|
|
* to wait for INIT_B to go high before CFG_IN.
|
|
* TODO: poll INIT_B via SAMPLE instead of fixed wait. */
|
|
if (bscan_set_ir(jc, t->ir_jprogram, t->ir_length) < 0) return -1;
|
|
bscan_idle_cycles(jc, 10000);
|
|
|
|
/* CFG_IN routes DR shifts to the configuration interface. */
|
|
if (bscan_set_ir(jc, t->ir_cfg_in, t->ir_length) < 0) return -1;
|
|
|
|
/* Xilinx bitstream bytes must be bit-reversed before JTAG shift
|
|
* (configuration interface latches MSB first, JTAG shifts LSB first). */
|
|
reversed = malloc(nbytes);
|
|
if (!reversed) return -1;
|
|
for (i = 0; i < nbytes; i++) {
|
|
reversed[i] = reverse_bits(data[i]);
|
|
}
|
|
if (bscan_shift_dr(jc, reversed, NULL, (int)(nbytes * 8)) < 0) {
|
|
free(reversed);
|
|
return -1;
|
|
}
|
|
free(reversed);
|
|
|
|
/* JSTART triggers the fabric startup. UG470/UG570: ≥12 cycles in
|
|
* Idle to complete the sequence. Use 2000 for margin. */
|
|
if (bscan_set_ir(jc, t->ir_jstart, t->ir_length) < 0) return -1;
|
|
bscan_idle_cycles(jc, 2000);
|
|
|
|
/* Park on BYPASS (all 1s) so other operations don't trip on a
|
|
* lingering instruction. */
|
|
bypass = (t->ir_length >= 32) ? 0xFFFFFFFFu : ((1u << t->ir_length) - 1u);
|
|
bscan_set_ir(jc, bypass, t->ir_length);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Parse a Xilinx .bit container; return offset and length of the raw
|
|
* bitstream payload. Returns -1 if not a .bit. */
|
|
static int xilinx_bit_payload(const uint8_t *buf, size_t buflen,
|
|
size_t *out_off, size_t *out_len)
|
|
{
|
|
size_t off = 0;
|
|
uint16_t hdr_len;
|
|
|
|
if (buflen < 13) return -1;
|
|
/* First 2 bytes are big-endian length of a magic block (typically 0x0009),
|
|
* followed by 9 magic bytes. */
|
|
hdr_len = (uint16_t)((buf[0] << 8) | buf[1]);
|
|
if (hdr_len != 0x0009) return -1;
|
|
off = 2 + hdr_len;
|
|
|
|
/* Then 2 bytes (0x0001) and ASCII-tagged sections a/b/c/d, then 'e'
|
|
* followed by 4 bytes big-endian length of the bitstream payload. */
|
|
if (off + 2 > buflen) return -1;
|
|
off += 2;
|
|
while (off < buflen) {
|
|
uint8_t tag = buf[off++];
|
|
if (tag == 'e') {
|
|
uint32_t bit_len;
|
|
if (off + 4 > buflen) return -1;
|
|
bit_len = ((uint32_t)buf[off] << 24) | ((uint32_t)buf[off + 1] << 16)
|
|
| ((uint32_t)buf[off + 2] << 8) | (uint32_t)buf[off + 3];
|
|
off += 4;
|
|
if (off + bit_len > buflen) return -1;
|
|
*out_off = off;
|
|
*out_len = bit_len;
|
|
return 0;
|
|
}
|
|
if (tag >= 'a' && tag <= 'd') {
|
|
if (off + 2 > buflen) return -1;
|
|
hdr_len = (uint16_t)((buf[off] << 8) | buf[off + 1]);
|
|
off += 2 + hdr_len;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int bscan_load_bitstream_file(jtag_core *jc, const fpga_target *t,
|
|
const char *path)
|
|
{
|
|
FILE *f;
|
|
long size;
|
|
uint8_t *buf;
|
|
size_t payload_off = 0;
|
|
size_t payload_len = 0;
|
|
int ret;
|
|
|
|
if (!path) return -1;
|
|
f = fopen(path, "rb");
|
|
if (!f) return -1;
|
|
if (fseek(f, 0, SEEK_END) != 0) { fclose(f); return -1; }
|
|
size = ftell(f);
|
|
if (size <= 0) { fclose(f); return -1; }
|
|
rewind(f);
|
|
|
|
buf = malloc((size_t)size);
|
|
if (!buf) { fclose(f); return -1; }
|
|
if (fread(buf, 1, (size_t)size, f) != (size_t)size) {
|
|
free(buf); fclose(f); return -1;
|
|
}
|
|
fclose(f);
|
|
|
|
if (xilinx_bit_payload(buf, (size_t)size, &payload_off, &payload_len) < 0) {
|
|
/* Treat as raw .bin */
|
|
payload_off = 0;
|
|
payload_len = (size_t)size;
|
|
}
|
|
|
|
ret = bscan_load_bitstream(jc, t, buf + payload_off, payload_len);
|
|
free(buf);
|
|
return ret;
|
|
}
|
|
|
|
/* Pipeline latency between a MOSI bit going in and its MISO bit
|
|
* appearing on TDO, in TCK cycles. For a single-device chain this is
|
|
* one (the proxy registers TDO); equals jtag_tap_count_enabled() in
|
|
* OpenOCD's jtagspi. The header asserts a single-device chain. */
|
|
#define BSCAN_SPI_READ_LATENCY 1
|
|
|
|
int bscan_spi_xfer(jtag_core *jc, const fpga_target *t,
|
|
const uint8_t *tx, size_t txlen,
|
|
uint8_t *rx, size_t rxlen)
|
|
{
|
|
/* DR frame (quartiq/OpenOCD jtagspi proxy, single device):
|
|
* marker(1)=1 | count(32, MSB-first) | MOSI(txlen*8, MSB-first/byte)
|
|
* | latency skip | MISO capture(rxlen*8, MSB-first/byte)
|
|
* count = total SPI bits - 1. The skip absorbs the TDO pipeline
|
|
* delay so the captured MISO aligns to byte boundaries.
|
|
* Bits are placed LSB-first per byte, the layout bscan_shift_dr
|
|
* shifts in order; ordering them here gives MSB-first on the wire. */
|
|
size_t spi_bytes = txlen + rxlen;
|
|
uint32_t count;
|
|
int total_bits, dr_bytes, capture_start, bit, j;
|
|
size_t i;
|
|
uint8_t *dr_out, *dr_in;
|
|
|
|
if (!drv_ok(jc) || !t || !t->ir_user1 || spi_bytes == 0) return -1;
|
|
if (txlen && !tx) return -1;
|
|
if (rxlen && !rx) return -1;
|
|
|
|
count = (uint32_t)(spi_bytes * 8u) - 1u;
|
|
|
|
total_bits = 1 + 32 + (int)txlen * 8;
|
|
if (rxlen) total_bits += BSCAN_SPI_READ_LATENCY + (int)rxlen * 8;
|
|
dr_bytes = (total_bits + 7) / 8;
|
|
|
|
dr_out = calloc(1, (size_t)dr_bytes);
|
|
dr_in = rxlen ? calloc(1, (size_t)dr_bytes) : NULL;
|
|
if (!dr_out || (rxlen && !dr_in)) { free(dr_out); free(dr_in); return -1; }
|
|
|
|
#define BS_SET(buf, pos) ((buf)[(pos) >> 3] |= (uint8_t)(1u << ((pos) & 7)))
|
|
#define BS_GET(buf, pos) (((buf)[(pos) >> 3] >> ((pos) & 7)) & 1u)
|
|
|
|
bit = 0;
|
|
BS_SET(dr_out, bit); bit++; /* marker = 1 */
|
|
|
|
for (j = 31; j >= 0; j--) { /* count, MSB-first */
|
|
if (count & (1u << j)) BS_SET(dr_out, bit);
|
|
bit++;
|
|
}
|
|
|
|
for (i = 0; i < txlen; i++) { /* MOSI, MSB-first/byte */
|
|
for (j = 7; j >= 0; j--) {
|
|
if (tx[i] & (1u << j)) BS_SET(dr_out, bit);
|
|
bit++;
|
|
}
|
|
}
|
|
|
|
capture_start = -1;
|
|
if (rxlen) {
|
|
bit += BSCAN_SPI_READ_LATENCY; /* skip pipeline delay */
|
|
capture_start = bit;
|
|
bit += (int)rxlen * 8; /* MISO region (MOSI=0) */
|
|
}
|
|
|
|
if (bscan_set_ir(jc, t->ir_user1, t->ir_length) < 0 ||
|
|
bscan_shift_dr(jc, dr_out, dr_in, total_bits) < 0) {
|
|
free(dr_out); free(dr_in);
|
|
return -1;
|
|
}
|
|
|
|
if (rxlen) {
|
|
memset(rx, 0, rxlen);
|
|
for (i = 0; i < rxlen * 8; i++) {
|
|
if (BS_GET(dr_in, capture_start + (int)i)) {
|
|
rx[i >> 3] |= (uint8_t)(1u << (7 - (i & 7))); /* MSB-first */
|
|
}
|
|
}
|
|
}
|
|
|
|
#undef BS_SET
|
|
#undef BS_GET
|
|
free(dr_out);
|
|
free(dr_in);
|
|
return 0;
|
|
}
|