#include #include #include #include "bscan.h" /* JTAG byte format expected by drv_TXRX_DATA / drv_TX_TMS: * bit 0 (JTAG_STR_DOUT) = TDI value * bit 1 (JTAG_STR_TMS) = TMS value * bit 4 (JTAG_STR_DIN) = TDO returned by the driver (1 if TDO was high). * In practice, jtag_core treats the input byte as "non-zero if TDO=1", * so we just check buf_in[i] != 0 on read. */ static int drv_ok(jtag_core *jc) { return jc && jc->io_functions.drv_TX_TMS && jc->io_functions.drv_TXRX_DATA; } /* --- Low-level primitives ----------------------------------------- */ int bscan_set_ir(jtag_core *jc, unsigned int opcode, int ir_length) { unsigned char tms_buf[8]; unsigned char *data_buf; int i; if (!drv_ok(jc) || ir_length <= 0 || ir_length > 32) { return -1; } /* Idle -> Select-DR -> Select-IR -> Capture-IR -> Shift-IR */ tms_buf[0] = JTAG_STR_TMS; tms_buf[1] = JTAG_STR_TMS; tms_buf[2] = 0; tms_buf[3] = 0; jc->io_functions.drv_TX_TMS(jc, tms_buf, 4); /* Shift IR LSB first; raise TMS on the last bit (-> Exit1-IR) */ data_buf = malloc(ir_length); if (!data_buf) return -1; for (i = 0; i < ir_length; i++) { data_buf[i] = ((opcode >> i) & 1u) ? JTAG_STR_DOUT : 0; if (i == ir_length - 1) { data_buf[i] |= JTAG_STR_TMS; } } jc->io_functions.drv_TXRX_DATA(jc, data_buf, NULL, ir_length); free(data_buf); /* Exit1-IR -> Update-IR -> Idle */ tms_buf[0] = JTAG_STR_TMS; tms_buf[1] = 0; jc->io_functions.drv_TX_TMS(jc, tms_buf, 2); return 0; } int bscan_shift_dr(jtag_core *jc, const uint8_t *tdi, uint8_t *tdo, int nbits) { unsigned char tms_buf[8]; unsigned char *buf_out, *buf_in; int i; if (!drv_ok(jc) || nbits <= 0) { return -1; } /* Idle -> Select-DR -> Capture-DR -> Shift-DR */ tms_buf[0] = JTAG_STR_TMS; tms_buf[1] = 0; tms_buf[2] = 0; jc->io_functions.drv_TX_TMS(jc, tms_buf, 3); buf_out = malloc(nbits); if (!buf_out) return -1; buf_in = tdo ? malloc(nbits) : NULL; if (tdo && !buf_in) { free(buf_out); return -1; } for (i = 0; i < nbits; i++) { uint8_t bit = 0; if (tdi) { bit = (tdi[i / 8] >> (i & 7)) & 1u; } buf_out[i] = bit ? JTAG_STR_DOUT : 0; if (i == nbits - 1) { buf_out[i] |= JTAG_STR_TMS; } } jc->io_functions.drv_TXRX_DATA(jc, buf_out, buf_in, nbits); if (tdo && buf_in) { memset(tdo, 0, (size_t)((nbits + 7) / 8)); for (i = 0; i < nbits; i++) { if (buf_in[i]) { tdo[i / 8] |= (uint8_t)(1u << (i & 7)); } } } free(buf_out); free(buf_in); /* Exit1-DR -> Update-DR -> Idle */ tms_buf[0] = JTAG_STR_TMS; tms_buf[1] = 0; jc->io_functions.drv_TX_TMS(jc, tms_buf, 2); return 0; } int bscan_shift_ir(jtag_core *jc, const uint8_t *tdi, uint8_t *tdo, int nbits) { unsigned char tms_buf[8]; unsigned char *buf_out, *buf_in; int i; if (!drv_ok(jc) || nbits <= 0) { return -1; } /* Idle -> Select-DR -> Select-IR -> Capture-IR -> Shift-IR */ tms_buf[0] = JTAG_STR_TMS; tms_buf[1] = JTAG_STR_TMS; tms_buf[2] = 0; tms_buf[3] = 0; jc->io_functions.drv_TX_TMS(jc, tms_buf, 4); buf_out = malloc(nbits); if (!buf_out) return -1; buf_in = tdo ? malloc(nbits) : NULL; if (tdo && !buf_in) { free(buf_out); return -1; } for (i = 0; i < nbits; i++) { uint8_t bit = 0; if (tdi) { bit = (tdi[i / 8] >> (i & 7)) & 1u; } buf_out[i] = bit ? JTAG_STR_DOUT : 0; if (i == nbits - 1) { buf_out[i] |= JTAG_STR_TMS; /* last bit -> Exit1-IR */ } } jc->io_functions.drv_TXRX_DATA(jc, buf_out, buf_in, nbits); if (tdo && buf_in) { memset(tdo, 0, (size_t)((nbits + 7) / 8)); for (i = 0; i < nbits; i++) { if (buf_in[i]) { tdo[i / 8] |= (uint8_t)(1u << (i & 7)); } } } free(buf_out); free(buf_in); /* Exit1-IR -> Update-IR -> Idle */ tms_buf[0] = JTAG_STR_TMS; tms_buf[1] = 0; jc->io_functions.drv_TX_TMS(jc, tms_buf, 2); return 0; } int bscan_tap_reset(jtag_core *jc) { unsigned char tms_buf[8]; if (!drv_ok(jc)) return -1; /* 5 TMS=1 forces Test-Logic-Reset from any state, then 1 TMS=0 * lands in Run-Test/Idle (where the shift primitives start). */ tms_buf[0] = JTAG_STR_TMS; tms_buf[1] = JTAG_STR_TMS; tms_buf[2] = JTAG_STR_TMS; tms_buf[3] = JTAG_STR_TMS; tms_buf[4] = JTAG_STR_TMS; tms_buf[5] = 0; jc->io_functions.drv_TX_TMS(jc, tms_buf, 6); return 0; } int bscan_idle_cycles(jtag_core *jc, int ncycles) { unsigned char *buf; int i; if (!drv_ok(jc) || ncycles <= 0) { return -1; } buf = malloc(ncycles); if (!buf) return -1; for (i = 0; i < ncycles; i++) buf[i] = 0; jc->io_functions.drv_TX_TMS(jc, buf, ncycles); free(buf); return 0; } /* --- High-level operations ---------------------------------------- */ static uint8_t reverse_bits(uint8_t b) { b = (uint8_t)(((b & 0xF0u) >> 4) | ((b & 0x0Fu) << 4)); b = (uint8_t)(((b & 0xCCu) >> 2) | ((b & 0x33u) << 2)); b = (uint8_t)(((b & 0xAAu) >> 1) | ((b & 0x55u) << 1)); return b; } int bscan_load_bitstream(jtag_core *jc, const fpga_target *t, const uint8_t *data, size_t nbytes) { uint8_t *reversed; unsigned int bypass; size_t i; if (!drv_ok(jc) || !t || !data || nbytes == 0) return -1; if (!t->ir_jprogram || !t->ir_cfg_in || !t->ir_jstart) { /* No configuration opcodes known for this family. */ return -1; } /* JPROGRAM clears the configuration memory. Min ~10k TCK cycles * to wait for INIT_B to go high before CFG_IN. * TODO: poll INIT_B via SAMPLE instead of fixed wait. */ if (bscan_set_ir(jc, t->ir_jprogram, t->ir_length) < 0) return -1; bscan_idle_cycles(jc, 10000); /* CFG_IN routes DR shifts to the configuration interface. */ if (bscan_set_ir(jc, t->ir_cfg_in, t->ir_length) < 0) return -1; /* Xilinx bitstream bytes must be bit-reversed before JTAG shift * (configuration interface latches MSB first, JTAG shifts LSB first). */ reversed = malloc(nbytes); if (!reversed) return -1; for (i = 0; i < nbytes; i++) { reversed[i] = reverse_bits(data[i]); } if (bscan_shift_dr(jc, reversed, NULL, (int)(nbytes * 8)) < 0) { free(reversed); return -1; } free(reversed); /* JSTART triggers the fabric startup. UG470/UG570: ≥12 cycles in * Idle to complete the sequence. Use 2000 for margin. */ if (bscan_set_ir(jc, t->ir_jstart, t->ir_length) < 0) return -1; bscan_idle_cycles(jc, 2000); /* Park on BYPASS (all 1s) so other operations don't trip on a * lingering instruction. */ bypass = (t->ir_length >= 32) ? 0xFFFFFFFFu : ((1u << t->ir_length) - 1u); bscan_set_ir(jc, bypass, t->ir_length); return 0; } /* Parse a Xilinx .bit container; return offset and length of the raw * bitstream payload. Returns -1 if not a .bit. */ static int xilinx_bit_payload(const uint8_t *buf, size_t buflen, size_t *out_off, size_t *out_len) { size_t off = 0; uint16_t hdr_len; if (buflen < 13) return -1; /* First 2 bytes are big-endian length of a magic block (typically 0x0009), * followed by 9 magic bytes. */ hdr_len = (uint16_t)((buf[0] << 8) | buf[1]); if (hdr_len != 0x0009) return -1; off = 2 + hdr_len; /* Then 2 bytes (0x0001) and ASCII-tagged sections a/b/c/d, then 'e' * followed by 4 bytes big-endian length of the bitstream payload. */ if (off + 2 > buflen) return -1; off += 2; while (off < buflen) { uint8_t tag = buf[off++]; if (tag == 'e') { uint32_t bit_len; if (off + 4 > buflen) return -1; bit_len = ((uint32_t)buf[off] << 24) | ((uint32_t)buf[off + 1] << 16) | ((uint32_t)buf[off + 2] << 8) | (uint32_t)buf[off + 3]; off += 4; if (off + bit_len > buflen) return -1; *out_off = off; *out_len = bit_len; return 0; } if (tag >= 'a' && tag <= 'd') { if (off + 2 > buflen) return -1; hdr_len = (uint16_t)((buf[off] << 8) | buf[off + 1]); off += 2 + hdr_len; } else { return -1; } } return -1; } int bscan_load_bitstream_file(jtag_core *jc, const fpga_target *t, const char *path) { FILE *f; long size; uint8_t *buf; size_t payload_off = 0; size_t payload_len = 0; int ret; if (!path) return -1; f = fopen(path, "rb"); if (!f) return -1; if (fseek(f, 0, SEEK_END) != 0) { fclose(f); return -1; } size = ftell(f); if (size <= 0) { fclose(f); return -1; } rewind(f); buf = malloc((size_t)size); if (!buf) { fclose(f); return -1; } if (fread(buf, 1, (size_t)size, f) != (size_t)size) { free(buf); fclose(f); return -1; } fclose(f); if (xilinx_bit_payload(buf, (size_t)size, &payload_off, &payload_len) < 0) { /* Treat as raw .bin */ payload_off = 0; payload_len = (size_t)size; } ret = bscan_load_bitstream(jc, t, buf + payload_off, payload_len); free(buf); return ret; } /* Pipeline latency between a MOSI bit going in and its MISO bit * appearing on TDO, in TCK cycles. For a single-device chain this is * one (the proxy registers TDO); equals jtag_tap_count_enabled() in * OpenOCD's jtagspi. The header asserts a single-device chain. */ #define BSCAN_SPI_READ_LATENCY 1 int bscan_spi_xfer(jtag_core *jc, const fpga_target *t, const uint8_t *tx, size_t txlen, uint8_t *rx, size_t rxlen) { /* DR frame (quartiq/OpenOCD jtagspi proxy, single device): * marker(1)=1 | count(32, MSB-first) | MOSI(txlen*8, MSB-first/byte) * | latency skip | MISO capture(rxlen*8, MSB-first/byte) * count = total SPI bits - 1. The skip absorbs the TDO pipeline * delay so the captured MISO aligns to byte boundaries. * Bits are placed LSB-first per byte, the layout bscan_shift_dr * shifts in order; ordering them here gives MSB-first on the wire. */ size_t spi_bytes = txlen + rxlen; uint32_t count; int total_bits, dr_bytes, capture_start, bit, j; size_t i; uint8_t *dr_out, *dr_in; if (!drv_ok(jc) || !t || !t->ir_user1 || spi_bytes == 0) return -1; if (txlen && !tx) return -1; if (rxlen && !rx) return -1; count = (uint32_t)(spi_bytes * 8u) - 1u; total_bits = 1 + 32 + (int)txlen * 8; if (rxlen) total_bits += BSCAN_SPI_READ_LATENCY + (int)rxlen * 8; dr_bytes = (total_bits + 7) / 8; dr_out = calloc(1, (size_t)dr_bytes); dr_in = rxlen ? calloc(1, (size_t)dr_bytes) : NULL; if (!dr_out || (rxlen && !dr_in)) { free(dr_out); free(dr_in); return -1; } #define BS_SET(buf, pos) ((buf)[(pos) >> 3] |= (uint8_t)(1u << ((pos) & 7))) #define BS_GET(buf, pos) (((buf)[(pos) >> 3] >> ((pos) & 7)) & 1u) bit = 0; BS_SET(dr_out, bit); bit++; /* marker = 1 */ for (j = 31; j >= 0; j--) { /* count, MSB-first */ if (count & (1u << j)) BS_SET(dr_out, bit); bit++; } for (i = 0; i < txlen; i++) { /* MOSI, MSB-first/byte */ for (j = 7; j >= 0; j--) { if (tx[i] & (1u << j)) BS_SET(dr_out, bit); bit++; } } capture_start = -1; if (rxlen) { bit += BSCAN_SPI_READ_LATENCY; /* skip pipeline delay */ capture_start = bit; bit += (int)rxlen * 8; /* MISO region (MOSI=0) */ } if (bscan_set_ir(jc, t->ir_user1, t->ir_length) < 0 || bscan_shift_dr(jc, dr_out, dr_in, total_bits) < 0) { free(dr_out); free(dr_in); return -1; } if (rxlen) { memset(rx, 0, rxlen); for (i = 0; i < rxlen * 8; i++) { if (BS_GET(dr_in, capture_start + (int)i)) { rx[i >> 3] |= (uint8_t)(1u << (7 - (i & 7))); /* MSB-first */ } } } #undef BS_SET #undef BS_GET free(dr_out); free(dr_in); return 0; }