From fda6aed077dce6d339db35792355f8bad65db2c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois?= Date: Sun, 24 May 2026 18:23:45 +0200 Subject: [PATCH] arm_debug: cycle-exact chain-1, Thumb->ARM, sys-speed re-entry The chain-1 access is now deterministic: a bare 33-bit bscan_shift_dr (breakpoint | flip32(instr)) is exactly one debug clock per access (the Update->Run-Test/Idle transition). The earlier "+1 idle" double-clocked the pipeline and the earlier all-zero/constant reads were the core being in Thumb state. Validated on the LPC2103 by a known-pattern register round-trip (write r1..r15, read back -> exact match; r15/PC differs by the expected pipeline offset). - c1_xfer: drop the extra idle dwell (one access == one debug clock). - mem_read: detect Thumb (ITBIT) and change_to_arm in one continuous chain-1 session so no chain switch clocks the core mid-sequence. - execute_sys_speed: drop the post-RESTART idle burst and poll DBG_STATUS straight away (matches OpenOCD); the system-speed LDM now re-enters debug (DBGACK & SYSCOMP) instead of running free. WIP: the read_core_regs after a system-speed access is phase-shifted by the EmbeddedICE<->chain-1 switch, so memory reads come back misaligned (capturing injected instructions). Next step + diagnosis in the arm7-debug-dclk-timing note. Co-Authored-By: Claude Opus 4.7 --- src/modules/arm_debug/arm_debug.c | 35 +++++++++++++++++-------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/modules/arm_debug/arm_debug.c b/src/modules/arm_debug/arm_debug.c index a336893..3e2983d 100644 --- a/src/modules/arm_debug/arm_debug.c +++ b/src/modules/arm_debug/arm_debug.c @@ -10,11 +10,14 @@ * primitives. Incremental bring-up: * - done: EmbeddedICE register access; halt (force DBGRQ, then debug * entry = DBGACK|INTDIS) / resume (RESTART); Thumb->ARM switch; - * instruction-injection register read/write + system-speed LDM. - * - WIP: the chain-1 access (c1_xfer) is not yet cycle-exact (one - * debug clock per access), so memory reads can be misaligned. See + * cycle-exact chain-1 access (one debug clock per access) -> debug- + * speed register read/write validated by a known-pattern round-trip; + * system-speed LDM re-enters debug (DBGACK&SYSCOMP). + * - WIP: the read_core_regs AFTER a system-speed access is phase- + * shifted by the EmbeddedICE<->chain-1 switch clocks, so memory + * reads come back misaligned (capturing injected instructions). See * the arm7-debug-dclk-timing note. - * - todo: cycle-exact c1_xfer, memory write, the arm_flash backend. + * - todo: align the post-sys-speed read, memory write, arm_flash. */ /* ARM7TDMI public JTAG instructions (IR length 4). */ @@ -216,12 +219,12 @@ static int c1_xfer(c1_ctx *c, uint32_t instr, int sysspeed, uint32_t *capture) for (i = 0; i < 32; i++) /* bits 1..32 = flip32(instr) */ if (f & (1u << i)) { int b = 1 + i; buf[b >> 3] |= (uint8_t)(1u << (b & 7)); } - /* Shift 33 bits (captures the bus at Capture-DR), then one explicit - * Run-Test/Idle clock to advance the core one debug step so the next - * access sees the next pipeline cycle. */ + /* Shift 33 bits: captures the bus at Capture-DR, applies the + * instruction at Update-DR and advances the core exactly one debug + * step via the Update->Run-Test/Idle transition. One access == one + * debug clock (an extra idle dwell would double-clock the pipeline). */ if (bscan_shift_dr(c->jc, buf, capture ? cap : NULL, 33) < 0) return -1; - bscan_idle_cycles(c->jc, 1); c->started = 1; if (capture) { @@ -320,14 +323,15 @@ static int execute_sys_speed(jtag_core *jc) int tries; if (bscan_set_ir(jc, IR_RESTART, ARM7_IR_LEN) < 0) return -1; - bscan_idle_cycles(jc, 32); + /* Poll DBG_STATUS; the EmbeddedICE scans clock the core enough to + * complete the one system-speed access and re-enter debug. (Matches + * OpenOCD: RESTART then poll, no runtest burst in between.) */ if (eice_select(jc) < 0) return -1; for (tries = 0; tries < 100; tries++) { if (eice_read(jc, EICE_DBG_STATUS, &status) < 0) return -1; if ((status & DBG_STATUS_DBGACK) && (status & DBG_STATUS_SYSCOMP)) return 0; - bscan_idle_cycles(jc, 32); } fprintf(stderr, "arm_debug: sys-speed access timed out (status 0x%08x)\n", status); return -1; @@ -338,12 +342,11 @@ static int execute_sys_speed(jtag_core *jc) * Core registers r0..r14 are clobbered (acceptable for a read-then- * power-cycle flow). The core must already be halted (DBGACK). * - * WORK IN PROGRESS: the chain-1 instruction pipeline is not yet - * cycle-exact (see the arm7-debug-dclk-timing design note). Halt, - * Thumb->ARM, RESTART and the instruction sequences are in place and - * real register data streams out, but each access must clock the core - * exactly once and `c1_xfer` (built on bscan_shift_dr) does not do that - * deterministically yet, so the captured words can be misaligned. */ + * WORK IN PROGRESS: chain-1 is now cycle-exact (register read/write + * round-trips) and the system-speed LDM re-enters debug, but the + * read_core_regs that follows execute_sys_speed is phase-shifted by the + * EmbeddedICE<->chain-1 switch, so the returned words are misaligned. + * See the arm7-debug-dclk-timing design note. */ int arm_debug_mem_read(jtag_core *jc, const jtag_target *t, unsigned long addr, void *buf, unsigned long len) {