diff --git a/src/modules/arm_debug/arm_debug.c b/src/modules/arm_debug/arm_debug.c index a336893..3e2983d 100644 --- a/src/modules/arm_debug/arm_debug.c +++ b/src/modules/arm_debug/arm_debug.c @@ -10,11 +10,14 @@ * primitives. Incremental bring-up: * - done: EmbeddedICE register access; halt (force DBGRQ, then debug * entry = DBGACK|INTDIS) / resume (RESTART); Thumb->ARM switch; - * instruction-injection register read/write + system-speed LDM. - * - WIP: the chain-1 access (c1_xfer) is not yet cycle-exact (one - * debug clock per access), so memory reads can be misaligned. See + * cycle-exact chain-1 access (one debug clock per access) -> debug- + * speed register read/write validated by a known-pattern round-trip; + * system-speed LDM re-enters debug (DBGACK&SYSCOMP). + * - WIP: the read_core_regs AFTER a system-speed access is phase- + * shifted by the EmbeddedICE<->chain-1 switch clocks, so memory + * reads come back misaligned (capturing injected instructions). See * the arm7-debug-dclk-timing note. - * - todo: cycle-exact c1_xfer, memory write, the arm_flash backend. + * - todo: align the post-sys-speed read, memory write, arm_flash. */ /* ARM7TDMI public JTAG instructions (IR length 4). */ @@ -216,12 +219,12 @@ static int c1_xfer(c1_ctx *c, uint32_t instr, int sysspeed, uint32_t *capture) for (i = 0; i < 32; i++) /* bits 1..32 = flip32(instr) */ if (f & (1u << i)) { int b = 1 + i; buf[b >> 3] |= (uint8_t)(1u << (b & 7)); } - /* Shift 33 bits (captures the bus at Capture-DR), then one explicit - * Run-Test/Idle clock to advance the core one debug step so the next - * access sees the next pipeline cycle. */ + /* Shift 33 bits: captures the bus at Capture-DR, applies the + * instruction at Update-DR and advances the core exactly one debug + * step via the Update->Run-Test/Idle transition. One access == one + * debug clock (an extra idle dwell would double-clock the pipeline). */ if (bscan_shift_dr(c->jc, buf, capture ? cap : NULL, 33) < 0) return -1; - bscan_idle_cycles(c->jc, 1); c->started = 1; if (capture) { @@ -320,14 +323,15 @@ static int execute_sys_speed(jtag_core *jc) int tries; if (bscan_set_ir(jc, IR_RESTART, ARM7_IR_LEN) < 0) return -1; - bscan_idle_cycles(jc, 32); + /* Poll DBG_STATUS; the EmbeddedICE scans clock the core enough to + * complete the one system-speed access and re-enter debug. (Matches + * OpenOCD: RESTART then poll, no runtest burst in between.) */ if (eice_select(jc) < 0) return -1; for (tries = 0; tries < 100; tries++) { if (eice_read(jc, EICE_DBG_STATUS, &status) < 0) return -1; if ((status & DBG_STATUS_DBGACK) && (status & DBG_STATUS_SYSCOMP)) return 0; - bscan_idle_cycles(jc, 32); } fprintf(stderr, "arm_debug: sys-speed access timed out (status 0x%08x)\n", status); return -1; @@ -338,12 +342,11 @@ static int execute_sys_speed(jtag_core *jc) * Core registers r0..r14 are clobbered (acceptable for a read-then- * power-cycle flow). The core must already be halted (DBGACK). * - * WORK IN PROGRESS: the chain-1 instruction pipeline is not yet - * cycle-exact (see the arm7-debug-dclk-timing design note). Halt, - * Thumb->ARM, RESTART and the instruction sequences are in place and - * real register data streams out, but each access must clock the core - * exactly once and `c1_xfer` (built on bscan_shift_dr) does not do that - * deterministically yet, so the captured words can be misaligned. */ + * WORK IN PROGRESS: chain-1 is now cycle-exact (register read/write + * round-trips) and the system-speed LDM re-enters debug, but the + * read_core_regs that follows execute_sys_speed is phase-shifted by the + * EmbeddedICE<->chain-1 switch, so the returned words are misaligned. + * See the arm7-debug-dclk-timing design note. */ int arm_debug_mem_read(jtag_core *jc, const jtag_target *t, unsigned long addr, void *buf, unsigned long len) {