diff --git a/src/modules/arm_debug/arm_debug.c b/src/modules/arm_debug/arm_debug.c index 70f1529..8b3f967 100644 --- a/src/modules/arm_debug/arm_debug.c +++ b/src/modules/arm_debug/arm_debug.c @@ -553,21 +553,43 @@ int arm_debug_mem_read(jtag_core *jc, const jtag_target *t, if (chain_select(jc, SC_EICE) < 0) return -1; if (eice_read(jc, EICE_DBG_STATUS, &status) < 0) return -1; if (chain_select(jc, SC_DEBUG) < 0) return -1; - /* Normalize the core to a known ARM pipeline state regardless of the - * halt state. In Thumb, change_to_arm (17 clocked instructions) - * switches to ARM and flushes the firmware out of the pipeline; the - * read alignment is tuned for that. In ARM, run the same NUMBER of - * clocked NOPs so the read sees the same pipeline phase (skipping it - * left the firmware's arbitrary pipeline and the read misaligned). */ - c1_init(&c1, jc); - if (status & DBG_STATUS_ITBIT) { - if (change_to_arm(&c1) < 0) return -1; - } else { - int k; - for (k = 0; k < 17; k++) - if (c1_xfer(&c1, ARM_NOP, 0, NULL) < 0) return -1; + /* Debug entry, mirroring OpenOCD's arm7_9_debug_entry to leave a + * deterministic pipeline regardless of halt state: switch Thumb->ARM + * if needed, then read all 16 core registers. That STMIA+NOP+NOP+16 + * sequence flushes the firmware out of the pipeline and ends in the + * same known state for both the Thumb and ARM paths, so the first + * system-speed read reliably re-enters debug. */ + { + uint32_t scratch[16]; + c1_init(&c1, jc); + if (status & DBG_STATUS_ITBIT) + if (change_to_arm(&c1) < 0) return -1; + memset(scratch, 0, sizeof(scratch)); + if (read_core_regs(&c1, 0, 0xffff, scratch) < 0) return -1; + c1_end(&c1); + } + + /* WARM-UP: the first system-speed read after debug entry normalizes + * the sys-speed pipeline but its own result is unreliable. Do one + * throwaway read block and discard it; every read after it is + * consistent and correct. (Like the FTDI stale-first-read, but for + * the ARM debug pipeline.) */ + { + uint32_t scratch[16]; + r0 = (uint32_t)base; + c1_init(&c1, jc); + if (write_core_regs(&c1, 0, 0x1, &r0) < 0) return -1; + if (load_word_regs(&c1, 0x7ffe) < 0) return -1; /* r1..r14 */ + c1_end(&c1); + if (execute_sys_speed(jc) < 0) return -1; + if (quiet_chain_select(jc, SC_DEBUG) < 0) return -1; + if (quiet_latch_chain1(jc, ARM_NOP) < 0) return -1; + quiet_exit(jc); + memset(scratch, 0, sizeof(scratch)); + c1_init(&c1, jc); + if (read_core_regs(&c1, 0, 0x7ffe, scratch) < 0) return -1; + c1_end(&c1); } - c1_end(&c1); r0 = (uint32_t)base;