From 61e5798e3a8aca767b9f0953f27e4e28a901576d Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Fri, 30 Jan 2026 01:19:10 +0800 Subject: [PATCH] Add BASEPRI zero-latency ISR support This implements BASEPRI-based critical sections to enable zero-latency ISRs (priority 0x0-0x2) that can preempt kernel operations. It adds cycle-accurate interrupt latency measurement infrastructure using DWT. - Add BASEPRI primitives (irq_kernel_critical_enter/exit) to irq.h - Convert scheduler critical sections from PRIMASK to BASEPRI - Convert IPC critical sections from PRIMASK to BASEPRI - Add TCB validation in IPC to prevent use-after-free during callbacks - Add irq_system_state tracking and in_isr_context() helper --- include/platform/irq-latency.h | 125 ++++++++++++++++++++ include/platform/irq.h | 76 +++++++++++- kernel/build.mk | 3 +- kernel/ipc.c | 31 +++-- kernel/kdb-latency.c | 67 +++++++++++ kernel/kdb.c | 10 ++ kernel/sched.c | 52 ++++---- platform/build.mk | 3 +- platform/irq-latency.c | 209 +++++++++++++++++++++++++++++++++ platform/irq.c | 6 + 10 files changed, 546 insertions(+), 36 deletions(-) create mode 100644 include/platform/irq-latency.h create mode 100644 kernel/kdb-latency.c create mode 100644 platform/irq-latency.c diff --git a/include/platform/irq-latency.h b/include/platform/irq-latency.h new file mode 100644 index 00000000..66d96641 --- /dev/null +++ b/include/platform/irq-latency.h @@ -0,0 +1,125 @@ +/* Copyright (c) 2026 The F9 Microkernel Project. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef PLATFORM_IRQ_LATENCY_H_ +#define PLATFORM_IRQ_LATENCY_H_ + +#include + +/* DWT (Data Watchpoint and Trace) registers for cycle counting */ +#define DWT_CTRL ((volatile uint32_t *) 0xE0001000) +#define DWT_CYCCNT ((volatile uint32_t *) 0xE0001004) +#define DWT_CTRL_CYCCNTENA (1 << 0) + +#define DEMCR ((volatile uint32_t *) 0xE000EDFC) +#define DEMCR_TRCENA (1 << 24) + +/** + * @file irq_latency.h + * @brief Interrupt latency measurement and profiling infrastructure + * + * Provides cycle-accurate latency tracking for zero-latency ISRs and + * standard IRQs. Enables validation of BASEPRI-based zero-latency + * interrupt performance (<10 cycle target). + * + * Usage: + * 1. Call latency_sample_start() at ISR entry + * 2. Call latency_sample_end(priority, irq_num) at ISR exit + * 3. View statistics via KDB 'L' command + */ + +/** + * Latency statistics per interrupt priority level. + */ +typedef struct { + uint32_t count; /* Number of samples */ + uint32_t min; /* Minimum latency (cycles) */ + uint32_t max; /* Maximum latency (cycles) */ + uint32_t sum; /* Sum for average calculation */ + uint32_t avg; /* Average latency (cycles) */ +} latency_stats_t; + +/** + * Get current cycle count from DWT_CYCCNT. + * Returns 0 if DWT is not enabled. + */ +static inline uint32_t get_cycle_count(void) +{ + return *DWT_CYCCNT; +} + +/** + * Enable DWT cycle counter for latency measurements. + * Called during system initialization. + */ +void latency_init(void); + +/** + * Record latency sample for an interrupt. + * + * @param priority Interrupt priority (0x0-0xF) + * @param irq_num IRQ number (-15 to 239) + * @param cycles Measured latency in cycles + */ +void latency_record(uint8_t priority, int16_t irq_num, uint32_t cycles); + +/** + * Get latency statistics for a priority level. + * + * @param priority Interrupt priority (0x0-0xF) + * @return Pointer to statistics structure + */ +const latency_stats_t *latency_get_stats(uint8_t priority); + +/** + * Get a best-effort atomic snapshot of latency statistics. + * + * Uses relaxed atomics only; intended for diagnostic reads outside ISR + * context. Returns 1 on success, 0 on invalid input. + */ +int latency_get_stats_snapshot(uint8_t priority, latency_stats_t *out); + +/** + * Reset all latency statistics. + */ +void latency_reset(void); + +/** + * Get interrupt number from IPSR. + * Returns 0 for thread mode, 1-15 for exceptions, 16+ for IRQs. + */ +static inline uint32_t get_irq_number(void) +{ + uint32_t ipsr; + __asm__ __volatile__("mrs %0, ipsr" : "=r"(ipsr)); + return ipsr & 0x1FF; +} + +/** + * Latency measurement helper - call at ISR entry. + * Returns timestamp for latency_sample_end(). + */ +static inline uint32_t latency_sample_start(void) +{ + return get_cycle_count(); +} + +/** + * Latency measurement helper - call at ISR exit. + * + * @param start_cycles Timestamp from latency_sample_start() + * @param priority Interrupt priority level + * @param irq_num IRQ number from IPSR + */ +static inline void latency_sample_end(uint32_t start_cycles, + uint8_t priority, + int16_t irq_num) +{ + uint32_t end_cycles = get_cycle_count(); + uint32_t elapsed = end_cycles - start_cycles; + latency_record(priority, irq_num, elapsed); +} + +#endif /* PLATFORM_IRQ_LATENCY_H_ */ diff --git a/include/platform/irq.h b/include/platform/irq.h index 6a057c1e..efeeed58 100644 --- a/include/platform/irq.h +++ b/include/platform/irq.h @@ -15,6 +15,34 @@ void irq_init(void); +/* + * Interrupt Priority Levels (ARM Cortex-M 4-bit priorities) + */ +#define IRQ_PRIO_ZERO_LATENCY_MAX 0x2 /* Highest priority, never masked */ +#define IRQ_PRIO_SYSTICK 0x3 /* System timer */ +#define IRQ_PRIO_KERNEL_MASK 0x40 /* BASEPRI mask (0x4 << 4) */ +#define IRQ_PRIO_USER_DEFAULT 0x8 /* Default user IRQ priority */ +#define IRQ_PRIO_LOWEST 0xF /* SVCall, PendSV */ + +/* + * System state tracking for ISR context. + * 0 = Thread mode (PSP), 1+ = Handler mode (MSP, tracks nesting depth). + */ +extern volatile uint32_t irq_system_state; + +/* + * Fast ISR context check using hardware IPSR register. + * Returns: true if currently in exception handler, false if in thread mode. + * Zero overhead: Single MRS instruction, no memory access or race conditions. + */ +static inline bool in_isr_context(void) +{ + return IPSR() != 0; +} + +/* + * PRIMASK-based critical sections (blocks ALL interrupts). + */ static inline void irq_disable(void) { __asm__ __volatile__("cpsid i" ::: "memory"); @@ -45,6 +73,53 @@ static inline void irq_restore_flags(uint32_t flags) __asm__ __volatile__("msr primask, %0" ::"r"(flags) : "memory"); } +/* + * BASEPRI-based critical sections (blocks interrupts >= priority level). + * Zero-latency ISRs at priority 0x0-0x2 can preempt kernel critical sections. + */ +static inline void irq_disable_below(uint8_t priority) +{ + uint32_t basepri = (priority << 4) & 0xFF; + __asm__ __volatile__("msr basepri, %0" ::"r"(basepri) : "memory"); +} + +static inline void irq_enable_all(void) +{ + __asm__ __volatile__("msr basepri, %0" ::"r"(0) : "memory"); +} + +static inline uint32_t irq_save_basepri(uint8_t priority) +{ + uint32_t prev_basepri; + uint32_t new_basepri = (priority << 4) & 0xFF; + __asm__ __volatile__( + "mrs %0, basepri\n\t" + "msr basepri, %1" + : "=r"(prev_basepri) + : "r"(new_basepri) + : "memory"); + return prev_basepri; +} + +static inline void irq_restore_basepri(uint32_t basepri) +{ + __asm__ __volatile__("msr basepri, %0" ::"r"(basepri) : "memory"); +} + +/* + * Kernel critical section (masks interrupts >= 0x4, allows 0x0-0x3). + * Use this as the default for scheduler, IPC, and memory operations. + */ +static inline uint32_t irq_kernel_critical_enter(void) +{ + return irq_save_basepri(IRQ_PRIO_KERNEL_MASK >> 4); +} + +static inline void irq_kernel_critical_exit(uint32_t basepri) +{ + irq_restore_basepri(basepri); +} + static inline void irq_svc(void) { __asm__ __volatile__("svc #0"); @@ -242,7 +317,6 @@ extern volatile uint32_t __irq_saved_regs[8]; request_schedule(); \ irq_return(); \ } - extern volatile tcb_t *current; #endif /* PLATFORM_IRQ_H_ */ diff --git a/kernel/build.mk b/kernel/build.mk index ac5d8707..7aa68255 100644 --- a/kernel/build.mk +++ b/kernel/build.mk @@ -21,7 +21,8 @@ kernel-y = \ interrupt.o KDB-$(CONFIG_KDB) = \ - kdb.o + kdb.o \ + kdb-latency.o KPROBES-$(CONFIG_KPROBES) = \ kprobes.o diff --git a/kernel/ipc.c b/kernel/ipc.c index e494acd2..a6340c90 100644 --- a/kernel/ipc.c +++ b/kernel/ipc.c @@ -214,18 +214,19 @@ static void do_ipc(tcb_t *from, tcb_t *to) * CONSTRAINT: Callback MUST NOT destroy its own TCB. */ if (to->ipc_notify && to->notify_pending && to->notify_depth < 3) { - uint32_t irq_flags; + uint32_t basepri; uint8_t generation_before; notify_handler_t callback; /* Atomically increment depth and capture generation. - * IRQ masking prevents race with nested interrupt-driven IPC. + * BASEPRI masking prevents race with nested interrupt-driven IPC. + * Zero-latency ISRs (0x0-0x2) can still preempt during this operation. */ - irq_flags = irq_save_flags(); + basepri = irq_kernel_critical_enter(); to->notify_depth++; generation_before = to->notify_generation; callback = to->ipc_notify; - irq_restore_flags(irq_flags); + irq_kernel_critical_exit(basepri); /* Recursion protection: prevent unbounded callback nesting. * Max depth 3 allows: serial → network → timer notification chains. @@ -245,11 +246,27 @@ static void do_ipc(tcb_t *from, tcb_t *to) /* Atomically decrement depth only if TCB still valid. * Generation counter detects TCB destruction during callback. * If TCB was destroyed, skip depth decrement (would be use-after-free). + * + * SAFETY: We must verify 'to' is still a valid TCB before accessing it. + * Search thread_map to confirm the pointer hasn't been freed and + * reused. */ - irq_flags = irq_save_flags(); - if (to->notify_generation == generation_before) + basepri = irq_kernel_critical_enter(); + + /* Verify TCB is still valid by checking thread_map */ + int tcb_valid = 0; + for (int i = 1; i < thread_count; ++i) { + if (thread_map[i] == to) { + tcb_valid = 1; + break; + } + } + + /* Only decrement if TCB is valid AND generation hasn't changed */ + if (tcb_valid && to->notify_generation == generation_before) to->notify_depth--; - irq_restore_flags(irq_flags); + + irq_kernel_critical_exit(basepri); /* Check for preemption after notification. * Callback may have made higher-priority threads runnable. diff --git a/kernel/kdb-latency.c b/kernel/kdb-latency.c new file mode 100644 index 00000000..6749c70d --- /dev/null +++ b/kernel/kdb-latency.c @@ -0,0 +1,67 @@ +/* Copyright (c) 2026 The F9 Microkernel Project. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include +#include + +/** + * KDB command: Display interrupt latency statistics. + * + * Shows min/avg/max latency for each priority level, highlighting + * zero-latency ISRs (0x0-0x2) and standard user IRQs. + */ +void kdb_show_latency(void) +{ + int i; + latency_stats_t stats; + int has_data = 0; + + dbg_printf(DL_KDB, "\n=== Interrupt Latency Statistics ===\n"); + dbg_printf(DL_KDB, "Prio Type Count Min Avg Max\n"); + dbg_printf(DL_KDB, "---- ---------------- ------ ----- ----- -----\n"); + + for (i = 0; i < 16; i++) { + if (!latency_get_stats_snapshot(i, &stats)) + continue; + + if (stats.count == 0) + continue; + + has_data = 1; + + const char *type; + if (i <= 0x2) + type = "Zero-latency ISR"; + else if (i == 0x3) + type = "SysTick"; + else if (i <= 0xE) + type = "User IRQ"; + else + type = "SVCall/PendSV"; + + stats.avg = stats.count > 0 ? (stats.sum / stats.count) : 0; + dbg_printf(DL_KDB, "0x%X %-16s %6u %5u %5u %5u\n", i, type, + stats.count, stats.min, stats.avg, stats.max); + } + + if (!has_data) { + dbg_printf(DL_KDB, "(No latency samples recorded yet)\n"); + } + + dbg_printf(DL_KDB, "\nNotes:\n"); + dbg_printf(DL_KDB, " - Zero-latency ISRs (0x0-0x2) target <10 cycles\n"); + dbg_printf(DL_KDB, " - User IRQs (0x4-0xE) masked during kernel ops\n"); + dbg_printf(DL_KDB, " - Use 'r' to reset statistics\n"); + dbg_printf(DL_KDB, "\n"); +} + +/** + * KDB command: Reset latency statistics. + */ +void kdb_reset_latency(void) +{ + latency_reset(); + dbg_printf(DL_KDB, "Latency statistics reset.\n"); +} diff --git a/kernel/kdb.c b/kernel/kdb.c index e711f587..502eaa2b 100644 --- a/kernel/kdb.c +++ b/kernel/kdb.c @@ -30,6 +30,8 @@ extern void kdb_dump_as(void); extern void kdb_show_sampling(void); extern void kdb_show_tickless_verify(void); extern void kdb_dump_notifications(void); +extern void kdb_show_latency(void); +extern void kdb_reset_latency(void); struct kdb_t kdb_functions[] = { {.option = 'K', @@ -84,6 +86,14 @@ struct kdb_t kdb_functions[] = { .menuentry = "show tickless scheduling stat", .function = kdb_show_tickless_verify}, #endif + {.option = 'L', + .name = "LATENCY", + .menuentry = "show interrupt latency", + .function = kdb_show_latency}, + {.option = 'r', + .name = "RESET LATENCY", + .menuentry = "reset latency statistics", + .function = kdb_reset_latency}, /* Insert KDB functions here */ }; diff --git a/kernel/sched.c b/kernel/sched.c index a3f6af4c..24e27b7a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -100,25 +100,25 @@ void sched_enqueue(tcb_t *thread) { uint8_t prio; tcb_t *head; - uint32_t flags; + uint32_t basepri; if (!thread) return; - flags = irq_save_flags(); + basepri = irq_kernel_critical_enter(); /* Strict invariant: only runnable threads in ready queues * Check inside critical section to prevent race with state changes */ if (thread->state != T_RUNNABLE) { - irq_restore_flags(flags); + irq_kernel_critical_exit(basepri); panic("SCHED: Enqueueing non-runnable thread %t (state %d)\n", thread->t_globalid, thread->state); } /* Don't double-enqueue */ if (sched_is_queued(thread)) { - irq_restore_flags(flags); + irq_kernel_critical_exit(basepri); return; } @@ -147,7 +147,7 @@ void sched_enqueue(tcb_t *thread) /* Bitmap already set - no update needed */ } - irq_restore_flags(flags); + irq_kernel_critical_exit(basepri); } /** @@ -159,16 +159,16 @@ void sched_dequeue(tcb_t *thread) { uint8_t prio; tcb_t *prev, *next; - uint32_t flags; + uint32_t basepri; if (!thread) return; - flags = irq_save_flags(); + basepri = irq_kernel_critical_enter(); /* Not in queue */ if (!sched_is_queued(thread)) { - irq_restore_flags(flags); + irq_kernel_critical_exit(basepri); return; } @@ -197,7 +197,7 @@ void sched_dequeue(tcb_t *thread) /* Mark as not queued */ sched_link_init(thread); - irq_restore_flags(flags); + irq_kernel_critical_exit(basepri); } /** @@ -210,15 +210,15 @@ void sched_yield(void) tcb_t *curr = thread_current(); uint8_t prio; tcb_t *head; - uint32_t flags; + uint32_t basepri; if (!curr) return; - flags = irq_save_flags(); + basepri = irq_kernel_critical_enter(); if (!sched_is_queued(curr)) { - irq_restore_flags(flags); + irq_kernel_critical_exit(basepri); return; } @@ -245,7 +245,7 @@ void sched_yield(void) ready_queue[prio] = head->sched_link.next; } - irq_restore_flags(flags); + irq_kernel_critical_exit(basepri); } /** @@ -268,15 +268,15 @@ tcb_t *schedule_select(void) uint32_t prio; tcb_t *thread; tcb_t *curr; - uint32_t flags; + uint32_t basepri; - flags = irq_save_flags(); + basepri = irq_kernel_critical_enter(); /* CLZ returns 32 if bitmap is 0 (no branches needed) */ prio = clz32(ready_bitmap); if (prio >= SCHED_PRIORITY_LEVELS) { - irq_restore_flags(flags); + irq_kernel_critical_exit(basepri); /* Not reached: idle thread should always be runnable */ panic("SCHED: Empty ready_bitmap (idle missing)\n"); return NULL; @@ -286,7 +286,7 @@ tcb_t *schedule_select(void) /* Safety check for consistency */ if (!thread) { - irq_restore_flags(flags); + irq_kernel_critical_exit(basepri); panic("SCHED: Inconsistent bitmap/queue at prio %d\n", prio); return NULL; } @@ -310,7 +310,7 @@ tcb_t *schedule_select(void) "SCHED: PTS defer prio %d (curr %t thresh %d)\n", prio, curr->t_globalid, curr->preempt_threshold); - irq_restore_flags(flags); + irq_kernel_critical_exit(basepri); return curr; /* Continue running current thread */ } } @@ -320,7 +320,7 @@ tcb_t *schedule_select(void) */ preempted_bitmap &= ~(1UL << (31 - prio)); - irq_restore_flags(flags); + irq_kernel_critical_exit(basepri); /* Strict invariant: queued threads are always runnable */ return thread; } @@ -335,7 +335,7 @@ tcb_t *schedule_select(void) */ void sched_set_priority(tcb_t *thread, uint8_t new_prio) { - uint32_t flags; + uint32_t basepri; int was_queued; if (!thread) @@ -344,11 +344,11 @@ void sched_set_priority(tcb_t *thread, uint8_t new_prio) if (new_prio >= SCHED_PRIORITY_LEVELS) new_prio = SCHED_PRIO_IDLE; - flags = irq_save_flags(); + basepri = irq_kernel_critical_enter(); /* Check if priority actually changes */ if (thread->priority == new_prio) { - irq_restore_flags(flags); + irq_kernel_critical_exit(basepri); return; } @@ -364,7 +364,7 @@ void sched_set_priority(tcb_t *thread, uint8_t new_prio) if (was_queued) sched_enqueue(thread); - irq_restore_flags(flags); + irq_kernel_critical_exit(basepri); } /** @@ -389,7 +389,7 @@ int sched_preemption_change(tcb_t *thread, uint8_t new_threshold, uint8_t *old_threshold) { - uint32_t flags; + uint32_t basepri; uint8_t old_thresh; int should_reschedule = 0; @@ -410,7 +410,7 @@ int sched_preemption_change(tcb_t *thread, return -1; } - flags = irq_save_flags(); + basepri = irq_kernel_critical_enter(); /* Save old threshold (return user-set value) */ old_thresh = thread->user_preempt_threshold; @@ -456,7 +456,7 @@ int sched_preemption_change(tcb_t *thread, } } - irq_restore_flags(flags); + irq_kernel_critical_exit(basepri); /* Trigger reschedule if needed (outside critical section) */ if (should_reschedule) { diff --git a/platform/build.mk b/platform/build.mk index dd4ba431..b2fdc77b 100644 --- a/platform/build.mk +++ b/platform/build.mk @@ -21,7 +21,8 @@ platform-y = \ debug_device.o \ mpu.o \ spinlock.o \ - irq.o + irq.o \ + irq-latency.o platform-$(CONFIG_DEBUG_DEV_UART) += debug_uart.o platform-$(CONFIG_DEBUG_DEV_RAM) += debug_ram.o diff --git a/platform/irq-latency.c b/platform/irq-latency.c new file mode 100644 index 00000000..802e8a19 --- /dev/null +++ b/platform/irq-latency.c @@ -0,0 +1,209 @@ +/* Copyright (c) 2026 The F9 Microkernel Project. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include +#include +#include +#include +#include + +/** + * Latency statistics per priority level (0x0-0xF = 16 levels). + */ +static latency_stats_t latency_stats[16]; + +/** + * Enable DWT cycle counter for latency measurements. + * + * Note: Called at INIT_LEVEL_PLATFORM with interrupts disabled. + * Safe to reset statistics with plain writes (no atomic operations needed). + */ +void latency_init(void) +{ + uint32_t test_before, test_after; + + /* Enable DWT if not already enabled */ + if (!(*DEMCR & DEMCR_TRCENA)) { + *DEMCR |= DEMCR_TRCENA; /* Enable trace and debug */ + } + + /* Enable cycle counter */ + *DWT_CTRL |= DWT_CTRL_CYCCNTENA; + + /* Reset cycle counter */ + *DWT_CYCCNT = 0; + + /* Verify DWT functionality (helpful diagnostic for QEMU vs hardware) */ + test_before = *DWT_CYCCNT; + for (volatile int i = 0; i < 100; i++) + ; /* Small busy loop */ + test_after = *DWT_CYCCNT; + + if (test_after == test_before) { + /* QEMU: DWT not emulated, cycle counter stays at 0 */ + dbg_printf(DL_KDB, + "IRQ latency profiling enabled (DWT not available)\n"); + } else { + /* Hardware: DWT working, show delta to confirm */ + dbg_printf(DL_KDB, + "IRQ latency profiling enabled (DWT cycle counter active, " + "test delta=%u)\n", + test_after - test_before); + } + + /* Reset statistics - inline to avoid irq_save_flags during early boot */ + for (int i = 0; i < 16; i++) { + latency_stats[i].count = 0; + latency_stats[i].min = 0; + latency_stats[i].max = 0; + latency_stats[i].sum = 0; + latency_stats[i].avg = 0; + } +} + +/* + * Note: DWT cycle counter not emulated in QEMU (reads return 0). + * On real hardware (STM32F4), DWT provides cycle-accurate latency measurement. + * System boots safely in both environments - no hang. + */ +INIT_HOOK(latency_init, INIT_LEVEL_PLATFORM); + +/** + * Record latency sample for an interrupt. + * + * CRITICAL: This function is called from ISRs, including zero-latency ISRs + * (priority 0x0-0x2). It MUST NOT use PRIMASK or any operation that blocks + * zero-latency interrupts. Uses atomic operations for lock-free updates. + * + * Note: min/max updates use atomic compare-exchange loops to ensure + * consistency even under heavy preemption from other zero-latency ISRs. + */ +void latency_record(uint8_t priority, int16_t irq_num, uint32_t cycles) +{ + latency_stats_t *stats; + uint32_t old_min, old_max; + + (void) irq_num; + + /* Validate priority (0x0-0xF) */ + if (priority >= 16) + return; + + /* Ignore obviously bogus samples (wraparound or >1M cycles). + * 1M cycles at 168MHz is ~6ms, which is a reasonable upper bound + * for most real-time ISRs. + */ + if (cycles == 0 || cycles > 1000000) + return; + + stats = &latency_stats[priority]; + + /* Atomic updates for count and sum (lock-free, no PRIMASK). */ + __atomic_add_fetch(&stats->count, 1, __ATOMIC_RELAXED); + __atomic_add_fetch(&stats->sum, cycles, __ATOMIC_RELAXED); + + /* Update min/max using atomic compare-exchange loops. + * These ensure that even under heavy preemption, we never miss a new + * min/max value. + */ + + /* Update min */ + old_min = __atomic_load_n(&stats->min, __ATOMIC_RELAXED); + while ((old_min == 0 || cycles < old_min) && + !__atomic_compare_exchange_n(&stats->min, &old_min, cycles, 0, + __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { + /* If CAS failed, old_min was updated by another ISR; loop and retry */ + } + + /* Update max */ + old_max = __atomic_load_n(&stats->max, __ATOMIC_RELAXED); + while (cycles > old_max && + !__atomic_compare_exchange_n(&stats->max, &old_max, cycles, 0, + __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { + /* If CAS failed, old_max was updated by another ISR; loop and retry */ + } +} + +/** + * Get latency statistics for a priority level. + * + * Calculates average lazily on read to avoid division in ISR hot path. + */ +const latency_stats_t *latency_get_stats(uint8_t priority) +{ + latency_stats_t *stats; + + if (priority >= 16) + return NULL; + + stats = &latency_stats[priority]; + + /* NOTE: This returns live stats. Callers must use atomic loads or + * prefer latency_get_stats_snapshot() for a stable read. + */ + return stats; +} + +/** + * Get a best-effort atomic snapshot of latency statistics for a priority. + * + * Uses only relaxed atomics (single-core). We retry if count changes during + * the read. Because count and sum are updated separately, this provides a + * consistent snapshot in the common case but is still best-effort. + */ +int latency_get_stats_snapshot(uint8_t priority, latency_stats_t *out) +{ + uint32_t count_before; + uint32_t count_after; + + if (!out || priority >= 16) + return 0; + + /* + * Retry loop ensures all fields (count, sum, min, max) are from + * the same snapshot generation. Reading count before and after + * ensures no ISR updated the stats during our reads. + */ + do { + count_before = + __atomic_load_n(&latency_stats[priority].count, __ATOMIC_RELAXED); + out->sum = + __atomic_load_n(&latency_stats[priority].sum, __ATOMIC_RELAXED); + out->min = + __atomic_load_n(&latency_stats[priority].min, __ATOMIC_RELAXED); + out->max = + __atomic_load_n(&latency_stats[priority].max, __ATOMIC_RELAXED); + count_after = + __atomic_load_n(&latency_stats[priority].count, __ATOMIC_RELAXED); + } while (count_before != count_after); + + out->count = count_after; + + /* avg is computed by the caller to avoid shared writes. */ + out->avg = 0; + + return 1; +} + +/** + * Reset all latency statistics. + */ +void latency_reset(void) +{ + uint32_t flags; + int i; + + flags = irq_save_flags(); + + for (i = 0; i < 16; i++) { + latency_stats[i].count = 0; + latency_stats[i].min = 0; + latency_stats[i].max = 0; + latency_stats[i].sum = 0; + latency_stats[i].avg = 0; + } + + irq_restore_flags(flags); +} diff --git a/platform/irq.c b/platform/irq.c index 79eb2d96..6afac92f 100644 --- a/platform/irq.c +++ b/platform/irq.c @@ -13,6 +13,12 @@ */ volatile uint32_t __irq_saved_regs[8]; +/* + * System state tracking for ISR context. + * 0 = Thread mode (PSP), 1+ = Handler mode (MSP). + */ +volatile uint32_t irq_system_state = 0; + void irq_init(void) { /* Set all 4-bit to pre-emption priority bit */