diff --git a/crates/sandlock-core/src/cow/dispatch.rs b/crates/sandlock-core/src/cow/dispatch.rs
index a183f46..98ed9f0 100644
--- a/crates/sandlock-core/src/cow/dispatch.rs
+++ b/crates/sandlock-core/src/cow/dispatch.rs
@@ -8,14 +8,39 @@ use std::path::{Component, Path, PathBuf};
 use std::sync::Arc;
 
 use tokio::sync::Mutex;
+use tokio::sync::Mutex as AsyncMutex;
 
 use crate::arch;
 use crate::cow::seccomp::SeccompCowBranch;
 use crate::procfs::{build_dirent64, DT_DIR, DT_LNK, DT_REG};
 use crate::seccomp::notif::{read_child_mem, write_child_mem, NotifAction};
-use crate::seccomp::state::{CowState, PidKey};
+use crate::seccomp::state::{CowState, PerProcessState, ProcessIndex};
 use crate::sys::structs::SeccompNotif;
 
+/// Acquire the per-process state handle for `notif.pid`. Returns
+/// None if the pid isn't tracked (pidfd_open failed at fork on an
+/// old kernel, or the process is gone) — callers should fall back
+/// to `NotifAction::Continue`.
+fn pp_handle(
+    processes: &Arc<ProcessIndex>,
+    pid: u32,
+) -> Option<Arc<AsyncMutex<PerProcessState>>> {
+    processes
+        .entry_for(i32::try_from(pid).ok()?)
+        .map(|(_, s)| s)
+}
+
+/// Read the current virtual cwd for `pid` (None if the process
+/// hasn't chdir'd into a COW-only directory, or isn't tracked).
+async fn current_virtual_cwd(
+    processes: &Arc<ProcessIndex>,
+    pid: u32,
+) -> Option<String> {
+    let handle = pp_handle(processes, pid)?;
+    let cwd = handle.lock().await.virtual_cwd.clone();
+    cwd
+}
+
 /// Read a NUL-terminated path from child memory (up to 4096 bytes for filesystem paths).
 ///
 /// Reads page-by-page to avoid crossing into unmapped memory (e.g. when the path
@@ -100,29 +125,6 @@ fn map_cow_upper_path(cow: &SeccompCowBranch, path: &str) -> String {
     normalize_path(path).to_string_lossy().into_owned()
 }
 
-fn read_pid_start_time(pid: u32) -> Option<u64> {
-    let stat = std::fs::read_to_string(format!("/proc/{}/stat", pid)).ok()?;
-    let rest = stat.rsplit_once(") ")?.1;
-    // starttime is field 22; after "pid (comm)" the first token is field 3.
-    rest.split_whitespace().nth(19)?.parse().ok()
-}
-
-fn cow_pid_key(pid: u32) -> Option<PidKey> {
-    Some(PidKey {
-        pid: i32::try_from(pid).ok()?,
-        start_time: read_pid_start_time(pid)?,
-    })
-}
-
-fn current_virtual_cwd(st: &mut CowState, pid: u32) -> Option<String> {
-    if st.virtual_cwds.is_empty() {
-        return None;
-    }
-    let pid_key = cow_pid_key(pid)?;
-    st.prune_reused_pid(pid_key);
-    st.virtual_cwds.get(&pid_key).cloned()
-}
-
 // ============================================================
 // openat handler
 // ============================================================
@@ -132,6 +134,7 @@ fn current_virtual_cwd(st: &mut CowState, pid: u32) -> Option<String> {
 pub(crate) async fn handle_cow_open(
     notif: &SeccompNotif,
     cow_state: &Arc<Mutex<CowState>>,
+    processes: &Arc<ProcessIndex>,
     notif_fd: RawFd,
 ) -> NotifAction {
     use crate::cow::seccomp::CowOpenPlan;
@@ -151,8 +154,7 @@ pub(crate) async fn handle_cow_open(
         None => return NotifAction::Continue,
     };
     let virtual_cwd = if (dirfd as i32) == libc::AT_FDCWD && !Path::new(&rel_path).is_absolute() {
-        let mut st = cow_state.lock().await;
-        current_virtual_cwd(&mut st, notif.pid)
+        current_virtual_cwd(processes, notif.pid).await
     } else {
         None
     };
@@ -476,12 +478,10 @@ async fn execute_deferred_copy(
 pub(crate) async fn handle_cow_write(
     notif: &SeccompNotif,
     cow_state: &Arc<Mutex<CowState>>,
+    processes: &Arc<ProcessIndex>,
     notif_fd: RawFd,
 ) -> NotifAction {
-    let virtual_cwd = {
-        let mut st = cow_state.lock().await;
-        current_virtual_cwd(&mut st, notif.pid)
-    };
+    let virtual_cwd = current_virtual_cwd(processes, notif.pid).await;
     let mut op = match parse_cow_write(notif, notif_fd, virtual_cwd.as_deref()) {
         Some(op) => op,
         None => return NotifAction::Continue,
@@ -573,17 +573,15 @@ pub(crate) const SYS_FACCESSAT2: i64 = 439;
 pub(crate) async fn handle_cow_access(
     notif: &SeccompNotif,
     cow_state: &Arc<Mutex<CowState>>,
+    processes: &Arc<ProcessIndex>,
     notif_fd: RawFd,
 ) -> NotifAction {
     let nr = notif.data.nr as i64;
+    let virtual_cwd = current_virtual_cwd(processes, notif.pid).await;
 
     // access(pathname, mode): args[0]=path, args[1]=mode
     // faccessat(dirfd, pathname, mode, flags): args[0]=dirfd, args[1]=path, args[2]=mode
     let (path, mode) = if Some(nr) == arch::SYS_ACCESS {
-        let virtual_cwd = {
-            let mut st = cow_state.lock().await;
-            current_virtual_cwd(&mut st, notif.pid)
-        };
         let p = match read_path(notif, notif.data.args[0], notif_fd) {
             Some(p) => resolve_at_path_with_virtual(
                 notif,
@@ -596,10 +594,6 @@ pub(crate) async fn handle_cow_access(
         (p, notif.data.args[1] as i32)
     } else {
         let dirfd = notif.data.args[0] as i64;
-        let virtual_cwd = {
-            let mut st = cow_state.lock().await;
-            current_virtual_cwd(&mut st, notif.pid)
-        };
         let p = match read_path(notif, notif.data.args[1], notif_fd) {
             Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()),
             None => return NotifAction::Continue,
@@ -642,6 +636,7 @@ pub(crate) async fn handle_cow_access(
 pub(crate) async fn handle_cow_utimensat(
     notif: &SeccompNotif,
     cow_state: &Arc<Mutex<CowState>>,
+    processes: &Arc<ProcessIndex>,
     notif_fd: RawFd,
 ) -> NotifAction {
     let dirfd = notif.data.args[0] as i64;
@@ -653,10 +648,7 @@ pub(crate) async fn handle_cow_utimensat(
         return NotifAction::Continue;
     }
 
-    let virtual_cwd = {
-        let mut st = cow_state.lock().await;
-        current_virtual_cwd(&mut st, notif.pid)
-    };
+    let virtual_cwd = current_virtual_cwd(processes, notif.pid).await;
     let path = match read_path(notif, path_ptr, notif_fd) {
         Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()),
         None => return NotifAction::Continue,
@@ -717,6 +709,7 @@ pub(crate) async fn handle_cow_utimensat(
 pub(crate) async fn handle_cow_stat(
     notif: &SeccompNotif,
     cow_state: &Arc<Mutex<CowState>>,
+    processes: &Arc<ProcessIndex>,
     notif_fd: RawFd,
 ) -> NotifAction {
     let nr = notif.data.nr as i64;
@@ -724,10 +717,7 @@ pub(crate) async fn handle_cow_stat(
     // newfstatat(dirfd, pathname, statbuf, flags)
     // faccessat(dirfd, pathname, mode, flags)
     let dirfd = notif.data.args[0] as i64;
-    let virtual_cwd = {
-        let mut st = cow_state.lock().await;
-        current_virtual_cwd(&mut st, notif.pid)
-    };
+    let virtual_cwd = current_virtual_cwd(processes, notif.pid).await;
     let path = match read_path(notif, notif.data.args[1], notif_fd) {
         Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()),
         None => return NotifAction::Continue,
@@ -794,14 +784,12 @@ pub(crate) async fn handle_cow_stat(
 pub(crate) async fn handle_cow_statx(
     notif: &SeccompNotif,
     cow_state: &Arc<Mutex<CowState>>,
+    processes: &Arc<ProcessIndex>,
     notif_fd: RawFd,
 ) -> NotifAction {
     // statx(dirfd, pathname, flags, mask, statxbuf)
     let dirfd = notif.data.args[0] as i64;
-    let virtual_cwd = {
-        let mut st = cow_state.lock().await;
-        current_virtual_cwd(&mut st, notif.pid)
-    };
+    let virtual_cwd = current_virtual_cwd(processes, notif.pid).await;
     let path = match read_path(notif, notif.data.args[1], notif_fd) {
         Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()),
         None => return NotifAction::Continue,
@@ -828,14 +816,12 @@ pub(crate) async fn handle_cow_statx(
 pub(crate) async fn handle_cow_readlink(
     notif: &SeccompNotif,
     cow_state: &Arc<Mutex<CowState>>,
+    processes: &Arc<ProcessIndex>,
     notif_fd: RawFd,
 ) -> NotifAction {
     // readlinkat(dirfd, pathname, buf, bufsiz)
     let dirfd = notif.data.args[0] as i64;
-    let virtual_cwd = {
-        let mut st = cow_state.lock().await;
-        current_virtual_cwd(&mut st, notif.pid)
-    };
+    let virtual_cwd = current_virtual_cwd(processes, notif.pid).await;
     let path = match read_path(notif, notif.data.args[1], notif_fd) {
         Some(p) => resolve_at_path_with_virtual(notif, dirfd, &p, virtual_cwd.as_deref()),
         None => return NotifAction::Continue,
@@ -875,96 +861,107 @@ pub(crate) async fn handle_cow_readlink(
 pub(crate) async fn handle_cow_getdents(
     notif: &SeccompNotif,
     cow_state: &Arc<Mutex<CowState>>,
+    processes: &Arc<ProcessIndex>,
     notif_fd: RawFd,
 ) -> NotifAction {
     let pid = notif.pid;
     let child_fd = (notif.data.args[0] & 0xFFFFFFFF) as u32;
     let buf_addr = notif.data.args[1];
     let buf_size = (notif.data.args[2] & 0xFFFFFFFF) as usize;
-    let pid_key = match cow_pid_key(pid) {
-        Some(key) => key,
-        None => return NotifAction::Continue,
-    };
 
-    // Check if fd points to a COW-managed directory
+    // Check if fd points to a COW-managed directory.
     let link_path = format!("/proc/{}/fd/{}", pid, child_fd);
     let target = match std::fs::read_link(&link_path) {
         Ok(t) => t.to_string_lossy().into_owned(),
         Err(_) => return NotifAction::Continue,
     };
 
-    let mut st = cow_state.lock().await;
-    st.prune_reused_pid(pid_key);
-    let cow = match st.branch.as_ref() {
-        Some(c) => c,
-        None => return NotifAction::Continue,
-    };
-
-    if !cow.has_changes() {
-        return NotifAction::Continue;
-    }
-
-    let target_path = Path::new(&target);
-    let rel_path = if cow.matches(&target) {
-        cow.safe_rel(&target).unwrap_or_else(|| ".".to_string())
-    } else if let Ok(rel) = target_path.strip_prefix(cow.upper_dir()) {
-        let rel = rel.to_string_lossy();
-        if rel.is_empty() {
-            ".".to_string()
+    // Compute rel_path under the global COW lock, but do not hold it
+    // across the per-process lock acquired below.
+    let rel_path = {
+        let st = cow_state.lock().await;
+        let cow = match st.branch.as_ref() {
+            Some(c) => c,
+            None => return NotifAction::Continue,
+        };
+        if !cow.has_changes() {
+            return NotifAction::Continue;
+        }
+        let target_path = Path::new(&target);
+        if cow.matches(&target) {
+            cow.safe_rel(&target).unwrap_or_else(|| ".".to_string())
+        } else if let Ok(rel) = target_path.strip_prefix(cow.upper_dir()) {
+            let rel = rel.to_string_lossy();
+            if rel.is_empty() {
+                ".".to_string()
+            } else {
+                rel.into_owned()
+            }
         } else {
-            rel.into_owned()
+            return NotifAction::Continue;
         }
-    } else {
-        return NotifAction::Continue;
     };
 
-    // Build cache on first call; invalidate if fd was reused for a different dir.
-    let cache_key = (pid_key, child_fd);
-    if let Some((cached_target, entries)) = st.dir_cache.get(&cache_key) {
+    // Per-process dir cache lookup.
+    let pp = match pp_handle(processes, pid) {
+        Some(h) => h,
+        None => return NotifAction::Continue,
+    };
+    let mut perproc = pp.lock().await;
+
+    // Invalidate stale cache (fd reused for a different directory),
+    // and short-circuit EOF on a previously fully-drained entry.
+    if let Some((cached_target, entries)) = perproc.cow_dir_cache.get(&child_fd) {
         if *cached_target != target {
-            // fd reused for a different directory — rebuild.
-            st.dir_cache.remove(&cache_key);
+            perproc.cow_dir_cache.remove(&child_fd);
         } else if entries.is_empty() {
-            // Previously fully drained — return end-of-directory and clean up.
-            st.dir_cache.remove(&cache_key);
+            perproc.cow_dir_cache.remove(&child_fd);
             return NotifAction::ReturnValue(0);
         }
     }
-    if !st.dir_cache.contains_key(&cache_key) {
-        let cow = st.branch.as_ref().unwrap();
-        let merged = cow.list_merged_dir(&rel_path);
-
-        let upper_dir = cow.upper_dir().join(&rel_path);
-        let lower_dir = cow.workdir().join(&rel_path);
-
-        let mut entries = Vec::new();
-        let mut d_off: i64 = 0;
-        for name in &merged {
-            d_off += 1;
-            let upper_p = upper_dir.join(name);
-            let lower_p = lower_dir.join(name);
-            let check = if upper_p.exists() || upper_p.is_symlink() {
-                &upper_p
-            } else {
-                &lower_p
-            };
-            let d_type = if check.is_dir() {
-                DT_DIR
-            } else if check.is_symlink() {
-                DT_LNK
-            } else {
-                DT_REG
+
+    // Build cache on first call.
+    if !perproc.cow_dir_cache.contains_key(&child_fd) {
+        let entries = {
+            let st = cow_state.lock().await;
+            let cow = match st.branch.as_ref() {
+                Some(c) => c,
+                None => return NotifAction::Continue,
             };
-            use std::os::unix::fs::MetadataExt;
-            let d_ino = std::fs::symlink_metadata(check)
-                .map(|m| m.ino())
-                .unwrap_or(0);
-            entries.push(build_dirent64(d_ino, d_off, d_type, name));
-        }
-        st.dir_cache.insert(cache_key, (target.clone(), entries));
+            let merged = cow.list_merged_dir(&rel_path);
+            let upper_dir = cow.upper_dir().join(&rel_path);
+            let lower_dir = cow.workdir().join(&rel_path);
+
+            let mut out = Vec::new();
+            let mut d_off: i64 = 0;
+            for name in &merged {
+                d_off += 1;
+                let upper_p = upper_dir.join(name);
+                let lower_p = lower_dir.join(name);
+                let check = if upper_p.exists() || upper_p.is_symlink() {
+                    &upper_p
+                } else {
+                    &lower_p
+                };
+                let d_type = if check.is_dir() {
+                    DT_DIR
+                } else if check.is_symlink() {
+                    DT_LNK
+                } else {
+                    DT_REG
+                };
+                use std::os::unix::fs::MetadataExt;
+                let d_ino = std::fs::symlink_metadata(check)
+                    .map(|m| m.ino())
+                    .unwrap_or(0);
+                out.push(build_dirent64(d_ino, d_off, d_type, name));
+            }
+            out
+        };
+        perproc.cow_dir_cache.insert(child_fd, (target.clone(), entries));
     }
 
-    let entries = match st.dir_cache.get_mut(&cache_key) {
+    let entries = match perproc.cow_dir_cache.get_mut(&child_fd) {
         Some((_, e)) => e,
         None => return NotifAction::Continue,
     };
@@ -982,12 +979,7 @@ pub(crate) async fn handle_cow_getdents(
     if consumed > 0 {
         entries.drain(..consumed);
     }
-    if entries.is_empty() {
-        // Mark as fully read by leaving an empty entry list in the cache.
-        // This prevents rebuilding the cache on the next call — the empty
-        // cache will produce ReturnValue(0) which signals end-of-directory.
-    }
-    drop(st);
+    drop(perproc);
 
     if !result.is_empty() {
         if write_child_mem(notif_fd, notif.id, pid, buf_addr, &result).is_err() {
@@ -1006,6 +998,7 @@ pub(crate) async fn handle_cow_getdents(
 pub(crate) async fn handle_cow_chdir(
     notif: &SeccompNotif,
     cow_state: &Arc<Mutex<CowState>>,
+    processes: &Arc<ProcessIndex>,
     notif_fd: RawFd,
 ) -> NotifAction {
     let path_ptr = notif.data.args[0];
@@ -1015,31 +1008,31 @@ pub(crate) async fn handle_cow_chdir(
     };
     let orig_path_buf_len = path.len() + 1; // NUL-terminated size in child memory
 
-    let mut st = cow_state.lock().await;
-    let virtual_cwd = current_virtual_cwd(&mut st, notif.pid);
-    let abs_path = resolve_at_path_with_virtual(
+    let virtual_cwd = current_virtual_cwd(processes, notif.pid).await;
+    let resolved = resolve_at_path_with_virtual(
         notif,
         libc::AT_FDCWD as i64,
         &path,
         virtual_cwd.as_deref(),
     );
-    let cow = match st.branch.as_ref() {
-        Some(c) => c,
-        None => return NotifAction::Continue,
-    };
-
-    let abs_path = map_cow_upper_path(cow, &abs_path);
-    if !cow.matches(&abs_path) {
-        return NotifAction::Continue;
-    }
 
-    // Check if it exists in the upper layer.
-    let rel = match cow.safe_rel(&abs_path) {
-        Some(r) => r,
-        None => return NotifAction::Continue,
+    let (abs_path, upper_path) = {
+        let st = cow_state.lock().await;
+        let cow = match st.branch.as_ref() {
+            Some(c) => c,
+            None => return NotifAction::Continue,
+        };
+        let abs_path = map_cow_upper_path(cow, &resolved);
+        if !cow.matches(&abs_path) {
+            return NotifAction::Continue;
+        }
+        let rel = match cow.safe_rel(&abs_path) {
+            Some(r) => r,
+            None => return NotifAction::Continue,
+        };
+        let upper_path = cow.upper_dir().join(&rel);
+        (abs_path, upper_path)
     };
-    let upper_path = cow.upper_dir().join(&rel);
-    drop(st);
 
     // If the directory exists on the real filesystem, let the kernel handle it.
     if std::path::Path::new(&abs_path).is_dir() {
@@ -1096,10 +1089,17 @@ pub(crate) async fn handle_cow_chdir(
         return NotifAction::Errno(libc::EFAULT);
     }
 
-    if let Some(pid_key) = cow_pid_key(notif.pid) {
-        let mut st = cow_state.lock().await;
-        st.prune_reused_pid(pid_key);
-        st.virtual_cwds.insert(pid_key, abs_path);
+    // We insert the virtual cwd here, before returning Continue and
+    // letting the kernel run the rewritten chdir. We can't observe
+    // the kernel's verdict without polling, but at this point we've
+    // verified upper_path is a directory, the addfd ioctl succeeded,
+    // and write_child_mem rewrote the path argument — so a kernel
+    // chdir to /proc/self/fd/N is essentially guaranteed. If it does
+    // somehow fail, the per-child pidfd watcher will drop this entry
+    // when the process exits, so the inconsistency is bounded by
+    // process lifetime.
+    if let Some(pp) = pp_handle(processes, notif.pid) {
+        pp.lock().await.virtual_cwd = Some(abs_path);
     }
 
     NotifAction::Continue
@@ -1109,21 +1109,21 @@ pub(crate) async fn handle_cow_chdir(
 pub(crate) async fn handle_cow_getcwd(
     notif: &SeccompNotif,
     cow_state: &Arc<Mutex<CowState>>,
+    processes: &Arc<ProcessIndex>,
     notif_fd: RawFd,
 ) -> NotifAction {
     let buf_addr = notif.data.args[0];
     let buf_size = (notif.data.args[1] & 0xFFFF_FFFF) as usize;
 
-    let mut st = cow_state.lock().await;
-    let cached_virtual_cwd = current_virtual_cwd(&mut st, notif.pid);
-    let cow = match st.branch.as_ref() {
-        Some(c) => c,
-        None => return NotifAction::Continue,
-    };
-
+    let cached_virtual_cwd = current_virtual_cwd(processes, notif.pid).await;
     let virtual_cwd = if let Some(cwd) = cached_virtual_cwd {
         cwd
     } else {
+        let st = cow_state.lock().await;
+        let cow = match st.branch.as_ref() {
+            Some(c) => c,
+            None => return NotifAction::Continue,
+        };
         let cwd = match std::fs::read_link(format!("/proc/{}/cwd", notif.pid)) {
             Ok(c) => c,
             Err(_) => return NotifAction::Continue,
@@ -1133,7 +1133,6 @@ pub(crate) async fn handle_cow_getcwd(
             Err(_) => return NotifAction::Continue,
         }
     };
-    drop(st);
 
     let cwd_bytes = virtual_cwd.as_bytes();
     if cwd_bytes.len() + 1 > buf_size {
diff --git a/crates/sandlock-core/src/procfs.rs b/crates/sandlock-core/src/procfs.rs
index 9878150..f49725a 100644
--- a/crates/sandlock-core/src/procfs.rs
+++ b/crates/sandlock-core/src/procfs.rs
@@ -12,7 +12,7 @@ use std::sync::Arc;
 use tokio::sync::Mutex;
 
 use crate::seccomp::notif::{read_child_cstr, write_child_mem, NotifAction, NotifPolicy};
-use crate::seccomp::state::{NetworkState, ProcfsState};
+use crate::seccomp::state::{NetworkState, ProcessIndex};
 use crate::sys::structs::{SeccompNotif, EACCES};
 use crate::sys::syscall;
 
@@ -380,7 +380,7 @@ fn read_path(notif: &SeccompNotif, addr: u64, notif_fd: RawFd) -> Option<String>
 /// - Lets everything else through.
 pub(crate) async fn handle_proc_open(
     notif: &SeccompNotif,
-    procfs: &Arc<Mutex<ProcfsState>>,
+    processes: &Arc<ProcessIndex>,
     resource: &Arc<Mutex<crate::seccomp::state::ResourceState>>,
     network: &Arc<Mutex<NetworkState>>,
     policy: &NotifPolicy,
@@ -404,8 +404,7 @@ pub(crate) async fn handle_proc_open(
     // already hide non-sandbox PIDs, but without this check a process
     // could still open /proc/{ppid}/cmdline (or any guessed PID) directly.
     if let Some(pid) = extract_proc_pid(&path) {
-        let pfs = procfs.lock().await;
-        if !pfs.proc_pids.contains(&pid) {
+        if !processes.contains(pid) {
             return NotifAction::Errno(EACCES);
         }
     }
@@ -435,11 +434,10 @@ pub(crate) async fn handle_proc_open(
 
     // Virtualize /proc/loadavg when proc virtualization is active.
     if path == "/proc/loadavg" {
-        let pfs = procfs.lock().await;
+        let total = processes.len() as u32;
+        let last_pid = processes.max_pid().unwrap_or(0);
         let rs = resource.lock().await;
-        let total = pfs.proc_pids.len() as u32;
         let running = rs.proc_count;
-        let last_pid = pfs.proc_pids.iter().max().copied().unwrap_or(0);
         let content = generate_loadavg(&rs.load_avg, running, total, last_pid);
         return inject_memfd(&content);
     }
@@ -612,7 +610,7 @@ pub(crate) fn handle_etc_hosts_open(
 /// regardless of filesystem internals.
 pub(crate) async fn handle_sorted_getdents(
     notif: &SeccompNotif,
-    procfs: &Arc<Mutex<ProcfsState>>,
+    processes: &Arc<ProcessIndex>,
     notif_fd: RawFd,
 ) -> NotifAction {
     let pid = notif.pid;
@@ -625,16 +623,17 @@ pub(crate) async fn handle_sorted_getdents(
         Ok(t) => t,
         Err(_) => return NotifAction::Continue,
     };
-    let cache_key = (
-        pid as i32,
-        child_fd,
-        dir_path.to_string_lossy().into_owned(),
-    );
-    let mut pfs = procfs.lock().await;
+
+    let entry = match processes.entry_for(pid as i32) {
+        Some(e) => e,
+        None => return NotifAction::Continue,
+    };
+    let cache_key = (child_fd, dir_path.to_string_lossy().into_owned());
+    let mut perproc = entry.1.lock().await;
 
     // Build and cache sorted entries on first call for this open directory.
     // Remove an empty cache on EOF so later fd reuse can rebuild entries.
-    if !pfs.getdents_cache.contains_key(&cache_key) {
+    if !perproc.procfs_dir_cache.contains_key(&cache_key) {
         let dir = match std::fs::read_dir(&dir_path) {
             Ok(d) => d,
             Err(_) => return NotifAction::Continue,
@@ -679,17 +678,17 @@ pub(crate) async fn handle_sorted_getdents(
             })
             .collect();
 
-        pfs.getdents_cache.insert(cache_key.clone(), entries);
+        perproc.procfs_dir_cache.insert(cache_key.clone(), entries);
     }
 
-    let entries = match pfs.getdents_cache.get_mut(&cache_key) {
+    let entries = match perproc.procfs_dir_cache.get_mut(&cache_key) {
         Some(e) => e,
         None => return NotifAction::Continue,
     };
 
     // Empty cache = already fully drained on a prior call → return 0 (EOF).
     if entries.is_empty() {
-        pfs.getdents_cache.remove(&cache_key);
+        perproc.procfs_dir_cache.remove(&cache_key);
         return NotifAction::ReturnValue(0);
     }
 
@@ -708,7 +707,7 @@ pub(crate) async fn handle_sorted_getdents(
         entries.drain(..consumed);
     }
 
-    drop(pfs);
+    drop(perproc);
 
     if !result.is_empty() {
         if write_child_mem(notif_fd, notif.id, pid, buf_addr, &result).is_err() {
@@ -795,7 +794,7 @@ fn build_filtered_dirents(sandbox_pids: &HashSet<i32>) -> Vec<Vec<u8>> {
 /// set of entries that hides PIDs not belonging to the sandbox.
 pub(crate) async fn handle_getdents(
     notif: &SeccompNotif,
-    procfs: &Arc<Mutex<ProcfsState>>,
+    processes: &Arc<ProcessIndex>,
     _policy: &NotifPolicy,
     notif_fd: RawFd,
 ) -> NotifAction {
@@ -814,16 +813,24 @@ pub(crate) async fn handle_getdents(
         return NotifAction::Continue;
     }
 
-    let cache_key = (pid as i32, child_fd, target.to_string_lossy().into_owned());
-    let mut pfs = procfs.lock().await;
+    let entry = match processes.entry_for(pid as i32) {
+        Some(e) => e,
+        None => return NotifAction::Continue,
+    };
+    let cache_key = (child_fd, target.to_string_lossy().into_owned());
+    let mut perproc = entry.1.lock().await;
 
-    // Build and cache entries on first call for this (pid, fd) pair.
-    if !pfs.getdents_cache.contains_key(&cache_key) {
-        let entries = build_filtered_dirents(&pfs.proc_pids);
-        pfs.getdents_cache.insert(cache_key.clone(), entries);
+    // Build and cache entries on first call for this (fd, target) pair.
+    if !perproc.procfs_dir_cache.contains_key(&cache_key) {
+        // Snapshot sandbox PIDs without holding the per-process lock
+        // any longer than needed — pids_snapshot only takes the
+        // ProcessIndex read lock briefly.
+        let snapshot = processes.pids_snapshot();
+        let entries = build_filtered_dirents(&snapshot);
+        perproc.procfs_dir_cache.insert(cache_key.clone(), entries);
     }
 
-    let entries = match pfs.getdents_cache.get_mut(&cache_key) {
+    let entries = match perproc.procfs_dir_cache.get_mut(&cache_key) {
         Some(e) => e,
         None => return NotifAction::Continue,
     };
@@ -841,7 +848,7 @@ pub(crate) async fn handle_getdents(
 
     // Empty cache = already fully drained on a prior call → return 0 (EOF).
     if entries.is_empty() {
-        pfs.getdents_cache.remove(&cache_key);
+        perproc.procfs_dir_cache.remove(&cache_key);
         return NotifAction::ReturnValue(0);
     }
 
@@ -849,7 +856,7 @@ pub(crate) async fn handle_getdents(
         entries.drain(..consumed);
     }
 
-    drop(pfs);
+    drop(perproc);
 
     // Write the result into the child's buffer and return the byte count.
     if !result.is_empty() {
diff --git a/crates/sandlock-core/src/resource.rs b/crates/sandlock-core/src/resource.rs
index cab2be8..00867ca 100644
--- a/crates/sandlock-core/src/resource.rs
+++ b/crates/sandlock-core/src/resource.rs
@@ -3,8 +3,9 @@
 use std::sync::Arc;
 use tokio::sync::Mutex;
 
-use crate::seccomp::notif::{NotifAction, NotifPolicy};
-use crate::seccomp::state::{ProcfsState, ResourceState};
+use crate::seccomp::ctx::SupervisorCtx;
+use crate::seccomp::notif::{spawn_pid_watcher, NotifAction, NotifPolicy};
+use crate::seccomp::state::ResourceState;
 use crate::sys::structs::{
     SeccompNotif, CLONE_NS_FLAGS, EAGAIN, EPERM,
 };
@@ -17,13 +18,17 @@ const MAP_ANONYMOUS: u64 = 0x20;
 
 /// Handle fork/clone/vfork notifications.
 ///
-/// Enforces namespace creation ban, process limits, and checkpoint hold.
-/// Needs both `ResourceState` (for proc_count, hold_forks, etc.) and
-/// `ProcfsState` (for proc_pids).
+/// Enforces namespace creation ban and process limits, registers the
+/// new child in `ProcessIndex` (with an owned pidfd), and spawns a
+/// per-child pidfd watcher that runs unified cleanup on exit.
+///
+/// Note: `notif.pid` here is the *parent* (the task issuing
+/// clone/fork). The kernel hasn't run the syscall yet, so we don't
+/// know the child's pid. The child is discovered and registered later,
+/// on its first own seccomp notification, via `register_child_if_new`.
 pub(crate) async fn handle_fork(
     notif: &SeccompNotif,
     resource: &Arc<Mutex<ResourceState>>,
-    procfs: &Arc<Mutex<ProcfsState>>,
     _policy: &NotifPolicy,
 ) -> NotifAction {
     let nr = notif.data.nr as i64;
@@ -55,12 +60,39 @@ pub(crate) async fn handle_fork(
     }
 
     rs.proc_count += 1;
-    drop(rs);
+    NotifAction::Continue
+}
+
+/// If `notif.pid` is not yet tracked in the ProcessIndex, register
+/// it: open a pidfd, record the canonical PidKey, and spawn the exit
+/// watcher. Called from the supervisor's notification dispatcher
+/// before per-syscall handlers run, so handlers can rely on
+/// `ProcessIndex::key_for(notif.pid)` returning a fresh PidKey.
+///
+/// The fast path is a single `RwLock` read: if the pid is already
+/// tracked, we trust the entry. PID-identity correctness comes from
+/// the per-child pidfd watcher — a process can't issue notifications
+/// after it has exited, and the kernel won't recycle a PID until the
+/// parent has waited (which we observe), so a stale entry has no
+/// window in which to be hit. We deliberately do *not* re-stat
+/// /proc/<pid>/stat on every notification.
+pub(crate) async fn register_child_if_new(ctx: &Arc<SupervisorCtx>, pid: i32) {
+    if ctx.processes.contains(pid) {
+        return;
+    }
 
-    let mut pfs = procfs.lock().await;
-    pfs.proc_pids.insert(notif.pid as i32);
+    let pidfd = match crate::sys::syscall::pidfd_open(pid as u32, 0) {
+        Ok(fd) => fd,
+        Err(_) => return, // old kernel or process gone — GC backstop will clean up
+    };
 
-    NotifAction::Continue
+    let key = match ctx.processes.register(pid) {
+        Some(k) => k,
+        None => return, // process exited between pidfd_open and stat read
+    };
+
+    // Hand the pidfd to the watcher; it owns the fd's lifetime now.
+    spawn_pid_watcher(Arc::clone(ctx), key, pidfd);
 }
 
 /// Handle wait4/waitid notifications — decrement the concurrent process count.
@@ -82,14 +114,14 @@ pub(crate) async fn handle_wait(
 /// Tracks anonymous memory usage and enforces the configured memory limit.
 pub(crate) async fn handle_memory(
     notif: &SeccompNotif,
-    resource: &Arc<Mutex<ResourceState>>,
+    ctx: &Arc<SupervisorCtx>,
     policy: &NotifPolicy,
 ) -> NotifAction {
     let nr = notif.data.nr as i64;
     let args = &notif.data.args;
     let limit = policy.max_memory_bytes;
 
-    let mut st = resource.lock().await;
+    let mut st = ctx.resource.lock().await;
 
     let kill = NotifAction::Kill { sig: libc::SIGKILL, pgid: notif.pid as i32 };
 
@@ -110,26 +142,36 @@ pub(crate) async fn handle_memory(
     } else if nr == libc::SYS_brk {
         // args[0] = new_brk
         let new_brk = args[0];
-        let pid = notif.pid as i32;
 
         if new_brk == 0 {
             // Query: return Continue, kernel handles it.
             return NotifAction::Continue;
         }
 
-        let base = *st.brk_bases.entry(pid).or_insert(new_brk);
-
+        // Per-process brk base is in PerProcessState. Drop the global
+        // ResourceState lock first to avoid lock ordering issues with
+        // the per-process lock acquired below (per-process first,
+        // then global, when both are needed).
+        drop(st);
+        let entry = match ctx.processes.entry_for(notif.pid as i32) {
+            Some(e) => e,
+            None => return NotifAction::Continue,
+        };
+        let mut perproc = entry.1.lock().await;
+        let mut st = ctx.resource.lock().await;
+
+        let base = *perproc.brk_base.get_or_insert(new_brk);
         if new_brk > base {
             let delta = new_brk - base;
             if st.mem_used.saturating_add(delta) > limit {
                 return kill;
             }
             st.mem_used += delta;
-            st.brk_bases.insert(pid, new_brk);
+            perproc.brk_base = Some(new_brk);
         } else if new_brk < base {
             let delta = base - new_brk;
             st.mem_used = st.mem_used.saturating_sub(delta);
-            st.brk_bases.insert(pid, new_brk);
+            perproc.brk_base = Some(new_brk);
         }
     } else if nr == libc::SYS_mremap {
         // args[1] = old_len, args[2] = new_len
diff --git a/crates/sandlock-core/src/sandbox.rs b/crates/sandlock-core/src/sandbox.rs
index 81dead1..190dc2f 100644
--- a/crates/sandlock-core/src/sandbox.rs
+++ b/crates/sandlock-core/src/sandbox.rs
@@ -957,9 +957,8 @@ impl Sandbox {
                 net_state.port_map.on_bind = Some(cb);
             }
 
-            // ProcfsState
-            let mut procfs_state = ProcfsState::new();
-            procfs_state.proc_pids.insert(pid);
+            // ProcfsState (sandbox membership lives in ProcessIndex now).
+            let procfs_state = ProcfsState::new();
 
             // ResourceState
             let mut res_state = ResourceState::new(
@@ -1029,6 +1028,9 @@ impl Sandbox {
             let time_random_state = Arc::new(Mutex::new(time_random_state));
             let policy_fn_state = Arc::new(Mutex::new(policy_fn_state));
             let chroot_state = Arc::new(Mutex::new(chroot_state));
+            // Root child is registered (with watcher) on its first
+            // notification, the same path grandchildren take.
+            let processes = Arc::new(crate::seccomp::state::ProcessIndex::new());
 
             let ctx = Arc::new(SupervisorCtx {
                 resource: Arc::clone(&res_state),
@@ -1039,6 +1041,7 @@ impl Sandbox {
                 policy_fn: Arc::clone(&policy_fn_state),
                 chroot: Arc::clone(&chroot_state),
                 netlink: Arc::new(crate::netlink::NetlinkState::new()),
+                processes: Arc::clone(&processes),
                 policy: Arc::new(notif_policy),
                 child_pidfd: child_pidfd_raw,
                 notif_fd: notif_raw_fd,
diff --git a/crates/sandlock-core/src/seccomp/ctx.rs b/crates/sandlock-core/src/seccomp/ctx.rs
index e415ae9..bb1be30 100644
--- a/crates/sandlock-core/src/seccomp/ctx.rs
+++ b/crates/sandlock-core/src/seccomp/ctx.rs
@@ -3,7 +3,10 @@ use std::sync::Arc;
 use tokio::sync::Mutex;
 
 use super::notif::NotifPolicy;
-use super::state::{ChrootState, CowState, NetworkState, PolicyFnState, ProcfsState, ResourceState, TimeRandomState};
+use super::state::{
+    ChrootState, CowState, NetworkState, PolicyFnState, ProcessIndex, ProcfsState, ResourceState,
+    TimeRandomState,
+};
 
 /// Holds all supervisor state and policy. Passed to every handler.
 pub struct SupervisorCtx {
@@ -23,6 +26,11 @@ pub struct SupervisorCtx {
     pub chroot: Arc<Mutex<ChrootState>>,
     /// NETLINK_ROUTE virtualization state.
     pub netlink: Arc<crate::netlink::NetlinkState>,
+    /// Per-process registry: pid → PidKey. Source of truth for
+    /// "which processes are in the sandbox" and the anchor for
+    /// unified per-process state cleanup. Wraps an internal RwLock,
+    /// so handlers can query it synchronously without `.await`.
+    pub processes: Arc<ProcessIndex>,
     /// Immutable policy — no lock needed.
     pub policy: Arc<NotifPolicy>,
     /// pidfd for the child process (immutable after spawn).
diff --git a/crates/sandlock-core/src/seccomp/dispatch.rs b/crates/sandlock-core/src/seccomp/dispatch.rs
index 8e1479f..34e1178 100644
--- a/crates/sandlock-core/src/seccomp/dispatch.rs
+++ b/crates/sandlock-core/src/seccomp/dispatch.rs
@@ -103,12 +103,11 @@ pub fn build_dispatch_table(
     for nr in fork_nrs {
         let policy = Arc::clone(policy);
         let resource = Arc::clone(resource);
-        table.register(nr, Box::new(move |notif, ctx, _notif_fd| {
+        table.register(nr, Box::new(move |notif, _ctx, _notif_fd| {
             let policy = Arc::clone(&policy);
             let resource = Arc::clone(&resource);
-            let procfs_inner = Arc::clone(&ctx.procfs);
             Box::pin(async move {
-                crate::resource::handle_fork(&notif, &resource, &procfs_inner, &policy).await
+                crate::resource::handle_fork(&notif, &resource, &policy).await
             })
         }));
     }
@@ -135,12 +134,10 @@ pub fn build_dispatch_table(
             libc::SYS_mremap, libc::SYS_shmget,
         ] {
             let policy = Arc::clone(policy);
-            let resource = Arc::clone(resource);
-            table.register(nr, Box::new(move |notif, _ctx, _notif_fd| {
+            table.register(nr, Box::new(move |notif, ctx, _notif_fd| {
                 let policy = Arc::clone(&policy);
-                let resource = Arc::clone(&resource);
                 Box::pin(async move {
-                    crate::resource::handle_memory(&notif, &resource, &policy).await
+                    crate::resource::handle_memory(&notif, &ctx, &policy).await
                 })
             }));
         }
@@ -233,10 +230,10 @@ pub fn build_dispatch_table(
         table.register(libc::SYS_openat, Box::new(move |notif, ctx, notif_fd| {
             let policy = Arc::clone(&policy);
             let resource = Arc::clone(&resource);
-            let procfs_inner = Arc::clone(&ctx.procfs);
+            let processes = Arc::clone(&ctx.processes);
             let network = Arc::clone(&ctx.network);
             Box::pin(async move {
-                crate::procfs::handle_proc_open(&notif, &procfs_inner, &resource, &network, &policy, notif_fd).await
+                crate::procfs::handle_proc_open(&notif, &processes, &resource, &network, &policy, notif_fd).await
             })
         }));
     }
@@ -248,9 +245,9 @@ pub fn build_dispatch_table(
         let policy = Arc::clone(policy);
         table.register(nr, Box::new(move |notif, ctx, notif_fd| {
             let policy = Arc::clone(&policy);
-            let procfs_inner = Arc::clone(&ctx.procfs);
+            let processes = Arc::clone(&ctx.processes);
             Box::pin(async move {
-                crate::procfs::handle_getdents(&notif, &procfs_inner, &policy, notif_fd).await
+                crate::procfs::handle_getdents(&notif, &processes, &policy, notif_fd).await
             })
         }));
     }
@@ -317,9 +314,9 @@ pub fn build_dispatch_table(
         }
         for nr in getdents_nrs {
             table.register(nr, Box::new(|notif, ctx, notif_fd| {
-                let procfs_inner = Arc::clone(&ctx.procfs);
+                let processes = Arc::clone(&ctx.processes);
                 Box::pin(async move {
-                    crate::procfs::handle_sorted_getdents(&notif, &procfs_inner, notif_fd).await
+                    crate::procfs::handle_sorted_getdents(&notif, &processes, notif_fd).await
                 })
             }));
         }
@@ -606,6 +603,17 @@ fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc<NotifPolicy>
 // ============================================================
 
 fn register_cow_handlers(table: &mut DispatchTable) {
+    // Helper to grab cow + processes from ctx in one place.
+    macro_rules! cow_call {
+        ($handler:expr) => {
+            Box::new(|notif, ctx, notif_fd| {
+                let cow = Arc::clone(&ctx.cow);
+                let processes = Arc::clone(&ctx.processes);
+                Box::pin(async move { $handler(&notif, &cow, &processes, notif_fd).await })
+            })
+        };
+    }
+
     // Write syscalls (*at variants + legacy)
     let mut write_nrs = vec![
         libc::SYS_unlinkat, libc::SYS_mkdirat, libc::SYS_renameat2,
@@ -618,108 +626,43 @@ fn register_cow_handlers(table: &mut DispatchTable) {
         arch::SYS_LCHOWN,
     ].into_iter().flatten());
     for nr in write_nrs {
-        table.register(nr, Box::new(|notif, ctx, notif_fd| {
-            let cow = Arc::clone(&ctx.cow);
-            Box::pin(async move {
-                crate::cow::dispatch::handle_cow_write(&notif, &cow, notif_fd).await
-            })
-        }));
+        table.register(nr, cow_call!(crate::cow::dispatch::handle_cow_write));
     }
 
-    // utimensat — unconditional return
-    table.register(libc::SYS_utimensat, Box::new(|notif, ctx, notif_fd| {
-        let cow = Arc::clone(&ctx.cow);
-        Box::pin(async move {
-            crate::cow::dispatch::handle_cow_utimensat(&notif, &cow, notif_fd).await
-        })
-    }));
+    table.register(libc::SYS_utimensat, cow_call!(crate::cow::dispatch::handle_cow_utimensat));
 
-    // faccessat/access — fallthrough
-    let mut access_nrs = vec![
-        libc::SYS_faccessat,
-        crate::cow::dispatch::SYS_FACCESSAT2,
-    ];
+    let mut access_nrs = vec![libc::SYS_faccessat, crate::cow::dispatch::SYS_FACCESSAT2];
     access_nrs.extend(arch::SYS_ACCESS);
     for nr in access_nrs {
-        table.register(nr, Box::new(|notif, ctx, notif_fd| {
-            let cow = Arc::clone(&ctx.cow);
-            Box::pin(async move {
-                crate::cow::dispatch::handle_cow_access(&notif, &cow, notif_fd).await
-            })
-        }));
+        table.register(nr, cow_call!(crate::cow::dispatch::handle_cow_access));
     }
 
-    // openat/open — fallthrough
     let mut open_nrs = vec![libc::SYS_openat];
     open_nrs.extend(arch::SYS_OPEN);
     for nr in open_nrs {
-        table.register(nr, Box::new(|notif, ctx, notif_fd| {
-            let cow = Arc::clone(&ctx.cow);
-            Box::pin(async move {
-                crate::cow::dispatch::handle_cow_open(&notif, &cow, notif_fd).await
-            })
-        }));
+        table.register(nr, cow_call!(crate::cow::dispatch::handle_cow_open));
     }
 
-    // stat family — fallthrough
-    let mut stat_nrs = vec![
-        libc::SYS_newfstatat, libc::SYS_faccessat,
-    ];
+    let mut stat_nrs = vec![libc::SYS_newfstatat, libc::SYS_faccessat];
     stat_nrs.extend([arch::SYS_STAT, arch::SYS_LSTAT, arch::SYS_ACCESS].into_iter().flatten());
     for nr in stat_nrs {
-        table.register(nr, Box::new(|notif, ctx, notif_fd| {
-            let cow = Arc::clone(&ctx.cow);
-            Box::pin(async move {
-                crate::cow::dispatch::handle_cow_stat(&notif, &cow, notif_fd).await
-            })
-        }));
+        table.register(nr, cow_call!(crate::cow::dispatch::handle_cow_stat));
     }
 
-    // statx — fallthrough
-    table.register(libc::SYS_statx, Box::new(|notif, ctx, notif_fd| {
-        let cow = Arc::clone(&ctx.cow);
-        Box::pin(async move {
-            crate::cow::dispatch::handle_cow_statx(&notif, &cow, notif_fd).await
-        })
-    }));
+    table.register(libc::SYS_statx, cow_call!(crate::cow::dispatch::handle_cow_statx));
 
-    // readlink — fallthrough
     let mut readlink_nrs = vec![libc::SYS_readlinkat];
     readlink_nrs.extend(arch::SYS_READLINK);
     for nr in readlink_nrs {
-        table.register(nr, Box::new(|notif, ctx, notif_fd| {
-            let cow = Arc::clone(&ctx.cow);
-            Box::pin(async move {
-                crate::cow::dispatch::handle_cow_readlink(&notif, &cow, notif_fd).await
-            })
-        }));
+        table.register(nr, cow_call!(crate::cow::dispatch::handle_cow_readlink));
     }
 
-    // getdents — fallthrough
     let mut getdents_nrs = vec![libc::SYS_getdents64];
     getdents_nrs.extend(arch::SYS_GETDENTS);
     for nr in getdents_nrs {
-        table.register(nr, Box::new(|notif, ctx, notif_fd| {
-            let cow = Arc::clone(&ctx.cow);
-            Box::pin(async move {
-                crate::cow::dispatch::handle_cow_getdents(&notif, &cow, notif_fd).await
-            })
-        }));
+        table.register(nr, cow_call!(crate::cow::dispatch::handle_cow_getdents));
     }
 
-    // chdir — redirect to upper dir if target was created by COW
-    table.register(libc::SYS_chdir, Box::new(|notif, ctx, notif_fd| {
-        let cow = Arc::clone(&ctx.cow);
-        Box::pin(async move {
-            crate::cow::dispatch::handle_cow_chdir(&notif, &cow, notif_fd).await
-        })
-    }));
-
-    // getcwd — return logical workdir path after chdir into a COW-only dir
-    table.register(libc::SYS_getcwd, Box::new(|notif, ctx, notif_fd| {
-        let cow = Arc::clone(&ctx.cow);
-        Box::pin(async move {
-            crate::cow::dispatch::handle_cow_getcwd(&notif, &cow, notif_fd).await
-        })
-    }));
+    table.register(libc::SYS_chdir, cow_call!(crate::cow::dispatch::handle_cow_chdir));
+    table.register(libc::SYS_getcwd, cow_call!(crate::cow::dispatch::handle_cow_getcwd));
 }
diff --git a/crates/sandlock-core/src/seccomp/notif.rs b/crates/sandlock-core/src/seccomp/notif.rs
index 03a1981..71c4e34 100644
--- a/crates/sandlock-core/src/seccomp/notif.rs
+++ b/crates/sandlock-core/src/seccomp/notif.rs
@@ -862,6 +862,12 @@ async fn handle_notification(
 ) {
     let policy = &ctx.policy;
 
+    // Ensure every pid that produces a notification is tracked in the
+    // ProcessIndex with an exit watcher. The fork handler runs on the
+    // *parent* pid (the child doesn't exist yet at clone-time), so the
+    // child gets registered the first time it issues its own syscall.
+    crate::resource::register_child_if_new(ctx, notif.pid as i32).await;
+
     // Re-patch vDSO if needed (exec replaces it with a fresh copy).
     if policy.has_time_start || policy.has_random_seed {
         let mut pfs = ctx.procfs.lock().await;
@@ -950,9 +956,79 @@ pub async fn supervisor(
         }
     });
 
+    // Periodic sweep as a defensive backstop in case pidfd-based
+    // lifecycle cleanup misses an entry (e.g. pidfd_open failed for a
+    // child on an old kernel, or its watcher panicked). At 5 minutes
+    // this is cheap enough to leave on; the primary cleanup path is
+    // still per-child pidfd readiness in `spawn_pid_watcher`.
+    let gc = tokio::spawn(process_index_gc(Arc::clone(&ctx.processes)));
+
     while let Some(notif) = rx.recv().await {
         handle_notification(notif, &ctx, &dispatch_table, fd).await;
     }
+
+    gc.abort();
+}
+
+/// Periodic sweep that drops `ProcessIndex` entries for exited PIDs.
+/// Per-process state hangs off these entries via `Arc`, so dropping
+/// them releases everything in one step.
+async fn process_index_gc(processes: Arc<super::state::ProcessIndex>) {
+    let interval = std::time::Duration::from_secs(300);
+    loop {
+        tokio::time::sleep(interval).await;
+        if processes.len() == 0 {
+            continue;
+        }
+        processes.prune_dead();
+    }
+}
+
+/// Spawn a per-child task that awaits the pidfd becoming readable
+/// (process exit) and then runs unified cleanup across every
+/// per-process supervisor map.
+///
+/// The watcher *owns* the pidfd via `AsyncFd<OwnedFd>` — the kernel
+/// fd stays alive for as long as tokio's IO driver has it registered,
+/// and is closed exactly once when the watcher task ends. This avoids
+/// a TOCTOU where dropping the fd from a separate map could let a
+/// recycled fd be deregistered from epoll.
+pub(crate) fn spawn_pid_watcher(
+    ctx: Arc<super::ctx::SupervisorCtx>,
+    key: super::state::PidKey,
+    pidfd: std::os::unix::io::OwnedFd,
+) {
+    tokio::spawn(async move {
+        let async_fd = match tokio::io::unix::AsyncFd::with_interest(
+            pidfd,
+            tokio::io::Interest::READABLE,
+        ) {
+            Ok(f) => f,
+            Err(_) => {
+                // AsyncFd registration failed (extremely unusual);
+                // fall back to immediate cleanup so we don't leak the
+                // index entry. The OwnedFd we passed in is consumed
+                // by `with_interest`'s Err return and will close on
+                // drop here.
+                cleanup_pid(&ctx, key).await;
+                return;
+            }
+        };
+        // pidfd becomes readable when the process exits; we don't
+        // read any data, so `readable()` is just an await point.
+        let _ = async_fd.readable().await;
+        cleanup_pid(&ctx, key).await;
+        // async_fd drops here, closing the pidfd.
+    });
+}
+
+/// Drop the supervisor's per-process state for `key`. With every
+/// per-process map living inside `PerProcessState` (owned by
+/// `ProcessIndex`), this is a single unregister — the entry's `Arc`
+/// drops here, and remaining clones held by in-flight handlers will
+/// drop with their tasks, freeing `PerProcessState` automatically.
+pub(crate) async fn cleanup_pid(ctx: &super::ctx::SupervisorCtx, key: super::state::PidKey) {
+    ctx.processes.unregister(key);
 }
 
 // ============================================================
@@ -997,7 +1073,6 @@ mod tests {
         assert_eq!(rs.mem_used, 0);
         assert_eq!(rs.max_memory_bytes, 1024 * 1024);
         assert_eq!(rs.max_processes, 10);
-        assert!(rs.brk_bases.is_empty());
         assert!(!rs.hold_forks);
         assert!(rs.held_notif_ids.is_empty());
     }
diff --git a/crates/sandlock-core/src/seccomp/state.rs b/crates/sandlock-core/src/seccomp/state.rs
index b9058e2..d81f207 100644
--- a/crates/sandlock-core/src/seccomp/state.rs
+++ b/crates/sandlock-core/src/seccomp/state.rs
@@ -1,7 +1,11 @@
 // Domain-specific state structs — each domain is locked independently so
-// handlers only contend on the state they actually need.
+// handlers only contend on the state they actually need. Per-process
+// state is bundled into a single `PerProcessState` owned by
+// `ProcessIndex`; cleanup on exit is just dropping the entry's `Arc`.
 
 use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+use tokio::sync::Mutex as AsyncMutex;
 
 /// Resource-limit runtime state shared across notification handlers.
 pub struct ResourceState {
@@ -13,8 +17,6 @@ pub struct ResourceState {
     pub mem_used: u64,
     /// Maximum allowed anonymous memory (bytes).
     pub max_memory_bytes: u64,
-    /// Per-PID brk base addresses for memory tracking.
-    pub brk_bases: HashMap<i32, u64>,
     /// Whether fork notifications should be held (checkpoint/freeze).
     pub hold_forks: bool,
     /// Notification IDs held during a checkpoint freeze.
@@ -33,7 +35,6 @@ impl ResourceState {
             max_processes,
             mem_used: 0,
             max_memory_bytes,
-            brk_bases: HashMap::new(),
             hold_forks: false,
             held_notif_ids: Vec::new(),
             load_avg: crate::procfs::LoadAvg::new(),
@@ -46,13 +47,11 @@ impl ResourceState {
 // ProcfsState — /proc virtualization state
 // ============================================================
 
-/// /proc virtualization runtime state.
+/// /proc virtualization runtime state. Sandbox membership lives in
+/// `ProcessIndex`; per-process getdents caches live in
+/// `PerProcessState::procfs_dir_cache`. This struct only holds
+/// truly global virtualization state.
 pub struct ProcfsState {
-    /// PIDs belonging to the sandbox (for /proc PID filtering).
-    pub proc_pids: HashSet<i32>,
-    /// Cache of filtered dirent entries keyed by (pid, fd, directory target).
-    /// Populated on first getdents64 call for a /proc directory, drained on subsequent calls.
-    pub getdents_cache: HashMap<(i32, u32, String), Vec<Vec<u8>>>,
     /// Base address of the last vDSO we patched (0 = not yet patched).
     pub vdso_patched_addr: u64,
 }
@@ -60,18 +59,18 @@ pub struct ProcfsState {
 impl ProcfsState {
     pub fn new() -> Self {
         Self {
-            proc_pids: HashSet::new(),
-            getdents_cache: HashMap::new(),
             vdso_patched_addr: 0,
         }
     }
 }
 
 // ============================================================
-// CowState — copy-on-write filesystem state
+// PidKey — stable per-process identity
 // ============================================================
 
-/// Stable process identity for per-process COW state.
+/// Stable process identity. Numeric pid plus the start_time that
+/// distinguishes a specific process instance from any future recycle
+/// of the same pid slot.
 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
 pub struct PidKey {
     /// Numeric PID observed by seccomp notification.
@@ -80,57 +79,209 @@ pub struct PidKey {
     pub start_time: u64,
 }
 
-/// Copy-on-write filesystem state.
-pub struct CowState {
-    /// Seccomp-based COW branch (None if COW disabled).
-    pub branch: Option<crate::cow::seccomp::SeccompCowBranch>,
-    /// Getdents cache for COW directories.
-    /// Value is (host_path, entries) to detect fd reuse and invalidate stale entries.
-    pub dir_cache: HashMap<(PidKey, u32), (String, Vec<Vec<u8>>)>,
-    /// Logical cwd for processes that chdir into COW-only directories.
-    pub virtual_cwds: HashMap<PidKey, String>,
+/// Read the process start time (field 22 of /proc/<pid>/stat) for `pid`.
+/// Returns None if the process is gone or /proc is not readable.
+pub(crate) fn read_pid_start_time(pid: i32) -> Option<u64> {
+    let stat = std::fs::read_to_string(format!("/proc/{}/stat", pid)).ok()?;
+    // Skip past "pid (comm)" — comm may contain spaces and parens, but the
+    // last ") " in the line ends the comm field.
+    let rest = stat.rsplit_once(") ")?.1;
+    // The first token after "(comm) " is field 3; field 22 is therefore nth(19).
+    rest.split_whitespace().nth(19)?.parse().ok()
 }
 
-impl CowState {
+// ============================================================
+// PerProcessState — bundled per-process supervisor state
+// ============================================================
+
+/// All per-process supervisor state for one tracked child. One
+/// instance lives per `PidKey`, owned by `ProcessIndex` behind an
+/// `Arc<AsyncMutex<…>>`. Cleanup on process exit is one operation:
+/// `ProcessIndex::unregister` drops the index's `Arc`, and the
+/// supervisor's per-handler clones drop along with their tasks.
+#[derive(Default)]
+pub struct PerProcessState {
+    /// Logical cwd while the process is chdir'd into a COW-only
+    /// directory. None means "use kernel-reported cwd".
+    pub virtual_cwd: Option<String>,
+    /// Recorded brk base for memory accounting. None until first brk.
+    pub brk_base: Option<u64>,
+    /// COW directory dirent cache. Keyed by child's fd; value is
+    /// (host target path, sorted dirent bytes left to return).
+    /// Entries are invalidated when the fd is reused for a different
+    /// directory.
+    pub cow_dir_cache: HashMap<u32, (String, Vec<Vec<u8>>)>,
+    /// /proc directory dirent cache. Keyed by (child fd, target
+    /// path); same drain-on-EOF semantics as cow_dir_cache.
+    pub procfs_dir_cache: HashMap<(u32, String), Vec<Vec<u8>>>,
+}
+
+// ============================================================
+// ProcessIndex — sandbox membership + per-process state
+// ============================================================
+
+/// Source-of-truth registry for processes inside the sandbox.
+///
+/// Maps the kernel's numeric `pid` (the value that arrives in seccomp
+/// notifications) to the canonical `PidKey` plus an
+/// `Arc<AsyncMutex<PerProcessState>>` holding everything per-process.
+/// Held behind an internal `std::sync::RwLock` so the read-mostly hot
+/// paths (`key_for`, `contains`, `entry_for`, `/proc` virtualization)
+/// avoid an async mutex on every notification, and so `ProcessIndex`
+/// doesn't need its own outer wrapper in `SupervisorCtx`. Lock guards
+/// are `!Send` and the compiler will reject holding one across an
+/// `.await`, which keeps callers honest.
+///
+/// Ownership of each child's pidfd lives with the per-child watcher
+/// task, not with this index. That keeps the kernel fd alive for as
+/// long as the `AsyncFd` registration in the tokio IO driver does,
+/// and avoids a race where dropping the fd from the index could
+/// deregister a recycled fd from epoll.
+pub struct ProcessIndex {
+    inner: std::sync::RwLock<HashMap<i32, ProcessEntry>>,
+}
+
+#[derive(Clone)]
+struct ProcessEntry {
+    key: PidKey,
+    state: Arc<AsyncMutex<PerProcessState>>,
+}
+
+impl ProcessIndex {
     pub fn new() -> Self {
         Self {
-            branch: None,
-            dir_cache: HashMap::new(),
-            virtual_cwds: HashMap::new(),
+            inner: std::sync::RwLock::new(HashMap::new()),
+        }
+    }
+
+    /// Register a process by reading its start_time once and
+    /// allocating its `PerProcessState`. Returns the canonical key,
+    /// or None if the process is already gone. The caller is
+    /// responsible for keeping the pidfd alive — the per-child
+    /// watcher task does this via `AsyncFd<OwnedFd>`.
+    pub fn register(&self, pid: i32) -> Option<PidKey> {
+        let start_time = read_pid_start_time(pid)?;
+        let key = PidKey { pid, start_time };
+        let entry = ProcessEntry {
+            key,
+            state: Arc::new(AsyncMutex::new(PerProcessState::default())),
+        };
+        self.inner.write().ok()?.insert(pid, entry);
+        Some(key)
+    }
+
+    /// Look up the canonical PidKey for a notification's raw pid.
+    /// Returns None if this pid was never registered (e.g. pidfd_open
+    /// failed at fork) — callers should fall back to a no-op.
+    pub fn key_for(&self, pid: i32) -> Option<PidKey> {
+        self.inner.read().ok()?.get(&pid).map(|e| e.key)
+    }
+
+    /// Look up both the PidKey and the per-process state handle for
+    /// `pid`. Returns None if the pid isn't tracked. The caller locks
+    /// the returned `Arc<AsyncMutex<…>>` to read or mutate.
+    pub fn entry_for(&self, pid: i32) -> Option<(PidKey, Arc<AsyncMutex<PerProcessState>>)> {
+        self.inner
+            .read()
+            .ok()?
+            .get(&pid)
+            .map(|e| (e.key, Arc::clone(&e.state)))
+    }
+
+    /// Cheap membership test — used by /proc virtualization to gate
+    /// access to `/proc/<pid>/...` paths and by getdents filtering.
+    pub fn contains(&self, pid: i32) -> bool {
+        self.inner
+            .read()
+            .map(|g| g.contains_key(&pid))
+            .unwrap_or(false)
+    }
+
+    /// Number of tracked processes (for /proc/loadavg total).
+    pub fn len(&self) -> usize {
+        self.inner.read().map(|g| g.len()).unwrap_or(0)
+    }
+
+    /// Largest tracked pid (for /proc/loadavg last_pid).
+    pub fn max_pid(&self) -> Option<i32> {
+        self.inner.read().ok()?.keys().copied().max()
+    }
+
+    /// Snapshot the set of tracked pids. Used by getdents filtering
+    /// where the caller needs O(1) lookups inside a loop and would
+    /// otherwise have to re-acquire the read lock per entry.
+    pub fn pids_snapshot(&self) -> HashSet<i32> {
+        self.inner
+            .read()
+            .map(|g| g.keys().copied().collect())
+            .unwrap_or_default()
+    }
+
+    /// Remove a process from the index. The per-process state's
+    /// `Arc` reference held by the index drops here; remaining clones
+    /// (e.g. a handler that's mid-execution for that pid) will drop
+    /// when they go out of scope, and the inner `PerProcessState`
+    /// frees automatically.
+    pub fn unregister(&self, key: PidKey) {
+        if let Ok(mut g) = self.inner.write() {
+            // Only clear if the entry still points at this key. A PID
+            // recycled with a fresh start_time may already have
+            // overwritten the entry via register(); we must not stomp it.
+            if g.get(&key.pid).map(|e| e.key) == Some(key) {
+                g.remove(&key.pid);
+            }
         }
     }
 
-    /// Drop COW per-process entries for an older process that used the same numeric PID.
-    pub(crate) fn prune_reused_pid(&mut self, current: PidKey) {
-        self.virtual_cwds
-            .retain(|key, _| key.pid != current.pid || *key == current);
-        self.dir_cache
-            .retain(|(key, _), _| key.pid != current.pid || *key == current);
+    /// Defensive sweep: drop entries whose process is gone (or whose
+    /// start_time has changed). Called from a low-frequency backstop
+    /// task in case a pidfd watcher failed to spawn or the kernel
+    /// didn't deliver the readability event.
+    pub fn prune_dead(&self) {
+        let candidates: Vec<(i32, PidKey)> = match self.inner.read() {
+            Ok(g) => g.iter().map(|(p, e)| (*p, e.key)).collect(),
+            Err(_) => return,
+        };
+        let mut dead = Vec::new();
+        for (pid, key) in candidates {
+            match read_pid_start_time(pid) {
+                Some(st) if st == key.start_time => continue,
+                _ => dead.push(key),
+            }
+        }
+        if dead.is_empty() {
+            return;
+        }
+        if let Ok(mut g) = self.inner.write() {
+            for key in dead {
+                if g.get(&key.pid).map(|e| e.key) == Some(key) {
+                    g.remove(&key.pid);
+                }
+            }
+        }
     }
 }
 
-#[cfg(test)]
-mod tests {
-    use super::*;
+impl Default for ProcessIndex {
+    fn default() -> Self {
+        Self::new()
+    }
+}
 
-    #[test]
-    fn cow_state_prunes_entries_for_reused_pid() {
-        let old = PidKey { pid: 42, start_time: 1 };
-        let current = PidKey { pid: 42, start_time: 2 };
-        let other = PidKey { pid: 43, start_time: 1 };
-        let mut state = CowState::new();
-
-        state.virtual_cwds.insert(old, "/old".to_string());
-        state.virtual_cwds.insert(other, "/other".to_string());
-        state.dir_cache.insert((old, 7), ("/old".to_string(), Vec::new()));
-        state.dir_cache.insert((other, 7), ("/other".to_string(), Vec::new()));
-
-        state.prune_reused_pid(current);
-
-        assert!(!state.virtual_cwds.contains_key(&old));
-        assert!(!state.dir_cache.contains_key(&(old, 7)));
-        assert_eq!(state.virtual_cwds.get(&other), Some(&"/other".to_string()));
-        assert!(state.dir_cache.contains_key(&(other, 7)));
+// ============================================================
+// CowState — copy-on-write filesystem state (global only)
+// ============================================================
+
+/// Global COW state. Per-process COW state (virtual cwd, dir cache)
+/// lives in `PerProcessState`.
+pub struct CowState {
+    /// Seccomp-based COW branch (None if COW disabled).
+    pub branch: Option<crate::cow::seccomp::SeccompCowBranch>,
+}
+
+impl CowState {
+    pub fn new() -> Self {
+        Self { branch: None }
     }
 }
 
@@ -176,13 +327,11 @@ impl NetworkState {
         pid: u32,
         live_policy: Option<&std::sync::Arc<std::sync::RwLock<crate::policy_fn::LivePolicy>>>,
     ) -> crate::seccomp::notif::NetworkPolicy {
-        // Per-PID override takes priority
         if let Ok(overrides) = self.pid_ip_overrides.read() {
             if let Some(ips) = overrides.get(&pid) {
                 return crate::seccomp::notif::NetworkPolicy::AllowList(ips.clone());
             }
         }
-        // Live policy (dynamic updates from policy_fn)
         if let Some(lp) = live_policy {
             if let Ok(live) = lp.read() {
                 if !live.allowed_ips.is_empty() {
@@ -190,7 +339,6 @@ impl NetworkState {
                 }
             }
         }
-        // Global policy
         self.network_policy.clone()
     }
 }
@@ -263,3 +411,115 @@ impl ChrootState {
         Self { chroot_exe: None }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn process_index_register_lookup_unregister() {
+        let self_pid = unsafe { libc::getpid() };
+        let idx = ProcessIndex::new();
+        let key = idx
+            .register(self_pid)
+            .expect("register should succeed for live pid");
+        assert_eq!(key.pid, self_pid);
+
+        assert_eq!(idx.key_for(self_pid), Some(key));
+        assert!(idx.contains(self_pid));
+        assert_eq!(idx.key_for(self_pid + 999_999), None);
+        assert!(!idx.contains(self_pid + 999_999));
+        assert_eq!(idx.len(), 1);
+        assert_eq!(idx.max_pid(), Some(self_pid));
+
+        idx.unregister(key);
+        assert_eq!(idx.key_for(self_pid), None);
+        assert!(!idx.contains(self_pid));
+        assert_eq!(idx.len(), 0);
+        assert_eq!(idx.max_pid(), None);
+    }
+
+    #[test]
+    fn process_index_register_overwrites_stale_entry_for_recycled_pid() {
+        let self_pid = unsafe { libc::getpid() };
+        let idx = ProcessIndex::new();
+        // Forge a stale entry by direct insertion under the lock.
+        {
+            let stale_key = PidKey { pid: self_pid, start_time: 0 };
+            let stale = ProcessEntry {
+                key: stale_key,
+                state: Arc::new(AsyncMutex::new(PerProcessState::default())),
+            };
+            idx.inner.write().unwrap().insert(self_pid, stale);
+        }
+
+        let new_key = idx.register(self_pid).unwrap();
+        assert_ne!(new_key.start_time, 0);
+        assert_eq!(idx.key_for(self_pid), Some(new_key));
+
+        // Unregistering by the stale key must NOT clobber the fresh
+        // registration; only an exact-match unregister wins.
+        let stale_key = PidKey { pid: self_pid, start_time: 0 };
+        idx.unregister(stale_key);
+        assert_eq!(idx.key_for(self_pid), Some(new_key));
+    }
+
+    #[tokio::test]
+    async fn process_index_entry_for_returns_shared_handle() {
+        let self_pid = unsafe { libc::getpid() };
+        let idx = ProcessIndex::new();
+        let key = idx.register(self_pid).unwrap();
+
+        let (k1, s1) = idx.entry_for(self_pid).unwrap();
+        let (k2, s2) = idx.entry_for(self_pid).unwrap();
+        assert_eq!(k1, key);
+        assert_eq!(k2, key);
+
+        // Two clones of the same Arc — writes through one are visible
+        // through the other.
+        s1.lock().await.brk_base = Some(0xdead_beef);
+        assert_eq!(s2.lock().await.brk_base, Some(0xdead_beef));
+
+        // After unregister, entry_for returns None but existing Arc
+        // clones stay valid (kept alive by callers).
+        idx.unregister(key);
+        assert!(idx.entry_for(self_pid).is_none());
+        assert_eq!(s1.lock().await.brk_base, Some(0xdead_beef));
+    }
+
+    #[test]
+    fn process_index_pids_snapshot_is_independent() {
+        let self_pid = unsafe { libc::getpid() };
+        let idx = ProcessIndex::new();
+        let key = idx.register(self_pid).unwrap();
+        let snap = idx.pids_snapshot();
+        idx.unregister(key);
+        assert!(snap.contains(&self_pid));
+        assert!(!idx.contains(self_pid));
+    }
+
+    #[test]
+    fn process_index_prune_dead_drops_recycled_entries() {
+        let self_pid = unsafe { libc::getpid() };
+        let idx = ProcessIndex::new();
+        // Insert a stale entry for self with a wrong start_time.
+        let stale_key = PidKey { pid: self_pid, start_time: 0 };
+        let stale = ProcessEntry {
+            key: stale_key,
+            state: Arc::new(AsyncMutex::new(PerProcessState::default())),
+        };
+        idx.inner.write().unwrap().insert(self_pid, stale);
+
+        idx.prune_dead();
+        assert!(!idx.contains(self_pid));
+    }
+
+    #[test]
+    fn process_index_prune_dead_keeps_live_entries() {
+        let self_pid = unsafe { libc::getpid() };
+        let idx = ProcessIndex::new();
+        let key = idx.register(self_pid).unwrap();
+        idx.prune_dead();
+        assert_eq!(idx.key_for(self_pid), Some(key));
+    }
+}