From 6ecd5106e33edb39680ac5c93637e7ac3105fbaa Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Thu, 23 Apr 2026 22:38:59 -0400
Subject: [PATCH 01/32] Fix TUI bugs and UX issues from real hardware testing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses 27 user-reported issues from live testing on an RTX 3080
system booting from USB. All changes deployed and verified on hardware.

Crash handling:
- Override App._handle_exception() to capture Textual runtime crashes
- Write crash dumps to persistent disk (/var/lib/neuraldrive/logs/)
- Screenshots routed to persistent disk via TEXTUAL_SCREENSHOT_LOCATION
- Outer try/except in __main__ catches startup crashes

Chat screen:
- Fix TypeError from RichLog.write(end='') — removed invalid param
- Move streaming response to @work(exclusive=True) to unblock UI
- Add on_screen_resume to refresh model list on every screen visit
- Add model selector (Select widget) on dedicated row with amber border

Models screen:
- Rewrite catalog with two-zone keyboard navigation (list + buttons)
- Arrow keys navigate, Enter/Space toggle, PgUp/PgDn page jump
- Add download cancel button with worker cancellation
- Handle asyncio.CancelledError in _start_pull
- Add model load/unload via Ollama generate API (keep_alive)
- Show both Load and Unload buttons per model (disable irrelevant one)
- Fix ModelItem._size/_name collision with Textual Widget internals

Services screen:
- Fix DuplicateIds crash: await remove_children() before mounting
- Use sudo systemctl for service start/stop/restart
- Arrow-key service selection with yellow highlight
- Use Binding() objects for show/priority params (not 4-element tuples)

Dashboard:
- Expand GPU StatsBox to show Device, VRAM, Temp, Utilization
- Rename 'Loaded Models' to 'Active Models (VRAM)'

Wizard:
- Rewrite _create_persistence_partition(): fix parted start position,
  detect actual free space, immediate mount, correct Ollama dirs,
  proper ownership, restart Ollama after partition creation
- Add YAML config persistence (persistent disk with overlay fallback)

Navigation:
- Replace single-letter hotkeys with F2-F6 function keys (priority=True)
- Remove old silent hotkeys entirely
- Disable command palette via ENABLE_COMMAND_PALETTE=False
  (COMMAND_PALETTE_BINDING=None crashes Textual 8.2.4)

Security:
- Add scoped NOPASSWD sudoers (/etc/sudoers.d/neuraldrive-tui) that
  survives wizard _finalize() stripping NOPASSWD from neuraldrive-admin
- Covers systemctl, parted, mkfs, mount, chpasswd, and file ops

New files:
- utils/config.py: YAML config read/write with persistent/overlay fallback
- utils/hardware.py: Boot device detection, partition enumeration
- etc/sudoers.d/neuraldrive-tui: Scoped NOPASSWD rules for TUI ops
- dev-reset.sh: Development reset script (password, NOPASSWD, sentinel)

Build:
- Add pyyaml to TUI venv dependencies
- Set neuraldrive-tui sudoers permissions in build hook
---
 config/hooks/live/01-setup-system.chroot      |   6 +
 .../hooks/live/04-install-python-apps.chroot  |   2 +-
 .../etc/sudoers.d/neuraldrive-tui             |  30 ++
 .../usr/lib/neuraldrive/dev-reset.sh          |  38 ++
 .../usr/lib/neuraldrive/tui/main.py           |  93 +++-
 .../usr/lib/neuraldrive/tui/screens/chat.py   |  75 ++-
 .../lib/neuraldrive/tui/screens/dashboard.py  |  20 +-
 .../usr/lib/neuraldrive/tui/screens/models.py | 370 ++++++++++++-
 .../lib/neuraldrive/tui/screens/services.py   | 117 +++--
 .../usr/lib/neuraldrive/tui/screens/wizard.py | 490 +++++++++++++++---
 .../usr/lib/neuraldrive/tui/styles.tcss       | 182 ++++++-
 .../lib/neuraldrive/tui/utils/api_client.py   |  24 +
 .../usr/lib/neuraldrive/tui/utils/config.py   |  84 +++
 .../usr/lib/neuraldrive/tui/utils/hardware.py |  92 ++++
 .../lib/neuraldrive/tui/widgets/model_item.py |  27 +-
 15 files changed, 1476 insertions(+), 174 deletions(-)
 create mode 100644 config/includes.chroot/etc/sudoers.d/neuraldrive-tui
 create mode 100755 config/includes.chroot/usr/lib/neuraldrive/dev-reset.sh
 create mode 100644 config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py

diff --git a/config/hooks/live/01-setup-system.chroot b/config/hooks/live/01-setup-system.chroot
index 8f1a8aa..b278e7a 100755
--- a/config/hooks/live/01-setup-system.chroot
+++ b/config/hooks/live/01-setup-system.chroot
@@ -15,6 +15,12 @@ echo "neuraldrive-admin:neuraldrive" | chpasswd
 echo "neuraldrive-admin ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/neuraldrive-admin
 chmod 440 /etc/sudoers.d/neuraldrive-admin
 
+# Scoped NOPASSWD rules for TUI — survives wizard _finalize() which only
+# modifies neuraldrive-admin.  File is baked in via includes.chroot; just
+# ensure correct ownership and permissions here.
+chmod 440 /etc/sudoers.d/neuraldrive-tui
+chown root:root /etc/sudoers.d/neuraldrive-tui
+
 mkdir -p /etc/neuraldrive/tls \
          /var/lib/neuraldrive/models/{manifests,blobs} \
          /var/lib/neuraldrive/ollama \
diff --git a/config/hooks/live/04-install-python-apps.chroot b/config/hooks/live/04-install-python-apps.chroot
index 6efc76f..80605e4 100755
--- a/config/hooks/live/04-install-python-apps.chroot
+++ b/config/hooks/live/04-install-python-apps.chroot
@@ -118,7 +118,7 @@ git clone --depth 1 https://github.com/psalias2006/gpu-hot.git /usr/lib/neuraldr
 # --- TUI (terminal interface) ---
 python3 -m venv /usr/lib/neuraldrive/tui/venv
 /usr/lib/neuraldrive/tui/venv/bin/pip install --no-cache-dir --upgrade pip
-/usr/lib/neuraldrive/tui/venv/bin/pip install --no-cache-dir textual psutil httpx rich
+/usr/lib/neuraldrive/tui/venv/bin/pip install --no-cache-dir textual psutil httpx rich pyyaml
 
 cat > /usr/local/bin/neuraldrive-tui << 'LAUNCHER'
 #!/bin/sh
diff --git a/config/includes.chroot/etc/sudoers.d/neuraldrive-tui b/config/includes.chroot/etc/sudoers.d/neuraldrive-tui
new file mode 100644
index 0000000..cc2fbf7
--- /dev/null
+++ b/config/includes.chroot/etc/sudoers.d/neuraldrive-tui
@@ -0,0 +1,30 @@
+# Scoped NOPASSWD rules for NeuralDrive TUI operations.
+# This file is NOT modified by the first-boot wizard's _finalize()
+# (which only touches /etc/sudoers.d/neuraldrive-admin).
+# Processed AFTER neuraldrive-admin (alphabetical), so these NOPASSWD
+# rules override the password-required ALL rule for matched commands.
+
+# Service management
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/systemctl start neuraldrive-*
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/systemctl stop neuraldrive-*
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/systemctl restart neuraldrive-*
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/systemctl is-active neuraldrive-*
+
+# Partition creation and storage management
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/sbin/parted *
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/sbin/mkfs.ext4 *
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/mount *
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/umount *
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/sbin/partprobe *
+
+# File operations (wizard config writing, directory setup)
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/tee *
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/mkdir *
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/chmod *
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/chown *
+
+# Password management (wizard security step)
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/sbin/chpasswd
+
+# Sudoers self-read (wizard _finalize reads neuraldrive-admin to strip NOPASSWD)
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/cat /etc/sudoers.d/neuraldrive-admin
diff --git a/config/includes.chroot/usr/lib/neuraldrive/dev-reset.sh b/config/includes.chroot/usr/lib/neuraldrive/dev-reset.sh
new file mode 100755
index 0000000..0b1bd96
--- /dev/null
+++ b/config/includes.chroot/usr/lib/neuraldrive/dev-reset.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# /usr/lib/neuraldrive/dev-reset.sh
+# Development reset script — restores a post-wizard system to a
+# development-friendly state.  Included in builds for convenience.
+#
+# Usage:  sudo /usr/lib/neuraldrive/dev-reset.sh
+
+set -e
+
+echo "=== NeuralDrive Development Reset ==="
+echo ""
+
+# 1. Reset admin password to the build default
+echo "neuraldrive-admin:neuraldrive" | chpasswd
+echo "[ok] Admin password reset to 'neuraldrive'"
+
+# 2. Restore blanket NOPASSWD for development
+echo "neuraldrive-admin ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/neuraldrive-admin
+chmod 440 /etc/sudoers.d/neuraldrive-admin
+echo "[ok] Blanket NOPASSWD sudo restored"
+
+# 3. Remove wizard sentinel so it runs again on next TUI start
+rm -f /etc/neuraldrive/first-boot-complete
+echo "[ok] Wizard sentinel removed"
+
+# 4. Clear config files so wizard starts fresh
+rm -f /var/lib/neuraldrive/config/config.yaml
+rm -f /etc/neuraldrive/config.yaml
+echo "[ok] Config files cleared"
+
+# 5. Clear generated credentials
+rm -f /etc/neuraldrive/api.key
+rm -f /etc/neuraldrive/credentials.conf
+echo "[ok] API key and credentials cleared"
+
+echo ""
+echo "Development reset complete."
+echo "Restart the TUI to re-run the first-boot wizard."
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/main.py b/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
index 4eac789..663595a 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
@@ -10,20 +10,73 @@
 from screens.wizard import FirstBootWizard
 
 import os
+import sys
+import traceback
+from datetime import datetime
+
+from utils import config
+
+PERSIST_DIR = "/var/lib/neuraldrive"
+OVERLAY_LOG_DIR = "/var/log/neuraldrive"
+
+
+def _persistent_available() -> bool:
+    return os.path.ismount(PERSIST_DIR)
+
+
+def _log_dir() -> str:
+    if _persistent_available():
+        p = os.path.join(PERSIST_DIR, "logs")
+        try:
+            os.makedirs(p, exist_ok=True)
+            return p
+        except PermissionError:
+            pass
+    os.makedirs(OVERLAY_LOG_DIR, exist_ok=True)
+    return OVERLAY_LOG_DIR
+
+
+def _screenshot_dir() -> str:
+    if _persistent_available():
+        p = os.path.join(PERSIST_DIR, "screenshots")
+        try:
+            os.makedirs(p, exist_ok=True)
+            return p
+        except PermissionError:
+            pass
+    os.makedirs(OVERLAY_LOG_DIR, exist_ok=True)
+    return OVERLAY_LOG_DIR
+
+
+def _write_crash_dump(error: BaseException) -> str | None:
+    try:
+        crash_dir = _log_dir()
+        ts = datetime.now().strftime("%Y%m%d-%H%M%S")
+        dump_path = os.path.join(crash_dir, f"tui-crash-{ts}.log")
+        with open(dump_path, "w") as f:
+            f.write(f"NeuralDrive TUI crash at {ts}\n")
+            f.write(f"Python: {sys.version}\n")
+            f.write(f"Args: {sys.argv}\n\n")
+            traceback.print_exception(type(error), error, error.__traceback__, file=f)
+        return dump_path
+    except Exception:
+        return None
 
 
 class NeuralDriveTUI(App):
     CSS_PATH = "styles.tcss"
     TITLE = "NeuralDrive"
+    ENABLE_COMMAND_PALETTE = False
 
     BINDINGS = [
-        Binding("m", "switch_screen('models')", "Models"),
-        Binding("s", "switch_screen('services')", "Services"),
-        Binding("n", "switch_screen('network')", "Network"),
-        Binding("l", "switch_screen('logs')", "Logs"),
-        Binding("c", "switch_screen('chat')", "Chat"),
-        Binding("d", "switch_screen('dashboard')", "Dashboard"),
+        Binding("f2", "switch_screen('dashboard')", "F2 Dash", priority=True),
+        Binding("f3", "switch_screen('models')", "F3 Models", priority=True),
+        Binding("f4", "switch_screen('services')", "F4 Svc", priority=True),
+        Binding("f5", "switch_screen('chat')", "F5 Chat", priority=True),
+        Binding("f6", "switch_screen('logs')", "F6 Logs", priority=True),
         Binding("q", "quit", "Quit"),
+        Binding("up", "focus_previous", "Previous", show=False),
+        Binding("down", "focus_next", "Next", show=False),
     ]
 
     SCREENS = {
@@ -37,14 +90,36 @@ class NeuralDriveTUI(App):
 
     def on_mount(self) -> None:
         self.push_screen(DashboardScreen())
-        if not os.path.exists("/etc/neuraldrive/first-boot-complete"):
+        sentinel_exists = os.path.exists("/etc/neuraldrive/first-boot-complete")
+        if not sentinel_exists and not config.wizard_complete():
             self.push_screen(FirstBootWizard())
 
+    def _handle_exception(self, error: Exception) -> None:
+        dump_path = _write_crash_dump(error)
+        if dump_path:
+            self.log(f"Crash dump saved to {dump_path}")
+        super()._handle_exception(error)
+
+    def action_focus_next(self) -> None:
+        self.screen.focus_next()
+
+    def action_focus_previous(self) -> None:
+        self.screen.focus_previous()
+
     def action_switch_screen(self, screen_name: str) -> None:
         if screen_name in self.SCREENS:
             self.switch_screen(screen_name)
 
 
 if __name__ == "__main__":
-    app = NeuralDriveTUI()
-    app.run(mouse=False)
+    screenshot_dir = _screenshot_dir()
+    os.environ["TEXTUAL_SCREENSHOT_LOCATION"] = screenshot_dir
+    try:
+        app = NeuralDriveTUI()
+        app.run(mouse=False)
+    except Exception as exc:
+        dump_path = _write_crash_dump(exc)
+        traceback.print_exc()
+        if dump_path:
+            print(f"\nCrash dump saved to {dump_path}")
+        sys.exit(1)
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
index d5716f4..6f4c88c 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
@@ -2,6 +2,7 @@
 
 import json
 
+from textual import work
 from textual.app import ComposeResult
 from textual.containers import Horizontal
 from textual.screen import Screen
@@ -19,9 +20,9 @@ def __init__(self) -> None:
 
     def compose(self) -> ComposeResult:
         yield Header()
-        with Horizontal():
-            yield Static("  Model: ", classes="label")
-            yield Select([], id="chat-model-select")
+        yield Static("  Model", classes="heading")
+        yield Select([], id="chat-model-select", prompt="Choose a model…")
+        yield Static("", id="chat-notice")
         yield RichLog(highlight=True, markup=False, id="chat-log")
         with Horizontal(id="chat-input-row"):
             yield Input(placeholder="Type a message…", id="chat-input")
@@ -31,23 +32,52 @@ def compose(self) -> ComposeResult:
     def on_mount(self) -> None:
         self.app.call_later(self._load_model_options)
 
+    def on_screen_resume(self) -> None:
+        self.app.call_later(self._load_model_options)
+
     async def _load_model_options(self) -> None:
-        models = await api_client.list_models()
+        notice = self.query_one("#chat-notice", Static)
         select = self.query_one("#chat-model-select", Select)
+        send_btn = self.query_one("#chat-send", Button)
+        chat_input = self.query_one("#chat-input", Input)
+
+        available = await api_client.ollama_available()
+        if not available:
+            notice.update("  Ollama is not running. Start it from the Services screen.")
+            notice.add_class("error")
+            send_btn.disabled = True
+            chat_input.disabled = True
+            return
+
+        models = await api_client.list_models()
         options = [(m.get("name", "?"), m.get("name", "?")) for m in models]
         select.set_options(options)
-        if options:
+
+        if not options:
+            notice.update(
+                "  No models installed. Pull a model from the Models screen (press M)."
+            )
+            notice.add_class("warn")
+            send_btn.disabled = True
+            chat_input.disabled = True
+            return
+
+        notice.update("")
+        notice.remove_class("error", "warn")
+        send_btn.disabled = False
+        chat_input.disabled = False
+        if select.value is Select.BLANK:
             select.value = options[0][1]
 
     async def on_button_pressed(self, event: Button.Pressed) -> None:
         if event.button.id == "chat-send":
-            await self._send_message()
+            self._do_send()
 
     async def on_input_submitted(self, event: Input.Submitted) -> None:
         if event.input.id == "chat-input":
-            await self._send_message()
+            self._do_send()
 
-    async def _send_message(self) -> None:
+    def _do_send(self) -> None:
         input_widget = self.query_one("#chat-input", Input)
         text = input_widget.value.strip()
         if not text:
@@ -56,6 +86,8 @@ async def _send_message(self) -> None:
         select = self.query_one("#chat-model-select", Select)
         model = str(select.value) if select.value is not Select.BLANK else ""
         if not model:
+            log = self.query_one("#chat-log", RichLog)
+            log.write("[error] No model selected. Choose a model from the dropdown.")
             return
 
         log = self.query_one("#chat-log", RichLog)
@@ -63,7 +95,17 @@ async def _send_message(self) -> None:
         input_widget.value = ""
 
         self._messages.append({"role": "user", "content": text})
-        log.write(f"\n[{model}] ", end="")
+        self._stream_response(model)
+
+    @work(exclusive=True)
+    async def _stream_response(self, model: str) -> None:
+        log = self.query_one("#chat-log", RichLog)
+        send_btn = self.query_one("#chat-send", Button)
+        chat_input = self.query_one("#chat-input", Input)
+
+        send_btn.disabled = True
+        chat_input.disabled = True
+        log.write(f"[{model}] ...")
 
         assistant_text = ""
         try:
@@ -73,11 +115,22 @@ async def _send_message(self) -> None:
                     chunk = data.get("message", {}).get("content", "")
                     if chunk:
                         assistant_text += chunk
-                        log.write(chunk, end="")
                 except json.JSONDecodeError:
                     pass
-            log.write("")
+
             if assistant_text:
+                log.clear()
+                for msg in self._messages:
+                    role = "You" if msg["role"] == "user" else model
+                    log.write(f"[{role}] {msg['content']}")
+                log.write(f"[{model}] {assistant_text}")
                 self._messages.append({"role": "assistant", "content": assistant_text})
+            else:
+                log.write(f"[{model}] (no response)")
         except Exception as exc:
             log.write(f"\n[error] {exc}")
+            if self._messages and self._messages[-1]["role"] == "user":
+                self._messages.pop()
+        finally:
+            send_btn.disabled = False
+            chat_input.disabled = False
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
index fcf0a5c..0f7c32e 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
@@ -20,8 +20,12 @@ def compose(self) -> ComposeResult:
                 yield StatsBox("CPU", [("Usage", "…")], id="box-cpu")
                 yield StatsBox("Memory", [("Used", "…"), ("Total", "…")], id="box-mem")
                 yield StatsBox("Disk", [("Used", "…"), ("Free", "…")], id="box-disk")
-                yield StatsBox("GPU", [("Vendor", "…")], id="box-gpu")
-            yield Static("Loaded Models", classes="heading")
+                yield StatsBox(
+                    "GPU",
+                    [("Device", "…"), ("VRAM", "…"), ("Temp", "…"), ("Util", "…")],
+                    id="box-gpu",
+                )
+            yield Static("Active Models (VRAM)", classes="heading")
             yield Vertical(id="loaded-models")
             yield Static("Services", classes="heading")
             yield Vertical(id="service-badges")
@@ -56,12 +60,16 @@ def _refresh_system(self) -> None:
 
         gpu = hardware.get_gpu_info()
         box_gpu = self.query_one("#box-gpu", StatsBox)
-        box_gpu.update_row("Vendor", gpu["vendor"])
         if gpu["devices"]:
             dev = gpu["devices"][0]
-            box_gpu.update_row(
-                "Vendor", f"{dev['name']}  {dev['temp_c']}°C  {dev['util_percent']}%"
-            )
+            box_gpu.update_row("Device", dev["name"])
+            vram_total = dev["vram_total_mb"]
+            vram_used = dev["vram_used_mb"]
+            box_gpu.update_row("VRAM", f"{vram_used} / {vram_total} MB")
+            box_gpu.update_row("Temp", f"{dev['temp_c']}\u00b0C")
+            box_gpu.update_row("Util", f"{dev['util_percent']}%")
+        else:
+            box_gpu.update_row("Device", gpu["vendor"])
 
         container = self.query_one("#service-badges", Vertical)
         container.remove_children()
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index df73348..195bef3 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -1,15 +1,236 @@
 from __future__ import annotations
 
+import asyncio
 import json
 
+from textual import work
 from textual.app import ComposeResult
-from textual.containers import Vertical, VerticalScroll
+from textual.containers import Horizontal, Vertical, VerticalScroll
 from textual.screen import Screen
-from textual.widgets import Button, Footer, Header, Input, Static
+from textual.widgets import Button, Footer, Header, Input, ProgressBar, Static
+
+from textual.binding import Binding
 
 from utils import api_client
 from widgets.model_item import ModelItem
 
+CURATED_MODELS = [
+    (
+        "CPU / ≤4 GB VRAM",
+        [
+            ("qwen2.5:3b", "1.9 GB", "Fast general-purpose"),
+            ("phi3:mini", "2.3 GB", "Microsoft reasoning model"),
+            ("gemma2:2b", "1.6 GB", "Google lightweight"),
+        ],
+    ),
+    (
+        "6 GB VRAM",
+        [
+            ("llama3.2:3b", "2.0 GB", "Meta compact model"),
+            ("mistral:7b", "4.1 GB", "Mistral AI flagship"),
+            ("qwen2.5:7b", "4.7 GB", "Strong multilingual"),
+        ],
+    ),
+    (
+        "8 GB VRAM",
+        [
+            ("llama3.1:8b", "4.7 GB", "Meta general-purpose"),
+            ("gemma2:9b", "5.4 GB", "Google mid-range"),
+            ("deepseek-coder-v2:lite", "5.0 GB", "Code-focused"),
+        ],
+    ),
+    (
+        "12 GB VRAM",
+        [
+            ("codestral:latest", "12 GB", "Mistral code generation"),
+            ("llama3.1:8b-instruct-q8_0", "8.5 GB", "High-quality quantization"),
+            ("qwen2.5:14b", "9.0 GB", "Strong reasoning"),
+        ],
+    ),
+    (
+        "24 GB+ VRAM",
+        [
+            ("llama3.1:70b", "40 GB", "Meta flagship (Q4)"),
+            ("qwen2.5:32b", "20 GB", "Top-tier multilingual"),
+            ("deepseek-coder-v2:16b", "8.9 GB", "Full code model"),
+        ],
+    ),
+]
+
+
+class ModelCatalog(Screen):
+    BINDINGS = [
+        ("escape", "cancel", "Back"),
+        Binding("up", "nav_up", show=False, priority=True),
+        Binding("down", "nav_down", show=False, priority=True),
+        Binding("pageup", "page_up", show=False, priority=True),
+        Binding("pagedown", "page_down", show=False, priority=True),
+        Binding("enter", "activate", show=False, priority=True),
+        Binding("space", "activate", show=False, priority=True),
+        Binding("tab", "next_zone", show=False, priority=True),
+        Binding("shift+tab", "prev_zone", show=False, priority=True),
+    ]
+
+    def __init__(self, installed_names: set[str]) -> None:
+        super().__init__()
+        self._installed = installed_names
+        self._selected: set[str] = set()
+        self._catalog_buttons: list[Button] = []
+        self._highlight_index = 0
+        self._zone = "list"
+
+    def compose(self) -> ComposeResult:
+        yield Header()
+        yield Static(
+            "  ↑↓ Navigate   Enter Select   Tab Actions   Esc Back", classes="muted"
+        )
+        with VerticalScroll(id="catalog-scroll"):
+            for tier_label, models in CURATED_MODELS:
+                yield Static(f"  {tier_label}", classes="tier-heading")
+                for model_name, size, desc in models:
+                    installed = any(
+                        model_name == n or model_name == n.split(":")[0]
+                        for n in self._installed
+                    )
+                    if installed:
+                        label = f"  ✓  {model_name}  ({size}) — {desc}  [installed]"
+                        btn = Button(
+                            label,
+                            id=f"cat-{model_name.replace(':', '--').replace('.', '-')}",
+                            classes="catalog-item catalog-installed",
+                            disabled=True,
+                        )
+                    else:
+                        label = f"  ○  {model_name}  ({size}) — {desc}"
+                        btn = Button(
+                            label,
+                            id=f"cat-{model_name.replace(':', '--').replace('.', '-')}",
+                            classes="catalog-item",
+                        )
+                    btn.tooltip = model_name
+                    btn.can_focus = False
+                    yield btn
+        with Horizontal(id="catalog-buttons"):
+            yield Button("Download Selected", id="download-selected", variant="primary")
+            yield Button("Cancel", id="catalog-cancel")
+        yield Footer()
+
+    def on_mount(self) -> None:
+        self._catalog_buttons = list(self.query("Button.catalog-item"))
+        self._zone = "list"
+        self._highlight_index = 0
+        self.set_focus(None)
+        if self._catalog_buttons:
+            self._apply_highlight()
+
+    def _apply_highlight(self) -> None:
+        for i, btn in enumerate(self._catalog_buttons):
+            if i == self._highlight_index:
+                btn.add_class("catalog-highlighted")
+                btn.scroll_visible()
+            else:
+                btn.remove_class("catalog-highlighted")
+
+    def _clear_highlight(self) -> None:
+        for btn in self._catalog_buttons:
+            btn.remove_class("catalog-highlighted")
+
+    def _toggle_highlighted(self) -> None:
+        if not self._catalog_buttons:
+            return
+        btn = self._catalog_buttons[self._highlight_index]
+        if btn.disabled:
+            return
+        model_name = btn.tooltip or ""
+        if not model_name:
+            return
+        if model_name in self._selected:
+            self._selected.discard(model_name)
+            btn.label = str(btn.label).replace("  ✓  ", "  ○  ")
+            btn.remove_class("catalog-checked")
+        else:
+            self._selected.add(model_name)
+            btn.label = str(btn.label).replace("  ○  ", "  ✓  ")
+            btn.add_class("catalog-checked")
+
+    def action_nav_up(self) -> None:
+        if self._zone == "buttons":
+            self._zone = "list"
+            self.set_focus(None)
+            self._apply_highlight()
+            return
+        if self._catalog_buttons and self._highlight_index > 0:
+            self._highlight_index -= 1
+            self._apply_highlight()
+
+    def action_nav_down(self) -> None:
+        if self._zone == "list" and self._catalog_buttons:
+            if self._highlight_index < len(self._catalog_buttons) - 1:
+                self._highlight_index += 1
+                self._apply_highlight()
+
+    def action_page_up(self) -> None:
+        if self._zone == "buttons":
+            self._zone = "list"
+            self.set_focus(None)
+            self._apply_highlight()
+            return
+        if not self._catalog_buttons:
+            return
+        scroll = self.query_one("#catalog-scroll", VerticalScroll)
+        page_size = max(1, scroll.size.height // 3)
+        self._highlight_index = max(0, self._highlight_index - page_size)
+        self._apply_highlight()
+
+    def action_page_down(self) -> None:
+        if not self._catalog_buttons:
+            return
+        if self._zone == "buttons":
+            return
+        scroll = self.query_one("#catalog-scroll", VerticalScroll)
+        page_size = max(1, scroll.size.height // 3)
+        last = len(self._catalog_buttons) - 1
+        self._highlight_index = min(last, self._highlight_index + page_size)
+        self._apply_highlight()
+
+    def action_activate(self) -> None:
+        if self._zone == "list":
+            self._toggle_highlighted()
+        else:
+            focused = self.focused
+            if focused and focused.id == "download-selected":
+                self.dismiss(list(self._selected))
+            elif focused and focused.id == "catalog-cancel":
+                self.dismiss([])
+
+    def action_next_zone(self) -> None:
+        if self._zone == "list":
+            self._zone = "buttons"
+            self._clear_highlight()
+            self.query_one("#download-selected", Button).focus()
+        else:
+            focused = self.focused
+            if focused and focused.id == "download-selected":
+                self.query_one("#catalog-cancel", Button).focus()
+            else:
+                self.query_one("#download-selected", Button).focus()
+
+    def action_prev_zone(self) -> None:
+        if self._zone == "buttons":
+            self._zone = "list"
+            self.set_focus(None)
+            self._apply_highlight()
+
+    def action_cancel(self) -> None:
+        self.dismiss([])
+
+    def on_button_pressed(self, event: Button.Pressed) -> None:
+        btn_id = event.button.id or ""
+        if btn_id == "download-selected":
+            self.dismiss(list(self._selected))
+        elif btn_id == "catalog-cancel":
+            self.dismiss([])
+
 
 class ModelsScreen(Screen):
     BINDINGS = [("r", "refresh", "Refresh")]
@@ -19,13 +240,28 @@ def compose(self) -> ComposeResult:
         with VerticalScroll():
             yield Static("Installed Models", classes="heading")
             yield Vertical(id="model-list")
-            yield Static("", id="model-status")
-            yield Static("Pull Model", classes="heading")
+            yield Static("", classes="heading")
+            yield Button(
+                "Browse Available Models",
+                id="open-catalog",
+                variant="primary",
+                classes="primary",
+            )
+            yield Static("", classes="heading")
+            yield Static("Pull by Name", classes="heading")
             yield Input(placeholder="e.g. llama3:8b", id="pull-input")
-            yield Button("Pull", variant="primary", id="pull-btn", classes="primary")
+            yield Button("Pull", id="pull-btn")
+            yield Static("", id="model-status")
+            with Horizontal(id="pull-row"):
+                yield ProgressBar(total=100, show_eta=True, id="pull-progress")
+                yield Button("Cancel", id="cancel-pull", variant="error")
         yield Footer()
 
     def on_mount(self) -> None:
+        self.query_one("#pull-progress", ProgressBar).display = False
+        self.query_one("#cancel-pull", Button).display = False
+        self._pull_queue: list[str] = []
+        self._pulling = False
         self.action_refresh()
 
     def action_refresh(self) -> None:
@@ -41,27 +277,73 @@ async def _load_models(self) -> None:
 
         if not all_models:
             container.mount(Static("  No models installed", classes="muted"))
-            return
-
-        for m in all_models:
-            name = m.get("name", "unknown")
-            size_bytes = m.get("size", 0)
-            size_str = f"{size_bytes / (1024**3):.1f} GB" if size_bytes else "—"
-            loaded = name in running_names
-            container.mount(ModelItem(name, size_str, loaded))
+        else:
+            for m in all_models:
+                name = m.get("name", "unknown")
+                size_bytes = m.get("size", 0)
+                size_str = f"{size_bytes / (1024**3):.1f} GB" if size_bytes else "—"
+                loaded = name in running_names
+                container.mount(ModelItem(name, size_str, loaded))
 
     async def on_button_pressed(self, event: Button.Pressed) -> None:
-        if event.button.id == "pull-btn":
-            await self._pull_model()
+        btn = event.button
+        btn_id = btn.id or ""
+        if btn_id == "pull-btn":
+            name = self.query_one("#pull-input", Input).value.strip()
+            if name:
+                self._start_pull(name)
+        elif btn_id == "open-catalog":
+            installed = {m.get("name", "") for m in await api_client.list_models()}
+            self.app.push_screen(ModelCatalog(installed), self._on_catalog_result)
+        elif btn_id == "cancel-pull":
+            self._cancel_pull()
+        elif btn.has_class("model-load"):
+            self._load_to_vram(btn.name or "")
+        elif btn.has_class("model-unload"):
+            self._unload_from_vram(btn.name or "")
 
-    async def _pull_model(self) -> None:
-        name_input = self.query_one("#pull-input", Input)
-        model_name = name_input.value.strip()
-        if not model_name:
+    def _cancel_pull(self) -> None:
+        self._pull_queue.clear()
+        self.workers.cancel_group(self, "default")
+        self._pulling = False
+        status = self.query_one("#model-status", Static)
+        status.update("  Download cancelled")
+        self.query_one("#pull-progress", ProgressBar).display = False
+        self.query_one("#cancel-pull", Button).display = False
+        self.query_one("#pull-btn", Button).disabled = False
+        self.query_one("#open-catalog", Button).disabled = False
+
+    def _on_catalog_result(self, selected: list[str]) -> None:
+        if not selected:
+            return
+        self._pull_queue = list(selected)
+        self._pull_next()
+
+    def _pull_next(self) -> None:
+        if not self._pull_queue:
+            self.app.call_later(self._load_models)
             return
+        model_name = self._pull_queue.pop(0)
+        self._start_pull(model_name)
 
+    @work(exclusive=True)
+    async def _start_pull(self, model_name: str) -> None:
         status = self.query_one("#model-status", Static)
-        status.update(f"Pulling {model_name}...")
+        progress = self.query_one("#pull-progress", ProgressBar)
+        cancel_btn = self.query_one("#cancel-pull", Button)
+        pull_btn = self.query_one("#pull-btn", Button)
+        catalog_btn = self.query_one("#open-catalog", Button)
+
+        pull_btn.disabled = True
+        catalog_btn.disabled = True
+        progress.display = True
+        cancel_btn.display = True
+        self._pulling = True
+        progress.update(total=100, progress=0)
+
+        remaining = len(self._pull_queue)
+        queue_msg = f"  (+{remaining} queued)" if remaining else ""
+        status.update(f"Pulling {model_name}...{queue_msg}")
 
         try:
             async for line in api_client.pull_model(model_name):
@@ -72,13 +354,53 @@ async def _pull_model(self) -> None:
                     completed = data.get("completed", 0)
                     if total:
                         pct = int(completed / total * 100)
-                        status.update(f"{msg}  {pct}%")
+                        progress.update(total=100, progress=pct)
+                        size_mb = total / (1024 * 1024)
+                        done_mb = completed / (1024 * 1024)
+                        status.update(
+                            f"{msg}  {done_mb:.0f}/{size_mb:.0f} MB  ({pct}%){queue_msg}"
+                        )
                     else:
-                        status.update(msg)
+                        status.update(f"{msg}{queue_msg}")
                 except json.JSONDecodeError:
                     pass
             status.update(f"✓ {model_name} pulled successfully")
-            name_input.value = ""
-            await self._load_models()
+            self.query_one("#pull-input", Input).value = ""
+        except asyncio.CancelledError:
+            status.update(f"  Download of {model_name} cancelled")
+            return
         except Exception as exc:
             status.update(f"✗ Pull failed: {exc}")
+        finally:
+            self._pulling = False
+            pull_btn.disabled = False
+            catalog_btn.disabled = False
+            progress.display = False
+            cancel_btn.display = False
+
+        if self._pull_queue:
+            self._pull_next()
+        else:
+            await self._load_models()
+
+    @work()
+    async def _load_to_vram(self, model_name: str) -> None:
+        status = self.query_one("#model-status", Static)
+        status.update(f"Loading {model_name} into VRAM...")
+        success = await api_client.load_model(model_name)
+        if success:
+            status.update(f"  \u2713 {model_name} loaded into VRAM")
+        else:
+            status.update(f"  \u2717 Failed to load {model_name}")
+        await self._load_models()
+
+    @work()
+    async def _unload_from_vram(self, model_name: str) -> None:
+        status = self.query_one("#model-status", Static)
+        status.update(f"Unloading {model_name}...")
+        success = await api_client.unload_model(model_name)
+        if success:
+            status.update(f"  \u2713 {model_name} unloaded from VRAM")
+        else:
+            status.update(f"  \u2717 Failed to unload {model_name}")
+        await self._load_models()
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
index 61fe215..ddca6d3 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
@@ -2,78 +2,119 @@
 
 import subprocess
 
+from textual import work
 from textual.app import ComposeResult
 from textual.containers import Horizontal, Vertical, VerticalScroll
 from textual.screen import Screen
 from textual.widgets import Button, Footer, Header, Static
 
-from utils import hardware
-
+from textual.binding import Binding
 
-class ServiceRow(Horizontal):
-    def __init__(self, service: str, status: str) -> None:
-        super().__init__(classes="service-row")
-        self.service_name = service
-        self.service_status = status
-
-    def compose(self) -> ComposeResult:
-        short = self.service_name.replace("neuraldrive-", "")
-        cls = "ok" if self.service_status == "active" else "error"
-        yield Static(
-            f"{'●' if self.service_status == 'active' else '○'} {short}", classes=cls
-        )
-        yield Static("", classes="value")
-        yield Button("Start", id=f"start-{self.service_name}")
-        yield Button("Stop", id=f"stop-{self.service_name}")
-        yield Button("Restart", id=f"restart-{self.service_name}")
+from utils import hardware
 
 
 class ServicesScreen(Screen):
-    BINDINGS = [("r", "refresh", "Refresh")]
+    BINDINGS = [
+        ("r", "refresh", "Refresh"),
+        Binding("up", "move_up", "Up", show=False),
+        Binding("down", "move_down", "Down", show=False),
+    ]
 
     def compose(self) -> ComposeResult:
         yield Header()
         with VerticalScroll():
             yield Static("NeuralDrive Services", classes="heading")
             yield Vertical(id="service-list")
-            yield Static("", id="svc-status")
+        yield Static("", id="svc-status")
+        with Horizontal(id="svc-actions"):
+            yield Button("Start", id="svc-start", variant="primary")
+            yield Button("Stop", id="svc-stop", variant="error")
+            yield Button("Restart", id="svc-restart")
         yield Footer()
 
     def on_mount(self) -> None:
-        self._load_services()
+        self._selected_index = 0
+        self._services: list[tuple[str, str]] = []
+        self.app.call_later(self._load_services)
+
+    def on_screen_resume(self) -> None:
+        self.app.call_later(self._load_services)
 
-    def _load_services(self) -> None:
+    async def _load_services(self) -> None:
         container = self.query_one("#service-list", Vertical)
-        container.remove_children()
+        await container.remove_children()
+        self._services = []
         for svc in hardware.NEURALDRIVE_SERVICES:
             status = hardware.get_service_status(svc)
-            container.mount(ServiceRow(svc, status))
+            self._services.append((svc, status))
+
+        for i, (svc, status) in enumerate(self._services):
+            short = svc.replace("neuraldrive-", "")
+            if status == "active":
+                indicator = "●"
+                cls = "svc-row svc-active"
+            else:
+                indicator = "○"
+                cls = "svc-row svc-inactive"
+            if i == self._selected_index:
+                cls += " svc-selected"
+            row = Static(
+                f"  {indicator}  {short:<20} {status}", classes=cls, id=f"svc-{i}"
+            )
+            await container.mount(row)
+
+        self._update_action_buttons()
+
+    def _update_action_buttons(self) -> None:
+        if not self._services:
+            return
+        _, status = self._services[self._selected_index]
+        self.query_one("#svc-start", Button).disabled = status == "active"
+        self.query_one("#svc-stop", Button).disabled = status != "active"
+
+    def action_move_up(self) -> None:
+        if self._selected_index > 0:
+            self._selected_index -= 1
+            self.app.call_later(self._load_services)
+
+    def action_move_down(self) -> None:
+        if self._selected_index < len(self._services) - 1:
+            self._selected_index += 1
+            self.app.call_later(self._load_services)
 
     def on_button_pressed(self, event: Button.Pressed) -> None:
         btn_id = event.button.id or ""
-        for action in ("start", "stop", "restart"):
-            prefix = f"{action}-"
-            if btn_id.startswith(prefix):
-                svc = btn_id[len(prefix) :]
-                self._run_systemctl(action, svc)
-                return
-
-    def _run_systemctl(self, action: str, service: str) -> None:
+        if btn_id == "svc-start":
+            self._run_action("start")
+        elif btn_id == "svc-stop":
+            self._run_action("stop")
+        elif btn_id == "svc-restart":
+            self._run_action("restart")
+
+    @work(exclusive=True)
+    async def _run_action(self, action: str) -> None:
+        if not self._services:
+            return
+        svc, _ = self._services[self._selected_index]
+        short = svc.replace("neuraldrive-", "")
         status_widget = self.query_one("#svc-status", Static)
+        status_widget.update(f"  {action.title()}ing {short}...")
+
         try:
             res = subprocess.run(
-                ["systemctl", action, service],
+                ["sudo", "systemctl", action, svc],
                 capture_output=True,
                 text=True,
                 timeout=15,
             )
             if res.returncode == 0:
-                status_widget.update(f"✓ {action} {service}")
+                status_widget.update(f"  ✓ {short} {action}ed")
             else:
-                status_widget.update(f"✗ {action} {service}: {res.stderr.strip()}")
+                status_widget.update(f"  ✗ {short}: {res.stderr.strip()}")
         except subprocess.TimeoutExpired:
-            status_widget.update(f"✗ {action} {service}: timeout")
-        self._load_services()
+            status_widget.update(f"  ✗ {short}: timeout")
+
+        self.app.call_later(self._load_services)
 
     def action_refresh(self) -> None:
-        self._load_services()
+        self.app.call_later(self._load_services)
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
index d2eed88..3766333 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
@@ -9,13 +9,19 @@
 from textual.screen import Screen
 from textual.widgets import Button, Input, Static
 
+from utils import config
+
 SENTINEL = "/etc/neuraldrive/first-boot-complete"
 CREDENTIALS_PATH = "/etc/neuraldrive/credentials.conf"
 API_KEY_PATH = "/etc/neuraldrive/api.key"
 SUDOERS_PATH = "/etc/sudoers.d/neuraldrive-admin"
+PERSISTENCE_MOUNT = "/var/lib/neuraldrive"
+PERSISTENCE_CONF_CONTENT = "/var/lib/neuraldrive union\n/etc/neuraldrive union\n/var/log/neuraldrive union\n/home union\n"
 
 
 class FirstBootWizard(Screen):
+    """Step order: Welcome → Storage → Security → Network → Models → Done"""
+
     BINDINGS = [("escape", "cancel_wizard", "Skip")]
 
     def __init__(self) -> None:
@@ -25,6 +31,10 @@ def __init__(self) -> None:
         self._wifi_ssid = ""
         self._wifi_psk = ""
         self._generated_api_key = ""
+        self._boot_device: str | None = None
+        self._unpartitioned_bytes = 0
+        self._has_persistence = False
+        self._awaiting_confirm = False
 
     def compose(self) -> ComposeResult:
         with Center(id="wizard-container"):
@@ -37,6 +47,12 @@ def compose(self) -> ComposeResult:
                 yield Button("Next →", id="wiz-next", classes="primary")
                 yield Button("Skip", id="wiz-skip")
 
+    def on_input_submitted(self, event: Input.Submitted) -> None:
+        if self._awaiting_confirm:
+            self._handle_storage_confirm()
+        else:
+            self.focus_next()
+
     def on_mount(self) -> None:
         self._show_step()
 
@@ -55,18 +71,22 @@ def _show_step(self) -> None:
         error.update("")
         inp.value = ""
         inp2.value = ""
+        self._awaiting_confirm = False
 
         if self._step == 0:
             title.update("Welcome to NeuralDrive")
             body.update(
                 "This wizard will configure your system.\n\n"
-                "Steps: Security → WiFi → Network → Storage → Models → Done"
+                "Steps: Storage → Security → Network → Models → Done"
             )
             next_btn.label = "Begin →"
 
         elif self._step == 1:
-            title.update("Step 1: Security")
-            body.update("Set an admin password for the 'neuraldrive' user.")
+            self._show_storage_step(title, body, inp, next_btn, skip_btn)
+
+        elif self._step == 2:
+            title.update("Step 2: Security")
+            body.update("Set an admin password for the 'neuraldrive-admin' user.")
             inp.display = True
             inp.placeholder = "New password"
             inp.password = True
@@ -74,8 +94,8 @@ def _show_step(self) -> None:
             inp2.placeholder = "Confirm password"
             next_btn.label = "Set Password →"
 
-        elif self._step == 2:
-            title.update("Step 2: WiFi (Optional)")
+        elif self._step == 3:
+            title.update("Step 3: Network (Optional)")
             body.update("Enter WiFi credentials, or skip for wired-only.")
             inp.display = True
             inp.placeholder = "SSID"
@@ -85,34 +105,8 @@ def _show_step(self) -> None:
             skip_btn.display = True
             next_btn.label = "Connect →"
 
-        elif self._step == 3:
-            title.update("Step 3: Network")
-            from utils import hardware
-
-            ip = hardware.get_ip_address()
-            hostname = hardware.get_hostname()
-            body.update(
-                f"Current configuration:\n"
-                f"  Hostname: {hostname}\n"
-                f"  IP: {ip}\n\n"
-                "DHCP is active. Static IP can be configured later."
-            )
-            next_btn.label = "Next →"
-
         elif self._step == 4:
-            title.update("Step 4: Storage")
-            from utils import hardware
-
-            disk = hardware.get_disk_info()
-            body.update(
-                f"Storage: {disk['free_gb']} GB free of {disk['total_gb']} GB\n"
-                f"Path: {disk['path']}\n\n"
-                "Models will be stored at /var/lib/neuraldrive/models."
-            )
-            next_btn.label = "Next →"
-
-        elif self._step == 5:
-            title.update("Step 5: Models")
+            title.update("Step 4: Models")
             body.update(
                 "Models can be pulled after setup from:\n"
                 "  • This TUI (press M for Models)\n"
@@ -121,7 +115,7 @@ def _show_step(self) -> None:
             )
             next_btn.label = "Next →"
 
-        elif self._step == 6:
+        elif self._step == 5:
             self._generated_api_key = secrets.token_urlsafe(32)
             title.update("Setup Complete")
             body.update(
@@ -132,28 +126,323 @@ def _show_step(self) -> None:
             )
             next_btn.label = "Finish ✓"
 
+        if inp.display:
+            inp.focus()
+        else:
+            next_btn.focus()
+
+    def _show_storage_step(
+        self,
+        title: Static,
+        body: Static,
+        inp: Input,
+        next_btn: Button,
+        skip_btn: Button,
+    ) -> None:
+        title.update("Step 1: Storage & Persistence")
+
+        from utils import hardware
+
+        self._boot_device = hardware.get_boot_device()
+        if not self._boot_device:
+            body.update(
+                "Could not detect boot device.\n\n"
+                "Persistence partition cannot be created automatically.\n"
+                "Data will be stored on the ephemeral overlay (lost on reboot).\n\n"
+                "You can create a persistence partition manually later\n"
+                "using: sudo /usr/lib/neuraldrive/prepare-usb.sh /dev/sdX"
+            )
+            next_btn.label = "Next →"
+            return
+
+        partitions = hardware.get_disk_partitions(self._boot_device)
+        self._has_persistence = any(p.get("label") == "persistence" for p in partitions)
+        total_bytes = hardware.get_device_size(self._boot_device)
+        total_gb = total_bytes / (1024**3) if total_bytes else 0
+
+        if self._has_persistence:
+            pers = next(p for p in partitions if p.get("label") == "persistence")
+            pers_gb = pers["size_bytes"] / (1024**3)
+            body.update(
+                f"Boot device: {self._boot_device} ({total_gb:.0f} GB)\n\n"
+                f"✓ Persistence partition found: {pers_gb:.1f} GB\n"
+                f"  Models, config, and logs will survive reboots.\n\n"
+                "No action needed."
+            )
+            next_btn.label = "Next →"
+            return
+
+        self._unpartitioned_bytes = hardware.get_unpartitioned_space(self._boot_device)
+        free_gb = self._unpartitioned_bytes / (1024**3)
+
+        if self._unpartitioned_bytes < 1024 * 1024 * 1024:
+            body.update(
+                f"Boot device: {self._boot_device} ({total_gb:.0f} GB)\n\n"
+                "No persistence partition found.\n"
+                f"Only {free_gb:.1f} GB unpartitioned space available\n"
+                "(minimum 1 GB required).\n\n"
+                "Data will be stored on the ephemeral overlay (lost on reboot)."
+            )
+            next_btn.label = "Next →"
+            return
+
+        body.update(
+            f"Boot device: {self._boot_device} ({total_gb:.0f} GB)\n\n"
+            "No persistence partition found.\n"
+            f"Available space: {free_gb:.1f} GB\n\n"
+            "A persistence partition stores your models, config,\n"
+            "and logs so they survive reboots.\n\n"
+            "Type 'yes' to create it, or skip to use\n"
+            "ephemeral overlay storage."
+        )
+        inp.display = True
+        inp.placeholder = "Type 'yes' to create persistence partition"
+        inp.password = False
+        self._awaiting_confirm = True
+        skip_btn.display = True
+        next_btn.label = "Create Partition →"
+
     def on_button_pressed(self, event: Button.Pressed) -> None:
         if event.button.id == "wiz-skip":
+            self._awaiting_confirm = False
             self._step += 1
             self._show_step()
             return
 
         if event.button.id == "wiz-next":
-            if self._step == 1:
+            if self._step == 1 and self._awaiting_confirm:
+                self._handle_storage_confirm()
+                return
+            if self._step == 2:
                 if not self._validate_password():
                     return
-            elif self._step == 2:
+            elif self._step == 3:
                 self._configure_wifi()
-            elif self._step == 6:
+            elif self._step == 5:
                 self._finalize()
                 return
 
             self._step += 1
-            if self._step > 6:
+            if self._step > 5:
                 self._finalize()
             else:
                 self._show_step()
 
+    def _handle_storage_confirm(self) -> None:
+        inp = self.query_one("#wiz-input", Input)
+        error = self.query_one("#wiz-error", Static)
+
+        if inp.value.strip().lower() != "yes":
+            error.update("Type 'yes' to confirm, or press Skip.")
+            return
+
+        self._awaiting_confirm = False
+        body = self.query_one("#wiz-body", Static)
+        body.update("Creating persistence partition...\nThis may take a moment.")
+        self.query_one("#wiz-next", Button).disabled = True
+        self.query_one("#wiz-skip", Button).display = False
+        inp.display = False
+
+        err = self._create_persistence_partition()
+        self.query_one("#wiz-next", Button).disabled = False
+
+        if err:
+            error.update(f"Partition creation failed: {err}")
+            body.update(
+                "Partition creation failed.\n"
+                "Data will use the ephemeral overlay.\n"
+                "You can retry manually later."
+            )
+            self.query_one("#wiz-next", Button).label = "Next →"
+        else:
+            body.update(
+                "✓ Persistence partition created and mounted.\n\n"
+                "Models, config, and logs will now survive reboots."
+            )
+            self.query_one("#wiz-next", Button).label = "Next →"
+
+    def _create_persistence_partition(self) -> str | None:
+        if not self._boot_device:
+            return "No boot device detected"
+
+        try:
+            res = subprocess.run(
+                [
+                    "sudo",
+                    "parted",
+                    "-m",
+                    self._boot_device,
+                    "unit",
+                    "B",
+                    "print",
+                    "free",
+                ],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            if res.returncode != 0:
+                return f"parted print failed: {res.stderr.strip()}"
+
+            free_start = None
+            free_end = None
+            for line in res.stdout.strip().splitlines():
+                if ":free;" in line:
+                    parts = line.split(":")
+                    if len(parts) >= 3:
+                        start_b = int(parts[1].rstrip("B"))
+                        end_b = int(parts[2].rstrip("B"))
+                        size_b = end_b - start_b
+                        if size_b > 1024 * 1024 * 1024:
+                            free_start = parts[1]
+                            free_end = parts[2]
+
+            if not free_start or not free_end:
+                return "No free space block large enough found"
+
+            proc = subprocess.run(
+                [
+                    "sudo",
+                    "parted",
+                    self._boot_device,
+                    "--script",
+                    "--",
+                    "mkpart",
+                    "primary",
+                    "ext4",
+                    free_start,
+                    free_end,
+                ],
+                capture_output=True,
+                text=True,
+                timeout=30,
+            )
+            if proc.returncode != 0:
+                return proc.stderr.strip()
+
+            subprocess.run(
+                ["sudo", "partprobe", self._boot_device],
+                capture_output=True,
+                timeout=10,
+            )
+
+            import time
+
+            time.sleep(2)
+
+            res = subprocess.run(
+                ["lsblk", "-ln", "-o", "NAME", self._boot_device],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            if res.returncode != 0:
+                return "Could not determine new partition device"
+            parts = res.stdout.strip().splitlines()
+            if not parts:
+                return "No partitions found after creation"
+            new_part = f"/dev/{parts[-1].strip()}"
+
+            proc = subprocess.run(
+                [
+                    "sudo",
+                    "mkfs.ext4",
+                    "-L",
+                    "persistence",
+                    "-m",
+                    "1",
+                    new_part,
+                ],
+                capture_output=True,
+                text=True,
+                timeout=120,
+            )
+            if proc.returncode != 0:
+                return f"mkfs.ext4 failed: {proc.stderr.strip()}"
+
+            subprocess.run(
+                ["sudo", "mkdir", "-p", "/mnt/persistence"],
+                capture_output=True,
+                timeout=5,
+            )
+            proc = subprocess.run(
+                ["sudo", "mount", new_part, "/mnt/persistence"],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            if proc.returncode != 0:
+                return f"Mount failed: {proc.stderr.strip()}"
+
+            proc = subprocess.run(
+                ["sudo", "tee", "/mnt/persistence/persistence.conf"],
+                input=PERSISTENCE_CONF_CONTENT.encode(),
+                capture_output=True,
+                timeout=5,
+            )
+            if proc.returncode != 0:
+                return "Failed to write persistence.conf"
+
+            for d in [
+                "/mnt/persistence/var/lib/neuraldrive/ollama/.ollama",
+                "/mnt/persistence/var/lib/neuraldrive/models",
+                "/mnt/persistence/var/lib/neuraldrive/config",
+                "/mnt/persistence/var/log/neuraldrive",
+                "/mnt/persistence/etc/neuraldrive",
+                "/mnt/persistence/home",
+            ]:
+                subprocess.run(
+                    ["sudo", "mkdir", "-p", d],
+                    capture_output=True,
+                    timeout=5,
+                )
+
+            subprocess.run(
+                [
+                    "sudo",
+                    "chown",
+                    "-R",
+                    "neuraldrive-ollama:neuraldrive-ollama",
+                    "/mnt/persistence/var/lib/neuraldrive/ollama",
+                ],
+                capture_output=True,
+                timeout=5,
+            )
+
+            subprocess.run(
+                ["sudo", "umount", "/mnt/persistence"],
+                capture_output=True,
+                timeout=10,
+            )
+
+            subprocess.run(
+                ["sudo", "mkdir", "-p", PERSISTENCE_MOUNT],
+                capture_output=True,
+                timeout=5,
+            )
+            proc = subprocess.run(
+                ["sudo", "mount", new_part, PERSISTENCE_MOUNT],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            if proc.returncode != 0:
+                return f"Mount at {PERSISTENCE_MOUNT} failed: {proc.stderr.strip()}"
+
+            subprocess.run(
+                ["sudo", "systemctl", "restart", "neuraldrive-ollama"],
+                capture_output=True,
+                timeout=30,
+            )
+
+            self._has_persistence = True
+            return None
+
+        except subprocess.TimeoutExpired:
+            return "Operation timed out"
+        except FileNotFoundError as e:
+            return f"Required tool not found: {e}"
+
     def _validate_password(self) -> bool:
         error = self.query_one("#wiz-error", Static)
         pw = self.query_one("#wiz-input", Input).value
@@ -185,44 +474,101 @@ def _configure_wifi(self) -> None:
         except (subprocess.TimeoutExpired, FileNotFoundError):
             pass
 
-    def _finalize(self) -> None:
+    def _sudo_write(self, path: str, content: str, mode: str = "0644") -> str | None:
         try:
-            if self._admin_password:
-                proc = subprocess.Popen(
-                    ["chpasswd"],
-                    stdin=subprocess.PIPE,
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                )
-                proc.communicate(
-                    input=f"neuraldrive:{self._admin_password}\n".encode(),
+            subprocess.run(
+                ["sudo", "mkdir", "-p", os.path.dirname(path)],
+                capture_output=True,
+                timeout=5,
+            )
+            proc = subprocess.run(
+                ["sudo", "tee", path],
+                input=content.encode(),
+                capture_output=True,
+                timeout=5,
+            )
+            if proc.returncode != 0:
+                return f"Failed to write {path}: {proc.stderr.decode().strip()}"
+            subprocess.run(
+                ["sudo", "chmod", mode, path],
+                capture_output=True,
+                timeout=5,
+            )
+            return None
+        except (subprocess.TimeoutExpired, FileNotFoundError) as e:
+            return f"Failed to write {path}: {e}"
+
+    def _finalize(self) -> None:
+        errors: list[str] = []
+
+        if self._admin_password:
+            try:
+                proc = subprocess.run(
+                    ["sudo", "chpasswd"],
+                    input=f"neuraldrive-admin:{self._admin_password}\n".encode(),
+                    capture_output=True,
                     timeout=10,
                 )
-
-                if os.path.exists(SUDOERS_PATH):
-                    with open(SUDOERS_PATH, "r") as f:
-                        content = f.read()
-                    content = content.replace("NOPASSWD:", "")
-                    with open(SUDOERS_PATH, "w") as f:
-                        f.write(content)
-
-            if self._generated_api_key:
-                os.makedirs(os.path.dirname(API_KEY_PATH), exist_ok=True)
-                with open(API_KEY_PATH, "w") as f:
-                    f.write(self._generated_api_key + "\n")
-                os.chmod(API_KEY_PATH, 0o600)
-
-                os.makedirs(os.path.dirname(CREDENTIALS_PATH), exist_ok=True)
-                with open(CREDENTIALS_PATH, "w") as f:
-                    f.write(f"api_key={self._generated_api_key}\n")
-                os.chmod(CREDENTIALS_PATH, 0o600)
-
-            os.makedirs(os.path.dirname(SENTINEL), exist_ok=True)
-            with open(SENTINEL, "w") as f:
-                f.write("")
-
-        except Exception:
-            pass
+                if proc.returncode != 0:
+                    errors.append(
+                        f"Password change failed: {proc.stderr.decode().strip()}"
+                    )
+            except (subprocess.TimeoutExpired, FileNotFoundError) as e:
+                errors.append(f"Password change failed: {e}")
+
+        if self._generated_api_key:
+            err = self._sudo_write(API_KEY_PATH, self._generated_api_key + "\n", "0600")
+            if err:
+                errors.append(err)
+
+            err = self._sudo_write(
+                CREDENTIALS_PATH,
+                f"api_key={self._generated_api_key}\n",
+                "0600",
+            )
+            if err:
+                errors.append(err)
+
+        cfg_data = config.load()
+        cfg_data["wizard_complete"] = True
+        if self._admin_password:
+            cfg_data["security"] = {"password_set": True}
+        if self._wifi_ssid:
+            cfg_data["network"] = {"wifi_ssid": self._wifi_ssid}
+        if self._generated_api_key:
+            cfg_data["api"] = {"key_generated": True}
+        if self._has_persistence:
+            cfg_data["storage"] = {"persistence": True}
+        cfg_err = config.save(cfg_data)
+        if cfg_err:
+            errors.append(cfg_err)
+
+        err = self._sudo_write(SENTINEL, "")
+        if err:
+            errors.append(err)
+
+        # Remove NOPASSWD LAST — after all other sudo operations are done,
+        # since removing it makes subsequent sudo calls require a TTY password prompt
+        if self._admin_password:
+            try:
+                result = subprocess.run(
+                    ["sudo", "cat", SUDOERS_PATH],
+                    capture_output=True,
+                    text=True,
+                    timeout=5,
+                )
+                if result.returncode == 0 and "NOPASSWD:" in result.stdout:
+                    new_content = result.stdout.replace("NOPASSWD:", "")
+                    err = self._sudo_write(SUDOERS_PATH, new_content, "0440")
+                    if err:
+                        errors.append(err)
+            except (subprocess.TimeoutExpired, FileNotFoundError):
+                pass
+
+        if errors:
+            error_widget = self.query_one("#wiz-error", Static)
+            error_widget.update("\n".join(errors))
+            return
 
         self.app.pop_screen()
 
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
index 258ccbf..0737277 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
@@ -104,17 +104,70 @@ Static.muted {
     width: 10;
 }
 
-.service-row {
-    layout: horizontal;
+Button.model-load {
+    background: #1F1F1F;
+    color: #10B981;
+    border: solid #10B981;
+    min-width: 10;
+    width: 10;
+    height: 3;
+}
+
+Button.model-load:hover {
+    background: #0A1F0A;
+}
+
+Button.model-unload {
+    background: #1F1F1F;
+    color: #F97316;
+    border: solid #F97316;
+    min-width: 10;
+    width: 10;
+    height: 3;
+}
+
+Button.model-unload:hover {
+    background: #1F0A0A;
+}
+
+.svc-row {
     height: 3;
     padding: 0 2;
     border: solid #2E2E2E;
-    margin: 0 0 1 0;
+    margin: 0 0 0 0;
     background: #141414;
+    content-align: left middle;
+}
+
+.svc-active {
+    color: #10B981;
 }
 
-.service-row:hover {
+.svc-inactive {
+    color: #EF4444;
+}
+
+.svc-selected {
     background: #1F1F1F;
+    border: solid #F59E0B;
+}
+
+#svc-status {
+    height: 1;
+    padding: 0 2;
+    dock: bottom;
+    offset: 0 -4;
+}
+
+#svc-actions {
+    height: auto;
+    padding: 0 1;
+    dock: bottom;
+    align: center middle;
+}
+
+#svc-actions Button {
+    margin: 0 1;
 }
 
 Button {
@@ -165,6 +218,14 @@ Select {
     color: #FFFFFF;
 }
 
+#chat-model-select {
+    width: 100%;
+    margin: 0 2;
+    border: solid #F59E0B;
+    background: #141414;
+    height: 3;
+}
+
 #wizard-container {
     align: center middle;
     padding: 2 4;
@@ -204,3 +265,116 @@ Select {
 .badge-offline {
     color: #EF4444;
 }
+
+#pull-progress {
+    margin: 1 0;
+    width: 1fr;
+}
+
+#pull-row {
+    height: auto;
+    layout: horizontal;
+}
+
+#cancel-pull {
+    width: 12;
+    margin: 1 0 1 1;
+}
+
+ProgressBar Bar {
+    color: #F59E0B;
+    background: #2E2E2E;
+}
+
+ProgressBar PercentageStatus {
+    color: #A1A1AA;
+}
+
+#chat-notice {
+    padding: 0 2;
+    height: auto;
+}
+
+Static.tier-heading {
+    color: #F59E0B;
+    text-style: bold;
+}
+
+Button.model-pick {
+    background: #141414;
+    color: #FFFFFF;
+    border: solid #2E2E2E;
+    width: 100%;
+    height: 3;
+    content-align: left middle;
+    text-align: left;
+}
+
+Button.model-pick:hover {
+    background: #1F1F1F;
+    border: solid #F59E0B;
+}
+
+Button.model-pick:disabled {
+    background: #0A0A0A;
+    color: #52525B;
+    border: solid #1A1A1A;
+}
+
+#catalog-scroll {
+    height: 1fr;
+    border: solid #2E2E2E;
+    scrollbar-background: #141414;
+    scrollbar-color: #2E2E2E;
+}
+
+Button.catalog-item {
+    background: #141414;
+    color: #A1A1AA;
+    border: solid #2E2E2E;
+    width: 100%;
+    height: 3;
+    content-align: left middle;
+    text-align: left;
+}
+
+Button.catalog-item:hover {
+    background: #1F1F1F;
+    border: solid #F59E0B;
+}
+
+Button.catalog-item:focus {
+    border: solid #F59E0B;
+}
+
+Button.catalog-highlighted {
+    background: #1F1F1F;
+    border: solid #F59E0B;
+}
+
+Button.catalog-highlighted.catalog-installed {
+    background: #0A0A0A;
+    border: solid #52525B;
+}
+
+Button.catalog-checked {
+    color: #10B981;
+    background: #0A1F0A;
+    border: solid #10B981;
+}
+
+Button.catalog-installed {
+    color: #52525B;
+    background: #0A0A0A;
+    border: solid #1A1A1A;
+}
+
+#catalog-buttons {
+    height: auto;
+    padding: 1 0;
+    align: center middle;
+}
+
+#catalog-buttons Button {
+    margin: 0 2;
+}
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py
index 11a4fd6..e862a87 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py
@@ -54,6 +54,30 @@ async def delete_model(name: str) -> bool:
         return False
 
 
+async def load_model(name: str, keep_alive: str = "5m") -> bool:
+    try:
+        async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, read=300.0)) as client:
+            resp = await client.post(
+                f"{OLLAMA_URL}/api/generate",
+                json={"model": name, "prompt": "", "keep_alive": keep_alive},
+            )
+            return resp.status_code == 200
+    except (httpx.ConnectError, httpx.TimeoutException, httpx.HTTPError):
+        return False
+
+
+async def unload_model(name: str) -> bool:
+    try:
+        async with httpx.AsyncClient(timeout=TIMEOUT) as client:
+            resp = await client.post(
+                f"{OLLAMA_URL}/api/generate",
+                json={"model": name, "prompt": "", "keep_alive": 0},
+            )
+            return resp.status_code == 200
+    except (httpx.ConnectError, httpx.TimeoutException, httpx.HTTPError):
+        return False
+
+
 async def chat_stream(model: str, messages: list[dict]):
     payload = {"model": model, "messages": messages, "stream": True}
     async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, read=600.0)) as client:
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py
new file mode 100644
index 0000000..4e7cd8e
--- /dev/null
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py
@@ -0,0 +1,84 @@
+from __future__ import annotations
+
+import os
+import subprocess
+from typing import Any
+
+import yaml
+
+PERSISTENT_CONFIG = "/var/lib/neuraldrive/config/config.yaml"
+OVERLAY_CONFIG = "/etc/neuraldrive/config.yaml"
+
+
+def _config_path() -> str:
+    persistent_dir = os.path.dirname(PERSISTENT_CONFIG)
+    if os.path.isdir(persistent_dir) and os.access(persistent_dir, os.W_OK):
+        return PERSISTENT_CONFIG
+    return OVERLAY_CONFIG
+
+
+def load() -> dict[str, Any]:
+    for path in (PERSISTENT_CONFIG, OVERLAY_CONFIG):
+        if os.path.exists(path):
+            try:
+                with open(path) as f:
+                    data = yaml.safe_load(f)
+                    if isinstance(data, dict):
+                        return data
+            except (OSError, yaml.YAMLError):
+                continue
+    return {}
+
+
+def save(data: dict[str, Any]) -> str | None:
+    path = _config_path()
+    content = yaml.dump(data, default_flow_style=False, sort_keys=False)
+    try:
+        subprocess.run(
+            ["sudo", "mkdir", "-p", os.path.dirname(path)],
+            capture_output=True,
+            timeout=5,
+        )
+        proc = subprocess.run(
+            ["sudo", "tee", path],
+            input=content.encode(),
+            capture_output=True,
+            timeout=5,
+        )
+        if proc.returncode != 0:
+            return f"Failed to write {path}: {proc.stderr.decode().strip()}"
+        subprocess.run(
+            ["sudo", "chmod", "0644", path],
+            capture_output=True,
+            timeout=5,
+        )
+        return None
+    except (subprocess.TimeoutExpired, FileNotFoundError) as e:
+        return f"Failed to write {path}: {e}"
+
+
+def get(key: str, default: Any = None) -> Any:
+    data = load()
+    keys = key.split(".")
+    for k in keys:
+        if isinstance(data, dict):
+            data = data.get(k, default)
+        else:
+            return default
+    return data
+
+
+def set_key(key: str, value: Any) -> str | None:
+    data = load()
+    keys = key.split(".")
+    target = data
+    for k in keys[:-1]:
+        if k not in target or not isinstance(target[k], dict):
+            target[k] = {}
+        target = target[k]
+    target[keys[-1]] = value
+    return save(data)
+
+
+def wizard_complete() -> bool:
+    return get("wizard_complete", False) is True
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
index e6949c4..e1f3918 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
@@ -147,3 +147,95 @@ def get_service_status(service: str) -> str:
     "neuraldrive-gpu-monitor",
     "neuraldrive-system-api",
 ]
+
+
+def get_boot_device() -> str | None:
+    try:
+        with open("/proc/cmdline") as f:
+            cmdline = f.read()
+        for part in cmdline.split():
+            if part.startswith("boot=live") or part.startswith("root="):
+                pass
+            if part.startswith("live-media="):
+                return part.split("=", 1)[1]
+        res = subprocess.run(
+            ["findmnt", "-n", "-o", "SOURCE", "/run/live/medium"],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if res.returncode == 0 and res.stdout.strip():
+            part_dev = res.stdout.strip()
+            import re
+
+            match = re.match(r"(/dev/[a-z]+)", part_dev)
+            if match:
+                return match.group(1)
+    except (OSError, subprocess.TimeoutExpired, FileNotFoundError):
+        pass
+    return None
+
+
+def get_disk_partitions(device: str) -> list[dict]:
+    try:
+        res = subprocess.run(
+            ["lsblk", "-J", "-b", "-o", "NAME,SIZE,FSTYPE,LABEL,MOUNTPOINT", device],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if res.returncode != 0:
+            return []
+        import json
+
+        data = json.loads(res.stdout)
+        partitions = []
+        for bd in data.get("blockdevices", []):
+            for child in bd.get("children", []):
+                partitions.append(
+                    {
+                        "name": child.get("name", ""),
+                        "size_bytes": int(child.get("size", 0)),
+                        "fstype": child.get("fstype", ""),
+                        "label": child.get("label", ""),
+                        "mountpoint": child.get("mountpoint", ""),
+                    }
+                )
+            if not bd.get("children"):
+                partitions.append(
+                    {
+                        "name": bd.get("name", ""),
+                        "size_bytes": int(bd.get("size", 0)),
+                        "fstype": bd.get("fstype", ""),
+                        "label": bd.get("label", ""),
+                        "mountpoint": bd.get("mountpoint", ""),
+                    }
+                )
+        return partitions
+    except (subprocess.TimeoutExpired, FileNotFoundError, ValueError):
+        return []
+
+
+def get_device_size(device: str) -> int:
+    try:
+        res = subprocess.run(
+            ["lsblk", "-b", "-d", "-n", "-o", "SIZE", device],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if res.returncode == 0:
+            return int(res.stdout.strip())
+    except (subprocess.TimeoutExpired, FileNotFoundError, ValueError):
+        pass
+    return 0
+
+
+def get_unpartitioned_space(device: str) -> int:
+    total = get_device_size(device)
+    if not total:
+        return 0
+    parts = get_disk_partitions(device)
+    used = sum(p["size_bytes"] for p in parts)
+    free = total - used
+    return max(0, free)
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
index 1df4451..fb6e2d6 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
@@ -2,19 +2,28 @@
 
 from textual.app import ComposeResult
 from textual.containers import Horizontal
-from textual.widgets import Static
+from textual.widgets import Button, Static
 
 
 class ModelItem(Horizontal):
     def __init__(self, name: str, size: str, loaded: bool = False) -> None:
-        super().__init__(classes="model-item")
-        self._name = name
-        self._size = size
+        super().__init__(name=name, classes="model-item")
+        self._model_name = name
+        self._model_size = size
         self._loaded = loaded
 
     def compose(self) -> ComposeResult:
-        yield Static(self._name, classes="model-name")
-        yield Static(self._size, classes="model-size")
-        status_cls = "model-status-loaded" if self._loaded else "model-status-cached"
-        status_txt = "● loaded" if self._loaded else "○ cached"
-        yield Static(status_txt, classes=status_cls)
+        yield Static(self._model_name, classes="model-name")
+        yield Static(self._model_size, classes="model-size")
+        if self._loaded:
+            yield Static("● VRAM", classes="model-status-loaded")
+        else:
+            yield Static("○ ready", classes="model-status-cached")
+        load_btn = Button("Load", name=self._model_name, classes="model-load")
+        unload_btn = Button("Unload", name=self._model_name, classes="model-unload")
+        if self._loaded:
+            load_btn.disabled = True
+        else:
+            unload_btn.disabled = True
+        yield load_btn
+        yield unload_btn

From 334ef93860dcda00a4a6db0eeb289c8792c94f09 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Thu, 23 Apr 2026 22:42:41 -0400
Subject: [PATCH 02/32] Show model metadata and fix button visibility in model
 list

Display parameter count, quantization level, disk size, and VRAM usage
for each installed model. VRAM is cached to persistent config on first
load so it remains visible after unloading.

Fix model-item height (3->5) so Load/Unload buttons render inside the
bordered container instead of being clipped. Show both buttons per model
with the irrelevant one disabled. Add disabled button styles.
---
 .../usr/lib/neuraldrive/tui/screens/models.py | 35 +++++++++++++++---
 .../usr/lib/neuraldrive/tui/styles.tcss       | 36 +++++++++++++++++--
 .../lib/neuraldrive/tui/widgets/model_item.py | 18 ++++++++--
 3 files changed, 81 insertions(+), 8 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index 195bef3..4e8951b 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -11,7 +11,7 @@
 
 from textual.binding import Binding
 
-from utils import api_client
+from utils import api_client, config
 from widgets.model_item import ModelItem
 
 CURATED_MODELS = [
@@ -270,7 +270,19 @@ def action_refresh(self) -> None:
     async def _load_models(self) -> None:
         all_models = await api_client.list_models()
         running = await api_client.list_running_models()
-        running_names = {m.get("name", "") for m in running}
+        running_map = {m.get("name", ""): m for m in running}
+
+        vram_cache = config.get("vram_cache", {})
+        if not isinstance(vram_cache, dict):
+            vram_cache = {}
+        cache_changed = False
+        for name, info in running_map.items():
+            vram_bytes = info.get("size_vram", 0)
+            if vram_bytes and vram_cache.get(name) != vram_bytes:
+                vram_cache[name] = vram_bytes
+                cache_changed = True
+        if cache_changed:
+            config.set_key("vram_cache", vram_cache)
 
         container = self.query_one("#model-list", Vertical)
         container.remove_children()
@@ -282,8 +294,23 @@ async def _load_models(self) -> None:
                 name = m.get("name", "unknown")
                 size_bytes = m.get("size", 0)
                 size_str = f"{size_bytes / (1024**3):.1f} GB" if size_bytes else "—"
-                loaded = name in running_names
-                container.mount(ModelItem(name, size_str, loaded))
+                details = m.get("details", {})
+                params = details.get("parameter_size", "")
+                quant = details.get("quantization_level", "")
+                loaded = name in running_map
+
+                if name in running_map:
+                    vb = running_map[name].get("size_vram", 0)
+                    vram_str = f"{vb / (1024**3):.1f} GB" if vb else "—"
+                elif name in vram_cache:
+                    vb = vram_cache[name]
+                    vram_str = f"~{vb / (1024**3):.1f} GB" if vb else "—"
+                else:
+                    vram_str = "—"
+
+                container.mount(
+                    ModelItem(name, size_str, params, quant, vram_str, loaded)
+                )
 
     async def on_button_pressed(self, event: Button.Pressed) -> None:
         btn = event.button
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
index 0737277..c9d6e41 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
@@ -72,7 +72,7 @@ Static.muted {
 
 .model-item {
     layout: horizontal;
-    height: 3;
+    height: 5;
     padding: 0 2;
     border: solid #2E2E2E;
     margin: 0 0 1 0;
@@ -83,14 +83,34 @@ Static.muted {
     background: #1F1F1F;
 }
 
+.model-item Static {
+    height: 100%;
+    content-align: left middle;
+}
+
 .model-item Static.model-name {
     color: #FFFFFF;
     text-style: bold;
     width: 1fr;
 }
 
-.model-item Static.model-size {
+.model-item Static.model-params {
+    color: #A1A1AA;
+    width: 8;
+}
+
+.model-item Static.model-quant {
+    color: #71717A;
+    width: 10;
+}
+
+.model-item Static.model-disk {
     color: #A1A1AA;
+    width: 10;
+}
+
+.model-item Static.model-vram {
+    color: #F59E0B;
     width: 12;
 }
 
@@ -130,6 +150,18 @@ Button.model-unload:hover {
     background: #1F0A0A;
 }
 
+Button.model-load:disabled {
+    background: #0A0A0A;
+    color: #52525B;
+    border: solid #1A1A1A;
+}
+
+Button.model-unload:disabled {
+    background: #0A0A0A;
+    color: #52525B;
+    border: solid #1A1A1A;
+}
+
 .svc-row {
     height: 3;
     padding: 0 2;
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
index fb6e2d6..c5eb2f4 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
@@ -6,15 +6,29 @@
 
 
 class ModelItem(Horizontal):
-    def __init__(self, name: str, size: str, loaded: bool = False) -> None:
+    def __init__(
+        self,
+        name: str,
+        size: str,
+        params: str = "",
+        quant: str = "",
+        vram_str: str = "",
+        loaded: bool = False,
+    ) -> None:
         super().__init__(name=name, classes="model-item")
         self._model_name = name
         self._model_size = size
+        self._params = params
+        self._quant = quant
+        self._vram_str = vram_str
         self._loaded = loaded
 
     def compose(self) -> ComposeResult:
         yield Static(self._model_name, classes="model-name")
-        yield Static(self._model_size, classes="model-size")
+        yield Static(self._params, classes="model-params")
+        yield Static(self._quant, classes="model-quant")
+        yield Static(self._model_size, classes="model-disk")
+        yield Static(self._vram_str, classes="model-vram")
         if self._loaded:
             yield Static("● VRAM", classes="model-status-loaded")
         else:

From 927df1ab80dd18c83456967e12c958929fa541f5 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Thu, 23 Apr 2026 22:53:18 -0400
Subject: [PATCH 03/32] Save API key to persistent disk alongside overlay

Write api.key and credentials.conf to both /etc/neuraldrive/ (overlay)
and /var/lib/neuraldrive/config/ (persistent disk) when available.
Update wizard completion text to show where the key is stored instead
of telling the user to save it manually.
---
 .../usr/lib/neuraldrive/tui/screens/wizard.py | 28 +++++++++++++------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
index 3766333..b2176b4 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
@@ -14,6 +14,8 @@
 SENTINEL = "/etc/neuraldrive/first-boot-complete"
 CREDENTIALS_PATH = "/etc/neuraldrive/credentials.conf"
 API_KEY_PATH = "/etc/neuraldrive/api.key"
+PERSISTENT_CREDENTIALS_PATH = "/var/lib/neuraldrive/config/credentials.conf"
+PERSISTENT_API_KEY_PATH = "/var/lib/neuraldrive/config/api.key"
 SUDOERS_PATH = "/etc/sudoers.d/neuraldrive-admin"
 PERSISTENCE_MOUNT = "/var/lib/neuraldrive"
 PERSISTENCE_CONF_CONTENT = "/var/lib/neuraldrive union\n/etc/neuraldrive union\n/var/log/neuraldrive union\n/home union\n"
@@ -121,7 +123,8 @@ def _show_step(self) -> None:
             body.update(
                 "NeuralDrive is ready.\n\n"
                 f"API Key: {self._generated_api_key}\n\n"
-                "Save this key — it is required for API access.\n"
+                "This key is stored at /etc/neuraldrive/api.key\n"
+                "and on persistent storage when available.\n"
                 "Press Finish to start using NeuralDrive."
             )
             next_btn.label = "Finish ✓"
@@ -517,18 +520,27 @@ def _finalize(self) -> None:
                 errors.append(f"Password change failed: {e}")
 
         if self._generated_api_key:
-            err = self._sudo_write(API_KEY_PATH, self._generated_api_key + "\n", "0600")
+            key_content = self._generated_api_key + "\n"
+            cred_content = f"api_key={self._generated_api_key}\n"
+
+            err = self._sudo_write(API_KEY_PATH, key_content, "0600")
             if err:
                 errors.append(err)
-
-            err = self._sudo_write(
-                CREDENTIALS_PATH,
-                f"api_key={self._generated_api_key}\n",
-                "0600",
-            )
+            err = self._sudo_write(CREDENTIALS_PATH, cred_content, "0600")
             if err:
                 errors.append(err)
 
+            persist_dir = os.path.dirname(PERSISTENT_API_KEY_PATH)
+            if os.path.isdir(persist_dir):
+                err = self._sudo_write(PERSISTENT_API_KEY_PATH, key_content, "0600")
+                if err:
+                    errors.append(err)
+                err = self._sudo_write(
+                    PERSISTENT_CREDENTIALS_PATH, cred_content, "0600"
+                )
+                if err:
+                    errors.append(err)
+
         cfg_data = config.load()
         cfg_data["wizard_complete"] = True
         if self._admin_password:

From a1e82c4de34a657e9d9b4bb66ec24c7b703d3646 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Thu, 23 Apr 2026 23:16:58 -0400
Subject: [PATCH 04/32] Add live clock to dashboard top-right corner

Updates every 2 seconds alongside the system stats refresh. Shows
HH:MM:SS so the user can tell at a glance the dashboard is live.
---
 .../usr/lib/neuraldrive/tui/screens/dashboard.py |  8 +++++++-
 .../usr/lib/neuraldrive/tui/styles.tcss          | 16 ++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
index 0f7c32e..f33e49c 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from datetime import datetime
+
 from textual.app import ComposeResult
 from textual.containers import Horizontal, Vertical, VerticalScroll
 from textual.screen import Screen
@@ -15,7 +17,9 @@ class DashboardScreen(Screen):
     def compose(self) -> ComposeResult:
         yield Header()
         with VerticalScroll():
-            yield Static("", id="dash-hostname")
+            with Horizontal(id="dash-topbar"):
+                yield Static("", id="dash-hostname")
+                yield Static("", id="dash-clock")
             with Horizontal(id="stats-panel"):
                 yield StatsBox("CPU", [("Usage", "…")], id="box-cpu")
                 yield StatsBox("Memory", [("Used", "…"), ("Total", "…")], id="box-mem")
@@ -44,6 +48,8 @@ def _refresh_system(self) -> None:
         self.query_one("#dash-hostname", Static).update(
             f"  {hostname}  •  {ip}  •  up {uptime}"
         )
+        now = datetime.now().strftime("%H:%M:%S")
+        self.query_one("#dash-clock", Static).update(now)
 
         cpu = hardware.get_cpu_percent()
         self.query_one("#box-cpu", StatsBox).update_row("Usage", f"{cpu:.0f}%")
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
index c9d6e41..141255d 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
@@ -56,6 +56,22 @@ Static.muted {
     padding: 1;
 }
 
+#dash-topbar {
+    height: 1;
+    layout: horizontal;
+}
+
+#dash-hostname {
+    width: 1fr;
+}
+
+#dash-clock {
+    width: auto;
+    color: #A1A1AA;
+    padding: 0 2;
+    text-align: right;
+}
+
 .stats-box {
     border: solid #2E2E2E;
     padding: 1 2;

From bdfd6946a1eaed83e6017ea15e6aa78f02d3a98f Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Thu, 23 Apr 2026 23:50:37 -0400
Subject: [PATCH 05/32] Fix chat screen layout and text wrapping

- Compact model selector into horizontal row with inline label
- Remove clipping on Select widget (border removed, height auto)
- Enable text wrapping in chat log (wrap=True on RichLog)
- Remove dock:bottom on input row to prevent footer collision
- Center Send button label vertically
---
 .../usr/lib/neuraldrive/tui/screens/chat.py   |  7 ++---
 .../usr/lib/neuraldrive/tui/styles.tcss       | 27 ++++++++++++++-----
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
index 6f4c88c..9699c64 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
@@ -20,10 +20,11 @@ def __init__(self) -> None:
 
     def compose(self) -> ComposeResult:
         yield Header()
-        yield Static("  Model", classes="heading")
-        yield Select([], id="chat-model-select", prompt="Choose a model…")
+        with Horizontal(id="chat-model-row"):
+            yield Static(" Model ", id="chat-model-label")
+            yield Select([], id="chat-model-select", prompt="Choose a model…")
         yield Static("", id="chat-notice")
-        yield RichLog(highlight=True, markup=False, id="chat-log")
+        yield RichLog(highlight=True, markup=False, wrap=True, id="chat-log")
         with Horizontal(id="chat-input-row"):
             yield Input(placeholder="Type a message…", id="chat-input")
             yield Button("Send", id="chat-send", classes="primary")
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
index 141255d..49c00d5 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
@@ -266,12 +266,26 @@ Select {
     color: #FFFFFF;
 }
 
-#chat-model-select {
-    width: 100%;
-    margin: 0 2;
-    border: solid #F59E0B;
-    background: #141414;
+#chat-model-row {
+    layout: horizontal;
+    height: auto;
+    padding: 0 1;
+}
+
+#chat-model-label {
+    color: #F59E0B;
+    text-style: bold;
+    width: auto;
     height: 3;
+    content-align: left middle;
+    padding: 0 1;
+}
+
+#chat-model-select {
+    width: 1fr;
+    background: #1F1F1F;
+    color: #FFFFFF;
+    height: auto;
 }
 
 #wizard-container {
@@ -291,7 +305,7 @@ Select {
 #chat-input-row {
     layout: horizontal;
     height: 3;
-    dock: bottom;
+    margin: 0 0 0 0;
 }
 
 #chat-input {
@@ -300,6 +314,7 @@ Select {
 
 #chat-send {
     width: 10;
+    content-align: center middle;
 }
 
 #chat-log {

From b9edad12a80072b84e8b299e7f2cc8e88f2c14f7 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Thu, 23 Apr 2026 23:55:29 -0400
Subject: [PATCH 06/32] Preserve selected model when returning to chat screen

Save Select value before refreshing options list, restore it
if the model is still available. Falls back to first model only
when previous selection is no longer present.
---
 .../includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
index 9699c64..621bbdd 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
@@ -52,6 +52,7 @@ async def _load_model_options(self) -> None:
 
         models = await api_client.list_models()
         options = [(m.get("name", "?"), m.get("name", "?")) for m in models]
+        previous = select.value
         select.set_options(options)
 
         if not options:
@@ -67,7 +68,10 @@ async def _load_model_options(self) -> None:
         notice.remove_class("error", "warn")
         send_btn.disabled = False
         chat_input.disabled = False
-        if select.value is Select.BLANK:
+        option_values = [v for _, v in options]
+        if previous is not Select.BLANK and previous in option_values:
+            select.value = previous
+        elif select.value is Select.BLANK:
             select.value = options[0][1]
 
     async def on_button_pressed(self, event: Button.Pressed) -> None:

From d0f1ca80e8b1b6c2cc00a5d3c83b6428f0461f94 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Thu, 23 Apr 2026 23:59:54 -0400
Subject: [PATCH 07/32] Add model delete and fix chat model persistence

- Add red Delete button to each installed model item
- Auto-unload from VRAM before deleting if model is loaded
- Fix httpx DELETE with json body (use client.request instead)
- Preserve selected chat model when returning to chat screen
---
 .../usr/lib/neuraldrive/tui/screens/models.py  | 18 ++++++++++++++++++
 .../usr/lib/neuraldrive/tui/styles.tcss        | 13 +++++++++++++
 .../lib/neuraldrive/tui/utils/api_client.py    |  4 +++-
 .../lib/neuraldrive/tui/widgets/model_item.py  |  2 ++
 4 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index 4e8951b..a80800b 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -328,6 +328,8 @@ async def on_button_pressed(self, event: Button.Pressed) -> None:
             self._load_to_vram(btn.name or "")
         elif btn.has_class("model-unload"):
             self._unload_from_vram(btn.name or "")
+        elif btn.has_class("model-delete"):
+            self._delete_model(btn.name or "")
 
     def _cancel_pull(self) -> None:
         self._pull_queue.clear()
@@ -431,3 +433,19 @@ async def _unload_from_vram(self, model_name: str) -> None:
         else:
             status.update(f"  \u2717 Failed to unload {model_name}")
         await self._load_models()
+
+    @work()
+    async def _delete_model(self, model_name: str) -> None:
+        status = self.query_one("#model-status", Static)
+        running = await api_client.list_running_models()
+        running_names = {m.get("name", "") for m in running}
+        if model_name in running_names:
+            status.update(f"Unloading {model_name} from VRAM before delete...")
+            await api_client.unload_model(model_name)
+        status.update(f"Deleting {model_name}...")
+        success = await api_client.delete_model(model_name)
+        if success:
+            status.update(f"  \u2713 {model_name} deleted")
+        else:
+            status.update(f"  \u2717 Failed to delete {model_name}")
+        await self._load_models()
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
index 49c00d5..daa6da0 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
@@ -178,6 +178,19 @@ Button.model-unload:disabled {
     border: solid #1A1A1A;
 }
 
+Button.model-delete {
+    background: #1F1F1F;
+    color: #EF4444;
+    border: solid #EF4444;
+    min-width: 10;
+    width: 10;
+    height: 3;
+}
+
+Button.model-delete:hover {
+    background: #1F0A0A;
+}
+
 .svc-row {
     height: 3;
     padding: 0 2;
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py
index e862a87..de61a27 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py
@@ -48,7 +48,9 @@ async def pull_model(name: str):
 async def delete_model(name: str) -> bool:
     try:
         async with httpx.AsyncClient(timeout=TIMEOUT) as client:
-            resp = await client.delete(f"{OLLAMA_URL}/api/delete", json={"name": name})
+            resp = await client.request(
+                "DELETE", f"{OLLAMA_URL}/api/delete", json={"name": name}
+            )
             return resp.status_code == 200
     except (httpx.ConnectError, httpx.TimeoutException, httpx.HTTPError):
         return False
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
index c5eb2f4..ccb5a0d 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
@@ -35,9 +35,11 @@ def compose(self) -> ComposeResult:
             yield Static("○ ready", classes="model-status-cached")
         load_btn = Button("Load", name=self._model_name, classes="model-load")
         unload_btn = Button("Unload", name=self._model_name, classes="model-unload")
+        delete_btn = Button("Delete", name=self._model_name, classes="model-delete")
         if self._loaded:
             load_btn.disabled = True
         else:
             unload_btn.disabled = True
         yield load_btn
         yield unload_btn
+        yield delete_btn

From ff34cab7fd56c66e075fdbec41205f1df401b4d7 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 00:29:43 -0400
Subject: [PATCH 08/32] Harden wizard finalization, add --wizard flag, and
 Enter-to-pull

- Gate sentinel write behind errors check: sentinel is only written
  after config.save() and all prior writes succeed, preventing the
  wizard from being silently skipped after partial failures
- Guard partition detection: reject if lsblk returns base device
  instead of new partition, preventing accidental whole-disk format
- Add --wizard CLI flag to force wizard rerun on demand
- Add on_input_submitted to ModelsScreen so Enter in the pull-input
  field triggers model download
---
 .../usr/lib/neuraldrive/tui/main.py           | 15 +++++++++++--
 .../usr/lib/neuraldrive/tui/screens/models.py |  6 +++++
 .../usr/lib/neuraldrive/tui/screens/wizard.py | 22 +++++++++++++------
 3 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/main.py b/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
index 663595a..ba4b475 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
@@ -9,6 +9,7 @@
 from screens.chat import ChatScreen
 from screens.wizard import FirstBootWizard
 
+import argparse
 import os
 import sys
 import traceback
@@ -88,10 +89,14 @@ class NeuralDriveTUI(App):
         "chat": ChatScreen,
     }
 
+    def __init__(self, force_wizard: bool = False) -> None:
+        super().__init__()
+        self._force_wizard = force_wizard
+
     def on_mount(self) -> None:
         self.push_screen(DashboardScreen())
         sentinel_exists = os.path.exists("/etc/neuraldrive/first-boot-complete")
-        if not sentinel_exists and not config.wizard_complete():
+        if self._force_wizard or (not sentinel_exists and not config.wizard_complete()):
             self.push_screen(FirstBootWizard())
 
     def _handle_exception(self, error: Exception) -> None:
@@ -112,10 +117,16 @@ def action_switch_screen(self, screen_name: str) -> None:
 
 
 if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="NeuralDrive TUI")
+    parser.add_argument(
+        "--wizard", action="store_true", help="Force the first-boot wizard to run"
+    )
+    args = parser.parse_args()
+
     screenshot_dir = _screenshot_dir()
     os.environ["TEXTUAL_SCREENSHOT_LOCATION"] = screenshot_dir
     try:
-        app = NeuralDriveTUI()
+        app = NeuralDriveTUI(force_wizard=args.wizard)
         app.run(mouse=False)
     except Exception as exc:
         dump_path = _write_crash_dump(exc)
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index a80800b..6d54142 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -331,6 +331,12 @@ async def on_button_pressed(self, event: Button.Pressed) -> None:
         elif btn.has_class("model-delete"):
             self._delete_model(btn.name or "")
 
+    def on_input_submitted(self, event: Input.Submitted) -> None:
+        if event.input.id == "pull-input":
+            name = event.input.value.strip()
+            if name:
+                self._start_pull(name)
+
     def _cancel_pull(self) -> None:
         self._pull_queue.clear()
         self.workers.cancel_group(self, "default")
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
index b2176b4..1a8d704 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
@@ -346,6 +346,9 @@ def _create_persistence_partition(self) -> str | None:
                 return "No partitions found after creation"
             new_part = f"/dev/{parts[-1].strip()}"
 
+            if new_part == self._boot_device:
+                return "Could not identify new partition (got base device)"
+
             proc = subprocess.run(
                 [
                     "sudo",
@@ -555,9 +558,17 @@ def _finalize(self) -> None:
         if cfg_err:
             errors.append(cfg_err)
 
+        if errors:
+            error_widget = self.query_one("#wiz-error", Static)
+            error_widget.update("\n".join(errors))
+            return
+
+        # All config writes succeeded — now write sentinel and strip NOPASSWD
         err = self._sudo_write(SENTINEL, "")
         if err:
-            errors.append(err)
+            error_widget = self.query_one("#wiz-error", Static)
+            error_widget.update(f"Failed to write sentinel: {err}")
+            return
 
         # Remove NOPASSWD LAST — after all other sudo operations are done,
         # since removing it makes subsequent sudo calls require a TTY password prompt
@@ -573,15 +584,12 @@ def _finalize(self) -> None:
                     new_content = result.stdout.replace("NOPASSWD:", "")
                     err = self._sudo_write(SUDOERS_PATH, new_content, "0440")
                     if err:
-                        errors.append(err)
+                        # Sudoers strip failed but sentinel+config are written —
+                        # wizard is complete, just warn
+                        pass
             except (subprocess.TimeoutExpired, FileNotFoundError):
                 pass
 
-        if errors:
-            error_widget = self.query_one("#wiz-error", Static)
-            error_widget.update("\n".join(errors))
-            return
-
         self.app.pop_screen()
 
     def action_cancel_wizard(self) -> None:

From fbac7b67d9c0b2944ac6ef8919262f4a89df0ef3 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 00:42:31 -0400
Subject: [PATCH 09/32] Harden partition detection, wizard source of truth, and
 subprocess error checking

- Launcher now forwards "$@" so neuraldrive-tui --wizard works
- Partition detection uses before/after diff instead of fragile last-line
- Wizard completion uses sentinel file as single source of truth
- config.save() and wizard._sudo_write() check all subprocess return codes
---
 .../hooks/live/04-install-python-apps.chroot  |  2 +-
 .../usr/lib/neuraldrive/tui/main.py           |  4 +-
 .../usr/lib/neuraldrive/tui/screens/wizard.py | 52 +++++++++++++++----
 .../usr/lib/neuraldrive/tui/utils/config.py   |  8 ++-
 4 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/config/hooks/live/04-install-python-apps.chroot b/config/hooks/live/04-install-python-apps.chroot
index 80605e4..853fd27 100755
--- a/config/hooks/live/04-install-python-apps.chroot
+++ b/config/hooks/live/04-install-python-apps.chroot
@@ -122,7 +122,7 @@ python3 -m venv /usr/lib/neuraldrive/tui/venv
 
 cat > /usr/local/bin/neuraldrive-tui << 'LAUNCHER'
 #!/bin/sh
-exec /usr/lib/neuraldrive/tui/venv/bin/python /usr/lib/neuraldrive/tui/main.py
+exec /usr/lib/neuraldrive/tui/venv/bin/python /usr/lib/neuraldrive/tui/main.py "$@"
 LAUNCHER
 chmod +x /usr/local/bin/neuraldrive-tui
 
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/main.py b/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
index ba4b475..27ea2c1 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
@@ -15,8 +15,6 @@
 import traceback
 from datetime import datetime
 
-from utils import config
-
 PERSIST_DIR = "/var/lib/neuraldrive"
 OVERLAY_LOG_DIR = "/var/log/neuraldrive"
 
@@ -96,7 +94,7 @@ def __init__(self, force_wizard: bool = False) -> None:
     def on_mount(self) -> None:
         self.push_screen(DashboardScreen())
         sentinel_exists = os.path.exists("/etc/neuraldrive/first-boot-complete")
-        if self._force_wizard or (not sentinel_exists and not config.wizard_complete()):
+        if self._force_wizard or not sentinel_exists:
             self.push_screen(FirstBootWizard())
 
     def _handle_exception(self, error: Exception) -> None:
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
index 1a8d704..3d580ce 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
@@ -323,6 +323,21 @@ def _create_persistence_partition(self) -> str | None:
             if proc.returncode != 0:
                 return proc.stderr.strip()
 
+            # Snapshot partition list BEFORE partprobe to detect the new one
+            pre_res = subprocess.run(
+                ["lsblk", "-ln", "-o", "NAME", self._boot_device],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            before_parts = set()
+            if pre_res.returncode == 0:
+                before_parts = {
+                    line.strip()
+                    for line in pre_res.stdout.strip().splitlines()
+                    if line.strip()
+                }
+
             subprocess.run(
                 ["sudo", "partprobe", self._boot_device],
                 capture_output=True,
@@ -333,21 +348,34 @@ def _create_persistence_partition(self) -> str | None:
 
             time.sleep(2)
 
-            res = subprocess.run(
+            # Snapshot partition list AFTER partprobe
+            post_res = subprocess.run(
                 ["lsblk", "-ln", "-o", "NAME", self._boot_device],
                 capture_output=True,
                 text=True,
                 timeout=5,
             )
-            if res.returncode != 0:
+            if post_res.returncode != 0:
                 return "Could not determine new partition device"
-            parts = res.stdout.strip().splitlines()
-            if not parts:
-                return "No partitions found after creation"
-            new_part = f"/dev/{parts[-1].strip()}"
 
-            if new_part == self._boot_device:
-                return "Could not identify new partition (got base device)"
+            after_parts = {
+                line.strip()
+                for line in post_res.stdout.strip().splitlines()
+                if line.strip()
+            }
+
+            new_parts = after_parts - before_parts
+            # Filter out the base device name itself
+            base_name = os.path.basename(self._boot_device)
+            new_parts.discard(base_name)
+
+            if len(new_parts) != 1:
+                return (
+                    f"Expected exactly 1 new partition, found {len(new_parts)}: "
+                    f"{new_parts or 'none'}"
+                )
+
+            new_part = f"/dev/{new_parts.pop()}"
 
             proc = subprocess.run(
                 [
@@ -482,11 +510,13 @@ def _configure_wifi(self) -> None:
 
     def _sudo_write(self, path: str, content: str, mode: str = "0644") -> str | None:
         try:
-            subprocess.run(
+            mkdir_proc = subprocess.run(
                 ["sudo", "mkdir", "-p", os.path.dirname(path)],
                 capture_output=True,
                 timeout=5,
             )
+            if mkdir_proc.returncode != 0:
+                return f"Failed to create dir for {path}: {mkdir_proc.stderr.decode().strip()}"
             proc = subprocess.run(
                 ["sudo", "tee", path],
                 input=content.encode(),
@@ -495,11 +525,13 @@ def _sudo_write(self, path: str, content: str, mode: str = "0644") -> str | None
             )
             if proc.returncode != 0:
                 return f"Failed to write {path}: {proc.stderr.decode().strip()}"
-            subprocess.run(
+            chmod_proc = subprocess.run(
                 ["sudo", "chmod", mode, path],
                 capture_output=True,
                 timeout=5,
             )
+            if chmod_proc.returncode != 0:
+                return f"Failed to chmod {path}: {chmod_proc.stderr.decode().strip()}"
             return None
         except (subprocess.TimeoutExpired, FileNotFoundError) as e:
             return f"Failed to write {path}: {e}"
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py
index 4e7cd8e..1a6e6ea 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py
@@ -34,11 +34,13 @@ def save(data: dict[str, Any]) -> str | None:
     path = _config_path()
     content = yaml.dump(data, default_flow_style=False, sort_keys=False)
     try:
-        subprocess.run(
+        mkdir_proc = subprocess.run(
             ["sudo", "mkdir", "-p", os.path.dirname(path)],
             capture_output=True,
             timeout=5,
         )
+        if mkdir_proc.returncode != 0:
+            return f"Failed to create dir for {path}: {mkdir_proc.stderr.decode().strip()}"
         proc = subprocess.run(
             ["sudo", "tee", path],
             input=content.encode(),
@@ -47,11 +49,13 @@ def save(data: dict[str, Any]) -> str | None:
         )
         if proc.returncode != 0:
             return f"Failed to write {path}: {proc.stderr.decode().strip()}"
-        subprocess.run(
+        chmod_proc = subprocess.run(
             ["sudo", "chmod", "0644", path],
             capture_output=True,
             timeout=5,
         )
+        if chmod_proc.returncode != 0:
+            return f"Failed to chmod {path}: {chmod_proc.stderr.decode().strip()}"
         return None
     except (subprocess.TimeoutExpired, FileNotFoundError) as e:
         return f"Failed to write {path}: {e}"

From 6efe83a1733b0c4e9ec8a550a5ea5e42b9525f28 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 00:44:42 -0400
Subject: [PATCH 10/32] Move partition snapshot before mkpart to prevent race
 condition

lsblk before-snapshot was taken after mkpart, which could show
the new partition if the kernel auto-detected the table change.
Snapshot now taken before mkpart so the diff is always reliable.
---
 .../usr/lib/neuraldrive/tui/screens/wizard.py | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
index 3d580ce..66ed5f2 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
@@ -303,6 +303,21 @@ def _create_persistence_partition(self) -> str | None:
             if not free_start or not free_end:
                 return "No free space block large enough found"
 
+            # Snapshot partition list BEFORE mkpart so the diff is reliable
+            pre_res = subprocess.run(
+                ["lsblk", "-ln", "-o", "NAME", self._boot_device],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            before_parts = set()
+            if pre_res.returncode == 0:
+                before_parts = {
+                    line.strip()
+                    for line in pre_res.stdout.strip().splitlines()
+                    if line.strip()
+                }
+
             proc = subprocess.run(
                 [
                     "sudo",
@@ -323,21 +338,6 @@ def _create_persistence_partition(self) -> str | None:
             if proc.returncode != 0:
                 return proc.stderr.strip()
 
-            # Snapshot partition list BEFORE partprobe to detect the new one
-            pre_res = subprocess.run(
-                ["lsblk", "-ln", "-o", "NAME", self._boot_device],
-                capture_output=True,
-                text=True,
-                timeout=5,
-            )
-            before_parts = set()
-            if pre_res.returncode == 0:
-                before_parts = {
-                    line.strip()
-                    for line in pre_res.stdout.strip().splitlines()
-                    if line.strip()
-                }
-
             subprocess.run(
                 ["sudo", "partprobe", self._boot_device],
                 capture_output=True,

From 5534f54cb56e8830dad8cb08b4d64b1798e1b944 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 00:52:37 -0400
Subject: [PATCH 11/32] Harden partition creation safety and boot device
 detection

- Abort before mkpart if pre-lsblk snapshot fails (no disk mutation
  without a valid baseline)
- Check partprobe return code; poll lsblk with bounded retry loop
  instead of fixed sleep(2)
- Replace fragile regex in get_boot_device() with lsblk PKNAME
  (supports NVMe, MMC, and sd devices)
- Guard Enter-to-pull against re-submission during active download
---
 .../usr/lib/neuraldrive/tui/screens/models.py |  2 +-
 .../usr/lib/neuraldrive/tui/screens/wizard.py | 69 ++++++++++---------
 .../usr/lib/neuraldrive/tui/utils/hardware.py | 13 ++--
 3 files changed, 44 insertions(+), 40 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index 6d54142..bdfd37f 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -332,7 +332,7 @@ async def on_button_pressed(self, event: Button.Pressed) -> None:
             self._delete_model(btn.name or "")
 
     def on_input_submitted(self, event: Input.Submitted) -> None:
-        if event.input.id == "pull-input":
+        if event.input.id == "pull-input" and not self._pulling:
             name = event.input.value.strip()
             if name:
                 self._start_pull(name)
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
index 66ed5f2..b169556 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
@@ -310,13 +310,13 @@ def _create_persistence_partition(self) -> str | None:
                 text=True,
                 timeout=5,
             )
-            before_parts = set()
-            if pre_res.returncode == 0:
-                before_parts = {
-                    line.strip()
-                    for line in pre_res.stdout.strip().splitlines()
-                    if line.strip()
-                }
+            if pre_res.returncode != 0:
+                return "Cannot list partitions — aborting to avoid unsafe disk changes"
+            before_parts = {
+                line.strip()
+                for line in pre_res.stdout.strip().splitlines()
+                if line.strip()
+            }
 
             proc = subprocess.run(
                 [
@@ -338,44 +338,45 @@ def _create_persistence_partition(self) -> str | None:
             if proc.returncode != 0:
                 return proc.stderr.strip()
 
-            subprocess.run(
+            partprobe_proc = subprocess.run(
                 ["sudo", "partprobe", self._boot_device],
                 capture_output=True,
+                text=True,
                 timeout=10,
             )
+            if partprobe_proc.returncode != 0:
+                return f"partprobe failed: {partprobe_proc.stderr.strip()}"
 
             import time
 
-            time.sleep(2)
-
-            # Snapshot partition list AFTER partprobe
-            post_res = subprocess.run(
-                ["lsblk", "-ln", "-o", "NAME", self._boot_device],
-                capture_output=True,
-                text=True,
-                timeout=5,
-            )
-            if post_res.returncode != 0:
-                return "Could not determine new partition device"
+            new_part = None
+            for _attempt in range(6):
+                time.sleep(1)
+                post_res = subprocess.run(
+                    ["lsblk", "-ln", "-o", "NAME", self._boot_device],
+                    capture_output=True,
+                    text=True,
+                    timeout=5,
+                )
+                if post_res.returncode != 0:
+                    continue
 
-            after_parts = {
-                line.strip()
-                for line in post_res.stdout.strip().splitlines()
-                if line.strip()
-            }
+                after_parts = {
+                    line.strip()
+                    for line in post_res.stdout.strip().splitlines()
+                    if line.strip()
+                }
 
-            new_parts = after_parts - before_parts
-            # Filter out the base device name itself
-            base_name = os.path.basename(self._boot_device)
-            new_parts.discard(base_name)
+                new_parts = after_parts - before_parts
+                base_name = os.path.basename(self._boot_device)
+                new_parts.discard(base_name)
 
-            if len(new_parts) != 1:
-                return (
-                    f"Expected exactly 1 new partition, found {len(new_parts)}: "
-                    f"{new_parts or 'none'}"
-                )
+                if len(new_parts) == 1:
+                    new_part = f"/dev/{new_parts.pop()}"
+                    break
 
-            new_part = f"/dev/{new_parts.pop()}"
+            if not new_part:
+                return "New partition did not appear after partprobe (timed out)"
 
             proc = subprocess.run(
                 [
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
index e1f3918..b3e599f 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
@@ -166,11 +166,14 @@ def get_boot_device() -> str | None:
         )
         if res.returncode == 0 and res.stdout.strip():
             part_dev = res.stdout.strip()
-            import re
-
-            match = re.match(r"(/dev/[a-z]+)", part_dev)
-            if match:
-                return match.group(1)
+            pkname_res = subprocess.run(
+                ["lsblk", "-no", "PKNAME", part_dev],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            if pkname_res.returncode == 0 and pkname_res.stdout.strip():
+                return f"/dev/{pkname_res.stdout.strip()}"
     except (OSError, subprocess.TimeoutExpired, FileNotFoundError):
         pass
     return None

From c0e802c167258ac487c953c698f5545fae736278 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 00:56:17 -0400
Subject: [PATCH 12/32] Guard pull button and Enter against concurrent
 submissions

Set _pulling=True immediately in both user-facing entry points
before scheduling the @work worker, closing the race window.
Pull button handler now mirrors the Enter-to-pull guard.
---
 .../includes.chroot/usr/lib/neuraldrive/tui/screens/models.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index bdfd37f..c7c61a6 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -317,7 +317,8 @@ async def on_button_pressed(self, event: Button.Pressed) -> None:
         btn_id = btn.id or ""
         if btn_id == "pull-btn":
             name = self.query_one("#pull-input", Input).value.strip()
-            if name:
+            if name and not self._pulling:
+                self._pulling = True
                 self._start_pull(name)
         elif btn_id == "open-catalog":
             installed = {m.get("name", "") for m in await api_client.list_models()}
@@ -335,6 +336,7 @@ def on_input_submitted(self, event: Input.Submitted) -> None:
         if event.input.id == "pull-input" and not self._pulling:
             name = event.input.value.strip()
             if name:
+                self._pulling = True
                 self._start_pull(name)
 
     def _cancel_pull(self) -> None:

From b0d8a88bd7a80645af171daa1883ab6648dfd19b Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 01:10:57 -0400
Subject: [PATCH 13/32] Remove dual wizard marker, check all subprocess
 returns, normalize live-media path, guard _pull_next

- Remove wizard_complete config key write from wizard finalize; sentinel
  file is now the single source of truth for wizard completion
- Remove unused wizard_complete() function from config.py
- Check return codes for all subprocess calls in partition creation:
  mkdir, chown, umount, systemctl (warning-only for restart)
- Normalize live-media= cmdline path through lsblk PKNAME for NVMe/MMC
- Set _pulling=True in _pull_next() before _start_pull() to prevent
  concurrent pull submissions from all entry points
---
 .../usr/lib/neuraldrive/tui/screens/models.py |  1 +
 .../usr/lib/neuraldrive/tui/screens/wizard.py | 33 +++++++++++++++----
 .../usr/lib/neuraldrive/tui/utils/config.py   |  4 ---
 .../usr/lib/neuraldrive/tui/utils/hardware.py | 11 ++++++-
 4 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index c7c61a6..83c8bce 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -361,6 +361,7 @@ def _pull_next(self) -> None:
             self.app.call_later(self._load_models)
             return
         model_name = self._pull_queue.pop(0)
+        self._pulling = True
         self._start_pull(model_name)
 
     @work(exclusive=True)
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
index b169556..3812e4b 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
@@ -395,11 +395,14 @@ def _create_persistence_partition(self) -> str | None:
             if proc.returncode != 0:
                 return f"mkfs.ext4 failed: {proc.stderr.strip()}"
 
-            subprocess.run(
+            proc = subprocess.run(
                 ["sudo", "mkdir", "-p", "/mnt/persistence"],
                 capture_output=True,
+                text=True,
                 timeout=5,
             )
+            if proc.returncode != 0:
+                return f"mkdir /mnt/persistence failed: {proc.stderr.strip()}"
             proc = subprocess.run(
                 ["sudo", "mount", new_part, "/mnt/persistence"],
                 capture_output=True,
@@ -426,13 +429,16 @@ def _create_persistence_partition(self) -> str | None:
                 "/mnt/persistence/etc/neuraldrive",
                 "/mnt/persistence/home",
             ]:
-                subprocess.run(
+                proc = subprocess.run(
                     ["sudo", "mkdir", "-p", d],
                     capture_output=True,
+                    text=True,
                     timeout=5,
                 )
+                if proc.returncode != 0:
+                    return f"mkdir {d} failed: {proc.stderr.strip()}"
 
-            subprocess.run(
+            proc = subprocess.run(
                 [
                     "sudo",
                     "chown",
@@ -441,20 +447,29 @@ def _create_persistence_partition(self) -> str | None:
                     "/mnt/persistence/var/lib/neuraldrive/ollama",
                 ],
                 capture_output=True,
+                text=True,
                 timeout=5,
             )
+            if proc.returncode != 0:
+                return f"chown failed: {proc.stderr.strip()}"
 
-            subprocess.run(
+            proc = subprocess.run(
                 ["sudo", "umount", "/mnt/persistence"],
                 capture_output=True,
+                text=True,
                 timeout=10,
             )
+            if proc.returncode != 0:
+                return f"umount /mnt/persistence failed: {proc.stderr.strip()}"
 
-            subprocess.run(
+            proc = subprocess.run(
                 ["sudo", "mkdir", "-p", PERSISTENCE_MOUNT],
                 capture_output=True,
+                text=True,
                 timeout=5,
             )
+            if proc.returncode != 0:
+                return f"mkdir {PERSISTENCE_MOUNT} failed: {proc.stderr.strip()}"
             proc = subprocess.run(
                 ["sudo", "mount", new_part, PERSISTENCE_MOUNT],
                 capture_output=True,
@@ -464,11 +479,16 @@ def _create_persistence_partition(self) -> str | None:
             if proc.returncode != 0:
                 return f"Mount at {PERSISTENCE_MOUNT} failed: {proc.stderr.strip()}"
 
-            subprocess.run(
+            proc = subprocess.run(
                 ["sudo", "systemctl", "restart", "neuraldrive-ollama"],
                 capture_output=True,
+                text=True,
                 timeout=30,
             )
+            if proc.returncode != 0:
+                self.query_one("#wiz-error", Static).update(
+                    f"Warning: Ollama restart failed: {proc.stderr.strip()}"
+                )
 
             self._has_persistence = True
             return None
@@ -578,7 +598,6 @@ def _finalize(self) -> None:
                     errors.append(err)
 
         cfg_data = config.load()
-        cfg_data["wizard_complete"] = True
         if self._admin_password:
             cfg_data["security"] = {"password_set": True}
         if self._wifi_ssid:
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py
index 1a6e6ea..b6a5496 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py
@@ -82,7 +82,3 @@ def set_key(key: str, value: Any) -> str | None:
         target = target[k]
     target[keys[-1]] = value
     return save(data)
-
-
-def wizard_complete() -> bool:
-    return get("wizard_complete", False) is True
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
index b3e599f..2acceb1 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
@@ -157,7 +157,16 @@ def get_boot_device() -> str | None:
             if part.startswith("boot=live") or part.startswith("root="):
                 pass
             if part.startswith("live-media="):
-                return part.split("=", 1)[1]
+                media_dev = part.split("=", 1)[1]
+                pkname_res = subprocess.run(
+                    ["lsblk", "-no", "PKNAME", media_dev],
+                    capture_output=True,
+                    text=True,
+                    timeout=5,
+                )
+                if pkname_res.returncode == 0 and pkname_res.stdout.strip():
+                    return f"/dev/{pkname_res.stdout.strip()}"
+                return media_dev
         res = subprocess.run(
             ["findmnt", "-n", "-o", "SOURCE", "/run/live/medium"],
             capture_output=True,

From 64a95148525d9eaec1a60d35c86a6e2082352b3f Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 01:15:39 -0400
Subject: [PATCH 14/32] Fall through to findmnt when live-media PKNAME fails

Instead of returning the raw live-media= partition path when lsblk
PKNAME resolution fails, fall through to the findmnt detection path.
This prevents handing an unvalidated partition/symlink path to the
storage wizard for partition creation.
---
 .../includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
index 2acceb1..5556e86 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
@@ -166,7 +166,8 @@ def get_boot_device() -> str | None:
                 )
                 if pkname_res.returncode == 0 and pkname_res.stdout.strip():
                     return f"/dev/{pkname_res.stdout.strip()}"
-                return media_dev
+                # PKNAME failed — fall through to findmnt instead of
+                # returning an unvalidated partition/symlink path.
         res = subprocess.run(
             ["findmnt", "-n", "-o", "SOURCE", "/run/live/medium"],
             capture_output=True,

From b1003b1d44745907523d293bd5026962d14cc897 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 08:29:49 -0400
Subject: [PATCH 15/32] Fix Header crash on screen transitions and simplify
 --wizard flag

Replace Textual's Header with SafeHeader subclass that catches
NoMatches during title watcher updates. Textual 8.2.4 only catches
NoScreen in the set_title watcher but not NoMatches, causing crashes
when screens are pushed/popped and HeaderTitle hasn't recomposed yet.
This is a known upstream bug (Textualize/textual#4258, PR #4817).

Simplify --wizard: instead of a separate force_wizard constructor
flag, --wizard now removes the sentinel file before launch so the
existing on_mount check triggers the wizard naturally.
---
 .../usr/lib/neuraldrive/tui/main.py           | 15 +++++++------
 .../usr/lib/neuraldrive/tui/screens/chat.py   |  6 +++--
 .../lib/neuraldrive/tui/screens/dashboard.py  |  6 +++--
 .../usr/lib/neuraldrive/tui/screens/logs.py   |  6 +++--
 .../usr/lib/neuraldrive/tui/screens/models.py |  8 ++++---
 .../lib/neuraldrive/tui/screens/network.py    |  6 +++--
 .../lib/neuraldrive/tui/screens/services.py   |  6 +++--
 .../lib/neuraldrive/tui/widgets/__init__.py   |  3 ++-
 .../neuraldrive/tui/widgets/safe_header.py    | 22 +++++++++++++++++++
 9 files changed, 57 insertions(+), 21 deletions(-)
 create mode 100644 config/includes.chroot/usr/lib/neuraldrive/tui/widgets/safe_header.py

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/main.py b/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
index 27ea2c1..ede4187 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
@@ -11,6 +11,7 @@
 
 import argparse
 import os
+import subprocess
 import sys
 import traceback
 from datetime import datetime
@@ -87,14 +88,9 @@ class NeuralDriveTUI(App):
         "chat": ChatScreen,
     }
 
-    def __init__(self, force_wizard: bool = False) -> None:
-        super().__init__()
-        self._force_wizard = force_wizard
-
     def on_mount(self) -> None:
         self.push_screen(DashboardScreen())
-        sentinel_exists = os.path.exists("/etc/neuraldrive/first-boot-complete")
-        if self._force_wizard or not sentinel_exists:
+        if not os.path.exists("/etc/neuraldrive/first-boot-complete"):
             self.push_screen(FirstBootWizard())
 
     def _handle_exception(self, error: Exception) -> None:
@@ -121,10 +117,15 @@ def action_switch_screen(self, screen_name: str) -> None:
     )
     args = parser.parse_args()
 
+    if args.wizard:
+        sentinel = "/etc/neuraldrive/first-boot-complete"
+        if os.path.exists(sentinel):
+            subprocess.run(["sudo", "rm", "-f", sentinel], timeout=5)
+
     screenshot_dir = _screenshot_dir()
     os.environ["TEXTUAL_SCREENSHOT_LOCATION"] = screenshot_dir
     try:
-        app = NeuralDriveTUI(force_wizard=args.wizard)
+        app = NeuralDriveTUI()
         app.run(mouse=False)
     except Exception as exc:
         dump_path = _write_crash_dump(exc)
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
index 621bbdd..62cb60d 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
@@ -6,7 +6,9 @@
 from textual.app import ComposeResult
 from textual.containers import Horizontal
 from textual.screen import Screen
-from textual.widgets import Button, Footer, Header, Input, RichLog, Select, Static
+from textual.widgets import Button, Footer, Input, RichLog, Select, Static
+
+from widgets.safe_header import SafeHeader
 
 from utils import api_client
 
@@ -19,7 +21,7 @@ def __init__(self) -> None:
         self._messages: list[dict] = []
 
     def compose(self) -> ComposeResult:
-        yield Header()
+        yield SafeHeader()
         with Horizontal(id="chat-model-row"):
             yield Static(" Model ", id="chat-model-label")
             yield Select([], id="chat-model-select", prompt="Choose a model…")
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
index f33e49c..0bbcaec 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
@@ -5,7 +5,9 @@
 from textual.app import ComposeResult
 from textual.containers import Horizontal, Vertical, VerticalScroll
 from textual.screen import Screen
-from textual.widgets import Footer, Header, Static
+from textual.widgets import Footer, Static
+
+from widgets.safe_header import SafeHeader
 
 from utils import api_client, hardware
 from widgets.stats_box import StatsBox
@@ -15,7 +17,7 @@ class DashboardScreen(Screen):
     BINDINGS = [("r", "refresh", "Refresh")]
 
     def compose(self) -> ComposeResult:
-        yield Header()
+        yield SafeHeader()
         with VerticalScroll():
             with Horizontal(id="dash-topbar"):
                 yield Static("", id="dash-hostname")
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/logs.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/logs.py
index 7be5812..fdf368d 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/logs.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/logs.py
@@ -5,7 +5,9 @@
 from textual.app import ComposeResult
 from textual.containers import Horizontal
 from textual.screen import Screen
-from textual.widgets import Footer, Header, RichLog, Select, Static
+from textual.widgets import Footer, RichLog, Select, Static
+
+from widgets.safe_header import SafeHeader
 
 from utils import hardware
 
@@ -19,7 +21,7 @@ class LogsScreen(Screen):
     BINDINGS = [("r", "refresh", "Refresh")]
 
     def compose(self) -> ComposeResult:
-        yield Header()
+        yield SafeHeader()
         with Horizontal():
             yield Static("  Service: ", classes="label")
             yield Select(
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index 83c8bce..d8c6772 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -7,7 +7,9 @@
 from textual.app import ComposeResult
 from textual.containers import Horizontal, Vertical, VerticalScroll
 from textual.screen import Screen
-from textual.widgets import Button, Footer, Header, Input, ProgressBar, Static
+from textual.widgets import Button, Footer, Input, ProgressBar, Static
+
+from widgets.safe_header import SafeHeader
 
 from textual.binding import Binding
 
@@ -80,7 +82,7 @@ def __init__(self, installed_names: set[str]) -> None:
         self._zone = "list"
 
     def compose(self) -> ComposeResult:
-        yield Header()
+        yield SafeHeader()
         yield Static(
             "  ↑↓ Navigate   Enter Select   Tab Actions   Esc Back", classes="muted"
         )
@@ -236,7 +238,7 @@ class ModelsScreen(Screen):
     BINDINGS = [("r", "refresh", "Refresh")]
 
     def compose(self) -> ComposeResult:
-        yield Header()
+        yield SafeHeader()
         with VerticalScroll():
             yield Static("Installed Models", classes="heading")
             yield Vertical(id="model-list")
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/network.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/network.py
index c12a8a2..bf39f04 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/network.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/network.py
@@ -5,7 +5,9 @@
 from textual.app import ComposeResult
 from textual.containers import Vertical, VerticalScroll
 from textual.screen import Screen
-from textual.widgets import Footer, Header, Static
+from textual.widgets import Footer, Static
+
+from widgets.safe_header import SafeHeader
 
 from utils import hardware
 
@@ -14,7 +16,7 @@ class NetworkScreen(Screen):
     BINDINGS = [("r", "refresh", "Refresh")]
 
     def compose(self) -> ComposeResult:
-        yield Header()
+        yield SafeHeader()
         with VerticalScroll():
             yield Static("Network Configuration", classes="heading")
             yield Static("", id="net-hostname")
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
index ddca6d3..d4cda6c 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
@@ -6,7 +6,9 @@
 from textual.app import ComposeResult
 from textual.containers import Horizontal, Vertical, VerticalScroll
 from textual.screen import Screen
-from textual.widgets import Button, Footer, Header, Static
+from textual.widgets import Button, Footer, Static
+
+from widgets.safe_header import SafeHeader
 
 from textual.binding import Binding
 
@@ -21,7 +23,7 @@ class ServicesScreen(Screen):
     ]
 
     def compose(self) -> ComposeResult:
-        yield Header()
+        yield SafeHeader()
         with VerticalScroll():
             yield Static("NeuralDrive Services", classes="heading")
             yield Vertical(id="service-list")
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/__init__.py b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/__init__.py
index 3ae5b2f..10a5e57 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/__init__.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/__init__.py
@@ -1,4 +1,5 @@
 from widgets.stats_box import StatsBox
 from widgets.model_item import ModelItem
+from widgets.safe_header import SafeHeader
 
-__all__ = ["StatsBox", "ModelItem"]
+__all__ = ["StatsBox", "ModelItem", "SafeHeader"]
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/safe_header.py b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/safe_header.py
new file mode 100644
index 0000000..e04ecc8
--- /dev/null
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/safe_header.py
@@ -0,0 +1,22 @@
+from __future__ import annotations
+
+from textual.css.query import NoMatches
+from textual.widgets import Header
+from textual.widgets._header import HeaderTitle
+
+
+class SafeHeader(Header):
+
+    def _on_mount(self, event) -> None:
+        original_set_title = None
+
+        async def safe_set_title() -> None:
+            try:
+                self.query_one(HeaderTitle).update(self.format_title())
+            except (NoMatches, Exception):
+                pass
+
+        self.watch(self.app, "title", safe_set_title)
+        self.watch(self.app, "sub_title", safe_set_title)
+        self.watch(self.screen, "title", safe_set_title)
+        self.watch(self.screen, "sub_title", safe_set_title)

From 493fe4ea59a83cc397545fd9064da7be70eaff01 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 08:42:34 -0400
Subject: [PATCH 16/32] Fix GPU acceleration: load nvidia-uvm at boot and
 remove cgroup device filter

- Add ExecStartPre to load nvidia-current-uvm module and create
  /dev/nvidia-uvm device nodes before Ollama starts (with - prefix
  for non-fatal failure on non-NVIDIA systems)
- Remove DeviceAllow lines that blocked CUDA access under cgroup v2
- Add nvidia-modprobe to NVIDIA package list for device node creation
- Add /etc/modules-load.d/nvidia-uvm.conf for early boot module load
- Show [GPU]/[CPU] tags with VRAM usage per model on dashboard
---
 .../etc/modules-load.d/nvidia-uvm.conf               |  4 ++++
 .../etc/systemd/system/neuraldrive-ollama.service    |  4 ++--
 .../usr/lib/neuraldrive/tui/screens/dashboard.py     | 12 +++++++++---
 config/package-lists/gpu-nvidia.list.chroot          |  1 +
 4 files changed, 16 insertions(+), 5 deletions(-)
 create mode 100644 config/includes.chroot/etc/modules-load.d/nvidia-uvm.conf

diff --git a/config/includes.chroot/etc/modules-load.d/nvidia-uvm.conf b/config/includes.chroot/etc/modules-load.d/nvidia-uvm.conf
new file mode 100644
index 0000000..1a5cb35
--- /dev/null
+++ b/config/includes.chroot/etc/modules-load.d/nvidia-uvm.conf
@@ -0,0 +1,4 @@
+# Load NVIDIA Unified Virtual Memory module at boot.
+# Required for CUDA GPU memory allocation (Ollama inference).
+# Harmless on systems without NVIDIA GPUs (modprobe fails silently).
+nvidia-current-uvm
diff --git a/config/includes.chroot/etc/systemd/system/neuraldrive-ollama.service b/config/includes.chroot/etc/systemd/system/neuraldrive-ollama.service
index c7558bf..2029529 100644
--- a/config/includes.chroot/etc/systemd/system/neuraldrive-ollama.service
+++ b/config/includes.chroot/etc/systemd/system/neuraldrive-ollama.service
@@ -7,6 +7,8 @@ Requires=neuraldrive-gpu-detect.service
 Environment=HOME=/var/lib/neuraldrive/ollama
 EnvironmentFile=/etc/neuraldrive/ollama.conf
 ExecStartPre=/usr/bin/mkdir -p /var/lib/neuraldrive/models
+ExecStartPre=-/sbin/modprobe nvidia-current-uvm
+ExecStartPre=-/usr/bin/nvidia-modprobe -u
 ExecStart=/usr/local/bin/ollama serve
 User=neuraldrive-ollama
 Group=neuraldrive-ollama
@@ -26,8 +28,6 @@ PrivateTmp=yes
 PrivateDevices=no
 ProtectKernelTunables=yes
 ProtectControlGroups=yes
-DeviceAllow=/dev/nvidia* rw
-DeviceAllow=/dev/dri/* rw
 ReadWritePaths=/var/lib/neuraldrive /var/log/neuraldrive /etc/neuraldrive /run/neuraldrive
 
 [Install]
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
index 0bbcaec..66c4c5e 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
@@ -31,7 +31,7 @@ def compose(self) -> ComposeResult:
                     [("Device", "…"), ("VRAM", "…"), ("Temp", "…"), ("Util", "…")],
                     id="box-gpu",
                 )
-            yield Static("Active Models (VRAM)", classes="heading")
+            yield Static("Active Models", classes="heading")
             yield Vertical(id="loaded-models")
             yield Static("Services", classes="heading")
             yield Vertical(id="service-badges")
@@ -101,9 +101,15 @@ async def _refresh_models_async(self) -> None:
         else:
             for m in running:
                 name = m.get("name", "unknown")
+                size_vram = m.get("size_vram", 0)
                 size_bytes = m.get("size", 0)
-                size_gb = f"{size_bytes / (1024**3):.1f} GB" if size_bytes else ""
-                container.mount(Static(f"  ● {name}  {size_gb}", classes="ok"))
+                if size_vram and size_vram > 0:
+                    vram_gb = f"{size_vram / (1024**3):.1f} GB"
+                    tag = f"[GPU] {vram_gb}"
+                else:
+                    ram_gb = f"{size_bytes / (1024**3):.1f} GB" if size_bytes else ""
+                    tag = f"[CPU] {ram_gb}"
+                container.mount(Static(f"  ● {name}  {tag}", classes="ok"))
 
     def action_refresh(self) -> None:
         self._refresh_system()
diff --git a/config/package-lists/gpu-nvidia.list.chroot b/config/package-lists/gpu-nvidia.list.chroot
index 2276e64..5e76837 100644
--- a/config/package-lists/gpu-nvidia.list.chroot
+++ b/config/package-lists/gpu-nvidia.list.chroot
@@ -11,3 +11,4 @@ nvidia-persistenced
 firmware-nvidia-gsp
 libcuda1
 libnvidia-ml1
+nvidia-modprobe

From 5e1d3762dcc888d6261cad82054a7390e852d8af Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 09:00:05 -0400
Subject: [PATCH 17/32] Escape Rich markup in [GPU]/[CPU] tags so they render
 visibly

Rich interprets [GPU] and [CPU] as style tags and silently drops them.
Escape with backslash-bracket on dashboard. Also change model_item
status from 'VRAM' to 'GPU' for consistency.
---
 .../usr/lib/neuraldrive/tui/screens/dashboard.py              | 4 ++--
 .../usr/lib/neuraldrive/tui/widgets/model_item.py             | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
index 66c4c5e..28113c4 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
@@ -105,10 +105,10 @@ async def _refresh_models_async(self) -> None:
                 size_bytes = m.get("size", 0)
                 if size_vram and size_vram > 0:
                     vram_gb = f"{size_vram / (1024**3):.1f} GB"
-                    tag = f"[GPU] {vram_gb}"
+                    tag = f"\\[GPU] {vram_gb}"
                 else:
                     ram_gb = f"{size_bytes / (1024**3):.1f} GB" if size_bytes else ""
-                    tag = f"[CPU] {ram_gb}"
+                    tag = f"\\[CPU] {ram_gb}"
                 container.mount(Static(f"  ● {name}  {tag}", classes="ok"))
 
     def action_refresh(self) -> None:
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
index ccb5a0d..ee38bd9 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
@@ -30,7 +30,7 @@ def compose(self) -> ComposeResult:
         yield Static(self._model_size, classes="model-disk")
         yield Static(self._vram_str, classes="model-vram")
         if self._loaded:
-            yield Static("● VRAM", classes="model-status-loaded")
+            yield Static("● GPU", classes="model-status-loaded")
         else:
             yield Static("○ ready", classes="model-status-cached")
         load_btn = Button("Load", name=self._model_name, classes="model-load")

From 5f8908e33ff771268bba5cc76d775d113ea6a8b8 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 09:04:47 -0400
Subject: [PATCH 18/32] Add arrow-key navigation with scroll-follow to
 installed models list

Up/Down/PgUp/PgDn navigate between model items with a yellow
highlight border. The scroll container follows the highlighted
item via scroll_visible(), matching the catalog popup behavior.
---
 .../usr/lib/neuraldrive/tui/screens/models.py | 60 +++++++++++++++++--
 .../usr/lib/neuraldrive/tui/styles.tcss       | 11 ++++
 2 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index d8c6772..e968587 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -235,11 +235,17 @@ def on_button_pressed(self, event: Button.Pressed) -> None:
 
 
 class ModelsScreen(Screen):
-    BINDINGS = [("r", "refresh", "Refresh")]
+    BINDINGS = [
+        ("r", "refresh", "Refresh"),
+        Binding("up", "nav_up", show=False, priority=True),
+        Binding("down", "nav_down", show=False, priority=True),
+        Binding("pageup", "page_up", show=False, priority=True),
+        Binding("pagedown", "page_down", show=False, priority=True),
+    ]
 
     def compose(self) -> ComposeResult:
         yield SafeHeader()
-        with VerticalScroll():
+        with VerticalScroll(id="models-scroll"):
             yield Static("Installed Models", classes="heading")
             yield Vertical(id="model-list")
             yield Static("", classes="heading")
@@ -264,8 +270,45 @@ def on_mount(self) -> None:
         self.query_one("#cancel-pull", Button).display = False
         self._pull_queue: list[str] = []
         self._pulling = False
+        self._model_items: list[ModelItem] = []
+        self._highlight_index = 0
         self.action_refresh()
 
+    def _apply_highlight(self) -> None:
+        for i, item in enumerate(self._model_items):
+            if i == self._highlight_index:
+                item.add_class("model-highlighted")
+                item.scroll_visible()
+            else:
+                item.remove_class("model-highlighted")
+
+    def action_nav_up(self) -> None:
+        if self._model_items and self._highlight_index > 0:
+            self._highlight_index -= 1
+            self._apply_highlight()
+
+    def action_nav_down(self) -> None:
+        if self._model_items and self._highlight_index < len(self._model_items) - 1:
+            self._highlight_index += 1
+            self._apply_highlight()
+
+    def action_page_up(self) -> None:
+        if not self._model_items:
+            return
+        scroll = self.query_one("#models-scroll", VerticalScroll)
+        page_size = max(1, scroll.size.height // 6)
+        self._highlight_index = max(0, self._highlight_index - page_size)
+        self._apply_highlight()
+
+    def action_page_down(self) -> None:
+        if not self._model_items:
+            return
+        scroll = self.query_one("#models-scroll", VerticalScroll)
+        page_size = max(1, scroll.size.height // 6)
+        last = len(self._model_items) - 1
+        self._highlight_index = min(last, self._highlight_index + page_size)
+        self._apply_highlight()
+
     def action_refresh(self) -> None:
         self.app.call_later(self._load_models)
 
@@ -288,6 +331,7 @@ async def _load_models(self) -> None:
 
         container = self.query_one("#model-list", Vertical)
         container.remove_children()
+        self._model_items = []
 
         if not all_models:
             container.mount(Static("  No models installed", classes="muted"))
@@ -310,9 +354,15 @@ async def _load_models(self) -> None:
                 else:
                     vram_str = "—"
 
-                container.mount(
-                    ModelItem(name, size_str, params, quant, vram_str, loaded)
-                )
+                item = ModelItem(name, size_str, params, quant, vram_str, loaded)
+                container.mount(item)
+                self._model_items.append(item)
+
+        if self._model_items:
+            self._highlight_index = min(
+                self._highlight_index, len(self._model_items) - 1
+            )
+            self._apply_highlight()
 
     async def on_button_pressed(self, event: Button.Pressed) -> None:
         btn = event.button
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
index daa6da0..49f1c0f 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
@@ -99,6 +99,11 @@ Static.muted {
     background: #1F1F1F;
 }
 
+.model-highlighted {
+    background: #1F1F1F;
+    border: solid #F59E0B;
+}
+
 .model-item Static {
     height: 100%;
     content-align: left middle;
@@ -454,3 +459,9 @@ Button.catalog-installed {
 #catalog-buttons Button {
     margin: 0 2;
 }
+
+#models-scroll {
+    height: 1fr;
+    scrollbar-background: #141414;
+    scrollbar-color: #2E2E2E;
+}

From b555d1f486bee7047353bee50e8cdb9e4443f38e Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 09:26:59 -0400
Subject: [PATCH 19/32] Unify models screen focus: zone-based Tab, arrow-key
 list+button nav
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tab cycles between zones: model list, Browse button, Pull input,
Pull button. Within the model list zone, Up/Down navigates models
with scroll-follow, Left/Right selects Load/Unload/Delete per
model, Enter activates the selected button. All ModelItem buttons
are non-focusable — navigation is fully managed by the screen.
---
 .../usr/lib/neuraldrive/tui/screens/models.py | 154 +++++++++++++++---
 .../usr/lib/neuraldrive/tui/styles.tcss       |  20 ++-
 .../lib/neuraldrive/tui/widgets/model_item.py |   9 +
 3 files changed, 159 insertions(+), 24 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index e968587..927451a 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -5,7 +5,7 @@
 
 from textual import work
 from textual.app import ComposeResult
-from textual.containers import Horizontal, Vertical, VerticalScroll
+from textual.containers import Horizontal, VerticalScroll
 from textual.screen import Screen
 from textual.widgets import Button, Footer, Input, ProgressBar, Static
 
@@ -239,30 +239,35 @@ class ModelsScreen(Screen):
         ("r", "refresh", "Refresh"),
         Binding("up", "nav_up", show=False, priority=True),
         Binding("down", "nav_down", show=False, priority=True),
+        Binding("left", "nav_left", show=False, priority=True),
+        Binding("right", "nav_right", show=False, priority=True),
         Binding("pageup", "page_up", show=False, priority=True),
         Binding("pagedown", "page_down", show=False, priority=True),
+        Binding("enter", "activate", show=False, priority=True),
+        Binding("tab", "next_zone", show=False, priority=True),
+        Binding("shift+tab", "prev_zone", show=False, priority=True),
     ]
 
+    ZONES = ["models", "browse", "pull-input", "pull-btn"]
+
     def compose(self) -> ComposeResult:
         yield SafeHeader()
-        with VerticalScroll(id="models-scroll"):
-            yield Static("Installed Models", classes="heading")
-            yield Vertical(id="model-list")
-            yield Static("", classes="heading")
-            yield Button(
-                "Browse Available Models",
-                id="open-catalog",
-                variant="primary",
-                classes="primary",
-            )
-            yield Static("", classes="heading")
-            yield Static("Pull by Name", classes="heading")
+        yield Static("Installed Models", classes="heading")
+        yield VerticalScroll(id="model-list")
+        yield Button(
+            "Browse Available Models",
+            id="open-catalog",
+            variant="primary",
+            classes="primary",
+        )
+        yield Static("Pull by Name", classes="heading")
+        with Horizontal(id="pull-input-row"):
             yield Input(placeholder="e.g. llama3:8b", id="pull-input")
             yield Button("Pull", id="pull-btn")
-            yield Static("", id="model-status")
-            with Horizontal(id="pull-row"):
-                yield ProgressBar(total=100, show_eta=True, id="pull-progress")
-                yield Button("Cancel", id="cancel-pull", variant="error")
+        yield Static("", id="model-status")
+        with Horizontal(id="pull-row"):
+            yield ProgressBar(total=100, show_eta=True, id="pull-progress")
+            yield Button("Cancel", id="cancel-pull", variant="error")
         yield Footer()
 
     def on_mount(self) -> None:
@@ -272,43 +277,144 @@ def on_mount(self) -> None:
         self._pulling = False
         self._model_items: list[ModelItem] = []
         self._highlight_index = 0
+        self._btn_index = 0
+        self._zone = "models"
         self.action_refresh()
 
+    # ── Zone management ──────────────────────────────────────
+
+    def _enter_zone(self, zone: str) -> None:
+        self._zone = zone
+        if zone == "models":
+            self.set_focus(None)
+            self._apply_highlight()
+        elif zone == "browse":
+            self._clear_highlight()
+            self.query_one("#open-catalog", Button).focus()
+        elif zone == "pull-input":
+            self._clear_highlight()
+            self.query_one("#pull-input", Input).focus()
+        elif zone == "pull-btn":
+            self._clear_highlight()
+            self.query_one("#pull-btn", Button).focus()
+
+    def action_next_zone(self) -> None:
+        idx = self.ZONES.index(self._zone) if self._zone in self.ZONES else 0
+        idx = (idx + 1) % len(self.ZONES)
+        self._enter_zone(self.ZONES[idx])
+
+    def action_prev_zone(self) -> None:
+        idx = self.ZONES.index(self._zone) if self._zone in self.ZONES else 0
+        idx = (idx - 1) % len(self.ZONES)
+        self._enter_zone(self.ZONES[idx])
+
+    # ── Model list highlight ─────────────────────────────────
+
     def _apply_highlight(self) -> None:
+        self._clear_btn_highlight()
         for i, item in enumerate(self._model_items):
             if i == self._highlight_index:
                 item.add_class("model-highlighted")
                 item.scroll_visible()
+                self._apply_btn_highlight()
             else:
                 item.remove_class("model-highlighted")
 
+    def _clear_highlight(self) -> None:
+        self._clear_btn_highlight()
+        for item in self._model_items:
+            item.remove_class("model-highlighted")
+
+    # ── Per-model button highlight ───────────────────────────
+
+    def _get_active_buttons(self) -> list[Button]:
+        if not self._model_items:
+            return []
+        item = self._model_items[self._highlight_index]
+        return item.get_action_buttons()
+
+    def _apply_btn_highlight(self) -> None:
+        buttons = self._get_active_buttons()
+        if not buttons:
+            return
+        self._btn_index = max(0, min(self._btn_index, len(buttons) - 1))
+        for i, btn in enumerate(buttons):
+            if i == self._btn_index:
+                btn.add_class("model-btn-active")
+            else:
+                btn.remove_class("model-btn-active")
+
+    def _clear_btn_highlight(self) -> None:
+        for item in self._model_items:
+            for btn in item.get_action_buttons():
+                btn.remove_class("model-btn-active")
+
+    # ── Navigation actions ───────────────────────────────────
+
     def action_nav_up(self) -> None:
+        if self._zone != "models":
+            return
         if self._model_items and self._highlight_index > 0:
             self._highlight_index -= 1
             self._apply_highlight()
 
     def action_nav_down(self) -> None:
+        if self._zone != "models":
+            return
         if self._model_items and self._highlight_index < len(self._model_items) - 1:
             self._highlight_index += 1
             self._apply_highlight()
 
+    def action_nav_left(self) -> None:
+        if self._zone != "models":
+            return
+        if self._btn_index > 0:
+            self._btn_index -= 1
+            self._apply_btn_highlight()
+
+    def action_nav_right(self) -> None:
+        if self._zone != "models":
+            return
+        buttons = self._get_active_buttons()
+        if self._btn_index < len(buttons) - 1:
+            self._btn_index += 1
+            self._apply_btn_highlight()
+
     def action_page_up(self) -> None:
-        if not self._model_items:
+        if self._zone != "models" or not self._model_items:
             return
-        scroll = self.query_one("#models-scroll", VerticalScroll)
+        scroll = self.query_one("#model-list", VerticalScroll)
         page_size = max(1, scroll.size.height // 6)
         self._highlight_index = max(0, self._highlight_index - page_size)
         self._apply_highlight()
 
     def action_page_down(self) -> None:
-        if not self._model_items:
+        if self._zone != "models" or not self._model_items:
             return
-        scroll = self.query_one("#models-scroll", VerticalScroll)
+        scroll = self.query_one("#model-list", VerticalScroll)
         page_size = max(1, scroll.size.height // 6)
         last = len(self._model_items) - 1
         self._highlight_index = min(last, self._highlight_index + page_size)
         self._apply_highlight()
 
+    def action_activate(self) -> None:
+        if self._zone == "models":
+            buttons = self._get_active_buttons()
+            if buttons and 0 <= self._btn_index < len(buttons):
+                btn = buttons[self._btn_index]
+                if not btn.disabled:
+                    btn.press()
+        elif self._zone == "browse":
+            self.query_one("#open-catalog", Button).press()
+        elif self._zone == "pull-input":
+            inp = self.query_one("#pull-input", Input)
+            name = inp.value.strip()
+            if name and not self._pulling:
+                self._pulling = True
+                self._start_pull(name)
+        elif self._zone == "pull-btn":
+            self.query_one("#pull-btn", Button).press()
+
     def action_refresh(self) -> None:
         self.app.call_later(self._load_models)
 
@@ -329,7 +435,7 @@ async def _load_models(self) -> None:
         if cache_changed:
             config.set_key("vram_cache", vram_cache)
 
-        container = self.query_one("#model-list", Vertical)
+        container = self.query_one("#model-list", VerticalScroll)
         container.remove_children()
         self._model_items = []
 
@@ -362,7 +468,9 @@ async def _load_models(self) -> None:
             self._highlight_index = min(
                 self._highlight_index, len(self._model_items) - 1
             )
-            self._apply_highlight()
+            self._btn_index = 0
+            if self._zone == "models":
+                self._apply_highlight()
 
     async def on_button_pressed(self, event: Button.Pressed) -> None:
         btn = event.button
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
index 49f1c0f..70c6ef6 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
@@ -196,6 +196,11 @@ Button.model-delete:hover {
     background: #1F0A0A;
 }
 
+Button.model-btn-active {
+    border: tall #F59E0B;
+    text-style: bold reverse;
+}
+
 .svc-row {
     height: 3;
     padding: 0 2;
@@ -460,8 +465,21 @@ Button.catalog-installed {
     margin: 0 2;
 }
 
-#models-scroll {
+#model-list {
     height: 1fr;
     scrollbar-background: #141414;
     scrollbar-color: #2E2E2E;
 }
+
+#pull-input-row {
+    height: auto;
+    layout: horizontal;
+}
+
+#pull-input-row Input {
+    width: 1fr;
+}
+
+#pull-input-row Button {
+    width: 12;
+}
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
index ee38bd9..71e363c 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
@@ -6,6 +6,8 @@
 
 
 class ModelItem(Horizontal):
+    can_focus = False
+
     def __init__(
         self,
         name: str,
@@ -36,6 +38,9 @@ def compose(self) -> ComposeResult:
         load_btn = Button("Load", name=self._model_name, classes="model-load")
         unload_btn = Button("Unload", name=self._model_name, classes="model-unload")
         delete_btn = Button("Delete", name=self._model_name, classes="model-delete")
+        load_btn.can_focus = False
+        unload_btn.can_focus = False
+        delete_btn.can_focus = False
         if self._loaded:
             load_btn.disabled = True
         else:
@@ -43,3 +48,7 @@ def compose(self) -> ComposeResult:
         yield load_btn
         yield unload_btn
         yield delete_btn
+
+    def get_action_buttons(self) -> list[Button]:
+        """Return the action buttons in left-to-right order."""
+        return list(self.query("Button"))

From 3c12f7df7054096ace51584f4f42879c26ffa5f9 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 09:57:55 -0400
Subject: [PATCH 20/32] Models screen: skip disabled buttons, Loading...
 feedback, column legend

Left/Right nav now skips disabled buttons (Unload when not loaded,
Load when already loaded). Load button shows 'Loading...' and
disables during VRAM load. Added column header row (Params, Quant,
Disk, VRAM, Status) aligned with model item columns.
---
 .../usr/lib/neuraldrive/tui/screens/models.py | 40 ++++++++++++++++---
 .../usr/lib/neuraldrive/tui/styles.tcss       | 35 ++++++++++++++++
 2 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index 927451a..e32c44f 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -253,6 +253,13 @@ class ModelsScreen(Screen):
     def compose(self) -> ComposeResult:
         yield SafeHeader()
         yield Static("Installed Models", classes="heading")
+        with Horizontal(id="model-legend"):
+            yield Static("Model", classes="legend-name")
+            yield Static("Params", classes="legend-col legend-params")
+            yield Static("Quant", classes="legend-col legend-quant")
+            yield Static("Disk", classes="legend-col legend-disk")
+            yield Static("VRAM", classes="legend-col legend-vram")
+            yield Static("Status", classes="legend-col legend-status")
         yield VerticalScroll(id="model-list")
         yield Button(
             "Browse Available Models",
@@ -331,7 +338,7 @@ def _get_active_buttons(self) -> list[Button]:
         if not self._model_items:
             return []
         item = self._model_items[self._highlight_index]
-        return item.get_action_buttons()
+        return [b for b in item.get_action_buttons() if not b.disabled]
 
     def _apply_btn_highlight(self) -> None:
         buttons = self._get_active_buttons()
@@ -585,6 +592,15 @@ async def _start_pull(self, model_name: str) -> None:
     async def _load_to_vram(self, model_name: str) -> None:
         status = self.query_one("#model-status", Static)
         status.update(f"Loading {model_name} into VRAM...")
+        load_btn = None
+        try:
+            load_btn = self.query_one(
+                f"Button.model-load[name='{model_name}']", Button
+            )
+            load_btn.label = "Loading…"
+            load_btn.disabled = True
+        except Exception:
+            pass
         success = await api_client.load_model(model_name)
         if success:
             status.update(f"  \u2713 {model_name} loaded into VRAM")
@@ -593,16 +609,28 @@ async def _load_to_vram(self, model_name: str) -> None:
         await self._load_models()
 
     @work()
-    async def _unload_from_vram(self, model_name: str) -> None:
+    async def _load_to_vram(self, model_name: str) -> None:
         status = self.query_one("#model-status", Static)
-        status.update(f"Unloading {model_name}...")
-        success = await api_client.unload_model(model_name)
+        status.update(f"Loading {model_name} into VRAM...")
+        load_btn = self._find_model_button(model_name, "model-load")
+        if load_btn:
+            load_btn.label = "Loading\u2026"
+            load_btn.disabled = True
+        success = await api_client.load_model(model_name)
         if success:
-            status.update(f"  \u2713 {model_name} unloaded from VRAM")
+            status.update(f"  \u2713 {model_name} loaded into VRAM")
         else:
-            status.update(f"  \u2717 Failed to unload {model_name}")
+            status.update(f"  \u2717 Failed to load {model_name}")
         await self._load_models()
 
+    def _find_model_button(self, model_name: str, btn_class: str) -> Button | None:
+        for item in self._model_items:
+            if item.name == model_name:
+                for btn in item.query("Button"):
+                    if btn.has_class(btn_class):
+                        return btn
+        return None
+
     @work()
     async def _delete_model(self, model_name: str) -> None:
         status = self.query_one("#model-status", Static)
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
index 70c6ef6..ed3daf3 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
@@ -471,6 +471,41 @@ Button.catalog-installed {
     scrollbar-color: #2E2E2E;
 }
 
+#model-legend {
+    height: 1;
+    padding: 0 2;
+    color: #71717A;
+}
+
+#model-legend Static.legend-name {
+    width: 1fr;
+    color: #71717A;
+}
+
+#model-legend Static.legend-col {
+    color: #71717A;
+}
+
+Static.legend-params {
+    width: 8;
+}
+
+Static.legend-quant {
+    width: 10;
+}
+
+Static.legend-disk {
+    width: 10;
+}
+
+Static.legend-vram {
+    width: 12;
+}
+
+Static.legend-status {
+    width: 10;
+}
+
 #pull-input-row {
     height: auto;
     layout: horizontal;

From 8064d81143ca9f2b09c463ddd96e8b53a9e9b20c Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 10:10:35 -0400
Subject: [PATCH 21/32] Restore _unload_from_vram and add legend column
 separators

---
 .../usr/lib/neuraldrive/tui/screens/models.py | 29 ++++++++-----------
 .../usr/lib/neuraldrive/tui/styles.tcss       |  5 ++++
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index e32c44f..ac18702 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -256,9 +256,13 @@ def compose(self) -> ComposeResult:
         with Horizontal(id="model-legend"):
             yield Static("Model", classes="legend-name")
             yield Static("Params", classes="legend-col legend-params")
+            yield Static("/", classes="legend-sep")
             yield Static("Quant", classes="legend-col legend-quant")
+            yield Static("/", classes="legend-sep")
             yield Static("Disk", classes="legend-col legend-disk")
+            yield Static("/", classes="legend-sep")
             yield Static("VRAM", classes="legend-col legend-vram")
+            yield Static("/", classes="legend-sep")
             yield Static("Status", classes="legend-col legend-status")
         yield VerticalScroll(id="model-list")
         yield Button(
@@ -592,15 +596,10 @@ async def _start_pull(self, model_name: str) -> None:
     async def _load_to_vram(self, model_name: str) -> None:
         status = self.query_one("#model-status", Static)
         status.update(f"Loading {model_name} into VRAM...")
-        load_btn = None
-        try:
-            load_btn = self.query_one(
-                f"Button.model-load[name='{model_name}']", Button
-            )
-            load_btn.label = "Loading…"
+        load_btn = self._find_model_button(model_name, "model-load")
+        if load_btn:
+            load_btn.label = "Loading\u2026"
             load_btn.disabled = True
-        except Exception:
-            pass
         success = await api_client.load_model(model_name)
         if success:
             status.update(f"  \u2713 {model_name} loaded into VRAM")
@@ -609,18 +608,14 @@ async def _load_to_vram(self, model_name: str) -> None:
         await self._load_models()
 
     @work()
-    async def _load_to_vram(self, model_name: str) -> None:
+    async def _unload_from_vram(self, model_name: str) -> None:
         status = self.query_one("#model-status", Static)
-        status.update(f"Loading {model_name} into VRAM...")
-        load_btn = self._find_model_button(model_name, "model-load")
-        if load_btn:
-            load_btn.label = "Loading\u2026"
-            load_btn.disabled = True
-        success = await api_client.load_model(model_name)
+        status.update(f"Unloading {model_name}...")
+        success = await api_client.unload_model(model_name)
         if success:
-            status.update(f"  \u2713 {model_name} loaded into VRAM")
+            status.update(f"  \u2713 {model_name} unloaded from VRAM")
         else:
-            status.update(f"  \u2717 Failed to load {model_name}")
+            status.update(f"  \u2717 Failed to unload {model_name}")
         await self._load_models()
 
     def _find_model_button(self, model_name: str, btn_class: str) -> Button | None:
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
index ed3daf3..29cba6c 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
@@ -486,6 +486,11 @@ Button.catalog-installed {
     color: #71717A;
 }
 
+#model-legend Static.legend-sep {
+    width: 1;
+    color: #52525B;
+}
+
 Static.legend-params {
     width: 8;
 }

From fe0a28fbb8fc543740a9d383606513b7d496f0c3 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 10:15:31 -0400
Subject: [PATCH 22/32] Fix unload race condition and keep manually loaded
 models in VRAM

Poll /api/ps after unload until model is actually evicted (Ollama
returns 200 before eviction completes). Await remove_children() to
prevent stale widgets. Use keep_alive=-1 for manual loads so models
stay loaded until explicitly unloaded.
---
 .../usr/lib/neuraldrive/tui/screens/models.py        | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index ac18702..0e3252f 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -447,7 +447,7 @@ async def _load_models(self) -> None:
             config.set_key("vram_cache", vram_cache)
 
         container = self.query_one("#model-list", VerticalScroll)
-        container.remove_children()
+        await container.remove_children()
         self._model_items = []
 
         if not all_models:
@@ -600,7 +600,7 @@ async def _load_to_vram(self, model_name: str) -> None:
         if load_btn:
             load_btn.label = "Loading\u2026"
             load_btn.disabled = True
-        success = await api_client.load_model(model_name)
+        success = await api_client.load_model(model_name, keep_alive="-1")
         if success:
             status.update(f"  \u2713 {model_name} loaded into VRAM")
         else:
@@ -613,6 +613,14 @@ async def _unload_from_vram(self, model_name: str) -> None:
         status.update(f"Unloading {model_name}...")
         success = await api_client.unload_model(model_name)
         if success:
+            # Ollama returns 200 before the model is fully evicted from /api/ps.
+            # Poll until it disappears so the UI refresh sees the real state.
+            for _ in range(10):
+                await asyncio.sleep(0.5)
+                running = await api_client.list_running_models()
+                running_names = {m.get("name", "") for m in running}
+                if model_name not in running_names:
+                    break
             status.update(f"  \u2713 {model_name} unloaded from VRAM")
         else:
             status.update(f"  \u2717 Failed to unload {model_name}")

From 4bc2f32123765064f9b7f6a0e2a5c71cdb6682a1 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 10:21:36 -0400
Subject: [PATCH 23/32] Fix keep_alive: pass integer -1 instead of string

Ollama rejects "-1" with 'missing unit in duration', but accepts
the integer -1 for infinite keep-alive.
---
 .../includes.chroot/usr/lib/neuraldrive/tui/screens/models.py   | 2 +-
 .../includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index 0e3252f..4a73153 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -600,7 +600,7 @@ async def _load_to_vram(self, model_name: str) -> None:
         if load_btn:
             load_btn.label = "Loading\u2026"
             load_btn.disabled = True
-        success = await api_client.load_model(model_name, keep_alive="-1")
+        success = await api_client.load_model(model_name, keep_alive=-1)
         if success:
             status.update(f"  \u2713 {model_name} loaded into VRAM")
         else:
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py
index de61a27..93de154 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py
@@ -56,7 +56,7 @@ async def delete_model(name: str) -> bool:
         return False
 
 
-async def load_model(name: str, keep_alive: str = "5m") -> bool:
+async def load_model(name: str, keep_alive: str | int = "5m") -> bool:
     try:
         async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, read=300.0)) as client:
             resp = await client.post(

From 78fbc0df166706d0e47356fb539e3c21e1a59104 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 10:30:29 -0400
Subject: [PATCH 24/32] Redesign services screen to match models screen UX

Each service gets its own row with inline Start/Stop/Restart buttons.
Arrow keys navigate services (Up/Down) and buttons (Left/Right).
Disabled buttons are skipped. Enter activates the highlighted button.
Service status auto-polls every 5 seconds and updates in place.
---
 .../lib/neuraldrive/tui/screens/services.py   | 180 +++++++++++-------
 .../usr/lib/neuraldrive/tui/styles.tcss       | 108 +++++++++--
 .../lib/neuraldrive/tui/widgets/__init__.py   |   3 +-
 .../neuraldrive/tui/widgets/service_item.py   |  56 ++++++
 4 files changed, 263 insertions(+), 84 deletions(-)
 create mode 100644 config/includes.chroot/usr/lib/neuraldrive/tui/widgets/service_item.py

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
index d4cda6c..f7a7daf 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
@@ -4,119 +4,169 @@
 
 from textual import work
 from textual.app import ComposeResult
-from textual.containers import Horizontal, Vertical, VerticalScroll
+from textual.containers import VerticalScroll
 from textual.screen import Screen
 from textual.widgets import Button, Footer, Static
+from textual.binding import Binding
 
 from widgets.safe_header import SafeHeader
+from widgets.service_item import ServiceItem
+from utils import hardware
 
-from textual.binding import Binding
 
-from utils import hardware
+POLL_INTERVAL = 5
 
 
 class ServicesScreen(Screen):
     BINDINGS = [
         ("r", "refresh", "Refresh"),
-        Binding("up", "move_up", "Up", show=False),
-        Binding("down", "move_down", "Down", show=False),
+        Binding("up", "nav_up", show=False, priority=True),
+        Binding("down", "nav_down", show=False, priority=True),
+        Binding("left", "nav_left", show=False, priority=True),
+        Binding("right", "nav_right", show=False, priority=True),
+        Binding("enter", "activate", show=False, priority=True),
     ]
 
     def compose(self) -> ComposeResult:
         yield SafeHeader()
-        with VerticalScroll():
-            yield Static("NeuralDrive Services", classes="heading")
-            yield Vertical(id="service-list")
+        yield Static("NeuralDrive Services", classes="heading")
+        yield VerticalScroll(id="svc-list")
         yield Static("", id="svc-status")
-        with Horizontal(id="svc-actions"):
-            yield Button("Start", id="svc-start", variant="primary")
-            yield Button("Stop", id="svc-stop", variant="error")
-            yield Button("Restart", id="svc-restart")
         yield Footer()
 
     def on_mount(self) -> None:
-        self._selected_index = 0
-        self._services: list[tuple[str, str]] = []
+        self._svc_items: list[ServiceItem] = []
+        self._highlight_index = 0
+        self._btn_index = 0
+        self._poll_timer = self.set_interval(POLL_INTERVAL, self._poll_services)
         self.app.call_later(self._load_services)
 
     def on_screen_resume(self) -> None:
         self.app.call_later(self._load_services)
 
+    def on_screen_suspend(self) -> None:
+        pass
+
     async def _load_services(self) -> None:
-        container = self.query_one("#service-list", Vertical)
+        container = self.query_one("#svc-list", VerticalScroll)
         await container.remove_children()
-        self._services = []
+        self._svc_items = []
         for svc in hardware.NEURALDRIVE_SERVICES:
             status = hardware.get_service_status(svc)
-            self._services.append((svc, status))
-
-        for i, (svc, status) in enumerate(self._services):
             short = svc.replace("neuraldrive-", "")
-            if status == "active":
-                indicator = "●"
-                cls = "svc-row svc-active"
-            else:
-                indicator = "○"
-                cls = "svc-row svc-inactive"
-            if i == self._selected_index:
-                cls += " svc-selected"
-            row = Static(
-                f"  {indicator}  {short:<20} {status}", classes=cls, id=f"svc-{i}"
+            item = ServiceItem(svc, short, status)
+            container.mount(item)
+            self._svc_items.append(item)
+        if self._svc_items:
+            self._highlight_index = min(
+                self._highlight_index, len(self._svc_items) - 1
             )
-            await container.mount(row)
+            self._btn_index = 0
+            self._apply_highlight()
+
+    async def _poll_services(self) -> None:
+        for item in self._svc_items:
+            status = hardware.get_service_status(item.name)
+            item.update_status(status)
+        if self._svc_items:
+            self._apply_btn_highlight()
+
+    def _get_active_buttons(self) -> list[Button]:
+        if not self._svc_items:
+            return []
+        item = self._svc_items[self._highlight_index]
+        return [b for b in item.get_action_buttons() if not b.disabled]
+
+    def _apply_highlight(self) -> None:
+        self._clear_btn_highlight()
+        for i, item in enumerate(self._svc_items):
+            if i == self._highlight_index:
+                item.add_class("svc-highlighted")
+                item.scroll_visible()
+                self._apply_btn_highlight()
+            else:
+                item.remove_class("svc-highlighted")
 
-        self._update_action_buttons()
+    def _clear_highlight(self) -> None:
+        self._clear_btn_highlight()
+        for item in self._svc_items:
+            item.remove_class("svc-highlighted")
 
-    def _update_action_buttons(self) -> None:
-        if not self._services:
+    def _apply_btn_highlight(self) -> None:
+        buttons = self._get_active_buttons()
+        if not buttons:
             return
-        _, status = self._services[self._selected_index]
-        self.query_one("#svc-start", Button).disabled = status == "active"
-        self.query_one("#svc-stop", Button).disabled = status != "active"
-
-    def action_move_up(self) -> None:
-        if self._selected_index > 0:
-            self._selected_index -= 1
-            self.app.call_later(self._load_services)
-
-    def action_move_down(self) -> None:
-        if self._selected_index < len(self._services) - 1:
-            self._selected_index += 1
-            self.app.call_later(self._load_services)
+        self._btn_index = max(0, min(self._btn_index, len(buttons) - 1))
+        for item in self._svc_items:
+            for btn in item.get_action_buttons():
+                btn.remove_class("svc-btn-active")
+        for i, btn in enumerate(buttons):
+            if i == self._btn_index:
+                btn.add_class("svc-btn-active")
+            else:
+                btn.remove_class("svc-btn-active")
+
+    def _clear_btn_highlight(self) -> None:
+        for item in self._svc_items:
+            for btn in item.get_action_buttons():
+                btn.remove_class("svc-btn-active")
+
+    def action_nav_up(self) -> None:
+        if self._svc_items and self._highlight_index > 0:
+            self._highlight_index -= 1
+            self._btn_index = 0
+            self._apply_highlight()
+
+    def action_nav_down(self) -> None:
+        if self._svc_items and self._highlight_index < len(self._svc_items) - 1:
+            self._highlight_index += 1
+            self._btn_index = 0
+            self._apply_highlight()
+
+    def action_nav_left(self) -> None:
+        if self._btn_index > 0:
+            self._btn_index -= 1
+            self._apply_btn_highlight()
+
+    def action_nav_right(self) -> None:
+        buttons = self._get_active_buttons()
+        if self._btn_index < len(buttons) - 1:
+            self._btn_index += 1
+            self._apply_btn_highlight()
+
+    def action_activate(self) -> None:
+        buttons = self._get_active_buttons()
+        if buttons and 0 <= self._btn_index < len(buttons):
+            buttons[self._btn_index].press()
 
     def on_button_pressed(self, event: Button.Pressed) -> None:
-        btn_id = event.button.id or ""
-        if btn_id == "svc-start":
-            self._run_action("start")
-        elif btn_id == "svc-stop":
-            self._run_action("stop")
-        elif btn_id == "svc-restart":
-            self._run_action("restart")
+        btn = event.button
+        if btn.has_class("svc-start"):
+            self._run_action(btn.name or "", "start")
+        elif btn.has_class("svc-stop"):
+            self._run_action(btn.name or "", "stop")
+        elif btn.has_class("svc-restart"):
+            self._run_action(btn.name or "", "restart")
 
     @work(exclusive=True)
-    async def _run_action(self, action: str) -> None:
-        if not self._services:
-            return
-        svc, _ = self._services[self._selected_index]
-        short = svc.replace("neuraldrive-", "")
+    async def _run_action(self, service: str, action: str) -> None:
+        short = service.replace("neuraldrive-", "")
         status_widget = self.query_one("#svc-status", Static)
         status_widget.update(f"  {action.title()}ing {short}...")
-
         try:
             res = subprocess.run(
-                ["sudo", "systemctl", action, svc],
+                ["sudo", "systemctl", action, service],
                 capture_output=True,
                 text=True,
                 timeout=15,
             )
             if res.returncode == 0:
-                status_widget.update(f"  ✓ {short} {action}ed")
+                status_widget.update(f"  \u2713 {short} {action}ed")
             else:
-                status_widget.update(f"  ✗ {short}: {res.stderr.strip()}")
+                status_widget.update(f"  \u2717 {short}: {res.stderr.strip()}")
         except subprocess.TimeoutExpired:
-            status_widget.update(f"  ✗ {short}: timeout")
-
-        self.app.call_later(self._load_services)
+            status_widget.update(f"  \u2717 {short}: timeout")
+        await self._poll_services()
 
     def action_refresh(self) -> None:
         self.app.call_later(self._load_services)
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
index 29cba6c..8ac54d0 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
@@ -201,44 +201,116 @@ Button.model-btn-active {
     text-style: bold reverse;
 }
 
-.svc-row {
-    height: 3;
+.svc-item {
+    layout: horizontal;
+    height: 5;
     padding: 0 2;
     border: solid #2E2E2E;
-    margin: 0 0 0 0;
+    margin: 0 0 1 0;
     background: #141414;
+}
+
+.svc-item:hover {
+    background: #1F1F1F;
+}
+
+.svc-highlighted {
+    background: #1F1F1F;
+    border: solid #F59E0B;
+}
+
+.svc-item Static {
+    height: 100%;
     content-align: left middle;
 }
 
-.svc-active {
+.svc-item Static.svc-name {
+    color: #FFFFFF;
+    text-style: bold;
+    width: 1fr;
+}
+
+.svc-item Static.svc-state {
+    width: 16;
+}
+
+.svc-status-active {
+    color: #10B981;
+}
+
+.svc-status-inactive {
+    color: #EF4444;
+}
+
+Button.svc-start {
+    background: #1F1F1F;
     color: #10B981;
+    border: solid #10B981;
+    min-width: 10;
+    width: 10;
+    height: 3;
+}
+
+Button.svc-start:hover {
+    background: #0A1F0A;
+}
+
+Button.svc-start:disabled {
+    background: #0A0A0A;
+    color: #52525B;
+    border: solid #1A1A1A;
 }
 
-.svc-inactive {
+Button.svc-stop {
+    background: #1F1F1F;
     color: #EF4444;
+    border: solid #EF4444;
+    min-width: 10;
+    width: 10;
+    height: 3;
+}
+
+Button.svc-stop:hover {
+    background: #1F0A0A;
 }
 
-.svc-selected {
+Button.svc-stop:disabled {
+    background: #0A0A0A;
+    color: #52525B;
+    border: solid #1A1A1A;
+}
+
+Button.svc-restart {
     background: #1F1F1F;
+    color: #F59E0B;
     border: solid #F59E0B;
+    min-width: 10;
+    width: 10;
+    height: 3;
 }
 
-#svc-status {
-    height: 1;
-    padding: 0 2;
-    dock: bottom;
-    offset: 0 -4;
+Button.svc-restart:hover {
+    background: #1F1A0A;
 }
 
-#svc-actions {
-    height: auto;
-    padding: 0 1;
-    dock: bottom;
-    align: center middle;
+Button.svc-restart:disabled {
+    background: #0A0A0A;
+    color: #52525B;
+    border: solid #1A1A1A;
 }
 
-#svc-actions Button {
-    margin: 0 1;
+Button.svc-btn-active {
+    border: tall #F59E0B;
+    text-style: bold reverse;
+}
+
+#svc-list {
+    height: 1fr;
+}
+
+#svc-status {
+    height: 1;
+    padding: 0 2;
 }
 
 Button {
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/__init__.py b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/__init__.py
index 10a5e57..4e1d144 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/__init__.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/__init__.py
@@ -1,5 +1,6 @@
 from widgets.stats_box import StatsBox
 from widgets.model_item import ModelItem
 from widgets.safe_header import SafeHeader
+from widgets.service_item import ServiceItem
 
-__all__ = ["StatsBox", "ModelItem", "SafeHeader"]
+__all__ = ["StatsBox", "ModelItem", "SafeHeader", "ServiceItem"]
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/service_item.py b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/service_item.py
new file mode 100644
index 0000000..74f7e82
--- /dev/null
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/service_item.py
@@ -0,0 +1,56 @@
+from __future__ import annotations
+
+from textual.app import ComposeResult
+from textual.containers import Horizontal
+from textual.widgets import Button, Static
+
+
+class ServiceItem(Horizontal):
+    can_focus = False
+
+    def __init__(self, service: str, display_name: str, status: str) -> None:
+        super().__init__(name=service, classes="svc-item")
+        self._service = service
+        self._display_name = display_name
+        self._status = status
+
+    def compose(self) -> ComposeResult:
+        active = self._status == "active"
+        indicator = "\u25cf" if active else "\u25cb"
+        status_cls = "svc-status-active" if active else "svc-status-inactive"
+        yield Static(self._display_name, classes="svc-name")
+        yield Static(f"{indicator} {self._status}", classes=f"svc-state {status_cls}")
+        start_btn = Button("Start", name=self._service, classes="svc-start")
+        stop_btn = Button("Stop", name=self._service, classes="svc-stop")
+        restart_btn = Button("Restart", name=self._service, classes="svc-restart")
+        start_btn.can_focus = False
+        stop_btn.can_focus = False
+        restart_btn.can_focus = False
+        if active:
+            start_btn.disabled = True
+        else:
+            stop_btn.disabled = True
+            restart_btn.disabled = True
+        yield start_btn
+        yield stop_btn
+        yield restart_btn
+
+    def get_action_buttons(self) -> list[Button]:
+        return list(self.query("Button"))
+
+    def update_status(self, status: str) -> None:
+        self._status = status
+        active = status == "active"
+        indicator = "\u25cf" if active else "\u25cb"
+        status_cls = "svc-status-active" if active else "svc-status-inactive"
+        state_widget = self.query_one(".svc-state", Static)
+        state_widget.update(f"{indicator} {status}")
+        state_widget.remove_class("svc-status-active", "svc-status-inactive")
+        state_widget.add_class(status_cls)
+        for btn in self.query("Button"):
+            if btn.has_class("svc-start"):
+                btn.disabled = active
+            elif btn.has_class("svc-stop"):
+                btn.disabled = not active
+            elif btn.has_class("svc-restart"):
+                btn.disabled = not active

From c8e3a710e2d0a9132e12b4ca266e8d4f1e8fee2f Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 10:34:03 -0400
Subject: [PATCH 25/32] Remap screen hotkeys to F1-F5: Dash, Models, Svc, Logs,
 Chat

---
 config/includes.chroot/usr/lib/neuraldrive/tui/main.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/main.py b/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
index ede4187..f563843 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
@@ -69,11 +69,11 @@ class NeuralDriveTUI(App):
     ENABLE_COMMAND_PALETTE = False
 
     BINDINGS = [
-        Binding("f2", "switch_screen('dashboard')", "F2 Dash", priority=True),
-        Binding("f3", "switch_screen('models')", "F3 Models", priority=True),
-        Binding("f4", "switch_screen('services')", "F4 Svc", priority=True),
+        Binding("f1", "switch_screen('dashboard')", "F1 Dash", priority=True),
+        Binding("f2", "switch_screen('models')", "F2 Models", priority=True),
+        Binding("f3", "switch_screen('services')", "F3 Svc", priority=True),
+        Binding("f4", "switch_screen('logs')", "F4 Logs", priority=True),
         Binding("f5", "switch_screen('chat')", "F5 Chat", priority=True),
-        Binding("f6", "switch_screen('logs')", "F6 Logs", priority=True),
         Binding("q", "quit", "Quit"),
         Binding("up", "focus_previous", "Previous", show=False),
         Binding("down", "focus_next", "Next", show=False),

From ea9fcfcc1476b2c9173a2c4f91455796d01c8ba8 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 10:38:28 -0400
Subject: [PATCH 26/32] Guard service poll timer against widget rebuild race

Poll fires every 5s but _load_services clears and remounts items.
Skip poll while _loading flag is set to avoid NoMatches on .svc-state.
---
 .../usr/lib/neuraldrive/tui/screens/services.py           | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
index f7a7daf..d8e669d 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
@@ -38,6 +38,7 @@ def on_mount(self) -> None:
         self._svc_items: list[ServiceItem] = []
         self._highlight_index = 0
         self._btn_index = 0
+        self._loading = False
         self._poll_timer = self.set_interval(POLL_INTERVAL, self._poll_services)
         self.app.call_later(self._load_services)
 
@@ -48,6 +49,7 @@ def on_screen_suspend(self) -> None:
         pass
 
     async def _load_services(self) -> None:
+        self._loading = True
         container = self.query_one("#svc-list", VerticalScroll)
         await container.remove_children()
         self._svc_items = []
@@ -63,13 +65,15 @@ async def _load_services(self) -> None:
             )
             self._btn_index = 0
             self._apply_highlight()
+        self._loading = False
 
     async def _poll_services(self) -> None:
+        if self._loading or not self._svc_items:
+            return
         for item in self._svc_items:
             status = hardware.get_service_status(item.name)
             item.update_status(status)
-        if self._svc_items:
-            self._apply_btn_highlight()
+        self._apply_btn_highlight()
 
     def _get_active_buttons(self) -> list[Button]:
         if not self._svc_items:

From 2a704c6d2262a273a6b81aad857a1e11fcf7c5e3 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 10:46:41 -0400
Subject: [PATCH 27/32] Allow concurrent model loading and persist Ollama
 config

Set OLLAMA_MAX_LOADED_MODELS=0 (auto) so Ollama manages concurrency
based on available VRAM. Add persistent EnvironmentFile override so
config on /var/lib/neuraldrive/config/ollama.conf survives reboots,
falling back to baked-in defaults when persistent disk is unavailable.
---
 config/hooks/live/05-generate-configs.chroot                    | 2 +-
 config/includes.chroot/etc/neuraldrive/ollama.conf              | 2 +-
 .../etc/systemd/system/neuraldrive-ollama.service               | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/config/hooks/live/05-generate-configs.chroot b/config/hooks/live/05-generate-configs.chroot
index 94557b2..d78e2f9 100755
--- a/config/hooks/live/05-generate-configs.chroot
+++ b/config/hooks/live/05-generate-configs.chroot
@@ -7,7 +7,7 @@ cat > /etc/neuraldrive/ollama.conf << 'EOF'
 OLLAMA_HOST=127.0.0.1:11434
 OLLAMA_MODELS=/var/lib/neuraldrive/models/
 OLLAMA_KEEP_ALIVE=5m
-OLLAMA_MAX_LOADED_MODELS=1
+OLLAMA_MAX_LOADED_MODELS=0
 OLLAMA_NUM_PARALLEL=1
 EOF
 
diff --git a/config/includes.chroot/etc/neuraldrive/ollama.conf b/config/includes.chroot/etc/neuraldrive/ollama.conf
index 868447a..1c0ff05 100644
--- a/config/includes.chroot/etc/neuraldrive/ollama.conf
+++ b/config/includes.chroot/etc/neuraldrive/ollama.conf
@@ -1,5 +1,5 @@
 OLLAMA_HOST=127.0.0.1:11434
 OLLAMA_MODELS=/var/lib/neuraldrive/models/
 OLLAMA_KEEP_ALIVE=5m
-OLLAMA_MAX_LOADED_MODELS=1
+OLLAMA_MAX_LOADED_MODELS=0
 OLLAMA_NUM_PARALLEL=1
diff --git a/config/includes.chroot/etc/systemd/system/neuraldrive-ollama.service b/config/includes.chroot/etc/systemd/system/neuraldrive-ollama.service
index 2029529..27c2ffd 100644
--- a/config/includes.chroot/etc/systemd/system/neuraldrive-ollama.service
+++ b/config/includes.chroot/etc/systemd/system/neuraldrive-ollama.service
@@ -6,6 +6,7 @@ Requires=neuraldrive-gpu-detect.service
 [Service]
 Environment=HOME=/var/lib/neuraldrive/ollama
 EnvironmentFile=/etc/neuraldrive/ollama.conf
+EnvironmentFile=-/var/lib/neuraldrive/config/ollama.conf
 ExecStartPre=/usr/bin/mkdir -p /var/lib/neuraldrive/models
 ExecStartPre=-/sbin/modprobe nvidia-current-uvm
 ExecStartPre=-/usr/bin/nvidia-modprobe -u

From 37d0330d7b8730582613ef3b9e4a428d085ad4f7 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 10:48:59 -0400
Subject: [PATCH 28/32] Widen services Restart button to fit label

---
 config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
index 8ac54d0..a2480d3 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
@@ -284,8 +284,8 @@ Button.svc-restart {
     background: #1F1F1F;
     color: #F59E0B;
     border: solid #F59E0B;
-    min-width: 10;
-    width: 10;
+    min-width: 11;
+    width: 11;
     height: 3;
 }
 

From 2160fa8f327a45222b6611eefc2517a92cfa40eb Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 10:52:22 -0400
Subject: [PATCH 29/32] Create webui data directory on persistence partition

Wizard was missing /var/lib/neuraldrive/webui from the directory list,
causing systemd NAMESPACE failure (status=226) when ReadWritePaths
referenced the missing path.
---
 .../usr/lib/neuraldrive/tui/screens/wizard.py    | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
index 3812e4b..fa28378 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
@@ -425,6 +425,7 @@ def _create_persistence_partition(self) -> str | None:
                 "/mnt/persistence/var/lib/neuraldrive/ollama/.ollama",
                 "/mnt/persistence/var/lib/neuraldrive/models",
                 "/mnt/persistence/var/lib/neuraldrive/config",
+                "/mnt/persistence/var/lib/neuraldrive/webui",
                 "/mnt/persistence/var/log/neuraldrive",
                 "/mnt/persistence/etc/neuraldrive",
                 "/mnt/persistence/home",
@@ -453,6 +454,21 @@ def _create_persistence_partition(self) -> str | None:
             if proc.returncode != 0:
                 return f"chown failed: {proc.stderr.strip()}"
 
+            proc = subprocess.run(
+                [
+                    "sudo",
+                    "chown",
+                    "-R",
+                    "neuraldrive-webui:neuraldrive-webui",
+                    "/mnt/persistence/var/lib/neuraldrive/webui",
+                ],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            if proc.returncode != 0:
+                return f"chown webui failed: {proc.stderr.strip()}"
+
             proc = subprocess.run(
                 ["sudo", "umount", "/mnt/persistence"],
                 capture_output=True,

From 7692b40738dc2878089c498168b58718edf36917 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 11:07:34 -0400
Subject: [PATCH 30/32] fix git urls

---
 README.md                                     | 2 +-
 docs/dev-guide/book.toml                      | 4 ++--
 docs/landing/index.html                       | 2 +-
 docs/user-guide/book.toml                     | 4 ++--
 docs/user-guide/src/advanced/custom-images.md | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 5c95fd5..010e4ae 100644
--- a/README.md
+++ b/README.md
@@ -161,7 +161,7 @@ NeuralDrive images are built using Debian's `live-build` toolchain inside a Dock
 
 ```bash
 # Clone and build
-git clone https://github.com/NeuralDrive/NeuralDrive.git
+git clone https://github.com/Rightbracket/NeuralDrive.git
 cd NeuralDrive
 docker compose run --rm builder
 
diff --git a/docs/dev-guide/book.toml b/docs/dev-guide/book.toml
index 48d9143..e0f3eb9 100644
--- a/docs/dev-guide/book.toml
+++ b/docs/dev-guide/book.toml
@@ -12,8 +12,8 @@ build-dir = "book"
 default-theme = "coal"
 preferred-dark-theme = "coal"
 site-url = "/NeuralDrive/dev-guide/"
-git-repository-url = "https://github.com/NeuralDrive/NeuralDrive"
-edit-url-template = "https://github.com/NeuralDrive/NeuralDrive/edit/main/docs/dev-guide/src/{path}"
+git-repository-url = "https://github.com/Rightbracket/NeuralDrive"
+edit-url-template = "https://github.com/Rightbracket/NeuralDrive/edit/main/docs/dev-guide/src/{path}"
 additional-css = ["custom.css"]
 
 [output.html.search]
diff --git a/docs/landing/index.html b/docs/landing/index.html
index 65dddd5..a37872b 100644
--- a/docs/landing/index.html
+++ b/docs/landing/index.html
@@ -97,7 +97,7 @@ <h2>Developer Guide</h2>
   </div>
 
   <footer>
-    <a href="https://github.com/NeuralDrive/NeuralDrive">GitHub</a>
+    <a href="https://github.com/Rightbracket/NeuralDrive">GitHub</a>
   </footer>
 </body>
 </html>
diff --git a/docs/user-guide/book.toml b/docs/user-guide/book.toml
index 719e8cd..faeb892 100644
--- a/docs/user-guide/book.toml
+++ b/docs/user-guide/book.toml
@@ -12,8 +12,8 @@ build-dir = "book"
 default-theme = "coal"
 preferred-dark-theme = "coal"
 site-url = "/NeuralDrive/user-guide/"
-git-repository-url = "https://github.com/NeuralDrive/NeuralDrive"
-edit-url-template = "https://github.com/NeuralDrive/NeuralDrive/edit/main/docs/user-guide/src/{path}"
+git-repository-url = "https://github.com/Rightbracket/NeuralDrive"
+edit-url-template = "https://github.com/Rightbracket/NeuralDrive/edit/main/docs/user-guide/src/{path}"
 additional-css = ["custom.css"]
 
 [output.html.search]
diff --git a/docs/user-guide/src/advanced/custom-images.md b/docs/user-guide/src/advanced/custom-images.md
index f19a883..16b8a71 100644
--- a/docs/user-guide/src/advanced/custom-images.md
+++ b/docs/user-guide/src/advanced/custom-images.md
@@ -20,7 +20,7 @@ Follow these steps to generate a custom NeuralDrive ISO.
 Clone the NeuralDrive repository and navigate to the builder directory.
 
 ```bash
-git clone https://github.com/NeuralDrive/NeuralDrive.git
+git clone https://github.com/Rightbracket/NeuralDrive.git
 cd NeuralDrive/builder
 ```
 

From 5dec79c58bc50a7c7d4fe6abd99f6d0829ef80da Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 11:18:49 -0400
Subject: [PATCH 31/32] Update documentation to reflect TUI redesign, GPU
 fixes, and config changes

Rewrite 17 docs files across user-guide and dev-guide to match the
current implementation after the TUI UX overhaul, GPU/VRAM fixes,
and Ollama configuration changes.

Key updates:
- Replace old single-letter hotkeys with F1-F5 function key nav
- Rewrite models and services screen docs for zone-based navigation
- Correct first-boot wizard steps, sentinel file path, and --wizard flag
- Update OLLAMA_MAX_LOADED_MODELS from 1 to 0 (auto/LRU eviction)
- Document DeviceAllow removal (cgroup v2 eBPF incompatibility)
- Document nvidia-current-uvm module naming and boot-time loading
- Add nvidia-uvm and cgroup v2 GPU troubleshooting sections
- Add persistent config override (EnvironmentFile) documentation
- Document crash dump logging, VRAM cache, and chat model selector
---
 docs/dev-guide/src/architecture/security.md   |  3 +-
 .../src/components/first-boot-wizard.md       | 39 ++++++-----
 .../dev-guide/src/components/gpu-detection.md | 17 ++++-
 docs/dev-guide/src/components/ollama.md       | 25 ++++++-
 docs/dev-guide/src/components/tui.md          | 54 +++++++++++++--
 docs/user-guide/src/advanced/performance.md   |  4 +-
 .../src/getting-started/first-boot.md         | 24 ++++---
 docs/user-guide/src/models/recommendations.md |  8 +--
 docs/user-guide/src/reference/config-files.md | 20 ++++--
 docs/user-guide/src/reference/services.md     |  5 +-
 docs/user-guide/src/troubleshooting/gpu.md    | 27 ++++++++
 docs/user-guide/src/troubleshooting/models.md |  8 ++-
 docs/user-guide/src/using/local-chat.md       |  8 +--
 docs/user-guide/src/using/models-tui.md       | 67 +++++++++++--------
 docs/user-guide/src/using/tui-dashboard.md    | 15 ++---
 docs/user-guide/src/using/tui-services.md     | 47 ++++++-------
 docs/user-guide/src/using/tui.md              | 55 ++++++++++-----
 17 files changed, 288 insertions(+), 138 deletions(-)

diff --git a/docs/dev-guide/src/architecture/security.md b/docs/dev-guide/src/architecture/security.md
index 618c659..db81a90 100644
--- a/docs/dev-guide/src/architecture/security.md
+++ b/docs/dev-guide/src/architecture/security.md
@@ -31,7 +31,8 @@ Every service unit employs advanced systemd hardening directives:
 - `ProtectHome=yes`: Access to `/home` is denied.
 - `PrivateTmp=yes`: A private `/tmp` directory is created.
 - `NoNewPrivileges=yes`: Prevents the service and its children from gaining new privileges via `setuid` binaries.
-- `DeviceAllow`: Only the necessary GPU devices (`/dev/nvidia*`, `/dev/dri/*`) are permitted for the Ollama service.
+- `PrivateDevices=no`: Explicitly disabled for the Ollama service to allow access to GPU device nodes (`/dev/nvidia*`, `/dev/dri/*`) required for accelerated inference.
+- **DeviceAllow removal**: All `DeviceAllow` lines were removed from the Ollama service unit. On cgroup v2 systems, `DeviceAllow` uses eBPF device filters that blocked CUDA access even with explicit allow rules for GPU devices. Removing these rules was necessary to enable reliable GPU acceleration.
 
 ### 3. Authentication and Authorization
 NeuralDrive uses a dual-key system for authentication:
diff --git a/docs/dev-guide/src/components/first-boot-wizard.md b/docs/dev-guide/src/components/first-boot-wizard.md
index 9369ca2..c547c81 100644
--- a/docs/dev-guide/src/components/first-boot-wizard.md
+++ b/docs/dev-guide/src/components/first-boot-wizard.md
@@ -6,37 +6,44 @@ The First-Boot Wizard is a specialized mode of the TUI that guides the user thro
 
 ## Execution Trigger
 
-The wizard is triggered by the `neuraldrive-setup.service`. This service checks for the existence of `/etc/neuraldrive/.setup-complete`. If this file is missing, the service:
-1. Blocks the standard TTY login.
-2. Launches the TUI in "Setup Mode".
-3. Prevents any other NeuralDrive application services (Ollama, WebUI, Caddy) from starting until setup is finished.
+The wizard is not a separate service. It is an integrated component of the TUI application (`main.py`). Upon startup, the TUI checks for the existence of the sentinel file `/etc/neuraldrive/first-boot-complete`. If this file is missing, the TUI presents the wizard interface before allowing access to the main dashboard.
 
 ## Wizard Flow
 
-The wizard consists of seven mandatory steps:
+The wizard consists of the following steps:
 
 1. **Welcome**: Introduction and hardware verification.
-2. **Network**: Configuration of Ethernet or Wi-Fi.
-3. **Persistence**: Detection and optional encryption (LUKS2) of the persistent partition.
-4. **Credentials**: Setting the `neuraldrive-admin` password and generating the initial API key.
-5. **Branding**: Setting the system hostname and mDNS name.
-6. **Model Selection**: Choosing a "Small", "Medium", or "Large" model to pre-download.
-7. **Finalization**: Writing configuration files, generating TLS certs, and creating the sentinel file.
+2. **Storage/Persistence**: Detects the boot device, creates the persistence partition, and initializes the directory structure:
+   - `/var/lib/neuraldrive/ollama`
+   - `/var/lib/neuraldrive/models`
+   - `/var/lib/neuraldrive/config`
+   - `/var/lib/neuraldrive/webui`
+   - `/var/lib/neuraldrive/logs`
+3. **Security**: Prompts for the `neuraldrive-admin` password and generates initial credentials.
+4. **Network**: Configuration of Ethernet or Wi-Fi.
+5. **Models**: Selection of initial models for download.
+6. **Done**: Finalizes configuration and generates the sentinel file.
 
 ## Credential Generation
 
-- **Admin Password**: The user is prompted to enter a password for the `neuraldrive-admin` account.
-- **API Key**: The system automatically generates a 32-character random string, prefixed with `nd-`. This key is displayed to the user once and then stored in `/etc/neuraldrive/api.key`.
+- **Admin Password**: The user is prompted to set the password for the `neuraldrive-admin` account.
+- **API Key**: The system automatically generates a 32-character random string, prefixed with `nd-`. This key is displayed once and then stored in the persistence layer.
 
 ## Sentinel File
 
-Once the user completes the wizard, the script runs `touch /etc/neuraldrive/.setup-complete`. This ensures that subsequent reboots proceed directly to the standard dashboard.
+Completion of the wizard creates the sentinel file at `/etc/neuraldrive/first-boot-complete`. This ensures that subsequent reboots bypass the wizard and proceed directly to the standard dashboard.
+
+## CLI Re-run
+
+To re-run the wizard on a configured system, use the following command:
+`neuraldrive-tui --wizard`
+This command removes the sentinel file, forcing the wizard to launch on the next application start.
 
 ## Customizing the Wizard
 
 The wizard logic is integrated into the TUI application. To add a new step:
-1. Create a new `Screen` class in `usr/lib/neuraldrive/tui/screens/wizard.py` or in a new screen file within `screens/`.
+1. Create a new `Screen` class in the `screens/` directory.
 2. Add the screen to the wizard orchestration loop in `main.py`.
 
-> **Note**: For development, you can re-trigger the wizard on a running system by deleting the sentinel file and restarting the `neuraldrive-setup` service. **Warning**: This may overwrite existing credentials and configuration.
+> **Note**: For development, you can re-trigger the wizard by using the `--wizard` flag. **Warning**: This may overwrite existing credentials and configuration.
 
diff --git a/docs/dev-guide/src/components/gpu-detection.md b/docs/dev-guide/src/components/gpu-detection.md
index 989174f..5f74d64 100644
--- a/docs/dev-guide/src/components/gpu-detection.md
+++ b/docs/dev-guide/src/components/gpu-detection.md
@@ -17,12 +17,25 @@ The script runs during the `neuraldrive-gpu-detect.service` phase. It performs t
 
 ### NVIDIA
 If an NVIDIA card is detected (PCI vendor ID `10de`), the script:
-- Loads the `nvidia`, `nvidia-uvm`, and `nvidia-drm` modules via `modprobe`.
+- Loads the `nvidia`, `nvidia-current-uvm`, and `nvidia-drm` modules via `modprobe`. Note that on Debian systems, the CUDA Unified Video Memory module is named `nvidia-current-uvm`, not `nvidia-uvm`.
+- Executes `nvidia-modprobe -u` to create the `/dev/nvidia-uvm` and `/dev/nvidia-uvm-tools` device nodes. Without these nodes, CUDA memory allocation fails silently, and Ollama falls back to CPU.
 - Enables persistence mode with `nvidia-smi -pm 1`.
 - Sets `VENDOR=NVIDIA` in the config file.
 - If module loading fails, records `NVIDIA_MODULE_MISSING=true`.
 
-### AMD
+## Boot-Time Module Loading
+
+In addition to the detection script, the system includes `/etc/modules-load.d/nvidia-uvm.conf`. This file contains `nvidia-current-uvm` to ensure the module is automatically loaded at boot.
+
+## Ollama Service Integration
+
+As a safety net, the Ollama systemd unit also includes `ExecStartPre` commands for both `modprobe nvidia-current-uvm` and `nvidia-modprobe -u`. This ensures the necessary drivers and device nodes are present even if the primary detection service is delayed.
+
+## cgroup v2 and Device Access
+
+On systems using cgroup v2, standard `DeviceAllow` rules in systemd units utilize eBPF filters that can inadvertently block CUDA access, even when explicit allow rules are defined. NeuralDrive avoids this by removing all `DeviceAllow` directives from the Ollama service and relying on `PrivateDevices=no` instead.
+
+## AMD
 If an AMD card is detected (PCI vendor ID `1002`), the script:
 - Loads the `amdgpu` module.
 - Sets `VENDOR=AMD`.
diff --git a/docs/dev-guide/src/components/ollama.md b/docs/dev-guide/src/components/ollama.md
index 9b22cb9..58f16b3 100644
--- a/docs/dev-guide/src/components/ollama.md
+++ b/docs/dev-guide/src/components/ollama.md
@@ -15,20 +15,39 @@ The `neuraldrive-ollama.service` manages the lifecycle of the inference engine.
 ### Service Unit Highlights
 - **User**: Runs as `neuraldrive-ollama` (UID 901).
 - **Dependencies**: `Requires=neuraldrive-gpu-detect.service`.
-- **Security**: Uses `DeviceAllow` to restrict access to only relevant GPU device nodes.
+- **Security**: The service uses `PrivateDevices=no` to allow GPU access. Note that all `DeviceAllow` directives were removed because cgroup v2's eBPF device filter blocked CUDA access even with explicit allow rules.
 - **Resource Limits**: 
   - `MemoryHigh=90%`: Triggers aggressive swapping/GC when system memory is nearly full.
   - `MemoryMax=95%`: The hard limit before the OOM killer intervenes.
+- **GPU Initialization**: The unit includes `ExecStartPre` commands to ensure CUDA is ready:
+  - `ExecStartPre=-/sbin/modprobe nvidia-current-uvm`: Loads the CUDA Unified Video Memory module (named `nvidia-current-uvm` in the Debian package).
+  - `ExecStartPre=-/usr/bin/nvidia-modprobe -u`: Creates the `/dev/nvidia-uvm` and `/dev/nvidia-uvm-tools` device nodes.
+
+### Persistent Config Overrides
+The service unit includes two `EnvironmentFile` directives to manage configuration:
+1. `EnvironmentFile=/etc/neuraldrive/ollama.conf`: Contains baked-in system defaults.
+2. `EnvironmentFile=-/var/lib/neuraldrive/config/ollama.conf`: Allows persistent user-defined overrides. The `-` prefix ensures the service starts even if this file is missing.
 
 ## Configuration (ollama.conf)
 
-System-wide settings are stored in `/etc/neuraldrive/ollama.conf`:
+System-wide settings are defined in the environment files:
 - `OLLAMA_HOST=127.0.0.1:11434`: Ensures the API is only accessible locally (proxied by Caddy).
 - `OLLAMA_MODELS=/var/lib/neuraldrive/models/`: Directs model weights to the persistence layer.
 - `OLLAMA_KEEP_ALIVE=5m`: Models are unloaded from VRAM after 5 minutes of inactivity.
-- `OLLAMA_MAX_LOADED_MODELS=1`: Limits concurrent model loading to prevent VRAM exhaustion on smaller cards.
+- `OLLAMA_MAX_LOADED_MODELS=0`: Set to `0` for auto mode. Ollama manages multiple models based on available VRAM using LRU (Least Recently Used) eviction.
 - `OLLAMA_NUM_PARALLEL=1`: Processes one request at a time to maintain deterministic performance.
 
+## API Usage Details
+
+### Loading Models
+To load a model, send a `POST` request to `/api/generate` with `keep_alive` set to `-1`. Note that `keep_alive` must be an integer; passing it as a string ("-1") will result in a rejection.
+
+### Unloading Models
+To unload a model, send a `POST` request to `/api/generate` with `keep_alive` set to `0`. To verify the eviction, poll `/api/ps` until the model no longer appears. A race condition exists where the 200 OK response may return before the eviction process is fully complete.
+
+### Monitoring
+`GET /api/ps` returns a list of running models, including the `size_vram` utilized by each.
+
 ## GPU Support
 
 Ollama automatically detects the compute provider based on the drivers loaded by `gpu-detect.sh`.
diff --git a/docs/dev-guide/src/components/tui.md b/docs/dev-guide/src/components/tui.md
index 7f7cd19..692e8a5 100644
--- a/docs/dev-guide/src/components/tui.md
+++ b/docs/dev-guide/src/components/tui.md
@@ -19,15 +19,54 @@ The default screen showing:
 - mDNS address (`neuraldrive.local`).
 - CPU, Memory, and Disk usage gauges.
 - GPU status overview.
+Manual refresh is available via the **R** key, alongside a live clock.
+
+### Models
+Lists all LLM models currently stored in the persistence layer. Shows model name and metadata columns (params, quantization, disk size, VRAM usage, and status). Users can Load, Unload, or Delete models. This screen refreshes automatically on user action.
 
 ### Services
-Provides a list of all NeuralDrive systemd units with their current status (active, inactive, failed). Users can select a service to view its recent logs or trigger a restart.
+Provides a list of all NeuralDrive systemd units with their current status (active, inactive, failed). Users can select a service to view its recent logs or trigger a restart. This screen auto-polls every 5 seconds.
 
-### Models
-Lists all LLM models currently stored in the persistence layer. Shows model size and allows users to delete unused models to free up disk space.
+### Logs
+System-wide log viewer for NeuralDrive services and kernel messages.
+
+### Chat
+A lightweight chat interface allowing users to test models locally. It includes a model selector dropdown and supports streaming responses via `@work(exclusive=True)`. Model selection persists across screen switches.
+
+## Hotkeys
+
+- **F1**: Dashboard
+- **F2**: Models
+- **F3**: Services
+- **F4**: Logs
+- **F5**: Chat
+- **Q**: Quit
+
+## Navigation Model
+
+The TUI uses a zone-based focus system.
+- **Tab / Shift+Tab**: Cycle focus between different zones within a screen.
+- **Arrow Keys**: Navigate within the currently focused zone.
+- **Enter**: Activate the selected item or button.
+
+## Custom Widgets
+
+Several custom composite widgets are used to build the interface:
+- `SafeHeader`: A subclass of Textual's `Header` that catches `NoMatches` exceptions during `_on_mount`, working around Textual bug #4258.
+- `ServiceItem`: Displays service name, status label, and control buttons (Start, Stop, Restart).
+- `ModelItem`: Displays model name, metadata, and action buttons (Load, Unload, Delete).
+
+## Crash Dump Logging
+
+The TUI overrides `App._handle_exception` to write crash dumps to `/var/lib/neuraldrive/logs/tui-crash-*.log` with a full traceback. The entire `main()` function is also wrapped in a try/except block to catch crashes occurring outside the Textual event loop. Screenshots are saved to `/var/lib/neuraldrive/screenshots/`.
+
+## CLI Flags
+
+- `--wizard`: Removes the sentinel file (`/etc/neuraldrive/first-boot-complete`) and forces the first-boot wizard to re-run on the next launch.
+
+## Command Palette
 
-### Networking
-Allows basic network configuration, such as switching between DHCP and static IP, or configuring a Wi-Fi connection.
+The Textual command palette is explicitly disabled (`ENABLE_COMMAND_PALETTE = False`).
 
 ## Auto-Login and Startup
 
@@ -43,9 +82,10 @@ The source code for the TUI is located at `/usr/lib/neuraldrive/tui/`.
 
 ## Refresh Intervals
 
+- **Dashboard**: Manual refresh (R key) with live clock.
+- **Services**: Auto-polls every 5 seconds.
+- **Models**: Refreshes on user action.
 - **System Metrics**: Refreshed every 2 seconds.
-- **Service Status**: Refreshed every 5 seconds.
-- **Network Info**: Refreshed only on request or after a configuration change.
 
 ## Modifying the TUI
 
diff --git a/docs/user-guide/src/advanced/performance.md b/docs/user-guide/src/advanced/performance.md
index 245d6d7..cf2165a 100644
--- a/docs/user-guide/src/advanced/performance.md
+++ b/docs/user-guide/src/advanced/performance.md
@@ -12,7 +12,7 @@ The primary backend service, Ollama, is controlled via `/etc/neuraldrive/ollama.
 OLLAMA_HOST=127.0.0.1:11434
 OLLAMA_MODELS=/var/lib/neuraldrive/models/
 OLLAMA_KEEP_ALIVE=5m
-OLLAMA_MAX_LOADED_MODELS=1
+OLLAMA_MAX_LOADED_MODELS=0
 OLLAMA_NUM_PARALLEL=1
 ```
 
@@ -20,7 +20,7 @@ OLLAMA_NUM_PARALLEL=1
 
 *   **OLLAMA_NUM_PARALLEL**: (Integer) The number of concurrent requests the server will handle. Increase this for multi-user environments, though this will increase VRAM usage.
 *   **OLLAMA_KEEP_ALIVE**: (Duration) How long a model remains loaded in memory after the last request. Setting this to a higher value (e.g., `30m`) avoids the latency of reloading models.
-*   **OLLAMA_MAX_LOADED_MODELS**: (Integer) The maximum number of models to keep in VRAM simultaneously.
+*   **OLLAMA_MAX_LOADED_MODELS**: (Integer) The maximum number of models to keep in VRAM simultaneously. The default is `0` (auto), which allows Ollama to manage concurrent loading based on available VRAM. When memory is full, Least Recently Used (LRU) models are evicted automatically. Manual overrides can be set in `/var/lib/neuraldrive/config/ollama.conf`.
 *   **OLLAMA_NUM_THREADS**: (Integer) Specifies the number of CPU threads to use for inference. By default, this auto-detects based on your hardware.
 *   **OLLAMA_FLASH_ATTENTION**: (Boolean) Enabling Flash Attention can significantly improve speed on supported GPUs (e.g., NVIDIA Ampere and newer).
 
diff --git a/docs/user-guide/src/getting-started/first-boot.md b/docs/user-guide/src/getting-started/first-boot.md
index d0d0b0d..d8840cd 100644
--- a/docs/user-guide/src/getting-started/first-boot.md
+++ b/docs/user-guide/src/getting-started/first-boot.md
@@ -28,15 +28,18 @@ Once the boot process is complete, the console will display your system's IP add
 
 ## First-Boot Wizard
 
-If the system has not been initialized, a Text User Interface (TUI) wizard will start automatically. You must complete these seven steps to prepare your server:
+If the system has not been initialized, a Text User Interface (TUI) wizard will start automatically. The wizard runs as part of the TUI application, checking for a sentinel file on startup. You must complete these six steps to prepare your server:
 
-1. **Welcome:** Displays a hardware summary and runs a brief system health check to ensure your GPU is detected correctly.
-2. **Security:** Generates a random administrator password and API key. You can choose to keep these or set a custom password.
-3. **Wi-Fi:** If no Ethernet connection is detected, the wizard provides an SSID selector to configure your wireless network.
-4. **Network:** Choose between DHCP (default) or a static IP address.
-5. **Storage:** Select the drive for your persistent data. You can also enable LUKS encryption here. **Warning: This step is destructive to data on the selected drive.**
-6. **Models:** Recommends specific LLM starter models based on your hardware's VRAM and capabilities.
-7. **Finish:** The system writes your configurations, provisions the web administrator account, and removes insecure default permissions (like NOPASSWD sudo).
+1. **Welcome:** Introductory screen with hardware summary and system health check.
+2. **Storage/Persistence:** Detects your USB boot device and creates an ext4 persistence partition on unused space. This step also creates required directories under `/var/lib/neuraldrive/` (ollama, models, config, webui, logs).
+3. **Security:** Sets the administrator password and configures system credentials.
+4. **Network:** Configure your network connection, including Wi-Fi (if applicable) and IP assignment (DHCP or static).
+5. **Models:** Select initial LLM models to download based on your hardware capabilities.
+6. **Done:** Final completion summary and display of system credentials.
+
+### Re-running the Wizard
+
+If you need to reset your configuration, run `neuraldrive-tui --wizard` from the console. This command removes the sentinel file and forces the wizard to run again on the next TUI launch.
 
 ### Write Down Your Credentials
 
@@ -44,10 +47,9 @@ At the end of the wizard, your final credentials and the dashboard URL will be d
 
 ## System Initialization Files
 
-NeuralDrive uses two sentinel files to track its state:
-- `/etc/neuraldrive/initialized`: Indicates that the core system initialization has occurred.
+NeuralDrive uses a sentinel file to track its state:
 - `/etc/neuraldrive/first-boot-complete`: Confirms the user setup wizard has been finished.
 
-Once these files are present, the system will boot directly to the ready state in the future.
+Once this file is present, the system will boot directly to the ready state.
 
 Next step: [Web Dashboard](../using/web-dashboard.md)
diff --git a/docs/user-guide/src/models/recommendations.md b/docs/user-guide/src/models/recommendations.md
index 8ac1bfc..ebc0606 100644
--- a/docs/user-guide/src/models/recommendations.md
+++ b/docs/user-guide/src/models/recommendations.md
@@ -25,13 +25,13 @@ If your system lacks a compatible GPU, NeuralDrive can run models on the CPU. Wh
 
 ## Concurrent Models
 
-NeuralDrive allows multiple models to be loaded into memory simultaneously, provided there is enough VRAM. This is managed via several environment variables in the Ollama service:
+NeuralDrive allows multiple models to be loaded into memory simultaneously, provided there is enough VRAM. This is managed by Ollama using several environment variables:
 
-- `OLLAMA_MAX_LOADED_MODELS`: Defines the maximum number of models kept in memory.
-- `OLLAMA_NUM_PARALLEL`: Determines how many concurrent requests can be handled by a single model.
+- `OLLAMA_MAX_LOADED_MODELS`: Defines the maximum number of models kept in memory. The default is `0` (auto), which allows Ollama to manage loading based on available VRAM.
+- `OLLAMA_NUM_PARALLEL`: Determines how many concurrent requests can be handled.
 - `OLLAMA_KEEP_ALIVE`: Sets how long a model stays in memory after the last request before being evicted.
 
-NeuralDrive uses a Least Recently Used (LRU) eviction policy. If you attempt to load a new model and VRAM is full, the model that hasn't been used for the longest time will be unloaded to make room.
+NeuralDrive uses a Least Recently Used (LRU) eviction policy. If you attempt to load a new model and VRAM is full, Ollama handles eviction automatically to make room for the new request.
 
 ## Model Catalog
 
diff --git a/docs/user-guide/src/reference/config-files.md b/docs/user-guide/src/reference/config-files.md
index f2d8d74..3d0a8ed 100644
--- a/docs/user-guide/src/reference/config-files.md
+++ b/docs/user-guide/src/reference/config-files.md
@@ -8,11 +8,16 @@ This document provides a complete reference for all critical configuration and s
 
 | File | Purpose | Format | Owner |
 | :--- | :--- | :--- | :--- |
-| `/etc/neuraldrive/ollama.conf` | Ollama environment variables | KEY=VALUE | root:neuraldrive-admin |
+| `/etc/neuraldrive/ollama.conf` | Ollama baked-in defaults | KEY=VALUE | root:neuraldrive-admin |
+| `/var/lib/neuraldrive/config/ollama.conf` | Persistent Ollama overrides | KEY=VALUE | root:neuraldrive-admin |
+| `/etc/neuraldrive/config.yaml` | TUI overlay fallback config | YAML | root:neuraldrive-admin |
+| `/var/lib/neuraldrive/config/config.yaml` | Persistent TUI configuration | YAML | root:neuraldrive-admin |
+| `/var/lib/neuraldrive/config/api.key` | Persistent API key | plaintext | root:root (600) |
+| `/etc/neuraldrive/api.key` | System API key (synced) | plaintext | root:root (600) |
+| `/var/lib/neuraldrive/config/credentials.conf` | Persistent credentials | KEY=VALUE | root:root (600) |
 | `/etc/neuraldrive/webui.env` | Open WebUI configuration | KEY=VALUE | root:neuraldrive-admin |
 | `/etc/neuraldrive/caddy.env` | Caddy API key environment | KEY=VALUE | root:neuraldrive-admin |
 | `/etc/neuraldrive/api.env` | System API environment | KEY=VALUE | root:neuraldrive-admin |
-| `/etc/neuraldrive/api.key` | API authentication key | plaintext | root:root (600) |
 | `/etc/neuraldrive/Caddyfile` | Caddy reverse proxy configuration | Caddyfile | root:neuraldrive-caddy |
 | `/etc/neuraldrive/nftables.conf` | Global firewall rules | nftables | root:root |
 | `/etc/neuraldrive/neuraldrive-models.yaml` | Model catalog definitions | YAML | root:neuraldrive-admin |
@@ -23,20 +28,27 @@ This document provides a complete reference for all critical configuration and s
 | `/etc/neuraldrive/firewall-custom.conf` | User-defined firewall rules | nftables | root:root |
 | `/run/neuraldrive/gpu.conf` | GPU detection results (at boot) | KEY=VALUE | root:root (runtime) |
 
+
 ## Key Configuration Reference
 
 ### `ollama.conf`
 
-Defines the behavior of the underlying LLM inference engine.
+Defines the behavior of the underlying LLM inference engine. The Ollama service uses two configuration sources:
+1. `/etc/neuraldrive/ollama.conf` — baked-in system defaults.
+2. `/var/lib/neuraldrive/config/ollama.conf` — persistent user overrides.
+
+Values in the persistent file override the system defaults.
 
 ```ini
 OLLAMA_HOST=127.0.0.1:11434
 OLLAMA_MODELS=/var/lib/neuraldrive/models/
 OLLAMA_KEEP_ALIVE=5m
-OLLAMA_MAX_LOADED_MODELS=1
+OLLAMA_MAX_LOADED_MODELS=0
 OLLAMA_NUM_PARALLEL=1
 ```
 
+**OLLAMA_MAX_LOADED_MODELS**: Set to `0` for "auto" mode. Ollama automatically manages how many models stay loaded based on available VRAM, using Least Recently Used (LRU) eviction when memory is required for a new request.
+
 ### `webui.env`
 
 Configures the Open WebUI chat interface and authentication.
diff --git a/docs/user-guide/src/reference/services.md b/docs/user-guide/src/reference/services.md
index 5d71fba..14dc728 100644
--- a/docs/user-guide/src/reference/services.md
+++ b/docs/user-guide/src/reference/services.md
@@ -13,7 +13,7 @@ This document provides a detailed overview of the systemd services that power th
 | `neuraldrive-certs` | oneshot | root | — | network-online, local-fs | Generates self-signed TLS certificates (skips if they already exist). |
 | `neuraldrive-zram` | oneshot | root | — | local-fs | Sets up compressed RAM-based swap space. |
 | `neuraldrive-show-ip` | oneshot | root | — | network-online | Displays the current IP address on the physical console. |
-| `neuraldrive-ollama` | long-running | neuraldrive-ollama | 11434 | gpu-detect | The underlying LLM inference and model management engine. |
+| `neuraldrive-ollama` | long-running | neuraldrive-ollama | 11434 | gpu-detect | The GPU-accelerated LLM inference and model management engine. |
 | `neuraldrive-webui` | long-running | neuraldrive-webui | 3000 | ollama | The Open WebUI dashboard and chat interface. |
 | `neuraldrive-caddy` | long-running | neuraldrive-caddy | 443, 8443 | certs | The TLS reverse proxy and API gateway. |
 | `neuraldrive-gpu-monitor` | long-running | neuraldrive-monitor | 1312 | gpu-detect | Monitors GPU temperature, VRAM usage, and health. |
@@ -24,7 +24,8 @@ This document provides a detailed overview of the systemd services that power th
 
 All NeuralDrive services are configured with systemd-native security hardening to minimize the system attack surface:
 
--   **PrivateDevices**: Most services are denied access to `/dev/` nodes, except for the GPU-specific services.
+-   **PrivateDevices**: Most services are denied access to `/dev/` nodes. The Ollama service specifically uses `PrivateDevices=no` to allow access to GPU device nodes required for hardware acceleration.
+-   **DeviceAllow**: This directive was removed from the Ollama service because cgroup v2 eBPF filters can block CUDA access even when devices are explicitly allowed.
 -   **ProtectSystem**: The root filesystem is mounted read-only for service processes.
 -   **ProtectHome**: Services have no access to the `/home/` directory.
 -   **NoNewPrivileges**: Prevents processes from gaining elevated permissions via `setuid` or `setgid`.
diff --git a/docs/user-guide/src/troubleshooting/gpu.md b/docs/user-guide/src/troubleshooting/gpu.md
index 90fa570..cb794fc 100644
--- a/docs/user-guide/src/troubleshooting/gpu.md
+++ b/docs/user-guide/src/troubleshooting/gpu.md
@@ -28,6 +28,33 @@ lsmod | grep nouveau
 
 If the command returns any output, the blacklist failed. Check `/etc/modprobe.d/neuraldrive-blacklist.conf`.
 
+## Driver and Hardware Support
+
+### missing nvidia-uvm module
+
+If Ollama reports CPU-only inference despite having an NVIDIA GPU, the `nvidia-uvm` (Unified Video Memory) kernel module may not be loaded. This module is essential for CUDA memory allocation.
+
+1.  **Verify Module**: Check if the module is loaded:
+    ```bash
+    lsmod | grep nvidia_uvm
+    ```
+2.  **Naming Convention**: On NeuralDrive (based on Debian), the module is named `nvidia-current-uvm`.
+3.  **Manual Load**: If missing, load it manually:
+    ```bash
+    sudo modprobe nvidia-current-uvm && nvidia-modprobe -u
+    ```
+4.  **Automatic Loading**: NeuralDrive should load this automatically at boot via `/etc/modules-load.d/nvidia-uvm.conf`. If it fails, check the `journalctl -u neuraldrive-ollama` logs for `ExecStartPre` failures.
+
+Without this module, `/dev/nvidia-uvm` device nodes will be missing, causing CUDA calls to fail silently and Ollama to fall back to CPU.
+
+### cgroup v2 / DeviceAllow blocking
+
+If the GPU is detected by the system but Ollama still falls back to CPU inference, systemd `DeviceAllow` rules might be blocking access.
+
+1.  **cgroup v2 Behavior**: On systems using cgroup v2, `DeviceAllow` uses eBPF device filters. These filters can block CUDA access even when explicit allow rules for `/dev/nvidia*` and `/dev/dri/*` are present.
+2.  **NeuralDrive Default**: The default NeuralDrive Ollama service has all `DeviceAllow` lines removed to prevent this.
+3.  **Custom Units**: If you have modified the service unit and re-added `DeviceAllow` rules, remove them and ensure `PrivateDevices=no` is set to restore GPU access.
+
 ## Diagnostic Tools
 
 NeuralDrive provides several utilities to inspect GPU state:
diff --git a/docs/user-guide/src/troubleshooting/models.md b/docs/user-guide/src/troubleshooting/models.md
index 34f48ca..f7448b1 100644
--- a/docs/user-guide/src/troubleshooting/models.md
+++ b/docs/user-guide/src/troubleshooting/models.md
@@ -37,7 +37,13 @@ If the model is generating text very slowly (less than 1 token per second):
     ```bash
     cat /run/neuraldrive/gpu.conf
     ```
-2.  **Mixed Models**: Ensure you are not running multiple models simultaneously, which may compete for limited hardware resources.
+### Concurrent Model Loading
+
+Multiple models can be loaded simultaneously if your hardware supports it.
+
+1.  **Automatic Management**: NeuralDrive uses `OLLAMA_MAX_LOADED_MODELS=0` (auto) by default. Ollama manages concurrent loading based on your available VRAM.
+2.  **Model Eviction**: When VRAM is exhausted, least-recently-used models are automatically evicted from memory to make room for new ones.
+3.  **Manual Control**: You can manually load or unload models via the TUI Models screen (accessible with the **F2** key).
 
 ## Management and Corruption
 
diff --git a/docs/user-guide/src/using/local-chat.md b/docs/user-guide/src/using/local-chat.md
index 4f680e3..6ce7175 100644
--- a/docs/user-guide/src/using/local-chat.md
+++ b/docs/user-guide/src/using/local-chat.md
@@ -6,20 +6,20 @@ For quick testing and offline interaction, NeuralDrive includes a lightweight, t
 
 ## Launching the Chat
 
-Access the local chat by pressing **C** from the main TUI dashboard.
+Access the local chat by pressing **F5** from any screen.
 
 ## Using the Chat Interface
 
-1. **Model Selection:** Upon entering the chat screen, you will be prompted to select one of the models currently available on your system. Use the arrow keys to highlight a model and press Enter.
+1. **Model Selection:** A model selector dropdown at the top of the screen lets you choose which installed model to chat with. The selected model persists even when switching away and returning to the chat screen.
 2. **Messaging:** Type your message into the input field at the bottom of the screen. Press Enter to send.
 3. **Streaming Responses:** The model's response will stream directly into the terminal window in real-time.
 4. **Keyboard Shortcuts:**
-   - **Esc or B:** Return to the model selection or main dashboard.
+   - **F1-F4:** Switch to another TUI screen (Dashboard, Models, Services, or Logs).
    - **Ctrl+C:** Interrupt the current response generation.
 
 ## Features and Limitations
 
-The TUI chat is designed for simplicity and speed.
+The TUI chat is designed for simplicity and speed. You must have at least one model downloaded and loaded to use the chat interface.
 
 - **Fast & Lightweight:** Minimal resource overhead compared to the full web UI.
 - **Persistent Context:** The chat maintains a basic conversation history within the current session, allowing for follow-up questions.
diff --git a/docs/user-guide/src/using/models-tui.md b/docs/user-guide/src/using/models-tui.md
index 5f6cf0b..4ad6b38 100644
--- a/docs/user-guide/src/using/models-tui.md
+++ b/docs/user-guide/src/using/models-tui.md
@@ -2,45 +2,54 @@
 
 # Model Management via TUI
 
-The Model Management screen allows you to download, unload, and delete LLMs directly from the terminal. Access this screen by pressing **M** from the main dashboard.
+The Model Management screen allows you to download, load, unload, and delete LLMs directly from the terminal.
 
-## Models Screen Interface
+## Access
+Press **F2** from any screen to access Model Management.
 
-```text
-┌──────────────── Model Management ────────────────────────────┐
-│ NAME                SIZE    STATUS      ACTION               │
-│ llama3.1:8b        4.7GB   LOADED      [U]nload  [D]elete   │
-│ codestral:latest   8.2GB   LOADED      [U]nload  [D]elete   │
-│ mistral:7b         4.1GB   CACHED      [L]oad    [D]elete   │
-├──────────────────────────────────────────────────────────────┤
-│ [P]ull Model  [I]mport GGUF  [B]ack                         │
-└──────────────────────────────────────────────────────────────┘
-```
+## Layout
+The screen is organized into three zones that you can navigate between using **Tab** or **Shift+Tab**:
 
-## Available Actions
+1.  **Installed Models list** (top zone): A scrollable list of models currently on your system.
+2.  **Browse Catalog** button (middle zone): Opens a popup to browse the Ollama library.
+3.  **Pull by name** (bottom zone): A text input field and a **Pull** button for direct model downloads.
 
-Each model in the list supports specific actions based on its current state:
+### Installed Models List
+Each model in the list displays its details in a columnar format. A legend header with `/` separators appears above the list:
+`Model name | Params | Quant | Disk | VRAM | Status`
 
-- **[L]oad:** If a model is **CACHED** (on disk but not in memory), pressing **L** will trigger a load into VRAM.
-- **[U]nload:** If a model is **LOADED**, pressing **U** will eject it from VRAM. This is useful if you want to free up space for a different model manually.
-- **[D]elete:** Pressing **D** will prompt for confirmation and then remove the model weights and metadata from the persistent storage.
+- **Model name**: The name of the model (e.g., `llama3:8b`).
+- **Params**: Parameter count of the model.
+- **Quant**: Quantization level.
+- **Disk**: Space occupied on disk.
+- **VRAM**: Measured or cached VRAM usage (e.g., "6.2 GB" or "~6.2 GB").
+- **Status**: Current state of the model ("loaded (GPU)", "loaded (CPU)", or "ready").
 
-## Pulling New Models
+## Navigation
+- **Tab / Shift+Tab**: Cycle focus between the three zones (models → browse → pull-input → pull-btn).
+- **Up / Down arrows**: Navigate through the installed model list. The view scrolls automatically to follow your focus.
+- **Left / Right arrows**: Navigate between the action buttons (Load/Unload/Delete) for the currently selected model. The cursor automatically skips disabled buttons.
+- **Enter**: Activate the focused button or zone.
+- **PageUp / PageDown**: Fast scroll through the model list.
 
-To download a new model from the Ollama registry:
+## Model Actions
+Each model has specific action buttons:
 
-1. Press **P** (Pull Model).
-2. Enter the full model string (e.g., `llama3.1:8b`).
-3. Press Enter to start the download.
-4. A progress bar will appear in the action column. You can press **Esc** or **Q** to cancel the download at any time.
+- **Load**: Loads the model into VRAM for inference. The status will show "Loading..." while in progress. Loaded models use a `keep_alive: -1` setting for infinite retention.
+- **Unload**: Removes the model from VRAM. The system polls the engine until the unload is confirmed.
+- **Delete**: Permanently removes the model from disk. A confirmation prompt will appear before deletion.
 
-## Importing GGUF Files
+## Downloading Models
 
-If you have a GGUF file on an external device or elsewhere in the filesystem, you can import it by pressing **I** (Import GGUF).
+### Browse Catalog
+Selecting the **Browse Catalog** button opens a scrollable popup listing popular models from the Ollama library. Select a model from the list and confirm to start the download.
 
-1. Provide the absolute path to the `.gguf` file.
-2. NeuralDrive will create a local manifest and copy the file into the internal model storage area.
-3. Once imported, the model will appear in your list with a default name derived from the filename.
+### Pull by Name
+To download a specific model, type its name (e.g., `llama3:8b`) into the text input field in the bottom zone and press **Enter** or click the **Pull** button. 
 
-Press **B** or **Back** to return to the main dashboard.
+A progress bar will show the download status. You can press **Escape** or the **Cancel** button to abort an active download.
 
+## VRAM Management
+VRAM usage values are measured during operation or retrieved from a cache stored in `/var/lib/neuraldrive/config/`. 
+
+Multiple models can be installed and loaded simultaneously. The underlying engine manages VRAM using an LRU (Least Recently Used) eviction policy when the `OLLAMA_MAX_LOADED_MODELS` setting is set to auto.
diff --git a/docs/user-guide/src/using/tui-dashboard.md b/docs/user-guide/src/using/tui-dashboard.md
index 2dd09c1..6098d36 100644
--- a/docs/user-guide/src/using/tui-dashboard.md
+++ b/docs/user-guide/src/using/tui-dashboard.md
@@ -9,32 +9,29 @@ The Dashboard is the central monitoring hub of NeuralDrive. It is designed to pr
 The dashboard is divided into three functional areas:
 
 ### 1. Header and System Info
-Located at the very top, this section displays the version of NeuralDrive, the current hostname, system uptime, and the primary IP address. 
-
-- **Refresh Rate:** Static upon load, updates if network state changes.
+Located at the very top, this section displays the version of NeuralDrive, the current hostname, system uptime, and the primary IP address. A live system clock is displayed in the upper-right corner, showing the exact time of the last data refresh.
 
 ### 2. Hardware Resource Monitor
 This section provides real-time metrics for your hardware.
 
-- **GPU:** Displays the detected GPU model (e.g., NVIDIA RTX 4090).
+- **GPU:** Displays the detected GPU model name, total VRAM capacity, driver version, and CUDA compute capability.
 - **VRAM:** Shows the current VRAM usage (e.g., 12.4/24.0 GB) and a percentage bar.
 - **Temp:** Current GPU temperature in Celsius.
 - **CPU:** Real-time CPU utilization percentage.
 - **RAM:** System memory usage (e.g., 18.2/64.0 GB).
 - **Disk:** Total disk space used on the persistence partition (e.g., 45.2 GB).
 
-- **Refresh Rate:** Hardware metrics refresh every **2 seconds**.
+- **Refresh Rate:** Hardware metrics refresh every **2 seconds**. You can press **R** at any time to trigger a manual refresh of all dashboard data.
 
 ### 3. Loaded Models List
 This list displays the models currently residing in memory and ready for immediate inference.
 
 - **Status Indicator:** A solid circle (●) indicates the model is currently loaded in memory. An open circle (○) indicates the model is cached on disk but not currently loaded.
-- **Backend:** Shows if the model is running on the **[GPU]** or CPU.
+- **Backend:** Shows if the model is running on the **[GPU]** or **[CPU]**.
 - **VRAM Footprint:** The amount of memory the model is currently occupying.
-- **Activity:** Real-time request volume, measured in requests per minute (req/min).
 
-- **Refresh Rate:** The model list and their activity metrics refresh every **10 seconds**.
+- **Refresh Rate:** The model list metrics refresh every **10 seconds**.
 
 ## Interaction
-While the Dashboard is primarily for monitoring, pressing any of the navigation keys at the bottom will transition you to a specific management screen. You can return to the dashboard at any time by pressing the **Back** or **B** key from most sub-screens.
+The Dashboard is accessed via **F1** from any other screen. While it is primarily for monitoring, you can transition to other management screens using the function keys (F2-F5) shown at the bottom. Use the **R** key to manually refresh the displayed information.
 
diff --git a/docs/user-guide/src/using/tui-services.md b/docs/user-guide/src/using/tui-services.md
index ff6476b..d866744 100644
--- a/docs/user-guide/src/using/tui-services.md
+++ b/docs/user-guide/src/using/tui-services.md
@@ -2,36 +2,33 @@
 
 # Service Control
 
-The Services screen provides a centralized interface for managing the background system processes that power NeuralDrive. Access this screen by pressing **S** from the main dashboard.
+The Services screen provides a centralized interface for managing the background system processes that power NeuralDrive.
 
-## Service List
+## Access
+Press **F3** from any screen to access Service Control.
 
-This screen displays all `neuraldrive-*` services and their current operational status:
+## Layout
+The screen displays a scrollable list of services. Each service is represented by a `ServiceItem` widget showing the service name, its current status, and a set of action buttons.
 
-- **Active:** The service is running normally.
-- **Inactive:** The service is stopped.
-- **Failed:** The service encountered an error and crashed.
+### Services Managed
+The TUI allows you to manage critical NeuralDrive components, including:
+- `neuraldrive-ollama`: The core model execution engine.
+- `neuraldrive-webui`: The browser-based user interface.
+- Any other configured system services specific to the NeuralDrive distribution.
 
-The primary services you will see include:
+## Navigation
+- **Up / Down arrows**: Navigate between the different services in the list.
+- **Left / Right arrows**: Navigate between the action buttons (Start/Stop/Restart) for the currently selected service. The focus will automatically skip buttons that are disabled based on the service's current state.
+- **Enter**: Activate the focused action button.
 
-- `neuraldrive-ollama`: The model execution engine.
-- `neuraldrive-webui`: The browser-based interface.
-- `neuraldrive-caddy`: The reverse proxy handling TLS and routing.
-- `neuraldrive-system-api`: The system management API.
+## Action Buttons
+Each service has three colored action buttons that enable or disable dynamically:
 
-## Managing Services
+- **Start** (green): Starts a service that is currently stopped or inactive.
+- **Stop** (red): Gracefully shuts down a running service.
+- **Restart** (amber): Stops and immediately restarts a running service. This is often the quickest way to resolve minor connectivity or performance issues.
 
-You can control individual services using the following keybindings after selecting a service from the list:
-
-- **R (Restart):** Stops and immediately restarts the selected service. This is the first step you should take if a component becomes unresponsive.
-- **S (Start):** Manages starting a service that is currently inactive or failed.
-- **T (Stop):** Gracefully shuts down the selected service.
-
-## Recovery and Troubleshooting
-
-If the Dashboard shows an "Ollama Offline" badge, navigate to the Services screen and check the status of `neuraldrive-ollama`. If it is in a **Failed** or **Inactive** state, use the **S** key to start it or **R** to restart it.
-
-Monitoring service status is critical for maintaining system uptime. If a service repeatedly fails, you should examine the system logs for more detailed error information.
-
-Press **B** or **Back** to return to the main dashboard.
+## Auto-Refresh and Monitoring
+The status of all services is automatically polled every 5 seconds to ensure the interface reflects the actual state of the system. 
 
+If a service like `neuraldrive-ollama` shows a failed or inactive status, use the action buttons to restore it. Continuous monitoring and manual control through this screen help maintain the overall health of your NeuralDrive instance.
diff --git a/docs/user-guide/src/using/tui.md b/docs/user-guide/src/using/tui.md
index 4c5d159..75df3a3 100644
--- a/docs/user-guide/src/using/tui.md
+++ b/docs/user-guide/src/using/tui.md
@@ -29,32 +29,51 @@ This launcher script (installed at `/usr/local/bin/neuraldrive-tui`) activates t
 The main dashboard provides a high-level overview of system health and active models.
 
 ```text
-┌──────────────── NeuralDrive v1.0.0 ──────────────────────────┐
+┌──────────────── NeuralDrive v1.0.0 ───────────────── 10:45:22 ─┐
 │ Host: neuraldrive.local    │ Uptime: 2h 15m │ IP: 192.168.1.50 │
-├──────────────────────────────────────────────────────────────┤
-│ GPU: NVIDIA RTX 4090  │ VRAM: 12.4/24.0 GB │ Temp: 65°C │ 85% │
-│ CPU: 12%              │ RAM: 18.2/64.0 GB  │ Disk: 45.2 GB    │
-├──────────────────────────────────────────────────────────────┤
-│ LOADED MODELS                                                │
-│ ● llama3.1:8b        [GPU] 4.7 GB   85 req/min              │
-│ ● codestral:latest   [GPU] 8.2 GB   12 req/min              │
-│ ○ phi3:mini           ---  (not loaded)                      │
-├──────────────────────────────────────────────────────────────┤
-│ [M]odels  [S]ervices  [N]etwork  [L]ogs  [C]hat  [Q]uit     │
-└──────────────────────────────────────────────────────────────┘
+├────────────────────────────────────────────────────────────────┤
+│ GPU: NVIDIA RTX 4090  │ VRAM: 12.4/24.0 GB │ Temp: 65°C │ 85%  │
+│ CPU: 12%              │ RAM: 18.2/64.0 GB  │ Disk: 45.2 GB     │
+├────────────────────────────────────────────────────────────────┤
+│ LOADED MODELS                                                  │
+│ ● llama3.1:8b        [GPU] 4.7 GB                              │
+│ ● codestral:latest   [GPU] 8.2 GB                              │
+│ ○ phi3:mini           ---  (not loaded)                        │
+├────────────────────────────────────────────────────────────────┤
+│ F1 Dashboard  F2 Models  F3 Services  F4 Logs  F5 Chat  Q Quit │
+└────────────────────────────────────────────────────────────────┘
 ```
 
 ## Navigation Keybindings
 
-Navigation is performed using single-letter hotkeys shown at the bottom of the screen:
+Navigation is performed using function keys:
 
-- **M:** Models screen for managing downloads and loading state.
-- **S:** Services screen for restarting or stopping system components.
-- **N:** Network screen for hostname and IP configuration.
-- **L:** Logs screen for real-time system and service logs.
-- **C:** Chat screen for a lightweight, terminal-based LLM chat.
+- **F1:** Dashboard overview.
+- **F2:** Models screen for managing downloads and loading state.
+- **F3:** Services screen for restarting or stopping system components.
+- **F4:** Logs screen for real-time system and service logs.
+- **F5:** Chat screen for a lightweight, terminal-based LLM chat.
 - **Q:** Quit the TUI and return to the shell.
 
+Within each screen, the following navigation model is used:
+- **Tab / Shift+Tab:** Cycle focus between different screen zones.
+- **Arrow Keys:** Navigate within a focused zone (e.g., scrolling a list).
+- **Enter:** Activate the currently focused element or button.
+
+## Troubleshooting and Debugging
+
+If the TUI encounters a critical error, it will write a crash dump to `/var/lib/neuraldrive/logs/tui-crash-*.log`.
+
+Screenshots captured within the TUI are saved to `/var/lib/neuraldrive/screenshots/`.
+
+### Re-running the First-Boot Wizard
+
+If you need to force the first-boot wizard to run again, launch the TUI with the `--wizard` flag:
+```bash
+neuraldrive-tui --wizard
+```
+This removes the sentinel file and initiates the guided setup process.
+
 ## Resilience
 
 The TUI is designed to be resilient. If the underlying Ollama service is unavailable, an "Ollama Offline" badge will appear on the dashboard, and certain model management features will be disabled until the service is restored via the **Services** screen.

From c6218270e0705458bcf35e4511502dfe5915de17 Mon Sep 17 00:00:00 2001
From: eshork <1829176+eshork@users.noreply.github.com>
Date: Fri, 24 Apr 2026 11:29:36 -0400
Subject: [PATCH 32/32] Mark VRAM-loaded models with * in chat selector and
 retain input focus

Chat model dropdown now prefixes loaded models with * so users can
see which models are ready without loading delay. A 10-second poll
timer keeps the indicators current as models load/unload.

Input focus is restored after each response completes so users can
type follow-up messages without re-clicking the input box.
---
 .../usr/lib/neuraldrive/tui/screens/chat.py       | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
index 62cb60d..41e0e2d 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
@@ -34,10 +34,14 @@ def compose(self) -> ComposeResult:
 
     def on_mount(self) -> None:
         self.app.call_later(self._load_model_options)
+        self._refresh_timer = self.set_interval(10, self._poll_model_options)
 
     def on_screen_resume(self) -> None:
         self.app.call_later(self._load_model_options)
 
+    async def _poll_model_options(self) -> None:
+        await self._load_model_options()
+
     async def _load_model_options(self) -> None:
         notice = self.query_one("#chat-notice", Static)
         select = self.query_one("#chat-model-select", Select)
@@ -53,13 +57,19 @@ async def _load_model_options(self) -> None:
             return
 
         models = await api_client.list_models()
-        options = [(m.get("name", "?"), m.get("name", "?")) for m in models]
+        running = await api_client.list_running_models()
+        running_names = {m.get("name", "") for m in running}
+        options = []
+        for m in models:
+            name = m.get("name", "?")
+            label = f"* {name}" if name in running_names else name
+            options.append((label, name))
         previous = select.value
         select.set_options(options)
 
         if not options:
             notice.update(
-                "  No models installed. Pull a model from the Models screen (press M)."
+                "  No models installed. Pull a model from the Models screen (F2)."
             )
             notice.add_class("warn")
             send_btn.disabled = True
@@ -141,3 +151,4 @@ async def _stream_response(self, model: str) -> None:
         finally:
             send_btn.disabled = False
             chat_input.disabled = False
+            chat_input.focus()