diff --git a/README.md b/README.md
index 5c95fd5..010e4ae 100644
--- a/README.md
+++ b/README.md
@@ -161,7 +161,7 @@ NeuralDrive images are built using Debian's `live-build` toolchain inside a Dock
 
 ```bash
 # Clone and build
-git clone https://github.com/NeuralDrive/NeuralDrive.git
+git clone https://github.com/Rightbracket/NeuralDrive.git
 cd NeuralDrive
 docker compose run --rm builder
 
diff --git a/config/hooks/live/01-setup-system.chroot b/config/hooks/live/01-setup-system.chroot
index 8f1a8aa..b278e7a 100755
--- a/config/hooks/live/01-setup-system.chroot
+++ b/config/hooks/live/01-setup-system.chroot
@@ -15,6 +15,12 @@ echo "neuraldrive-admin:neuraldrive" | chpasswd
 echo "neuraldrive-admin ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/neuraldrive-admin
 chmod 440 /etc/sudoers.d/neuraldrive-admin
 
+# Scoped NOPASSWD rules for TUI — survives wizard _finalize() which only
+# modifies neuraldrive-admin.  File is baked in via includes.chroot; just
+# ensure correct ownership and permissions here.
+chmod 440 /etc/sudoers.d/neuraldrive-tui
+chown root:root /etc/sudoers.d/neuraldrive-tui
+
 mkdir -p /etc/neuraldrive/tls \
          /var/lib/neuraldrive/models/{manifests,blobs} \
          /var/lib/neuraldrive/ollama \
diff --git a/config/hooks/live/04-install-python-apps.chroot b/config/hooks/live/04-install-python-apps.chroot
index 6efc76f..853fd27 100755
--- a/config/hooks/live/04-install-python-apps.chroot
+++ b/config/hooks/live/04-install-python-apps.chroot
@@ -118,11 +118,11 @@ git clone --depth 1 https://github.com/psalias2006/gpu-hot.git /usr/lib/neuraldr
 # --- TUI (terminal interface) ---
 python3 -m venv /usr/lib/neuraldrive/tui/venv
 /usr/lib/neuraldrive/tui/venv/bin/pip install --no-cache-dir --upgrade pip
-/usr/lib/neuraldrive/tui/venv/bin/pip install --no-cache-dir textual psutil httpx rich
+/usr/lib/neuraldrive/tui/venv/bin/pip install --no-cache-dir textual psutil httpx rich pyyaml
 
 cat > /usr/local/bin/neuraldrive-tui << 'LAUNCHER'
 #!/bin/sh
-exec /usr/lib/neuraldrive/tui/venv/bin/python /usr/lib/neuraldrive/tui/main.py
+exec /usr/lib/neuraldrive/tui/venv/bin/python /usr/lib/neuraldrive/tui/main.py "$@"
 LAUNCHER
 chmod +x /usr/local/bin/neuraldrive-tui
 
diff --git a/config/hooks/live/05-generate-configs.chroot b/config/hooks/live/05-generate-configs.chroot
index 94557b2..d78e2f9 100755
--- a/config/hooks/live/05-generate-configs.chroot
+++ b/config/hooks/live/05-generate-configs.chroot
@@ -7,7 +7,7 @@ cat > /etc/neuraldrive/ollama.conf << 'EOF'
 OLLAMA_HOST=127.0.0.1:11434
 OLLAMA_MODELS=/var/lib/neuraldrive/models/
 OLLAMA_KEEP_ALIVE=5m
-OLLAMA_MAX_LOADED_MODELS=1
+OLLAMA_MAX_LOADED_MODELS=0
 OLLAMA_NUM_PARALLEL=1
 EOF
 
diff --git a/config/includes.chroot/etc/modules-load.d/nvidia-uvm.conf b/config/includes.chroot/etc/modules-load.d/nvidia-uvm.conf
new file mode 100644
index 0000000..1a5cb35
--- /dev/null
+++ b/config/includes.chroot/etc/modules-load.d/nvidia-uvm.conf
@@ -0,0 +1,4 @@
+# Load NVIDIA Unified Virtual Memory module at boot.
+# Required for CUDA GPU memory allocation (Ollama inference).
+# Harmless on systems without NVIDIA GPUs (modprobe fails silently).
+nvidia-current-uvm
diff --git a/config/includes.chroot/etc/neuraldrive/ollama.conf b/config/includes.chroot/etc/neuraldrive/ollama.conf
index 868447a..1c0ff05 100644
--- a/config/includes.chroot/etc/neuraldrive/ollama.conf
+++ b/config/includes.chroot/etc/neuraldrive/ollama.conf
@@ -1,5 +1,5 @@
 OLLAMA_HOST=127.0.0.1:11434
 OLLAMA_MODELS=/var/lib/neuraldrive/models/
 OLLAMA_KEEP_ALIVE=5m
-OLLAMA_MAX_LOADED_MODELS=1
+OLLAMA_MAX_LOADED_MODELS=0
 OLLAMA_NUM_PARALLEL=1
diff --git a/config/includes.chroot/etc/sudoers.d/neuraldrive-tui b/config/includes.chroot/etc/sudoers.d/neuraldrive-tui
new file mode 100644
index 0000000..cc2fbf7
--- /dev/null
+++ b/config/includes.chroot/etc/sudoers.d/neuraldrive-tui
@@ -0,0 +1,30 @@
+# Scoped NOPASSWD rules for NeuralDrive TUI operations.
+# This file is NOT modified by the first-boot wizard's _finalize()
+# (which only touches /etc/sudoers.d/neuraldrive-admin).
+# Processed AFTER neuraldrive-admin (alphabetical), so these NOPASSWD
+# rules override the password-required ALL rule for matched commands.
+
+# Service management
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/systemctl start neuraldrive-*
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/systemctl stop neuraldrive-*
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/systemctl restart neuraldrive-*
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/systemctl is-active neuraldrive-*
+
+# Partition creation and storage management
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/sbin/parted *
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/sbin/mkfs.ext4 *
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/mount *
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/umount *
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/sbin/partprobe *
+
+# File operations (wizard config writing, directory setup)
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/tee *
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/mkdir *
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/chmod *
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/chown *
+
+# Password management (wizard security step)
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/sbin/chpasswd
+
+# Sudoers self-read (wizard _finalize reads neuraldrive-admin to strip NOPASSWD)
+neuraldrive-admin ALL=(ALL) NOPASSWD: /usr/bin/cat /etc/sudoers.d/neuraldrive-admin
diff --git a/config/includes.chroot/etc/systemd/system/neuraldrive-ollama.service b/config/includes.chroot/etc/systemd/system/neuraldrive-ollama.service
index c7558bf..27c2ffd 100644
--- a/config/includes.chroot/etc/systemd/system/neuraldrive-ollama.service
+++ b/config/includes.chroot/etc/systemd/system/neuraldrive-ollama.service
@@ -6,7 +6,10 @@ Requires=neuraldrive-gpu-detect.service
 [Service]
 Environment=HOME=/var/lib/neuraldrive/ollama
 EnvironmentFile=/etc/neuraldrive/ollama.conf
+EnvironmentFile=-/var/lib/neuraldrive/config/ollama.conf
 ExecStartPre=/usr/bin/mkdir -p /var/lib/neuraldrive/models
+ExecStartPre=-/sbin/modprobe nvidia-current-uvm
+ExecStartPre=-/usr/bin/nvidia-modprobe -u
 ExecStart=/usr/local/bin/ollama serve
 User=neuraldrive-ollama
 Group=neuraldrive-ollama
@@ -26,8 +29,6 @@ PrivateTmp=yes
 PrivateDevices=no
 ProtectKernelTunables=yes
 ProtectControlGroups=yes
-DeviceAllow=/dev/nvidia* rw
-DeviceAllow=/dev/dri/* rw
 ReadWritePaths=/var/lib/neuraldrive /var/log/neuraldrive /etc/neuraldrive /run/neuraldrive
 
 [Install]
diff --git a/config/includes.chroot/usr/lib/neuraldrive/dev-reset.sh b/config/includes.chroot/usr/lib/neuraldrive/dev-reset.sh
new file mode 100755
index 0000000..0b1bd96
--- /dev/null
+++ b/config/includes.chroot/usr/lib/neuraldrive/dev-reset.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# /usr/lib/neuraldrive/dev-reset.sh
+# Development reset script — restores a post-wizard system to a
+# development-friendly state.  Included in builds for convenience.
+#
+# Usage:  sudo /usr/lib/neuraldrive/dev-reset.sh
+
+set -e
+
+echo "=== NeuralDrive Development Reset ==="
+echo ""
+
+# 1. Reset admin password to the build default
+echo "neuraldrive-admin:neuraldrive" | chpasswd
+echo "[ok] Admin password reset to 'neuraldrive'"
+
+# 2. Restore blanket NOPASSWD for development
+echo "neuraldrive-admin ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/neuraldrive-admin
+chmod 440 /etc/sudoers.d/neuraldrive-admin
+echo "[ok] Blanket NOPASSWD sudo restored"
+
+# 3. Remove wizard sentinel so it runs again on next TUI start
+rm -f /etc/neuraldrive/first-boot-complete
+echo "[ok] Wizard sentinel removed"
+
+# 4. Clear config files so wizard starts fresh
+rm -f /var/lib/neuraldrive/config/config.yaml
+rm -f /etc/neuraldrive/config.yaml
+echo "[ok] Config files cleared"
+
+# 5. Clear generated credentials
+rm -f /etc/neuraldrive/api.key
+rm -f /etc/neuraldrive/credentials.conf
+echo "[ok] API key and credentials cleared"
+
+echo ""
+echo "Development reset complete."
+echo "Restart the TUI to re-run the first-boot wizard."
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/main.py b/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
index 4eac789..f563843 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/main.py
@@ -9,21 +9,74 @@
 from screens.chat import ChatScreen
 from screens.wizard import FirstBootWizard
 
+import argparse
 import os
+import subprocess
+import sys
+import traceback
+from datetime import datetime
+
+PERSIST_DIR = "/var/lib/neuraldrive"
+OVERLAY_LOG_DIR = "/var/log/neuraldrive"
+
+
+def _persistent_available() -> bool:
+    return os.path.ismount(PERSIST_DIR)
+
+
+def _log_dir() -> str:
+    if _persistent_available():
+        p = os.path.join(PERSIST_DIR, "logs")
+        try:
+            os.makedirs(p, exist_ok=True)
+            return p
+        except PermissionError:
+            pass
+    os.makedirs(OVERLAY_LOG_DIR, exist_ok=True)
+    return OVERLAY_LOG_DIR
+
+
+def _screenshot_dir() -> str:
+    if _persistent_available():
+        p = os.path.join(PERSIST_DIR, "screenshots")
+        try:
+            os.makedirs(p, exist_ok=True)
+            return p
+        except PermissionError:
+            pass
+    os.makedirs(OVERLAY_LOG_DIR, exist_ok=True)
+    return OVERLAY_LOG_DIR
+
+
+def _write_crash_dump(error: BaseException) -> str | None:
+    try:
+        crash_dir = _log_dir()
+        ts = datetime.now().strftime("%Y%m%d-%H%M%S")
+        dump_path = os.path.join(crash_dir, f"tui-crash-{ts}.log")
+        with open(dump_path, "w") as f:
+            f.write(f"NeuralDrive TUI crash at {ts}\n")
+            f.write(f"Python: {sys.version}\n")
+            f.write(f"Args: {sys.argv}\n\n")
+            traceback.print_exception(type(error), error, error.__traceback__, file=f)
+        return dump_path
+    except Exception:
+        return None
 
 
 class NeuralDriveTUI(App):
     CSS_PATH = "styles.tcss"
     TITLE = "NeuralDrive"
+    ENABLE_COMMAND_PALETTE = False
 
     BINDINGS = [
-        Binding("m", "switch_screen('models')", "Models"),
-        Binding("s", "switch_screen('services')", "Services"),
-        Binding("n", "switch_screen('network')", "Network"),
-        Binding("l", "switch_screen('logs')", "Logs"),
-        Binding("c", "switch_screen('chat')", "Chat"),
-        Binding("d", "switch_screen('dashboard')", "Dashboard"),
+        Binding("f1", "switch_screen('dashboard')", "F1 Dash", priority=True),
+        Binding("f2", "switch_screen('models')", "F2 Models", priority=True),
+        Binding("f3", "switch_screen('services')", "F3 Svc", priority=True),
+        Binding("f4", "switch_screen('logs')", "F4 Logs", priority=True),
+        Binding("f5", "switch_screen('chat')", "F5 Chat", priority=True),
         Binding("q", "quit", "Quit"),
+        Binding("up", "focus_previous", "Previous", show=False),
+        Binding("down", "focus_next", "Next", show=False),
     ]
 
     SCREENS = {
@@ -40,11 +93,43 @@ def on_mount(self) -> None:
         if not os.path.exists("/etc/neuraldrive/first-boot-complete"):
             self.push_screen(FirstBootWizard())
 
+    def _handle_exception(self, error: Exception) -> None:
+        dump_path = _write_crash_dump(error)
+        if dump_path:
+            self.log(f"Crash dump saved to {dump_path}")
+        super()._handle_exception(error)
+
+    def action_focus_next(self) -> None:
+        self.screen.focus_next()
+
+    def action_focus_previous(self) -> None:
+        self.screen.focus_previous()
+
     def action_switch_screen(self, screen_name: str) -> None:
         if screen_name in self.SCREENS:
             self.switch_screen(screen_name)
 
 
 if __name__ == "__main__":
-    app = NeuralDriveTUI()
-    app.run(mouse=False)
+    parser = argparse.ArgumentParser(description="NeuralDrive TUI")
+    parser.add_argument(
+        "--wizard", action="store_true", help="Force the first-boot wizard to run"
+    )
+    args = parser.parse_args()
+
+    if args.wizard:
+        sentinel = "/etc/neuraldrive/first-boot-complete"
+        if os.path.exists(sentinel):
+            subprocess.run(["sudo", "rm", "-f", sentinel], timeout=5)
+
+    screenshot_dir = _screenshot_dir()
+    os.environ["TEXTUAL_SCREENSHOT_LOCATION"] = screenshot_dir
+    try:
+        app = NeuralDriveTUI()
+        app.run(mouse=False)
+    except Exception as exc:
+        dump_path = _write_crash_dump(exc)
+        traceback.print_exc()
+        if dump_path:
+            print(f"\nCrash dump saved to {dump_path}")
+        sys.exit(1)
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
index d5716f4..41e0e2d 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/chat.py
@@ -2,10 +2,13 @@
 
 import json
 
+from textual import work
 from textual.app import ComposeResult
 from textual.containers import Horizontal
 from textual.screen import Screen
-from textual.widgets import Button, Footer, Header, Input, RichLog, Select, Static
+from textual.widgets import Button, Footer, Input, RichLog, Select, Static
+
+from widgets.safe_header import SafeHeader
 
 from utils import api_client
 
@@ -18,11 +21,12 @@ def __init__(self) -> None:
         self._messages: list[dict] = []
 
     def compose(self) -> ComposeResult:
-        yield Header()
-        with Horizontal():
-            yield Static("  Model: ", classes="label")
-            yield Select([], id="chat-model-select")
-        yield RichLog(highlight=True, markup=False, id="chat-log")
+        yield SafeHeader()
+        with Horizontal(id="chat-model-row"):
+            yield Static(" Model ", id="chat-model-label")
+            yield Select([], id="chat-model-select", prompt="Choose a model…")
+        yield Static("", id="chat-notice")
+        yield RichLog(highlight=True, markup=False, wrap=True, id="chat-log")
         with Horizontal(id="chat-input-row"):
             yield Input(placeholder="Type a message…", id="chat-input")
             yield Button("Send", id="chat-send", classes="primary")
@@ -30,24 +34,67 @@ def compose(self) -> ComposeResult:
 
     def on_mount(self) -> None:
         self.app.call_later(self._load_model_options)
+        self._refresh_timer = self.set_interval(10, self._poll_model_options)
+
+    def on_screen_resume(self) -> None:
+        self.app.call_later(self._load_model_options)
+
+    async def _poll_model_options(self) -> None:
+        await self._load_model_options()
 
     async def _load_model_options(self) -> None:
-        models = await api_client.list_models()
+        notice = self.query_one("#chat-notice", Static)
         select = self.query_one("#chat-model-select", Select)
-        options = [(m.get("name", "?"), m.get("name", "?")) for m in models]
+        send_btn = self.query_one("#chat-send", Button)
+        chat_input = self.query_one("#chat-input", Input)
+
+        available = await api_client.ollama_available()
+        if not available:
+            notice.update("  Ollama is not running. Start it from the Services screen.")
+            notice.add_class("error")
+            send_btn.disabled = True
+            chat_input.disabled = True
+            return
+
+        models = await api_client.list_models()
+        running = await api_client.list_running_models()
+        running_names = {m.get("name", "") for m in running}
+        options = []
+        for m in models:
+            name = m.get("name", "?")
+            label = f"* {name}" if name in running_names else name
+            options.append((label, name))
+        previous = select.value
         select.set_options(options)
-        if options:
+
+        if not options:
+            notice.update(
+                "  No models installed. Pull a model from the Models screen (F2)."
+            )
+            notice.add_class("warn")
+            send_btn.disabled = True
+            chat_input.disabled = True
+            return
+
+        notice.update("")
+        notice.remove_class("error", "warn")
+        send_btn.disabled = False
+        chat_input.disabled = False
+        option_values = [v for _, v in options]
+        if previous is not Select.BLANK and previous in option_values:
+            select.value = previous
+        elif select.value is Select.BLANK:
             select.value = options[0][1]
 
     async def on_button_pressed(self, event: Button.Pressed) -> None:
         if event.button.id == "chat-send":
-            await self._send_message()
+            self._do_send()
 
     async def on_input_submitted(self, event: Input.Submitted) -> None:
         if event.input.id == "chat-input":
-            await self._send_message()
+            self._do_send()
 
-    async def _send_message(self) -> None:
+    def _do_send(self) -> None:
         input_widget = self.query_one("#chat-input", Input)
         text = input_widget.value.strip()
         if not text:
@@ -56,6 +103,8 @@ async def _send_message(self) -> None:
         select = self.query_one("#chat-model-select", Select)
         model = str(select.value) if select.value is not Select.BLANK else ""
         if not model:
+            log = self.query_one("#chat-log", RichLog)
+            log.write("[error] No model selected. Choose a model from the dropdown.")
             return
 
         log = self.query_one("#chat-log", RichLog)
@@ -63,7 +112,17 @@ async def _send_message(self) -> None:
         input_widget.value = ""
 
         self._messages.append({"role": "user", "content": text})
-        log.write(f"\n[{model}] ", end="")
+        self._stream_response(model)
+
+    @work(exclusive=True)
+    async def _stream_response(self, model: str) -> None:
+        log = self.query_one("#chat-log", RichLog)
+        send_btn = self.query_one("#chat-send", Button)
+        chat_input = self.query_one("#chat-input", Input)
+
+        send_btn.disabled = True
+        chat_input.disabled = True
+        log.write(f"[{model}] ...")
 
         assistant_text = ""
         try:
@@ -73,11 +132,23 @@ async def _send_message(self) -> None:
                     chunk = data.get("message", {}).get("content", "")
                     if chunk:
                         assistant_text += chunk
-                        log.write(chunk, end="")
                 except json.JSONDecodeError:
                     pass
-            log.write("")
+
             if assistant_text:
+                log.clear()
+                for msg in self._messages:
+                    role = "You" if msg["role"] == "user" else model
+                    log.write(f"[{role}] {msg['content']}")
+                log.write(f"[{model}] {assistant_text}")
                 self._messages.append({"role": "assistant", "content": assistant_text})
+            else:
+                log.write(f"[{model}] (no response)")
         except Exception as exc:
             log.write(f"\n[error] {exc}")
+            if self._messages and self._messages[-1]["role"] == "user":
+                self._messages.pop()
+        finally:
+            send_btn.disabled = False
+            chat_input.disabled = False
+            chat_input.focus()
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
index fcf0a5c..28113c4 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/dashboard.py
@@ -1,9 +1,13 @@
 from __future__ import annotations
 
+from datetime import datetime
+
 from textual.app import ComposeResult
 from textual.containers import Horizontal, Vertical, VerticalScroll
 from textual.screen import Screen
-from textual.widgets import Footer, Header, Static
+from textual.widgets import Footer, Static
+
+from widgets.safe_header import SafeHeader
 
 from utils import api_client, hardware
 from widgets.stats_box import StatsBox
@@ -13,15 +17,21 @@ class DashboardScreen(Screen):
     BINDINGS = [("r", "refresh", "Refresh")]
 
     def compose(self) -> ComposeResult:
-        yield Header()
+        yield SafeHeader()
         with VerticalScroll():
-            yield Static("", id="dash-hostname")
+            with Horizontal(id="dash-topbar"):
+                yield Static("", id="dash-hostname")
+                yield Static("", id="dash-clock")
             with Horizontal(id="stats-panel"):
                 yield StatsBox("CPU", [("Usage", "…")], id="box-cpu")
                 yield StatsBox("Memory", [("Used", "…"), ("Total", "…")], id="box-mem")
                 yield StatsBox("Disk", [("Used", "…"), ("Free", "…")], id="box-disk")
-                yield StatsBox("GPU", [("Vendor", "…")], id="box-gpu")
-            yield Static("Loaded Models", classes="heading")
+                yield StatsBox(
+                    "GPU",
+                    [("Device", "…"), ("VRAM", "…"), ("Temp", "…"), ("Util", "…")],
+                    id="box-gpu",
+                )
+            yield Static("Active Models", classes="heading")
             yield Vertical(id="loaded-models")
             yield Static("Services", classes="heading")
             yield Vertical(id="service-badges")
@@ -40,6 +50,8 @@ def _refresh_system(self) -> None:
         self.query_one("#dash-hostname", Static).update(
             f"  {hostname}  •  {ip}  •  up {uptime}"
         )
+        now = datetime.now().strftime("%H:%M:%S")
+        self.query_one("#dash-clock", Static).update(now)
 
         cpu = hardware.get_cpu_percent()
         self.query_one("#box-cpu", StatsBox).update_row("Usage", f"{cpu:.0f}%")
@@ -56,12 +68,16 @@ def _refresh_system(self) -> None:
 
         gpu = hardware.get_gpu_info()
         box_gpu = self.query_one("#box-gpu", StatsBox)
-        box_gpu.update_row("Vendor", gpu["vendor"])
         if gpu["devices"]:
             dev = gpu["devices"][0]
-            box_gpu.update_row(
-                "Vendor", f"{dev['name']}  {dev['temp_c']}°C  {dev['util_percent']}%"
-            )
+            box_gpu.update_row("Device", dev["name"])
+            vram_total = dev["vram_total_mb"]
+            vram_used = dev["vram_used_mb"]
+            box_gpu.update_row("VRAM", f"{vram_used} / {vram_total} MB")
+            box_gpu.update_row("Temp", f"{dev['temp_c']}\u00b0C")
+            box_gpu.update_row("Util", f"{dev['util_percent']}%")
+        else:
+            box_gpu.update_row("Device", gpu["vendor"])
 
         container = self.query_one("#service-badges", Vertical)
         container.remove_children()
@@ -85,9 +101,15 @@ async def _refresh_models_async(self) -> None:
         else:
             for m in running:
                 name = m.get("name", "unknown")
+                size_vram = m.get("size_vram", 0)
                 size_bytes = m.get("size", 0)
-                size_gb = f"{size_bytes / (1024**3):.1f} GB" if size_bytes else ""
-                container.mount(Static(f"  ● {name}  {size_gb}", classes="ok"))
+                if size_vram and size_vram > 0:
+                    vram_gb = f"{size_vram / (1024**3):.1f} GB"
+                    tag = f"\\[GPU] {vram_gb}"
+                else:
+                    ram_gb = f"{size_bytes / (1024**3):.1f} GB" if size_bytes else ""
+                    tag = f"\\[CPU] {ram_gb}"
+                container.mount(Static(f"  ● {name}  {tag}", classes="ok"))
 
     def action_refresh(self) -> None:
         self._refresh_system()
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/logs.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/logs.py
index 7be5812..fdf368d 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/logs.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/logs.py
@@ -5,7 +5,9 @@
 from textual.app import ComposeResult
 from textual.containers import Horizontal
 from textual.screen import Screen
-from textual.widgets import Footer, Header, RichLog, Select, Static
+from textual.widgets import Footer, RichLog, Select, Static
+
+from widgets.safe_header import SafeHeader
 
 from utils import hardware
 
@@ -19,7 +21,7 @@ class LogsScreen(Screen):
     BINDINGS = [("r", "refresh", "Refresh")]
 
     def compose(self) -> ComposeResult:
-        yield Header()
+        yield SafeHeader()
         with Horizontal():
             yield Static("  Service: ", classes="label")
             yield Select(
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
index df73348..4a73153 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/models.py
@@ -1,67 +1,558 @@
 from __future__ import annotations
 
+import asyncio
 import json
 
+from textual import work
 from textual.app import ComposeResult
-from textual.containers import Vertical, VerticalScroll
+from textual.containers import Horizontal, VerticalScroll
 from textual.screen import Screen
-from textual.widgets import Button, Footer, Header, Input, Static
+from textual.widgets import Button, Footer, Input, ProgressBar, Static
 
-from utils import api_client
+from widgets.safe_header import SafeHeader
+
+from textual.binding import Binding
+
+from utils import api_client, config
 from widgets.model_item import ModelItem
 
+CURATED_MODELS = [
+    (
+        "CPU / ≤4 GB VRAM",
+        [
+            ("qwen2.5:3b", "1.9 GB", "Fast general-purpose"),
+            ("phi3:mini", "2.3 GB", "Microsoft reasoning model"),
+            ("gemma2:2b", "1.6 GB", "Google lightweight"),
+        ],
+    ),
+    (
+        "6 GB VRAM",
+        [
+            ("llama3.2:3b", "2.0 GB", "Meta compact model"),
+            ("mistral:7b", "4.1 GB", "Mistral AI flagship"),
+            ("qwen2.5:7b", "4.7 GB", "Strong multilingual"),
+        ],
+    ),
+    (
+        "8 GB VRAM",
+        [
+            ("llama3.1:8b", "4.7 GB", "Meta general-purpose"),
+            ("gemma2:9b", "5.4 GB", "Google mid-range"),
+            ("deepseek-coder-v2:lite", "5.0 GB", "Code-focused"),
+        ],
+    ),
+    (
+        "12 GB VRAM",
+        [
+            ("codestral:latest", "12 GB", "Mistral code generation"),
+            ("llama3.1:8b-instruct-q8_0", "8.5 GB", "High-quality quantization"),
+            ("qwen2.5:14b", "9.0 GB", "Strong reasoning"),
+        ],
+    ),
+    (
+        "24 GB+ VRAM",
+        [
+            ("llama3.1:70b", "40 GB", "Meta flagship (Q4)"),
+            ("qwen2.5:32b", "20 GB", "Top-tier multilingual"),
+            ("deepseek-coder-v2:16b", "8.9 GB", "Full code model"),
+        ],
+    ),
+]
+
+
+class ModelCatalog(Screen):
+    BINDINGS = [
+        ("escape", "cancel", "Back"),
+        Binding("up", "nav_up", show=False, priority=True),
+        Binding("down", "nav_down", show=False, priority=True),
+        Binding("pageup", "page_up", show=False, priority=True),
+        Binding("pagedown", "page_down", show=False, priority=True),
+        Binding("enter", "activate", show=False, priority=True),
+        Binding("space", "activate", show=False, priority=True),
+        Binding("tab", "next_zone", show=False, priority=True),
+        Binding("shift+tab", "prev_zone", show=False, priority=True),
+    ]
+
+    def __init__(self, installed_names: set[str]) -> None:
+        super().__init__()
+        self._installed = installed_names
+        self._selected: set[str] = set()
+        self._catalog_buttons: list[Button] = []
+        self._highlight_index = 0
+        self._zone = "list"
+
+    def compose(self) -> ComposeResult:
+        yield SafeHeader()
+        yield Static(
+            "  ↑↓ Navigate   Enter Select   Tab Actions   Esc Back", classes="muted"
+        )
+        with VerticalScroll(id="catalog-scroll"):
+            for tier_label, models in CURATED_MODELS:
+                yield Static(f"  {tier_label}", classes="tier-heading")
+                for model_name, size, desc in models:
+                    installed = any(
+                        model_name == n or model_name == n.split(":")[0]
+                        for n in self._installed
+                    )
+                    if installed:
+                        label = f"  ✓  {model_name}  ({size}) — {desc}  [installed]"
+                        btn = Button(
+                            label,
+                            id=f"cat-{model_name.replace(':', '--').replace('.', '-')}",
+                            classes="catalog-item catalog-installed",
+                            disabled=True,
+                        )
+                    else:
+                        label = f"  ○  {model_name}  ({size}) — {desc}"
+                        btn = Button(
+                            label,
+                            id=f"cat-{model_name.replace(':', '--').replace('.', '-')}",
+                            classes="catalog-item",
+                        )
+                    btn.tooltip = model_name
+                    btn.can_focus = False
+                    yield btn
+        with Horizontal(id="catalog-buttons"):
+            yield Button("Download Selected", id="download-selected", variant="primary")
+            yield Button("Cancel", id="catalog-cancel")
+        yield Footer()
+
+    def on_mount(self) -> None:
+        self._catalog_buttons = list(self.query("Button.catalog-item"))
+        self._zone = "list"
+        self._highlight_index = 0
+        self.set_focus(None)
+        if self._catalog_buttons:
+            self._apply_highlight()
+
+    def _apply_highlight(self) -> None:
+        for i, btn in enumerate(self._catalog_buttons):
+            if i == self._highlight_index:
+                btn.add_class("catalog-highlighted")
+                btn.scroll_visible()
+            else:
+                btn.remove_class("catalog-highlighted")
+
+    def _clear_highlight(self) -> None:
+        for btn in self._catalog_buttons:
+            btn.remove_class("catalog-highlighted")
+
+    def _toggle_highlighted(self) -> None:
+        if not self._catalog_buttons:
+            return
+        btn = self._catalog_buttons[self._highlight_index]
+        if btn.disabled:
+            return
+        model_name = btn.tooltip or ""
+        if not model_name:
+            return
+        if model_name in self._selected:
+            self._selected.discard(model_name)
+            btn.label = str(btn.label).replace("  ✓  ", "  ○  ")
+            btn.remove_class("catalog-checked")
+        else:
+            self._selected.add(model_name)
+            btn.label = str(btn.label).replace("  ○  ", "  ✓  ")
+            btn.add_class("catalog-checked")
+
+    def action_nav_up(self) -> None:
+        if self._zone == "buttons":
+            self._zone = "list"
+            self.set_focus(None)
+            self._apply_highlight()
+            return
+        if self._catalog_buttons and self._highlight_index > 0:
+            self._highlight_index -= 1
+            self._apply_highlight()
+
+    def action_nav_down(self) -> None:
+        if self._zone == "list" and self._catalog_buttons:
+            if self._highlight_index < len(self._catalog_buttons) - 1:
+                self._highlight_index += 1
+                self._apply_highlight()
+
+    def action_page_up(self) -> None:
+        if self._zone == "buttons":
+            self._zone = "list"
+            self.set_focus(None)
+            self._apply_highlight()
+            return
+        if not self._catalog_buttons:
+            return
+        scroll = self.query_one("#catalog-scroll", VerticalScroll)
+        page_size = max(1, scroll.size.height // 3)
+        self._highlight_index = max(0, self._highlight_index - page_size)
+        self._apply_highlight()
+
+    def action_page_down(self) -> None:
+        if not self._catalog_buttons:
+            return
+        if self._zone == "buttons":
+            return
+        scroll = self.query_one("#catalog-scroll", VerticalScroll)
+        page_size = max(1, scroll.size.height // 3)
+        last = len(self._catalog_buttons) - 1
+        self._highlight_index = min(last, self._highlight_index + page_size)
+        self._apply_highlight()
+
+    def action_activate(self) -> None:
+        if self._zone == "list":
+            self._toggle_highlighted()
+        else:
+            focused = self.focused
+            if focused and focused.id == "download-selected":
+                self.dismiss(list(self._selected))
+            elif focused and focused.id == "catalog-cancel":
+                self.dismiss([])
+
+    def action_next_zone(self) -> None:
+        if self._zone == "list":
+            self._zone = "buttons"
+            self._clear_highlight()
+            self.query_one("#download-selected", Button).focus()
+        else:
+            focused = self.focused
+            if focused and focused.id == "download-selected":
+                self.query_one("#catalog-cancel", Button).focus()
+            else:
+                self.query_one("#download-selected", Button).focus()
+
+    def action_prev_zone(self) -> None:
+        if self._zone == "buttons":
+            self._zone = "list"
+            self.set_focus(None)
+            self._apply_highlight()
+
+    def action_cancel(self) -> None:
+        self.dismiss([])
+
+    def on_button_pressed(self, event: Button.Pressed) -> None:
+        btn_id = event.button.id or ""
+        if btn_id == "download-selected":
+            self.dismiss(list(self._selected))
+        elif btn_id == "catalog-cancel":
+            self.dismiss([])
+
 
 class ModelsScreen(Screen):
-    BINDINGS = [("r", "refresh", "Refresh")]
+    BINDINGS = [
+        ("r", "refresh", "Refresh"),
+        Binding("up", "nav_up", show=False, priority=True),
+        Binding("down", "nav_down", show=False, priority=True),
+        Binding("left", "nav_left", show=False, priority=True),
+        Binding("right", "nav_right", show=False, priority=True),
+        Binding("pageup", "page_up", show=False, priority=True),
+        Binding("pagedown", "page_down", show=False, priority=True),
+        Binding("enter", "activate", show=False, priority=True),
+        Binding("tab", "next_zone", show=False, priority=True),
+        Binding("shift+tab", "prev_zone", show=False, priority=True),
+    ]
+
+    ZONES = ["models", "browse", "pull-input", "pull-btn"]
 
     def compose(self) -> ComposeResult:
-        yield Header()
-        with VerticalScroll():
-            yield Static("Installed Models", classes="heading")
-            yield Vertical(id="model-list")
-            yield Static("", id="model-status")
-            yield Static("Pull Model", classes="heading")
+        yield SafeHeader()
+        yield Static("Installed Models", classes="heading")
+        with Horizontal(id="model-legend"):
+            yield Static("Model", classes="legend-name")
+            yield Static("Params", classes="legend-col legend-params")
+            yield Static("/", classes="legend-sep")
+            yield Static("Quant", classes="legend-col legend-quant")
+            yield Static("/", classes="legend-sep")
+            yield Static("Disk", classes="legend-col legend-disk")
+            yield Static("/", classes="legend-sep")
+            yield Static("VRAM", classes="legend-col legend-vram")
+            yield Static("/", classes="legend-sep")
+            yield Static("Status", classes="legend-col legend-status")
+        yield VerticalScroll(id="model-list")
+        yield Button(
+            "Browse Available Models",
+            id="open-catalog",
+            variant="primary",
+            classes="primary",
+        )
+        yield Static("Pull by Name", classes="heading")
+        with Horizontal(id="pull-input-row"):
             yield Input(placeholder="e.g. llama3:8b", id="pull-input")
-            yield Button("Pull", variant="primary", id="pull-btn", classes="primary")
+            yield Button("Pull", id="pull-btn")
+        yield Static("", id="model-status")
+        with Horizontal(id="pull-row"):
+            yield ProgressBar(total=100, show_eta=True, id="pull-progress")
+            yield Button("Cancel", id="cancel-pull", variant="error")
         yield Footer()
 
     def on_mount(self) -> None:
+        self.query_one("#pull-progress", ProgressBar).display = False
+        self.query_one("#cancel-pull", Button).display = False
+        self._pull_queue: list[str] = []
+        self._pulling = False
+        self._model_items: list[ModelItem] = []
+        self._highlight_index = 0
+        self._btn_index = 0
+        self._zone = "models"
         self.action_refresh()
 
+    # ── Zone management ──────────────────────────────────────
+
+    def _enter_zone(self, zone: str) -> None:
+        self._zone = zone
+        if zone == "models":
+            self.set_focus(None)
+            self._apply_highlight()
+        elif zone == "browse":
+            self._clear_highlight()
+            self.query_one("#open-catalog", Button).focus()
+        elif zone == "pull-input":
+            self._clear_highlight()
+            self.query_one("#pull-input", Input).focus()
+        elif zone == "pull-btn":
+            self._clear_highlight()
+            self.query_one("#pull-btn", Button).focus()
+
+    def action_next_zone(self) -> None:
+        idx = self.ZONES.index(self._zone) if self._zone in self.ZONES else 0
+        idx = (idx + 1) % len(self.ZONES)
+        self._enter_zone(self.ZONES[idx])
+
+    def action_prev_zone(self) -> None:
+        idx = self.ZONES.index(self._zone) if self._zone in self.ZONES else 0
+        idx = (idx - 1) % len(self.ZONES)
+        self._enter_zone(self.ZONES[idx])
+
+    # ── Model list highlight ─────────────────────────────────
+
+    def _apply_highlight(self) -> None:
+        self._clear_btn_highlight()
+        for i, item in enumerate(self._model_items):
+            if i == self._highlight_index:
+                item.add_class("model-highlighted")
+                item.scroll_visible()
+                self._apply_btn_highlight()
+            else:
+                item.remove_class("model-highlighted")
+
+    def _clear_highlight(self) -> None:
+        self._clear_btn_highlight()
+        for item in self._model_items:
+            item.remove_class("model-highlighted")
+
+    # ── Per-model button highlight ───────────────────────────
+
+    def _get_active_buttons(self) -> list[Button]:
+        if not self._model_items:
+            return []
+        item = self._model_items[self._highlight_index]
+        return [b for b in item.get_action_buttons() if not b.disabled]
+
+    def _apply_btn_highlight(self) -> None:
+        buttons = self._get_active_buttons()
+        if not buttons:
+            return
+        self._btn_index = max(0, min(self._btn_index, len(buttons) - 1))
+        for i, btn in enumerate(buttons):
+            if i == self._btn_index:
+                btn.add_class("model-btn-active")
+            else:
+                btn.remove_class("model-btn-active")
+
+    def _clear_btn_highlight(self) -> None:
+        for item in self._model_items:
+            for btn in item.get_action_buttons():
+                btn.remove_class("model-btn-active")
+
+    # ── Navigation actions ───────────────────────────────────
+
+    def action_nav_up(self) -> None:
+        if self._zone != "models":
+            return
+        if self._model_items and self._highlight_index > 0:
+            self._highlight_index -= 1
+            self._apply_highlight()
+
+    def action_nav_down(self) -> None:
+        if self._zone != "models":
+            return
+        if self._model_items and self._highlight_index < len(self._model_items) - 1:
+            self._highlight_index += 1
+            self._apply_highlight()
+
+    def action_nav_left(self) -> None:
+        if self._zone != "models":
+            return
+        if self._btn_index > 0:
+            self._btn_index -= 1
+            self._apply_btn_highlight()
+
+    def action_nav_right(self) -> None:
+        if self._zone != "models":
+            return
+        buttons = self._get_active_buttons()
+        if self._btn_index < len(buttons) - 1:
+            self._btn_index += 1
+            self._apply_btn_highlight()
+
+    def action_page_up(self) -> None:
+        if self._zone != "models" or not self._model_items:
+            return
+        scroll = self.query_one("#model-list", VerticalScroll)
+        page_size = max(1, scroll.size.height // 6)
+        self._highlight_index = max(0, self._highlight_index - page_size)
+        self._apply_highlight()
+
+    def action_page_down(self) -> None:
+        if self._zone != "models" or not self._model_items:
+            return
+        scroll = self.query_one("#model-list", VerticalScroll)
+        page_size = max(1, scroll.size.height // 6)
+        last = len(self._model_items) - 1
+        self._highlight_index = min(last, self._highlight_index + page_size)
+        self._apply_highlight()
+
+    def action_activate(self) -> None:
+        if self._zone == "models":
+            buttons = self._get_active_buttons()
+            if buttons and 0 <= self._btn_index < len(buttons):
+                btn = buttons[self._btn_index]
+                if not btn.disabled:
+                    btn.press()
+        elif self._zone == "browse":
+            self.query_one("#open-catalog", Button).press()
+        elif self._zone == "pull-input":
+            inp = self.query_one("#pull-input", Input)
+            name = inp.value.strip()
+            if name and not self._pulling:
+                self._pulling = True
+                self._start_pull(name)
+        elif self._zone == "pull-btn":
+            self.query_one("#pull-btn", Button).press()
+
     def action_refresh(self) -> None:
         self.app.call_later(self._load_models)
 
     async def _load_models(self) -> None:
         all_models = await api_client.list_models()
         running = await api_client.list_running_models()
-        running_names = {m.get("name", "") for m in running}
+        running_map = {m.get("name", ""): m for m in running}
+
+        vram_cache = config.get("vram_cache", {})
+        if not isinstance(vram_cache, dict):
+            vram_cache = {}
+        cache_changed = False
+        for name, info in running_map.items():
+            vram_bytes = info.get("size_vram", 0)
+            if vram_bytes and vram_cache.get(name) != vram_bytes:
+                vram_cache[name] = vram_bytes
+                cache_changed = True
+        if cache_changed:
+            config.set_key("vram_cache", vram_cache)
 
-        container = self.query_one("#model-list", Vertical)
-        container.remove_children()
+        container = self.query_one("#model-list", VerticalScroll)
+        await container.remove_children()
+        self._model_items = []
 
         if not all_models:
             container.mount(Static("  No models installed", classes="muted"))
-            return
+        else:
+            for m in all_models:
+                name = m.get("name", "unknown")
+                size_bytes = m.get("size", 0)
+                size_str = f"{size_bytes / (1024**3):.1f} GB" if size_bytes else "—"
+                details = m.get("details", {})
+                params = details.get("parameter_size", "")
+                quant = details.get("quantization_level", "")
+                loaded = name in running_map
+
+                if name in running_map:
+                    vb = running_map[name].get("size_vram", 0)
+                    vram_str = f"{vb / (1024**3):.1f} GB" if vb else "—"
+                elif name in vram_cache:
+                    vb = vram_cache[name]
+                    vram_str = f"~{vb / (1024**3):.1f} GB" if vb else "—"
+                else:
+                    vram_str = "—"
+
+                item = ModelItem(name, size_str, params, quant, vram_str, loaded)
+                container.mount(item)
+                self._model_items.append(item)
 
-        for m in all_models:
-            name = m.get("name", "unknown")
-            size_bytes = m.get("size", 0)
-            size_str = f"{size_bytes / (1024**3):.1f} GB" if size_bytes else "—"
-            loaded = name in running_names
-            container.mount(ModelItem(name, size_str, loaded))
+        if self._model_items:
+            self._highlight_index = min(
+                self._highlight_index, len(self._model_items) - 1
+            )
+            self._btn_index = 0
+            if self._zone == "models":
+                self._apply_highlight()
 
     async def on_button_pressed(self, event: Button.Pressed) -> None:
-        if event.button.id == "pull-btn":
-            await self._pull_model()
+        btn = event.button
+        btn_id = btn.id or ""
+        if btn_id == "pull-btn":
+            name = self.query_one("#pull-input", Input).value.strip()
+            if name and not self._pulling:
+                self._pulling = True
+                self._start_pull(name)
+        elif btn_id == "open-catalog":
+            installed = {m.get("name", "") for m in await api_client.list_models()}
+            self.app.push_screen(ModelCatalog(installed), self._on_catalog_result)
+        elif btn_id == "cancel-pull":
+            self._cancel_pull()
+        elif btn.has_class("model-load"):
+            self._load_to_vram(btn.name or "")
+        elif btn.has_class("model-unload"):
+            self._unload_from_vram(btn.name or "")
+        elif btn.has_class("model-delete"):
+            self._delete_model(btn.name or "")
 
-    async def _pull_model(self) -> None:
-        name_input = self.query_one("#pull-input", Input)
-        model_name = name_input.value.strip()
-        if not model_name:
+    def on_input_submitted(self, event: Input.Submitted) -> None:
+        if event.input.id == "pull-input" and not self._pulling:
+            name = event.input.value.strip()
+            if name:
+                self._pulling = True
+                self._start_pull(name)
+
+    def _cancel_pull(self) -> None:
+        self._pull_queue.clear()
+        self.workers.cancel_group(self, "default")
+        self._pulling = False
+        status = self.query_one("#model-status", Static)
+        status.update("  Download cancelled")
+        self.query_one("#pull-progress", ProgressBar).display = False
+        self.query_one("#cancel-pull", Button).display = False
+        self.query_one("#pull-btn", Button).disabled = False
+        self.query_one("#open-catalog", Button).disabled = False
+
+    def _on_catalog_result(self, selected: list[str]) -> None:
+        if not selected:
+            return
+        self._pull_queue = list(selected)
+        self._pull_next()
+
+    def _pull_next(self) -> None:
+        if not self._pull_queue:
+            self.app.call_later(self._load_models)
             return
+        model_name = self._pull_queue.pop(0)
+        self._pulling = True
+        self._start_pull(model_name)
 
+    @work(exclusive=True)
+    async def _start_pull(self, model_name: str) -> None:
         status = self.query_one("#model-status", Static)
-        status.update(f"Pulling {model_name}...")
+        progress = self.query_one("#pull-progress", ProgressBar)
+        cancel_btn = self.query_one("#cancel-pull", Button)
+        pull_btn = self.query_one("#pull-btn", Button)
+        catalog_btn = self.query_one("#open-catalog", Button)
+
+        pull_btn.disabled = True
+        catalog_btn.disabled = True
+        progress.display = True
+        cancel_btn.display = True
+        self._pulling = True
+        progress.update(total=100, progress=0)
+
+        remaining = len(self._pull_queue)
+        queue_msg = f"  (+{remaining} queued)" if remaining else ""
+        status.update(f"Pulling {model_name}...{queue_msg}")
 
         try:
             async for line in api_client.pull_model(model_name):
@@ -72,13 +563,89 @@ async def _pull_model(self) -> None:
                     completed = data.get("completed", 0)
                     if total:
                         pct = int(completed / total * 100)
-                        status.update(f"{msg}  {pct}%")
+                        progress.update(total=100, progress=pct)
+                        size_mb = total / (1024 * 1024)
+                        done_mb = completed / (1024 * 1024)
+                        status.update(
+                            f"{msg}  {done_mb:.0f}/{size_mb:.0f} MB  ({pct}%){queue_msg}"
+                        )
                     else:
-                        status.update(msg)
+                        status.update(f"{msg}{queue_msg}")
                 except json.JSONDecodeError:
                     pass
             status.update(f"✓ {model_name} pulled successfully")
-            name_input.value = ""
-            await self._load_models()
+            self.query_one("#pull-input", Input).value = ""
+        except asyncio.CancelledError:
+            status.update(f"  Download of {model_name} cancelled")
+            return
         except Exception as exc:
             status.update(f"✗ Pull failed: {exc}")
+        finally:
+            self._pulling = False
+            pull_btn.disabled = False
+            catalog_btn.disabled = False
+            progress.display = False
+            cancel_btn.display = False
+
+        if self._pull_queue:
+            self._pull_next()
+        else:
+            await self._load_models()
+
+    @work()
+    async def _load_to_vram(self, model_name: str) -> None:
+        status = self.query_one("#model-status", Static)
+        status.update(f"Loading {model_name} into VRAM...")
+        load_btn = self._find_model_button(model_name, "model-load")
+        if load_btn:
+            load_btn.label = "Loading\u2026"
+            load_btn.disabled = True
+        success = await api_client.load_model(model_name, keep_alive=-1)
+        if success:
+            status.update(f"  \u2713 {model_name} loaded into VRAM")
+        else:
+            status.update(f"  \u2717 Failed to load {model_name}")
+        await self._load_models()
+
+    @work()
+    async def _unload_from_vram(self, model_name: str) -> None:
+        status = self.query_one("#model-status", Static)
+        status.update(f"Unloading {model_name}...")
+        success = await api_client.unload_model(model_name)
+        if success:
+            # Ollama returns 200 before the model is fully evicted from /api/ps.
+            # Poll until it disappears so the UI refresh sees the real state.
+            for _ in range(10):
+                await asyncio.sleep(0.5)
+                running = await api_client.list_running_models()
+                running_names = {m.get("name", "") for m in running}
+                if model_name not in running_names:
+                    break
+            status.update(f"  \u2713 {model_name} unloaded from VRAM")
+        else:
+            status.update(f"  \u2717 Failed to unload {model_name}")
+        await self._load_models()
+
+    def _find_model_button(self, model_name: str, btn_class: str) -> Button | None:
+        for item in self._model_items:
+            if item.name == model_name:
+                for btn in item.query("Button"):
+                    if btn.has_class(btn_class):
+                        return btn
+        return None
+
+    @work()
+    async def _delete_model(self, model_name: str) -> None:
+        status = self.query_one("#model-status", Static)
+        running = await api_client.list_running_models()
+        running_names = {m.get("name", "") for m in running}
+        if model_name in running_names:
+            status.update(f"Unloading {model_name} from VRAM before delete...")
+            await api_client.unload_model(model_name)
+        status.update(f"Deleting {model_name}...")
+        success = await api_client.delete_model(model_name)
+        if success:
+            status.update(f"  \u2713 {model_name} deleted")
+        else:
+            status.update(f"  \u2717 Failed to delete {model_name}")
+        await self._load_models()
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/network.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/network.py
index c12a8a2..bf39f04 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/network.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/network.py
@@ -5,7 +5,9 @@
 from textual.app import ComposeResult
 from textual.containers import Vertical, VerticalScroll
 from textual.screen import Screen
-from textual.widgets import Footer, Header, Static
+from textual.widgets import Footer, Static
+
+from widgets.safe_header import SafeHeader
 
 from utils import hardware
 
@@ -14,7 +16,7 @@ class NetworkScreen(Screen):
     BINDINGS = [("r", "refresh", "Refresh")]
 
     def compose(self) -> ComposeResult:
-        yield Header()
+        yield SafeHeader()
         with VerticalScroll():
             yield Static("Network Configuration", classes="heading")
             yield Static("", id="net-hostname")
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
index 61fe215..d8e669d 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/services.py
@@ -2,78 +2,175 @@
 
 import subprocess
 
+from textual import work
 from textual.app import ComposeResult
-from textual.containers import Horizontal, Vertical, VerticalScroll
+from textual.containers import VerticalScroll
 from textual.screen import Screen
-from textual.widgets import Button, Footer, Header, Static
+from textual.widgets import Button, Footer, Static
+from textual.binding import Binding
 
+from widgets.safe_header import SafeHeader
+from widgets.service_item import ServiceItem
 from utils import hardware
 
 
-class ServiceRow(Horizontal):
-    def __init__(self, service: str, status: str) -> None:
-        super().__init__(classes="service-row")
-        self.service_name = service
-        self.service_status = status
-
-    def compose(self) -> ComposeResult:
-        short = self.service_name.replace("neuraldrive-", "")
-        cls = "ok" if self.service_status == "active" else "error"
-        yield Static(
-            f"{'●' if self.service_status == 'active' else '○'} {short}", classes=cls
-        )
-        yield Static("", classes="value")
-        yield Button("Start", id=f"start-{self.service_name}")
-        yield Button("Stop", id=f"stop-{self.service_name}")
-        yield Button("Restart", id=f"restart-{self.service_name}")
+POLL_INTERVAL = 5
 
 
 class ServicesScreen(Screen):
-    BINDINGS = [("r", "refresh", "Refresh")]
+    BINDINGS = [
+        ("r", "refresh", "Refresh"),
+        Binding("up", "nav_up", show=False, priority=True),
+        Binding("down", "nav_down", show=False, priority=True),
+        Binding("left", "nav_left", show=False, priority=True),
+        Binding("right", "nav_right", show=False, priority=True),
+        Binding("enter", "activate", show=False, priority=True),
+    ]
 
     def compose(self) -> ComposeResult:
-        yield Header()
-        with VerticalScroll():
-            yield Static("NeuralDrive Services", classes="heading")
-            yield Vertical(id="service-list")
-            yield Static("", id="svc-status")
+        yield SafeHeader()
+        yield Static("NeuralDrive Services", classes="heading")
+        yield VerticalScroll(id="svc-list")
+        yield Static("", id="svc-status")
         yield Footer()
 
     def on_mount(self) -> None:
-        self._load_services()
+        self._svc_items: list[ServiceItem] = []
+        self._highlight_index = 0
+        self._btn_index = 0
+        self._loading = False
+        self._poll_timer = self.set_interval(POLL_INTERVAL, self._poll_services)
+        self.app.call_later(self._load_services)
+
+    def on_screen_resume(self) -> None:
+        self.app.call_later(self._load_services)
 
-    def _load_services(self) -> None:
-        container = self.query_one("#service-list", Vertical)
-        container.remove_children()
+    def on_screen_suspend(self) -> None:
+        pass
+
+    async def _load_services(self) -> None:
+        self._loading = True
+        container = self.query_one("#svc-list", VerticalScroll)
+        await container.remove_children()
+        self._svc_items = []
         for svc in hardware.NEURALDRIVE_SERVICES:
             status = hardware.get_service_status(svc)
-            container.mount(ServiceRow(svc, status))
+            short = svc.replace("neuraldrive-", "")
+            item = ServiceItem(svc, short, status)
+            container.mount(item)
+            self._svc_items.append(item)
+        if self._svc_items:
+            self._highlight_index = min(
+                self._highlight_index, len(self._svc_items) - 1
+            )
+            self._btn_index = 0
+            self._apply_highlight()
+        self._loading = False
+
+    async def _poll_services(self) -> None:
+        if self._loading or not self._svc_items:
+            return
+        for item in self._svc_items:
+            status = hardware.get_service_status(item.name)
+            item.update_status(status)
+        self._apply_btn_highlight()
+
+    def _get_active_buttons(self) -> list[Button]:
+        if not self._svc_items:
+            return []
+        item = self._svc_items[self._highlight_index]
+        return [b for b in item.get_action_buttons() if not b.disabled]
+
+    def _apply_highlight(self) -> None:
+        self._clear_btn_highlight()
+        for i, item in enumerate(self._svc_items):
+            if i == self._highlight_index:
+                item.add_class("svc-highlighted")
+                item.scroll_visible()
+                self._apply_btn_highlight()
+            else:
+                item.remove_class("svc-highlighted")
+
+    def _clear_highlight(self) -> None:
+        self._clear_btn_highlight()
+        for item in self._svc_items:
+            item.remove_class("svc-highlighted")
+
+    def _apply_btn_highlight(self) -> None:
+        buttons = self._get_active_buttons()
+        if not buttons:
+            return
+        self._btn_index = max(0, min(self._btn_index, len(buttons) - 1))
+        for item in self._svc_items:
+            for btn in item.get_action_buttons():
+                btn.remove_class("svc-btn-active")
+        for i, btn in enumerate(buttons):
+            if i == self._btn_index:
+                btn.add_class("svc-btn-active")
+            else:
+                btn.remove_class("svc-btn-active")
+
+    def _clear_btn_highlight(self) -> None:
+        for item in self._svc_items:
+            for btn in item.get_action_buttons():
+                btn.remove_class("svc-btn-active")
+
+    def action_nav_up(self) -> None:
+        if self._svc_items and self._highlight_index > 0:
+            self._highlight_index -= 1
+            self._btn_index = 0
+            self._apply_highlight()
+
+    def action_nav_down(self) -> None:
+        if self._svc_items and self._highlight_index < len(self._svc_items) - 1:
+            self._highlight_index += 1
+            self._btn_index = 0
+            self._apply_highlight()
+
+    def action_nav_left(self) -> None:
+        if self._btn_index > 0:
+            self._btn_index -= 1
+            self._apply_btn_highlight()
+
+    def action_nav_right(self) -> None:
+        buttons = self._get_active_buttons()
+        if self._btn_index < len(buttons) - 1:
+            self._btn_index += 1
+            self._apply_btn_highlight()
+
+    def action_activate(self) -> None:
+        buttons = self._get_active_buttons()
+        if buttons and 0 <= self._btn_index < len(buttons):
+            buttons[self._btn_index].press()
 
     def on_button_pressed(self, event: Button.Pressed) -> None:
-        btn_id = event.button.id or ""
-        for action in ("start", "stop", "restart"):
-            prefix = f"{action}-"
-            if btn_id.startswith(prefix):
-                svc = btn_id[len(prefix) :]
-                self._run_systemctl(action, svc)
-                return
-
-    def _run_systemctl(self, action: str, service: str) -> None:
+        btn = event.button
+        if btn.has_class("svc-start"):
+            self._run_action(btn.name or "", "start")
+        elif btn.has_class("svc-stop"):
+            self._run_action(btn.name or "", "stop")
+        elif btn.has_class("svc-restart"):
+            self._run_action(btn.name or "", "restart")
+
+    @work(exclusive=True)
+    async def _run_action(self, service: str, action: str) -> None:
+        short = service.replace("neuraldrive-", "")
         status_widget = self.query_one("#svc-status", Static)
+        status_widget.update(f"  {action.title()}ing {short}...")
         try:
             res = subprocess.run(
-                ["systemctl", action, service],
+                ["sudo", "systemctl", action, service],
                 capture_output=True,
                 text=True,
                 timeout=15,
             )
             if res.returncode == 0:
-                status_widget.update(f"✓ {action} {service}")
+                status_widget.update(f"  \u2713 {short} {action}ed")
             else:
-                status_widget.update(f"✗ {action} {service}: {res.stderr.strip()}")
+                status_widget.update(f"  \u2717 {short}: {res.stderr.strip()}")
         except subprocess.TimeoutExpired:
-            status_widget.update(f"✗ {action} {service}: timeout")
-        self._load_services()
+            status_widget.update(f"  \u2717 {short}: timeout")
+        await self._poll_services()
 
     def action_refresh(self) -> None:
-        self._load_services()
+        self.app.call_later(self._load_services)
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
index d2eed88..fa28378 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/screens/wizard.py
@@ -9,13 +9,21 @@
 from textual.screen import Screen
 from textual.widgets import Button, Input, Static
 
+from utils import config
+
 SENTINEL = "/etc/neuraldrive/first-boot-complete"
 CREDENTIALS_PATH = "/etc/neuraldrive/credentials.conf"
 API_KEY_PATH = "/etc/neuraldrive/api.key"
+PERSISTENT_CREDENTIALS_PATH = "/var/lib/neuraldrive/config/credentials.conf"
+PERSISTENT_API_KEY_PATH = "/var/lib/neuraldrive/config/api.key"
 SUDOERS_PATH = "/etc/sudoers.d/neuraldrive-admin"
+PERSISTENCE_MOUNT = "/var/lib/neuraldrive"
+PERSISTENCE_CONF_CONTENT = "/var/lib/neuraldrive union\n/etc/neuraldrive union\n/var/log/neuraldrive union\n/home union\n"
 
 
 class FirstBootWizard(Screen):
+    """Step order: Welcome → Storage → Security → Network → Models → Done"""
+
     BINDINGS = [("escape", "cancel_wizard", "Skip")]
 
     def __init__(self) -> None:
@@ -25,6 +33,10 @@ def __init__(self) -> None:
         self._wifi_ssid = ""
         self._wifi_psk = ""
         self._generated_api_key = ""
+        self._boot_device: str | None = None
+        self._unpartitioned_bytes = 0
+        self._has_persistence = False
+        self._awaiting_confirm = False
 
     def compose(self) -> ComposeResult:
         with Center(id="wizard-container"):
@@ -37,6 +49,12 @@ def compose(self) -> ComposeResult:
                 yield Button("Next →", id="wiz-next", classes="primary")
                 yield Button("Skip", id="wiz-skip")
 
+    def on_input_submitted(self, event: Input.Submitted) -> None:
+        if self._awaiting_confirm:
+            self._handle_storage_confirm()
+        else:
+            self.focus_next()
+
     def on_mount(self) -> None:
         self._show_step()
 
@@ -55,18 +73,22 @@ def _show_step(self) -> None:
         error.update("")
         inp.value = ""
         inp2.value = ""
+        self._awaiting_confirm = False
 
         if self._step == 0:
             title.update("Welcome to NeuralDrive")
             body.update(
                 "This wizard will configure your system.\n\n"
-                "Steps: Security → WiFi → Network → Storage → Models → Done"
+                "Steps: Storage → Security → Network → Models → Done"
             )
             next_btn.label = "Begin →"
 
         elif self._step == 1:
-            title.update("Step 1: Security")
-            body.update("Set an admin password for the 'neuraldrive' user.")
+            self._show_storage_step(title, body, inp, next_btn, skip_btn)
+
+        elif self._step == 2:
+            title.update("Step 2: Security")
+            body.update("Set an admin password for the 'neuraldrive-admin' user.")
             inp.display = True
             inp.placeholder = "New password"
             inp.password = True
@@ -74,8 +96,8 @@ def _show_step(self) -> None:
             inp2.placeholder = "Confirm password"
             next_btn.label = "Set Password →"
 
-        elif self._step == 2:
-            title.update("Step 2: WiFi (Optional)")
+        elif self._step == 3:
+            title.update("Step 3: Network (Optional)")
             body.update("Enter WiFi credentials, or skip for wired-only.")
             inp.display = True
             inp.placeholder = "SSID"
@@ -85,34 +107,8 @@ def _show_step(self) -> None:
             skip_btn.display = True
             next_btn.label = "Connect →"
 
-        elif self._step == 3:
-            title.update("Step 3: Network")
-            from utils import hardware
-
-            ip = hardware.get_ip_address()
-            hostname = hardware.get_hostname()
-            body.update(
-                f"Current configuration:\n"
-                f"  Hostname: {hostname}\n"
-                f"  IP: {ip}\n\n"
-                "DHCP is active. Static IP can be configured later."
-            )
-            next_btn.label = "Next →"
-
         elif self._step == 4:
-            title.update("Step 4: Storage")
-            from utils import hardware
-
-            disk = hardware.get_disk_info()
-            body.update(
-                f"Storage: {disk['free_gb']} GB free of {disk['total_gb']} GB\n"
-                f"Path: {disk['path']}\n\n"
-                "Models will be stored at /var/lib/neuraldrive/models."
-            )
-            next_btn.label = "Next →"
-
-        elif self._step == 5:
-            title.update("Step 5: Models")
+            title.update("Step 4: Models")
             body.update(
                 "Models can be pulled after setup from:\n"
                 "  • This TUI (press M for Models)\n"
@@ -121,39 +117,403 @@ def _show_step(self) -> None:
             )
             next_btn.label = "Next →"
 
-        elif self._step == 6:
+        elif self._step == 5:
             self._generated_api_key = secrets.token_urlsafe(32)
             title.update("Setup Complete")
             body.update(
                 "NeuralDrive is ready.\n\n"
                 f"API Key: {self._generated_api_key}\n\n"
-                "Save this key — it is required for API access.\n"
+                "This key is stored at /etc/neuraldrive/api.key\n"
+                "and on persistent storage when available.\n"
                 "Press Finish to start using NeuralDrive."
             )
             next_btn.label = "Finish ✓"
 
+        if inp.display:
+            inp.focus()
+        else:
+            next_btn.focus()
+
+    def _show_storage_step(
+        self,
+        title: Static,
+        body: Static,
+        inp: Input,
+        next_btn: Button,
+        skip_btn: Button,
+    ) -> None:
+        title.update("Step 1: Storage & Persistence")
+
+        from utils import hardware
+
+        self._boot_device = hardware.get_boot_device()
+        if not self._boot_device:
+            body.update(
+                "Could not detect boot device.\n\n"
+                "Persistence partition cannot be created automatically.\n"
+                "Data will be stored on the ephemeral overlay (lost on reboot).\n\n"
+                "You can create a persistence partition manually later\n"
+                "using: sudo /usr/lib/neuraldrive/prepare-usb.sh /dev/sdX"
+            )
+            next_btn.label = "Next →"
+            return
+
+        partitions = hardware.get_disk_partitions(self._boot_device)
+        self._has_persistence = any(p.get("label") == "persistence" for p in partitions)
+        total_bytes = hardware.get_device_size(self._boot_device)
+        total_gb = total_bytes / (1024**3) if total_bytes else 0
+
+        if self._has_persistence:
+            pers = next(p for p in partitions if p.get("label") == "persistence")
+            pers_gb = pers["size_bytes"] / (1024**3)
+            body.update(
+                f"Boot device: {self._boot_device} ({total_gb:.0f} GB)\n\n"
+                f"✓ Persistence partition found: {pers_gb:.1f} GB\n"
+                f"  Models, config, and logs will survive reboots.\n\n"
+                "No action needed."
+            )
+            next_btn.label = "Next →"
+            return
+
+        self._unpartitioned_bytes = hardware.get_unpartitioned_space(self._boot_device)
+        free_gb = self._unpartitioned_bytes / (1024**3)
+
+        if self._unpartitioned_bytes < 1024 * 1024 * 1024:
+            body.update(
+                f"Boot device: {self._boot_device} ({total_gb:.0f} GB)\n\n"
+                "No persistence partition found.\n"
+                f"Only {free_gb:.1f} GB unpartitioned space available\n"
+                "(minimum 1 GB required).\n\n"
+                "Data will be stored on the ephemeral overlay (lost on reboot)."
+            )
+            next_btn.label = "Next →"
+            return
+
+        body.update(
+            f"Boot device: {self._boot_device} ({total_gb:.0f} GB)\n\n"
+            "No persistence partition found.\n"
+            f"Available space: {free_gb:.1f} GB\n\n"
+            "A persistence partition stores your models, config,\n"
+            "and logs so they survive reboots.\n\n"
+            "Type 'yes' to create it, or skip to use\n"
+            "ephemeral overlay storage."
+        )
+        inp.display = True
+        inp.placeholder = "Type 'yes' to create persistence partition"
+        inp.password = False
+        self._awaiting_confirm = True
+        skip_btn.display = True
+        next_btn.label = "Create Partition →"
+
     def on_button_pressed(self, event: Button.Pressed) -> None:
         if event.button.id == "wiz-skip":
+            self._awaiting_confirm = False
             self._step += 1
             self._show_step()
             return
 
         if event.button.id == "wiz-next":
-            if self._step == 1:
+            if self._step == 1 and self._awaiting_confirm:
+                self._handle_storage_confirm()
+                return
+            if self._step == 2:
                 if not self._validate_password():
                     return
-            elif self._step == 2:
+            elif self._step == 3:
                 self._configure_wifi()
-            elif self._step == 6:
+            elif self._step == 5:
                 self._finalize()
                 return
 
             self._step += 1
-            if self._step > 6:
+            if self._step > 5:
                 self._finalize()
             else:
                 self._show_step()
 
+    def _handle_storage_confirm(self) -> None:
+        inp = self.query_one("#wiz-input", Input)
+        error = self.query_one("#wiz-error", Static)
+
+        if inp.value.strip().lower() != "yes":
+            error.update("Type 'yes' to confirm, or press Skip.")
+            return
+
+        self._awaiting_confirm = False
+        body = self.query_one("#wiz-body", Static)
+        body.update("Creating persistence partition...\nThis may take a moment.")
+        self.query_one("#wiz-next", Button).disabled = True
+        self.query_one("#wiz-skip", Button).display = False
+        inp.display = False
+
+        err = self._create_persistence_partition()
+        self.query_one("#wiz-next", Button).disabled = False
+
+        if err:
+            error.update(f"Partition creation failed: {err}")
+            body.update(
+                "Partition creation failed.\n"
+                "Data will use the ephemeral overlay.\n"
+                "You can retry manually later."
+            )
+            self.query_one("#wiz-next", Button).label = "Next →"
+        else:
+            body.update(
+                "✓ Persistence partition created and mounted.\n\n"
+                "Models, config, and logs will now survive reboots."
+            )
+            self.query_one("#wiz-next", Button).label = "Next →"
+
+    def _create_persistence_partition(self) -> str | None:
+        if not self._boot_device:
+            return "No boot device detected"
+
+        try:
+            res = subprocess.run(
+                [
+                    "sudo",
+                    "parted",
+                    "-m",
+                    self._boot_device,
+                    "unit",
+                    "B",
+                    "print",
+                    "free",
+                ],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            if res.returncode != 0:
+                return f"parted print failed: {res.stderr.strip()}"
+
+            free_start = None
+            free_end = None
+            for line in res.stdout.strip().splitlines():
+                if ":free;" in line:
+                    parts = line.split(":")
+                    if len(parts) >= 3:
+                        start_b = int(parts[1].rstrip("B"))
+                        end_b = int(parts[2].rstrip("B"))
+                        size_b = end_b - start_b
+                        if size_b > 1024 * 1024 * 1024:
+                            free_start = parts[1]
+                            free_end = parts[2]
+
+            if not free_start or not free_end:
+                return "No free space block large enough found"
+
+            # Snapshot partition list BEFORE mkpart so the diff is reliable
+            pre_res = subprocess.run(
+                ["lsblk", "-ln", "-o", "NAME", self._boot_device],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            if pre_res.returncode != 0:
+                return "Cannot list partitions — aborting to avoid unsafe disk changes"
+            before_parts = {
+                line.strip()
+                for line in pre_res.stdout.strip().splitlines()
+                if line.strip()
+            }
+
+            proc = subprocess.run(
+                [
+                    "sudo",
+                    "parted",
+                    self._boot_device,
+                    "--script",
+                    "--",
+                    "mkpart",
+                    "primary",
+                    "ext4",
+                    free_start,
+                    free_end,
+                ],
+                capture_output=True,
+                text=True,
+                timeout=30,
+            )
+            if proc.returncode != 0:
+                return proc.stderr.strip()
+
+            partprobe_proc = subprocess.run(
+                ["sudo", "partprobe", self._boot_device],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            if partprobe_proc.returncode != 0:
+                return f"partprobe failed: {partprobe_proc.stderr.strip()}"
+
+            import time
+
+            new_part = None
+            for _attempt in range(6):
+                time.sleep(1)
+                post_res = subprocess.run(
+                    ["lsblk", "-ln", "-o", "NAME", self._boot_device],
+                    capture_output=True,
+                    text=True,
+                    timeout=5,
+                )
+                if post_res.returncode != 0:
+                    continue
+
+                after_parts = {
+                    line.strip()
+                    for line in post_res.stdout.strip().splitlines()
+                    if line.strip()
+                }
+
+                new_parts = after_parts - before_parts
+                base_name = os.path.basename(self._boot_device)
+                new_parts.discard(base_name)
+
+                if len(new_parts) == 1:
+                    new_part = f"/dev/{new_parts.pop()}"
+                    break
+
+            if not new_part:
+                return "New partition did not appear after partprobe (timed out)"
+
+            proc = subprocess.run(
+                [
+                    "sudo",
+                    "mkfs.ext4",
+                    "-L",
+                    "persistence",
+                    "-m",
+                    "1",
+                    new_part,
+                ],
+                capture_output=True,
+                text=True,
+                timeout=120,
+            )
+            if proc.returncode != 0:
+                return f"mkfs.ext4 failed: {proc.stderr.strip()}"
+
+            proc = subprocess.run(
+                ["sudo", "mkdir", "-p", "/mnt/persistence"],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            if proc.returncode != 0:
+                return f"mkdir /mnt/persistence failed: {proc.stderr.strip()}"
+            proc = subprocess.run(
+                ["sudo", "mount", new_part, "/mnt/persistence"],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            if proc.returncode != 0:
+                return f"Mount failed: {proc.stderr.strip()}"
+
+            proc = subprocess.run(
+                ["sudo", "tee", "/mnt/persistence/persistence.conf"],
+                input=PERSISTENCE_CONF_CONTENT.encode(),
+                capture_output=True,
+                timeout=5,
+            )
+            if proc.returncode != 0:
+                return "Failed to write persistence.conf"
+
+            for d in [
+                "/mnt/persistence/var/lib/neuraldrive/ollama/.ollama",
+                "/mnt/persistence/var/lib/neuraldrive/models",
+                "/mnt/persistence/var/lib/neuraldrive/config",
+                "/mnt/persistence/var/lib/neuraldrive/webui",
+                "/mnt/persistence/var/log/neuraldrive",
+                "/mnt/persistence/etc/neuraldrive",
+                "/mnt/persistence/home",
+            ]:
+                proc = subprocess.run(
+                    ["sudo", "mkdir", "-p", d],
+                    capture_output=True,
+                    text=True,
+                    timeout=5,
+                )
+                if proc.returncode != 0:
+                    return f"mkdir {d} failed: {proc.stderr.strip()}"
+
+            proc = subprocess.run(
+                [
+                    "sudo",
+                    "chown",
+                    "-R",
+                    "neuraldrive-ollama:neuraldrive-ollama",
+                    "/mnt/persistence/var/lib/neuraldrive/ollama",
+                ],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            if proc.returncode != 0:
+                return f"chown failed: {proc.stderr.strip()}"
+
+            proc = subprocess.run(
+                [
+                    "sudo",
+                    "chown",
+                    "-R",
+                    "neuraldrive-webui:neuraldrive-webui",
+                    "/mnt/persistence/var/lib/neuraldrive/webui",
+                ],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            if proc.returncode != 0:
+                return f"chown webui failed: {proc.stderr.strip()}"
+
+            proc = subprocess.run(
+                ["sudo", "umount", "/mnt/persistence"],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            if proc.returncode != 0:
+                return f"umount /mnt/persistence failed: {proc.stderr.strip()}"
+
+            proc = subprocess.run(
+                ["sudo", "mkdir", "-p", PERSISTENCE_MOUNT],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            if proc.returncode != 0:
+                return f"mkdir {PERSISTENCE_MOUNT} failed: {proc.stderr.strip()}"
+            proc = subprocess.run(
+                ["sudo", "mount", new_part, PERSISTENCE_MOUNT],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            if proc.returncode != 0:
+                return f"Mount at {PERSISTENCE_MOUNT} failed: {proc.stderr.strip()}"
+
+            proc = subprocess.run(
+                ["sudo", "systemctl", "restart", "neuraldrive-ollama"],
+                capture_output=True,
+                text=True,
+                timeout=30,
+            )
+            if proc.returncode != 0:
+                self.query_one("#wiz-error", Static).update(
+                    f"Warning: Ollama restart failed: {proc.stderr.strip()}"
+                )
+
+            self._has_persistence = True
+            return None
+
+        except subprocess.TimeoutExpired:
+            return "Operation timed out"
+        except FileNotFoundError as e:
+            return f"Required tool not found: {e}"
+
     def _validate_password(self) -> bool:
         error = self.query_one("#wiz-error", Static)
         pw = self.query_one("#wiz-input", Input).value
@@ -185,44 +545,118 @@ def _configure_wifi(self) -> None:
         except (subprocess.TimeoutExpired, FileNotFoundError):
             pass
 
-    def _finalize(self) -> None:
+    def _sudo_write(self, path: str, content: str, mode: str = "0644") -> str | None:
         try:
-            if self._admin_password:
-                proc = subprocess.Popen(
-                    ["chpasswd"],
-                    stdin=subprocess.PIPE,
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                )
-                proc.communicate(
-                    input=f"neuraldrive:{self._admin_password}\n".encode(),
+            mkdir_proc = subprocess.run(
+                ["sudo", "mkdir", "-p", os.path.dirname(path)],
+                capture_output=True,
+                timeout=5,
+            )
+            if mkdir_proc.returncode != 0:
+                return f"Failed to create dir for {path}: {mkdir_proc.stderr.decode().strip()}"
+            proc = subprocess.run(
+                ["sudo", "tee", path],
+                input=content.encode(),
+                capture_output=True,
+                timeout=5,
+            )
+            if proc.returncode != 0:
+                return f"Failed to write {path}: {proc.stderr.decode().strip()}"
+            chmod_proc = subprocess.run(
+                ["sudo", "chmod", mode, path],
+                capture_output=True,
+                timeout=5,
+            )
+            if chmod_proc.returncode != 0:
+                return f"Failed to chmod {path}: {chmod_proc.stderr.decode().strip()}"
+            return None
+        except (subprocess.TimeoutExpired, FileNotFoundError) as e:
+            return f"Failed to write {path}: {e}"
+
+    def _finalize(self) -> None:
+        errors: list[str] = []
+
+        if self._admin_password:
+            try:
+                proc = subprocess.run(
+                    ["sudo", "chpasswd"],
+                    input=f"neuraldrive-admin:{self._admin_password}\n".encode(),
+                    capture_output=True,
                     timeout=10,
                 )
+                if proc.returncode != 0:
+                    errors.append(
+                        f"Password change failed: {proc.stderr.decode().strip()}"
+                    )
+            except (subprocess.TimeoutExpired, FileNotFoundError) as e:
+                errors.append(f"Password change failed: {e}")
+
+        if self._generated_api_key:
+            key_content = self._generated_api_key + "\n"
+            cred_content = f"api_key={self._generated_api_key}\n"
+
+            err = self._sudo_write(API_KEY_PATH, key_content, "0600")
+            if err:
+                errors.append(err)
+            err = self._sudo_write(CREDENTIALS_PATH, cred_content, "0600")
+            if err:
+                errors.append(err)
+
+            persist_dir = os.path.dirname(PERSISTENT_API_KEY_PATH)
+            if os.path.isdir(persist_dir):
+                err = self._sudo_write(PERSISTENT_API_KEY_PATH, key_content, "0600")
+                if err:
+                    errors.append(err)
+                err = self._sudo_write(
+                    PERSISTENT_CREDENTIALS_PATH, cred_content, "0600"
+                )
+                if err:
+                    errors.append(err)
+
+        cfg_data = config.load()
+        if self._admin_password:
+            cfg_data["security"] = {"password_set": True}
+        if self._wifi_ssid:
+            cfg_data["network"] = {"wifi_ssid": self._wifi_ssid}
+        if self._generated_api_key:
+            cfg_data["api"] = {"key_generated": True}
+        if self._has_persistence:
+            cfg_data["storage"] = {"persistence": True}
+        cfg_err = config.save(cfg_data)
+        if cfg_err:
+            errors.append(cfg_err)
+
+        if errors:
+            error_widget = self.query_one("#wiz-error", Static)
+            error_widget.update("\n".join(errors))
+            return
 
-                if os.path.exists(SUDOERS_PATH):
-                    with open(SUDOERS_PATH, "r") as f:
-                        content = f.read()
-                    content = content.replace("NOPASSWD:", "")
-                    with open(SUDOERS_PATH, "w") as f:
-                        f.write(content)
-
-            if self._generated_api_key:
-                os.makedirs(os.path.dirname(API_KEY_PATH), exist_ok=True)
-                with open(API_KEY_PATH, "w") as f:
-                    f.write(self._generated_api_key + "\n")
-                os.chmod(API_KEY_PATH, 0o600)
-
-                os.makedirs(os.path.dirname(CREDENTIALS_PATH), exist_ok=True)
-                with open(CREDENTIALS_PATH, "w") as f:
-                    f.write(f"api_key={self._generated_api_key}\n")
-                os.chmod(CREDENTIALS_PATH, 0o600)
-
-            os.makedirs(os.path.dirname(SENTINEL), exist_ok=True)
-            with open(SENTINEL, "w") as f:
-                f.write("")
-
-        except Exception:
-            pass
+        # All config writes succeeded — now write sentinel and strip NOPASSWD
+        err = self._sudo_write(SENTINEL, "")
+        if err:
+            error_widget = self.query_one("#wiz-error", Static)
+            error_widget.update(f"Failed to write sentinel: {err}")
+            return
+
+        # Remove NOPASSWD LAST — after all other sudo operations are done,
+        # since removing it makes subsequent sudo calls require a TTY password prompt
+        if self._admin_password:
+            try:
+                result = subprocess.run(
+                    ["sudo", "cat", SUDOERS_PATH],
+                    capture_output=True,
+                    text=True,
+                    timeout=5,
+                )
+                if result.returncode == 0 and "NOPASSWD:" in result.stdout:
+                    new_content = result.stdout.replace("NOPASSWD:", "")
+                    err = self._sudo_write(SUDOERS_PATH, new_content, "0440")
+                    if err:
+                        # Sudoers strip failed but sentinel+config are written —
+                        # wizard is complete, just warn
+                        pass
+            except (subprocess.TimeoutExpired, FileNotFoundError):
+                pass
 
         self.app.pop_screen()
 
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
index 258ccbf..a2480d3 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/styles.tcss
@@ -56,6 +56,22 @@ Static.muted {
     padding: 1;
 }
 
+#dash-topbar {
+    height: 1;
+    layout: horizontal;
+}
+
+#dash-hostname {
+    width: 1fr;
+}
+
+#dash-clock {
+    width: auto;
+    color: #A1A1AA;
+    padding: 0 2;
+    text-align: right;
+}
+
 .stats-box {
     border: solid #2E2E2E;
     padding: 1 2;
@@ -72,7 +88,7 @@ Static.muted {
 
 .model-item {
     layout: horizontal;
-    height: 3;
+    height: 5;
     padding: 0 2;
     border: solid #2E2E2E;
     margin: 0 0 1 0;
@@ -83,14 +99,39 @@ Static.muted {
     background: #1F1F1F;
 }
 
+.model-highlighted {
+    background: #1F1F1F;
+    border: solid #F59E0B;
+}
+
+.model-item Static {
+    height: 100%;
+    content-align: left middle;
+}
+
 .model-item Static.model-name {
     color: #FFFFFF;
     text-style: bold;
     width: 1fr;
 }
 
-.model-item Static.model-size {
+.model-item Static.model-params {
     color: #A1A1AA;
+    width: 8;
+}
+
+.model-item Static.model-quant {
+    color: #71717A;
+    width: 10;
+}
+
+.model-item Static.model-disk {
+    color: #A1A1AA;
+    width: 10;
+}
+
+.model-item Static.model-vram {
+    color: #F59E0B;
     width: 12;
 }
 
@@ -104,17 +145,172 @@ Static.muted {
     width: 10;
 }
 
-.service-row {
-    layout: horizontal;
+Button.model-load {
+    background: #1F1F1F;
+    color: #10B981;
+    border: solid #10B981;
+    min-width: 10;
+    width: 10;
     height: 3;
+}
+
+Button.model-load:hover {
+    background: #0A1F0A;
+}
+
+Button.model-unload {
+    background: #1F1F1F;
+    color: #F97316;
+    border: solid #F97316;
+    min-width: 10;
+    width: 10;
+    height: 3;
+}
+
+Button.model-unload:hover {
+    background: #1F0A0A;
+}
+
+Button.model-load:disabled {
+    background: #0A0A0A;
+    color: #52525B;
+    border: solid #1A1A1A;
+}
+
+Button.model-unload:disabled {
+    background: #0A0A0A;
+    color: #52525B;
+    border: solid #1A1A1A;
+}
+
+Button.model-delete {
+    background: #1F1F1F;
+    color: #EF4444;
+    border: solid #EF4444;
+    min-width: 10;
+    width: 10;
+    height: 3;
+}
+
+Button.model-delete:hover {
+    background: #1F0A0A;
+}
+
+Button.model-btn-active {
+    border: tall #F59E0B;
+    text-style: bold reverse;
+}
+
+.svc-item {
+    layout: horizontal;
+    height: 5;
     padding: 0 2;
     border: solid #2E2E2E;
     margin: 0 0 1 0;
     background: #141414;
 }
 
-.service-row:hover {
+.svc-item:hover {
+    background: #1F1F1F;
+}
+
+.svc-highlighted {
     background: #1F1F1F;
+    border: solid #F59E0B;
+}
+
+.svc-item Static {
+    height: 100%;
+    content-align: left middle;
+}
+
+.svc-item Static.svc-name {
+    color: #FFFFFF;
+    text-style: bold;
+    width: 1fr;
+}
+
+.svc-item Static.svc-state {
+    width: 16;
+}
+
+.svc-status-active {
+    color: #10B981;
+}
+
+.svc-status-inactive {
+    color: #EF4444;
+}
+
+Button.svc-start {
+    background: #1F1F1F;
+    color: #10B981;
+    border: solid #10B981;
+    min-width: 10;
+    width: 10;
+    height: 3;
+}
+
+Button.svc-start:hover {
+    background: #0A1F0A;
+}
+
+Button.svc-start:disabled {
+    background: #0A0A0A;
+    color: #52525B;
+    border: solid #1A1A1A;
+}
+
+Button.svc-stop {
+    background: #1F1F1F;
+    color: #EF4444;
+    border: solid #EF4444;
+    min-width: 10;
+    width: 10;
+    height: 3;
+}
+
+Button.svc-stop:hover {
+    background: #1F0A0A;
+}
+
+Button.svc-stop:disabled {
+    background: #0A0A0A;
+    color: #52525B;
+    border: solid #1A1A1A;
+}
+
+Button.svc-restart {
+    background: #1F1F1F;
+    color: #F59E0B;
+    border: solid #F59E0B;
+    min-width: 11;
+    width: 11;
+    height: 3;
+}
+
+Button.svc-restart:hover {
+    background: #1F1A0A;
+}
+
+Button.svc-restart:disabled {
+    background: #0A0A0A;
+    color: #52525B;
+    border: solid #1A1A1A;
+}
+
+Button.svc-btn-active {
+    border: tall #F59E0B;
+    text-style: bold reverse;
+}
+
+#svc-list {
+    height: 1fr;
+}
+
+#svc-status {
+    height: 1;
+    padding: 0 2;
 }
 
 Button {
@@ -165,6 +361,28 @@ Select {
     color: #FFFFFF;
 }
 
+#chat-model-row {
+    layout: horizontal;
+    height: auto;
+    padding: 0 1;
+}
+
+#chat-model-label {
+    color: #F59E0B;
+    text-style: bold;
+    width: auto;
+    height: 3;
+    content-align: left middle;
+    padding: 0 1;
+}
+
+#chat-model-select {
+    width: 1fr;
+    background: #1F1F1F;
+    color: #FFFFFF;
+    height: auto;
+}
+
 #wizard-container {
     align: center middle;
     padding: 2 4;
@@ -182,7 +400,7 @@ Select {
 #chat-input-row {
     layout: horizontal;
     height: 3;
-    dock: bottom;
+    margin: 0 0 0 0;
 }
 
 #chat-input {
@@ -191,6 +409,7 @@ Select {
 
 #chat-send {
     width: 10;
+    content-align: center middle;
 }
 
 #chat-log {
@@ -204,3 +423,175 @@ Select {
 .badge-offline {
     color: #EF4444;
 }
+
+#pull-progress {
+    margin: 1 0;
+    width: 1fr;
+}
+
+#pull-row {
+    height: auto;
+    layout: horizontal;
+}
+
+#cancel-pull {
+    width: 12;
+    margin: 1 0 1 1;
+}
+
+ProgressBar Bar {
+    color: #F59E0B;
+    background: #2E2E2E;
+}
+
+ProgressBar PercentageStatus {
+    color: #A1A1AA;
+}
+
+#chat-notice {
+    padding: 0 2;
+    height: auto;
+}
+
+Static.tier-heading {
+    color: #F59E0B;
+    text-style: bold;
+}
+
+Button.model-pick {
+    background: #141414;
+    color: #FFFFFF;
+    border: solid #2E2E2E;
+    width: 100%;
+    height: 3;
+    content-align: left middle;
+    text-align: left;
+}
+
+Button.model-pick:hover {
+    background: #1F1F1F;
+    border: solid #F59E0B;
+}
+
+Button.model-pick:disabled {
+    background: #0A0A0A;
+    color: #52525B;
+    border: solid #1A1A1A;
+}
+
+#catalog-scroll {
+    height: 1fr;
+    border: solid #2E2E2E;
+    scrollbar-background: #141414;
+    scrollbar-color: #2E2E2E;
+}
+
+Button.catalog-item {
+    background: #141414;
+    color: #A1A1AA;
+    border: solid #2E2E2E;
+    width: 100%;
+    height: 3;
+    content-align: left middle;
+    text-align: left;
+}
+
+Button.catalog-item:hover {
+    background: #1F1F1F;
+    border: solid #F59E0B;
+}
+
+Button.catalog-item:focus {
+    border: solid #F59E0B;
+}
+
+Button.catalog-highlighted {
+    background: #1F1F1F;
+    border: solid #F59E0B;
+}
+
+Button.catalog-highlighted.catalog-installed {
+    background: #0A0A0A;
+    border: solid #52525B;
+}
+
+Button.catalog-checked {
+    color: #10B981;
+    background: #0A1F0A;
+    border: solid #10B981;
+}
+
+Button.catalog-installed {
+    color: #52525B;
+    background: #0A0A0A;
+    border: solid #1A1A1A;
+}
+
+#catalog-buttons {
+    height: auto;
+    padding: 1 0;
+    align: center middle;
+}
+
+#catalog-buttons Button {
+    margin: 0 2;
+}
+
+#model-list {
+    height: 1fr;
+    scrollbar-background: #141414;
+    scrollbar-color: #2E2E2E;
+}
+
+#model-legend {
+    height: 1;
+    padding: 0 2;
+    color: #71717A;
+}
+
+#model-legend Static.legend-name {
+    width: 1fr;
+    color: #71717A;
+}
+
+#model-legend Static.legend-col {
+    color: #71717A;
+}
+
+#model-legend Static.legend-sep {
+    width: 1;
+    color: #52525B;
+}
+
+Static.legend-params {
+    width: 8;
+}
+
+Static.legend-quant {
+    width: 10;
+}
+
+Static.legend-disk {
+    width: 10;
+}
+
+Static.legend-vram {
+    width: 12;
+}
+
+Static.legend-status {
+    width: 10;
+}
+
+#pull-input-row {
+    height: auto;
+    layout: horizontal;
+}
+
+#pull-input-row Input {
+    width: 1fr;
+}
+
+#pull-input-row Button {
+    width: 12;
+}
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py
index 11a4fd6..93de154 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/api_client.py
@@ -48,7 +48,33 @@ async def pull_model(name: str):
 async def delete_model(name: str) -> bool:
     try:
         async with httpx.AsyncClient(timeout=TIMEOUT) as client:
-            resp = await client.delete(f"{OLLAMA_URL}/api/delete", json={"name": name})
+            resp = await client.request(
+                "DELETE", f"{OLLAMA_URL}/api/delete", json={"name": name}
+            )
+            return resp.status_code == 200
+    except (httpx.ConnectError, httpx.TimeoutException, httpx.HTTPError):
+        return False
+
+
+async def load_model(name: str, keep_alive: str | int = "5m") -> bool:
+    try:
+        async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, read=300.0)) as client:
+            resp = await client.post(
+                f"{OLLAMA_URL}/api/generate",
+                json={"model": name, "prompt": "", "keep_alive": keep_alive},
+            )
+            return resp.status_code == 200
+    except (httpx.ConnectError, httpx.TimeoutException, httpx.HTTPError):
+        return False
+
+
+async def unload_model(name: str) -> bool:
+    try:
+        async with httpx.AsyncClient(timeout=TIMEOUT) as client:
+            resp = await client.post(
+                f"{OLLAMA_URL}/api/generate",
+                json={"model": name, "prompt": "", "keep_alive": 0},
+            )
             return resp.status_code == 200
     except (httpx.ConnectError, httpx.TimeoutException, httpx.HTTPError):
         return False
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py
new file mode 100644
index 0000000..b6a5496
--- /dev/null
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/config.py
@@ -0,0 +1,84 @@
+from __future__ import annotations
+
+import os
+import subprocess
+from typing import Any
+
+import yaml
+
+PERSISTENT_CONFIG = "/var/lib/neuraldrive/config/config.yaml"
+OVERLAY_CONFIG = "/etc/neuraldrive/config.yaml"
+
+
+def _config_path() -> str:
+    persistent_dir = os.path.dirname(PERSISTENT_CONFIG)
+    if os.path.isdir(persistent_dir) and os.access(persistent_dir, os.W_OK):
+        return PERSISTENT_CONFIG
+    return OVERLAY_CONFIG
+
+
+def load() -> dict[str, Any]:
+    for path in (PERSISTENT_CONFIG, OVERLAY_CONFIG):
+        if os.path.exists(path):
+            try:
+                with open(path) as f:
+                    data = yaml.safe_load(f)
+                    if isinstance(data, dict):
+                        return data
+            except (OSError, yaml.YAMLError):
+                continue
+    return {}
+
+
+def save(data: dict[str, Any]) -> str | None:
+    path = _config_path()
+    content = yaml.dump(data, default_flow_style=False, sort_keys=False)
+    try:
+        mkdir_proc = subprocess.run(
+            ["sudo", "mkdir", "-p", os.path.dirname(path)],
+            capture_output=True,
+            timeout=5,
+        )
+        if mkdir_proc.returncode != 0:
+            return f"Failed to create dir for {path}: {mkdir_proc.stderr.decode().strip()}"
+        proc = subprocess.run(
+            ["sudo", "tee", path],
+            input=content.encode(),
+            capture_output=True,
+            timeout=5,
+        )
+        if proc.returncode != 0:
+            return f"Failed to write {path}: {proc.stderr.decode().strip()}"
+        chmod_proc = subprocess.run(
+            ["sudo", "chmod", "0644", path],
+            capture_output=True,
+            timeout=5,
+        )
+        if chmod_proc.returncode != 0:
+            return f"Failed to chmod {path}: {chmod_proc.stderr.decode().strip()}"
+        return None
+    except (subprocess.TimeoutExpired, FileNotFoundError) as e:
+        return f"Failed to write {path}: {e}"
+
+
+def get(key: str, default: Any = None) -> Any:
+    data = load()
+    keys = key.split(".")
+    for k in keys:
+        if isinstance(data, dict):
+            data = data.get(k, default)
+        else:
+            return default
+    return data
+
+
+def set_key(key: str, value: Any) -> str | None:
+    data = load()
+    keys = key.split(".")
+    target = data
+    for k in keys[:-1]:
+        if k not in target or not isinstance(target[k], dict):
+            target[k] = {}
+        target = target[k]
+    target[keys[-1]] = value
+    return save(data)
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
index e6949c4..5556e86 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/utils/hardware.py
@@ -147,3 +147,108 @@ def get_service_status(service: str) -> str:
     "neuraldrive-gpu-monitor",
     "neuraldrive-system-api",
 ]
+
+
+def get_boot_device() -> str | None:
+    try:
+        with open("/proc/cmdline") as f:
+            cmdline = f.read()
+        for part in cmdline.split():
+            if part.startswith("boot=live") or part.startswith("root="):
+                pass
+            if part.startswith("live-media="):
+                media_dev = part.split("=", 1)[1]
+                pkname_res = subprocess.run(
+                    ["lsblk", "-no", "PKNAME", media_dev],
+                    capture_output=True,
+                    text=True,
+                    timeout=5,
+                )
+                if pkname_res.returncode == 0 and pkname_res.stdout.strip():
+                    return f"/dev/{pkname_res.stdout.strip()}"
+                # PKNAME failed — fall through to findmnt instead of
+                # returning an unvalidated partition/symlink path.
+        res = subprocess.run(
+            ["findmnt", "-n", "-o", "SOURCE", "/run/live/medium"],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if res.returncode == 0 and res.stdout.strip():
+            part_dev = res.stdout.strip()
+            pkname_res = subprocess.run(
+                ["lsblk", "-no", "PKNAME", part_dev],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            if pkname_res.returncode == 0 and pkname_res.stdout.strip():
+                return f"/dev/{pkname_res.stdout.strip()}"
+    except (OSError, subprocess.TimeoutExpired, FileNotFoundError):
+        pass
+    return None
+
+
+def get_disk_partitions(device: str) -> list[dict]:
+    try:
+        res = subprocess.run(
+            ["lsblk", "-J", "-b", "-o", "NAME,SIZE,FSTYPE,LABEL,MOUNTPOINT", device],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if res.returncode != 0:
+            return []
+        import json
+
+        data = json.loads(res.stdout)
+        partitions = []
+        for bd in data.get("blockdevices", []):
+            for child in bd.get("children", []):
+                partitions.append(
+                    {
+                        "name": child.get("name", ""),
+                        "size_bytes": int(child.get("size", 0)),
+                        "fstype": child.get("fstype", ""),
+                        "label": child.get("label", ""),
+                        "mountpoint": child.get("mountpoint", ""),
+                    }
+                )
+            if not bd.get("children"):
+                partitions.append(
+                    {
+                        "name": bd.get("name", ""),
+                        "size_bytes": int(bd.get("size", 0)),
+                        "fstype": bd.get("fstype", ""),
+                        "label": bd.get("label", ""),
+                        "mountpoint": bd.get("mountpoint", ""),
+                    }
+                )
+        return partitions
+    except (subprocess.TimeoutExpired, FileNotFoundError, ValueError):
+        return []
+
+
+def get_device_size(device: str) -> int:
+    try:
+        res = subprocess.run(
+            ["lsblk", "-b", "-d", "-n", "-o", "SIZE", device],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if res.returncode == 0:
+            return int(res.stdout.strip())
+    except (subprocess.TimeoutExpired, FileNotFoundError, ValueError):
+        pass
+    return 0
+
+
+def get_unpartitioned_space(device: str) -> int:
+    total = get_device_size(device)
+    if not total:
+        return 0
+    parts = get_disk_partitions(device)
+    used = sum(p["size_bytes"] for p in parts)
+    free = total - used
+    return max(0, free)
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/__init__.py b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/__init__.py
index 3ae5b2f..4e1d144 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/__init__.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/__init__.py
@@ -1,4 +1,6 @@
 from widgets.stats_box import StatsBox
 from widgets.model_item import ModelItem
+from widgets.safe_header import SafeHeader
+from widgets.service_item import ServiceItem
 
-__all__ = ["StatsBox", "ModelItem"]
+__all__ = ["StatsBox", "ModelItem", "SafeHeader", "ServiceItem"]
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
index 1df4451..71e363c 100644
--- a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/model_item.py
@@ -2,19 +2,53 @@
 
 from textual.app import ComposeResult
 from textual.containers import Horizontal
-from textual.widgets import Static
+from textual.widgets import Button, Static
 
 
 class ModelItem(Horizontal):
-    def __init__(self, name: str, size: str, loaded: bool = False) -> None:
-        super().__init__(classes="model-item")
-        self._name = name
-        self._size = size
+    can_focus = False
+
+    def __init__(
+        self,
+        name: str,
+        size: str,
+        params: str = "",
+        quant: str = "",
+        vram_str: str = "",
+        loaded: bool = False,
+    ) -> None:
+        super().__init__(name=name, classes="model-item")
+        self._model_name = name
+        self._model_size = size
+        self._params = params
+        self._quant = quant
+        self._vram_str = vram_str
         self._loaded = loaded
 
     def compose(self) -> ComposeResult:
-        yield Static(self._name, classes="model-name")
-        yield Static(self._size, classes="model-size")
-        status_cls = "model-status-loaded" if self._loaded else "model-status-cached"
-        status_txt = "● loaded" if self._loaded else "○ cached"
-        yield Static(status_txt, classes=status_cls)
+        yield Static(self._model_name, classes="model-name")
+        yield Static(self._params, classes="model-params")
+        yield Static(self._quant, classes="model-quant")
+        yield Static(self._model_size, classes="model-disk")
+        yield Static(self._vram_str, classes="model-vram")
+        if self._loaded:
+            yield Static("● GPU", classes="model-status-loaded")
+        else:
+            yield Static("○ ready", classes="model-status-cached")
+        load_btn = Button("Load", name=self._model_name, classes="model-load")
+        unload_btn = Button("Unload", name=self._model_name, classes="model-unload")
+        delete_btn = Button("Delete", name=self._model_name, classes="model-delete")
+        load_btn.can_focus = False
+        unload_btn.can_focus = False
+        delete_btn.can_focus = False
+        if self._loaded:
+            load_btn.disabled = True
+        else:
+            unload_btn.disabled = True
+        yield load_btn
+        yield unload_btn
+        yield delete_btn
+
+    def get_action_buttons(self) -> list[Button]:
+        """Return the action buttons in left-to-right order."""
+        return list(self.query("Button"))
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/safe_header.py b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/safe_header.py
new file mode 100644
index 0000000..e04ecc8
--- /dev/null
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/safe_header.py
@@ -0,0 +1,22 @@
+from __future__ import annotations
+
+from textual.css.query import NoMatches
+from textual.widgets import Header
+from textual.widgets._header import HeaderTitle
+
+
+class SafeHeader(Header):
+
+    def _on_mount(self, event) -> None:
+        original_set_title = None
+
+        async def safe_set_title() -> None:
+            try:
+                self.query_one(HeaderTitle).update(self.format_title())
+            except (NoMatches, Exception):
+                pass
+
+        self.watch(self.app, "title", safe_set_title)
+        self.watch(self.app, "sub_title", safe_set_title)
+        self.watch(self.screen, "title", safe_set_title)
+        self.watch(self.screen, "sub_title", safe_set_title)
diff --git a/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/service_item.py b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/service_item.py
new file mode 100644
index 0000000..74f7e82
--- /dev/null
+++ b/config/includes.chroot/usr/lib/neuraldrive/tui/widgets/service_item.py
@@ -0,0 +1,56 @@
+from __future__ import annotations
+
+from textual.app import ComposeResult
+from textual.containers import Horizontal
+from textual.widgets import Button, Static
+
+
+class ServiceItem(Horizontal):
+    can_focus = False
+
+    def __init__(self, service: str, display_name: str, status: str) -> None:
+        super().__init__(name=service, classes="svc-item")
+        self._service = service
+        self._display_name = display_name
+        self._status = status
+
+    def compose(self) -> ComposeResult:
+        active = self._status == "active"
+        indicator = "\u25cf" if active else "\u25cb"
+        status_cls = "svc-status-active" if active else "svc-status-inactive"
+        yield Static(self._display_name, classes="svc-name")
+        yield Static(f"{indicator} {self._status}", classes=f"svc-state {status_cls}")
+        start_btn = Button("Start", name=self._service, classes="svc-start")
+        stop_btn = Button("Stop", name=self._service, classes="svc-stop")
+        restart_btn = Button("Restart", name=self._service, classes="svc-restart")
+        start_btn.can_focus = False
+        stop_btn.can_focus = False
+        restart_btn.can_focus = False
+        if active:
+            start_btn.disabled = True
+        else:
+            stop_btn.disabled = True
+            restart_btn.disabled = True
+        yield start_btn
+        yield stop_btn
+        yield restart_btn
+
+    def get_action_buttons(self) -> list[Button]:
+        return list(self.query("Button"))
+
+    def update_status(self, status: str) -> None:
+        self._status = status
+        active = status == "active"
+        indicator = "\u25cf" if active else "\u25cb"
+        status_cls = "svc-status-active" if active else "svc-status-inactive"
+        state_widget = self.query_one(".svc-state", Static)
+        state_widget.update(f"{indicator} {status}")
+        state_widget.remove_class("svc-status-active", "svc-status-inactive")
+        state_widget.add_class(status_cls)
+        for btn in self.query("Button"):
+            if btn.has_class("svc-start"):
+                btn.disabled = active
+            elif btn.has_class("svc-stop"):
+                btn.disabled = not active
+            elif btn.has_class("svc-restart"):
+                btn.disabled = not active
diff --git a/config/package-lists/gpu-nvidia.list.chroot b/config/package-lists/gpu-nvidia.list.chroot
index 2276e64..5e76837 100644
--- a/config/package-lists/gpu-nvidia.list.chroot
+++ b/config/package-lists/gpu-nvidia.list.chroot
@@ -11,3 +11,4 @@ nvidia-persistenced
 firmware-nvidia-gsp
 libcuda1
 libnvidia-ml1
+nvidia-modprobe
diff --git a/docs/dev-guide/book.toml b/docs/dev-guide/book.toml
index 48d9143..e0f3eb9 100644
--- a/docs/dev-guide/book.toml
+++ b/docs/dev-guide/book.toml
@@ -12,8 +12,8 @@ build-dir = "book"
 default-theme = "coal"
 preferred-dark-theme = "coal"
 site-url = "/NeuralDrive/dev-guide/"
-git-repository-url = "https://github.com/NeuralDrive/NeuralDrive"
-edit-url-template = "https://github.com/NeuralDrive/NeuralDrive/edit/main/docs/dev-guide/src/{path}"
+git-repository-url = "https://github.com/Rightbracket/NeuralDrive"
+edit-url-template = "https://github.com/Rightbracket/NeuralDrive/edit/main/docs/dev-guide/src/{path}"
 additional-css = ["custom.css"]
 
 [output.html.search]
diff --git a/docs/dev-guide/src/architecture/security.md b/docs/dev-guide/src/architecture/security.md
index 618c659..db81a90 100644
--- a/docs/dev-guide/src/architecture/security.md
+++ b/docs/dev-guide/src/architecture/security.md
@@ -31,7 +31,8 @@ Every service unit employs advanced systemd hardening directives:
 - `ProtectHome=yes`: Access to `/home` is denied.
 - `PrivateTmp=yes`: A private `/tmp` directory is created.
 - `NoNewPrivileges=yes`: Prevents the service and its children from gaining new privileges via `setuid` binaries.
-- `DeviceAllow`: Only the necessary GPU devices (`/dev/nvidia*`, `/dev/dri/*`) are permitted for the Ollama service.
+- `PrivateDevices=no`: Explicitly disabled for the Ollama service to allow access to GPU device nodes (`/dev/nvidia*`, `/dev/dri/*`) required for accelerated inference.
+- **DeviceAllow removal**: All `DeviceAllow` lines were removed from the Ollama service unit. On cgroup v2 systems, `DeviceAllow` uses eBPF device filters that blocked CUDA access even with explicit allow rules for GPU devices. Removing these rules was necessary to enable reliable GPU acceleration.
 
 ### 3. Authentication and Authorization
 NeuralDrive uses a dual-key system for authentication:
diff --git a/docs/dev-guide/src/components/first-boot-wizard.md b/docs/dev-guide/src/components/first-boot-wizard.md
index 9369ca2..c547c81 100644
--- a/docs/dev-guide/src/components/first-boot-wizard.md
+++ b/docs/dev-guide/src/components/first-boot-wizard.md
@@ -6,37 +6,44 @@ The First-Boot Wizard is a specialized mode of the TUI that guides the user thro
 
 ## Execution Trigger
 
-The wizard is triggered by the `neuraldrive-setup.service`. This service checks for the existence of `/etc/neuraldrive/.setup-complete`. If this file is missing, the service:
-1. Blocks the standard TTY login.
-2. Launches the TUI in "Setup Mode".
-3. Prevents any other NeuralDrive application services (Ollama, WebUI, Caddy) from starting until setup is finished.
+The wizard is not a separate service. It is an integrated component of the TUI application (`main.py`). Upon startup, the TUI checks for the existence of the sentinel file `/etc/neuraldrive/first-boot-complete`. If this file is missing, the TUI presents the wizard interface before allowing access to the main dashboard.
 
 ## Wizard Flow
 
-The wizard consists of seven mandatory steps:
+The wizard consists of the following steps:
 
 1. **Welcome**: Introduction and hardware verification.
-2. **Network**: Configuration of Ethernet or Wi-Fi.
-3. **Persistence**: Detection and optional encryption (LUKS2) of the persistent partition.
-4. **Credentials**: Setting the `neuraldrive-admin` password and generating the initial API key.
-5. **Branding**: Setting the system hostname and mDNS name.
-6. **Model Selection**: Choosing a "Small", "Medium", or "Large" model to pre-download.
-7. **Finalization**: Writing configuration files, generating TLS certs, and creating the sentinel file.
+2. **Storage/Persistence**: Detects the boot device, creates the persistence partition, and initializes the directory structure:
+   - `/var/lib/neuraldrive/ollama`
+   - `/var/lib/neuraldrive/models`
+   - `/var/lib/neuraldrive/config`
+   - `/var/lib/neuraldrive/webui`
+   - `/var/lib/neuraldrive/logs`
+3. **Security**: Prompts for the `neuraldrive-admin` password and generates initial credentials.
+4. **Network**: Configuration of Ethernet or Wi-Fi.
+5. **Models**: Selection of initial models for download.
+6. **Done**: Finalizes configuration and generates the sentinel file.
 
 ## Credential Generation
 
-- **Admin Password**: The user is prompted to enter a password for the `neuraldrive-admin` account.
-- **API Key**: The system automatically generates a 32-character random string, prefixed with `nd-`. This key is displayed to the user once and then stored in `/etc/neuraldrive/api.key`.
+- **Admin Password**: The user is prompted to set the password for the `neuraldrive-admin` account.
+- **API Key**: The system automatically generates a 32-character random string, prefixed with `nd-`. This key is displayed once and then stored in the persistence layer.
 
 ## Sentinel File
 
-Once the user completes the wizard, the script runs `touch /etc/neuraldrive/.setup-complete`. This ensures that subsequent reboots proceed directly to the standard dashboard.
+Completion of the wizard creates the sentinel file at `/etc/neuraldrive/first-boot-complete`. This ensures that subsequent reboots bypass the wizard and proceed directly to the standard dashboard.
+
+## CLI Re-run
+
+To re-run the wizard on a configured system, use the following command:
+`neuraldrive-tui --wizard`
+This command removes the sentinel file, forcing the wizard to launch on the next application start.
 
 ## Customizing the Wizard
 
 The wizard logic is integrated into the TUI application. To add a new step:
-1. Create a new `Screen` class in `usr/lib/neuraldrive/tui/screens/wizard.py` or in a new screen file within `screens/`.
+1. Create a new `Screen` class in the `screens/` directory.
 2. Add the screen to the wizard orchestration loop in `main.py`.
 
-> **Note**: For development, you can re-trigger the wizard on a running system by deleting the sentinel file and restarting the `neuraldrive-setup` service. **Warning**: This may overwrite existing credentials and configuration.
+> **Note**: For development, you can re-trigger the wizard by using the `--wizard` flag. **Warning**: This may overwrite existing credentials and configuration.
 
diff --git a/docs/dev-guide/src/components/gpu-detection.md b/docs/dev-guide/src/components/gpu-detection.md
index 989174f..5f74d64 100644
--- a/docs/dev-guide/src/components/gpu-detection.md
+++ b/docs/dev-guide/src/components/gpu-detection.md
@@ -17,12 +17,25 @@ The script runs during the `neuraldrive-gpu-detect.service` phase. It performs t
 
 ### NVIDIA
 If an NVIDIA card is detected (PCI vendor ID `10de`), the script:
-- Loads the `nvidia`, `nvidia-uvm`, and `nvidia-drm` modules via `modprobe`.
+- Loads the `nvidia`, `nvidia-current-uvm`, and `nvidia-drm` modules via `modprobe`. Note that on Debian systems, the CUDA Unified Video Memory module is named `nvidia-current-uvm`, not `nvidia-uvm`.
+- Executes `nvidia-modprobe -u` to create the `/dev/nvidia-uvm` and `/dev/nvidia-uvm-tools` device nodes. Without these nodes, CUDA memory allocation fails silently, and Ollama falls back to CPU.
 - Enables persistence mode with `nvidia-smi -pm 1`.
 - Sets `VENDOR=NVIDIA` in the config file.
 - If module loading fails, records `NVIDIA_MODULE_MISSING=true`.
 
-### AMD
+## Boot-Time Module Loading
+
+In addition to the detection script, the system includes `/etc/modules-load.d/nvidia-uvm.conf`. This file contains `nvidia-current-uvm` to ensure the module is automatically loaded at boot.
+
+## Ollama Service Integration
+
+As a safety net, the Ollama systemd unit also includes `ExecStartPre` commands for both `modprobe nvidia-current-uvm` and `nvidia-modprobe -u`. This ensures the necessary drivers and device nodes are present even if the primary detection service is delayed.
+
+## cgroup v2 and Device Access
+
+On systems using cgroup v2, standard `DeviceAllow` rules in systemd units utilize eBPF filters that can inadvertently block CUDA access, even when explicit allow rules are defined. NeuralDrive avoids this by removing all `DeviceAllow` directives from the Ollama service and relying on `PrivateDevices=no` instead.
+
+## AMD
 If an AMD card is detected (PCI vendor ID `1002`), the script:
 - Loads the `amdgpu` module.
 - Sets `VENDOR=AMD`.
diff --git a/docs/dev-guide/src/components/ollama.md b/docs/dev-guide/src/components/ollama.md
index 9b22cb9..58f16b3 100644
--- a/docs/dev-guide/src/components/ollama.md
+++ b/docs/dev-guide/src/components/ollama.md
@@ -15,20 +15,39 @@ The `neuraldrive-ollama.service` manages the lifecycle of the inference engine.
 ### Service Unit Highlights
 - **User**: Runs as `neuraldrive-ollama` (UID 901).
 - **Dependencies**: `Requires=neuraldrive-gpu-detect.service`.
-- **Security**: Uses `DeviceAllow` to restrict access to only relevant GPU device nodes.
+- **Security**: The service uses `PrivateDevices=no` to allow GPU access. Note that all `DeviceAllow` directives were removed because cgroup v2's eBPF device filter blocked CUDA access even with explicit allow rules.
 - **Resource Limits**: 
   - `MemoryHigh=90%`: Triggers aggressive swapping/GC when system memory is nearly full.
   - `MemoryMax=95%`: The hard limit before the OOM killer intervenes.
+- **GPU Initialization**: The unit includes `ExecStartPre` commands to ensure CUDA is ready:
+  - `ExecStartPre=-/sbin/modprobe nvidia-current-uvm`: Loads the CUDA Unified Video Memory module (named `nvidia-current-uvm` in the Debian package).
+  - `ExecStartPre=-/usr/bin/nvidia-modprobe -u`: Creates the `/dev/nvidia-uvm` and `/dev/nvidia-uvm-tools` device nodes.
+
+### Persistent Config Overrides
+The service unit includes two `EnvironmentFile` directives to manage configuration:
+1. `EnvironmentFile=/etc/neuraldrive/ollama.conf`: Contains baked-in system defaults.
+2. `EnvironmentFile=-/var/lib/neuraldrive/config/ollama.conf`: Allows persistent user-defined overrides. The `-` prefix ensures the service starts even if this file is missing.
 
 ## Configuration (ollama.conf)
 
-System-wide settings are stored in `/etc/neuraldrive/ollama.conf`:
+System-wide settings are defined in the environment files:
 - `OLLAMA_HOST=127.0.0.1:11434`: Ensures the API is only accessible locally (proxied by Caddy).
 - `OLLAMA_MODELS=/var/lib/neuraldrive/models/`: Directs model weights to the persistence layer.
 - `OLLAMA_KEEP_ALIVE=5m`: Models are unloaded from VRAM after 5 minutes of inactivity.
-- `OLLAMA_MAX_LOADED_MODELS=1`: Limits concurrent model loading to prevent VRAM exhaustion on smaller cards.
+- `OLLAMA_MAX_LOADED_MODELS=0`: Set to `0` for auto mode. Ollama manages multiple models based on available VRAM using LRU (Least Recently Used) eviction.
 - `OLLAMA_NUM_PARALLEL=1`: Processes one request at a time to maintain deterministic performance.
 
+## API Usage Details
+
+### Loading Models
+To load a model, send a `POST` request to `/api/generate` with `keep_alive` set to `-1`. Note that `keep_alive` must be an integer; passing it as a string ("-1") will result in a rejection.
+
+### Unloading Models
+To unload a model, send a `POST` request to `/api/generate` with `keep_alive` set to `0`. To verify the eviction, poll `/api/ps` until the model no longer appears. A race condition exists where the 200 OK response may return before the eviction process is fully complete.
+
+### Monitoring
+`GET /api/ps` returns a list of running models, including the `size_vram` utilized by each.
+
 ## GPU Support
 
 Ollama automatically detects the compute provider based on the drivers loaded by `gpu-detect.sh`.
diff --git a/docs/dev-guide/src/components/tui.md b/docs/dev-guide/src/components/tui.md
index 7f7cd19..692e8a5 100644
--- a/docs/dev-guide/src/components/tui.md
+++ b/docs/dev-guide/src/components/tui.md
@@ -19,15 +19,54 @@ The default screen showing:
 - mDNS address (`neuraldrive.local`).
 - CPU, Memory, and Disk usage gauges.
 - GPU status overview.
+Manual refresh is available via the **R** key, alongside a live clock.
+
+### Models
+Lists all LLM models currently stored in the persistence layer. Shows model name and metadata columns (params, quantization, disk size, VRAM usage, and status). Users can Load, Unload, or Delete models. This screen refreshes automatically on user action.
 
 ### Services
-Provides a list of all NeuralDrive systemd units with their current status (active, inactive, failed). Users can select a service to view its recent logs or trigger a restart.
+Provides a list of all NeuralDrive systemd units with their current status (active, inactive, failed). Users can select a service to view its recent logs or trigger a restart. This screen auto-polls every 5 seconds.
 
-### Models
-Lists all LLM models currently stored in the persistence layer. Shows model size and allows users to delete unused models to free up disk space.
+### Logs
+System-wide log viewer for NeuralDrive services and kernel messages.
+
+### Chat
+A lightweight chat interface allowing users to test models locally. It includes a model selector dropdown and supports streaming responses via `@work(exclusive=True)`. Model selection persists across screen switches.
+
+## Hotkeys
+
+- **F1**: Dashboard
+- **F2**: Models
+- **F3**: Services
+- **F4**: Logs
+- **F5**: Chat
+- **Q**: Quit
+
+## Navigation Model
+
+The TUI uses a zone-based focus system.
+- **Tab / Shift+Tab**: Cycle focus between different zones within a screen.
+- **Arrow Keys**: Navigate within the currently focused zone.
+- **Enter**: Activate the selected item or button.
+
+## Custom Widgets
+
+Several custom composite widgets are used to build the interface:
+- `SafeHeader`: A subclass of Textual's `Header` that catches `NoMatches` exceptions during `_on_mount`, working around Textual bug #4258.
+- `ServiceItem`: Displays service name, status label, and control buttons (Start, Stop, Restart).
+- `ModelItem`: Displays model name, metadata, and action buttons (Load, Unload, Delete).
+
+## Crash Dump Logging
+
+The TUI overrides `App._handle_exception` to write crash dumps to `/var/lib/neuraldrive/logs/tui-crash-*.log` with a full traceback. The entire `main()` function is also wrapped in a try/except block to catch crashes occurring outside the Textual event loop. Screenshots are saved to `/var/lib/neuraldrive/screenshots/`.
+
+## CLI Flags
+
+- `--wizard`: Removes the sentinel file (`/etc/neuraldrive/first-boot-complete`) and forces the first-boot wizard to re-run on the next launch.
+
+## Command Palette
 
-### Networking
-Allows basic network configuration, such as switching between DHCP and static IP, or configuring a Wi-Fi connection.
+The Textual command palette is explicitly disabled (`ENABLE_COMMAND_PALETTE = False`).
 
 ## Auto-Login and Startup
 
@@ -43,9 +82,10 @@ The source code for the TUI is located at `/usr/lib/neuraldrive/tui/`.
 
 ## Refresh Intervals
 
+- **Dashboard**: Manual refresh (R key) with live clock.
+- **Services**: Auto-polls every 5 seconds.
+- **Models**: Refreshes on user action.
 - **System Metrics**: Refreshed every 2 seconds.
-- **Service Status**: Refreshed every 5 seconds.
-- **Network Info**: Refreshed only on request or after a configuration change.
 
 ## Modifying the TUI
 
diff --git a/docs/landing/index.html b/docs/landing/index.html
index 65dddd5..a37872b 100644
--- a/docs/landing/index.html
+++ b/docs/landing/index.html
@@ -97,7 +97,7 @@ <h2>Developer Guide</h2>
   </div>
 
   <footer>
-    <a href="https://github.com/NeuralDrive/NeuralDrive">GitHub</a>
+    <a href="https://github.com/Rightbracket/NeuralDrive">GitHub</a>
   </footer>
 </body>
 </html>
diff --git a/docs/user-guide/book.toml b/docs/user-guide/book.toml
index 719e8cd..faeb892 100644
--- a/docs/user-guide/book.toml
+++ b/docs/user-guide/book.toml
@@ -12,8 +12,8 @@ build-dir = "book"
 default-theme = "coal"
 preferred-dark-theme = "coal"
 site-url = "/NeuralDrive/user-guide/"
-git-repository-url = "https://github.com/NeuralDrive/NeuralDrive"
-edit-url-template = "https://github.com/NeuralDrive/NeuralDrive/edit/main/docs/user-guide/src/{path}"
+git-repository-url = "https://github.com/Rightbracket/NeuralDrive"
+edit-url-template = "https://github.com/Rightbracket/NeuralDrive/edit/main/docs/user-guide/src/{path}"
 additional-css = ["custom.css"]
 
 [output.html.search]
diff --git a/docs/user-guide/src/advanced/custom-images.md b/docs/user-guide/src/advanced/custom-images.md
index f19a883..16b8a71 100644
--- a/docs/user-guide/src/advanced/custom-images.md
+++ b/docs/user-guide/src/advanced/custom-images.md
@@ -20,7 +20,7 @@ Follow these steps to generate a custom NeuralDrive ISO.
 Clone the NeuralDrive repository and navigate to the builder directory.
 
 ```bash
-git clone https://github.com/NeuralDrive/NeuralDrive.git
+git clone https://github.com/Rightbracket/NeuralDrive.git
 cd NeuralDrive/builder
 ```
 
diff --git a/docs/user-guide/src/advanced/performance.md b/docs/user-guide/src/advanced/performance.md
index 245d6d7..cf2165a 100644
--- a/docs/user-guide/src/advanced/performance.md
+++ b/docs/user-guide/src/advanced/performance.md
@@ -12,7 +12,7 @@ The primary backend service, Ollama, is controlled via `/etc/neuraldrive/ollama.
 OLLAMA_HOST=127.0.0.1:11434
 OLLAMA_MODELS=/var/lib/neuraldrive/models/
 OLLAMA_KEEP_ALIVE=5m
-OLLAMA_MAX_LOADED_MODELS=1
+OLLAMA_MAX_LOADED_MODELS=0
 OLLAMA_NUM_PARALLEL=1
 ```
 
@@ -20,7 +20,7 @@ OLLAMA_NUM_PARALLEL=1
 
 *   **OLLAMA_NUM_PARALLEL**: (Integer) The number of concurrent requests the server will handle. Increase this for multi-user environments, though this will increase VRAM usage.
 *   **OLLAMA_KEEP_ALIVE**: (Duration) How long a model remains loaded in memory after the last request. Setting this to a higher value (e.g., `30m`) avoids the latency of reloading models.
-*   **OLLAMA_MAX_LOADED_MODELS**: (Integer) The maximum number of models to keep in VRAM simultaneously.
+*   **OLLAMA_MAX_LOADED_MODELS**: (Integer) The maximum number of models to keep in VRAM simultaneously. The default is `0` (auto), which allows Ollama to manage concurrent loading based on available VRAM. When memory is full, Least Recently Used (LRU) models are evicted automatically. Manual overrides can be set in `/var/lib/neuraldrive/config/ollama.conf`.
 *   **OLLAMA_NUM_THREADS**: (Integer) Specifies the number of CPU threads to use for inference. By default, this auto-detects based on your hardware.
 *   **OLLAMA_FLASH_ATTENTION**: (Boolean) Enabling Flash Attention can significantly improve speed on supported GPUs (e.g., NVIDIA Ampere and newer).
 
diff --git a/docs/user-guide/src/getting-started/first-boot.md b/docs/user-guide/src/getting-started/first-boot.md
index d0d0b0d..d8840cd 100644
--- a/docs/user-guide/src/getting-started/first-boot.md
+++ b/docs/user-guide/src/getting-started/first-boot.md
@@ -28,15 +28,18 @@ Once the boot process is complete, the console will display your system's IP add
 
 ## First-Boot Wizard
 
-If the system has not been initialized, a Text User Interface (TUI) wizard will start automatically. You must complete these seven steps to prepare your server:
+If the system has not been initialized, a Text User Interface (TUI) wizard will start automatically. The wizard runs as part of the TUI application, checking for a sentinel file on startup. You must complete these six steps to prepare your server:
 
-1. **Welcome:** Displays a hardware summary and runs a brief system health check to ensure your GPU is detected correctly.
-2. **Security:** Generates a random administrator password and API key. You can choose to keep these or set a custom password.
-3. **Wi-Fi:** If no Ethernet connection is detected, the wizard provides an SSID selector to configure your wireless network.
-4. **Network:** Choose between DHCP (default) or a static IP address.
-5. **Storage:** Select the drive for your persistent data. You can also enable LUKS encryption here. **Warning: This step is destructive to data on the selected drive.**
-6. **Models:** Recommends specific LLM starter models based on your hardware's VRAM and capabilities.
-7. **Finish:** The system writes your configurations, provisions the web administrator account, and removes insecure default permissions (like NOPASSWD sudo).
+1. **Welcome:** Introductory screen with hardware summary and system health check.
+2. **Storage/Persistence:** Detects your USB boot device and creates an ext4 persistence partition on unused space. This step also creates required directories under `/var/lib/neuraldrive/` (ollama, models, config, webui, logs).
+3. **Security:** Sets the administrator password and configures system credentials.
+4. **Network:** Configure your network connection, including Wi-Fi (if applicable) and IP assignment (DHCP or static).
+5. **Models:** Select initial LLM models to download based on your hardware capabilities.
+6. **Done:** Final completion summary and display of system credentials.
+
+### Re-running the Wizard
+
+If you need to reset your configuration, run `neuraldrive-tui --wizard` from the console. This command removes the sentinel file and forces the wizard to run again on the next TUI launch.
 
 ### Write Down Your Credentials
 
@@ -44,10 +47,9 @@ At the end of the wizard, your final credentials and the dashboard URL will be d
 
 ## System Initialization Files
 
-NeuralDrive uses two sentinel files to track its state:
-- `/etc/neuraldrive/initialized`: Indicates that the core system initialization has occurred.
+NeuralDrive uses a sentinel file to track its state:
 - `/etc/neuraldrive/first-boot-complete`: Confirms the user setup wizard has been finished.
 
-Once these files are present, the system will boot directly to the ready state in the future.
+Once this file is present, the system will boot directly to the ready state.
 
 Next step: [Web Dashboard](../using/web-dashboard.md)
diff --git a/docs/user-guide/src/models/recommendations.md b/docs/user-guide/src/models/recommendations.md
index 8ac1bfc..ebc0606 100644
--- a/docs/user-guide/src/models/recommendations.md
+++ b/docs/user-guide/src/models/recommendations.md
@@ -25,13 +25,13 @@ If your system lacks a compatible GPU, NeuralDrive can run models on the CPU. Wh
 
 ## Concurrent Models
 
-NeuralDrive allows multiple models to be loaded into memory simultaneously, provided there is enough VRAM. This is managed via several environment variables in the Ollama service:
+NeuralDrive allows multiple models to be loaded into memory simultaneously, provided there is enough VRAM. This is managed by Ollama using several environment variables:
 
-- `OLLAMA_MAX_LOADED_MODELS`: Defines the maximum number of models kept in memory.
-- `OLLAMA_NUM_PARALLEL`: Determines how many concurrent requests can be handled by a single model.
+- `OLLAMA_MAX_LOADED_MODELS`: Defines the maximum number of models kept in memory. The default is `0` (auto), which allows Ollama to manage loading based on available VRAM.
+- `OLLAMA_NUM_PARALLEL`: Determines how many concurrent requests can be handled.
 - `OLLAMA_KEEP_ALIVE`: Sets how long a model stays in memory after the last request before being evicted.
 
-NeuralDrive uses a Least Recently Used (LRU) eviction policy. If you attempt to load a new model and VRAM is full, the model that hasn't been used for the longest time will be unloaded to make room.
+NeuralDrive uses a Least Recently Used (LRU) eviction policy. If you attempt to load a new model and VRAM is full, Ollama handles eviction automatically to make room for the new request.
 
 ## Model Catalog
 
diff --git a/docs/user-guide/src/reference/config-files.md b/docs/user-guide/src/reference/config-files.md
index f2d8d74..3d0a8ed 100644
--- a/docs/user-guide/src/reference/config-files.md
+++ b/docs/user-guide/src/reference/config-files.md
@@ -8,11 +8,16 @@ This document provides a complete reference for all critical configuration and s
 
 | File | Purpose | Format | Owner |
 | :--- | :--- | :--- | :--- |
-| `/etc/neuraldrive/ollama.conf` | Ollama environment variables | KEY=VALUE | root:neuraldrive-admin |
+| `/etc/neuraldrive/ollama.conf` | Ollama baked-in defaults | KEY=VALUE | root:neuraldrive-admin |
+| `/var/lib/neuraldrive/config/ollama.conf` | Persistent Ollama overrides | KEY=VALUE | root:neuraldrive-admin |
+| `/etc/neuraldrive/config.yaml` | TUI overlay fallback config | YAML | root:neuraldrive-admin |
+| `/var/lib/neuraldrive/config/config.yaml` | Persistent TUI configuration | YAML | root:neuraldrive-admin |
+| `/var/lib/neuraldrive/config/api.key` | Persistent API key | plaintext | root:root (600) |
+| `/etc/neuraldrive/api.key` | System API key (synced) | plaintext | root:root (600) |
+| `/var/lib/neuraldrive/config/credentials.conf` | Persistent credentials | KEY=VALUE | root:root (600) |
 | `/etc/neuraldrive/webui.env` | Open WebUI configuration | KEY=VALUE | root:neuraldrive-admin |
 | `/etc/neuraldrive/caddy.env` | Caddy API key environment | KEY=VALUE | root:neuraldrive-admin |
 | `/etc/neuraldrive/api.env` | System API environment | KEY=VALUE | root:neuraldrive-admin |
-| `/etc/neuraldrive/api.key` | API authentication key | plaintext | root:root (600) |
 | `/etc/neuraldrive/Caddyfile` | Caddy reverse proxy configuration | Caddyfile | root:neuraldrive-caddy |
 | `/etc/neuraldrive/nftables.conf` | Global firewall rules | nftables | root:root |
 | `/etc/neuraldrive/neuraldrive-models.yaml` | Model catalog definitions | YAML | root:neuraldrive-admin |
@@ -23,20 +28,27 @@ This document provides a complete reference for all critical configuration and s
 | `/etc/neuraldrive/firewall-custom.conf` | User-defined firewall rules | nftables | root:root |
 | `/run/neuraldrive/gpu.conf` | GPU detection results (at boot) | KEY=VALUE | root:root (runtime) |
 
+
 ## Key Configuration Reference
 
 ### `ollama.conf`
 
-Defines the behavior of the underlying LLM inference engine.
+Defines the behavior of the underlying LLM inference engine. The Ollama service uses two configuration sources:
+1. `/etc/neuraldrive/ollama.conf` — baked-in system defaults.
+2. `/var/lib/neuraldrive/config/ollama.conf` — persistent user overrides.
+
+Values in the persistent file override the system defaults.
 
 ```ini
 OLLAMA_HOST=127.0.0.1:11434
 OLLAMA_MODELS=/var/lib/neuraldrive/models/
 OLLAMA_KEEP_ALIVE=5m
-OLLAMA_MAX_LOADED_MODELS=1
+OLLAMA_MAX_LOADED_MODELS=0
 OLLAMA_NUM_PARALLEL=1
 ```
 
+**OLLAMA_MAX_LOADED_MODELS**: Set to `0` for "auto" mode. Ollama automatically manages how many models stay loaded based on available VRAM, using Least Recently Used (LRU) eviction when memory is required for a new request.
+
 ### `webui.env`
 
 Configures the Open WebUI chat interface and authentication.
diff --git a/docs/user-guide/src/reference/services.md b/docs/user-guide/src/reference/services.md
index 5d71fba..14dc728 100644
--- a/docs/user-guide/src/reference/services.md
+++ b/docs/user-guide/src/reference/services.md
@@ -13,7 +13,7 @@ This document provides a detailed overview of the systemd services that power th
 | `neuraldrive-certs` | oneshot | root | — | network-online, local-fs | Generates self-signed TLS certificates (skips if they already exist). |
 | `neuraldrive-zram` | oneshot | root | — | local-fs | Sets up compressed RAM-based swap space. |
 | `neuraldrive-show-ip` | oneshot | root | — | network-online | Displays the current IP address on the physical console. |
-| `neuraldrive-ollama` | long-running | neuraldrive-ollama | 11434 | gpu-detect | The underlying LLM inference and model management engine. |
+| `neuraldrive-ollama` | long-running | neuraldrive-ollama | 11434 | gpu-detect | The GPU-accelerated LLM inference and model management engine. |
 | `neuraldrive-webui` | long-running | neuraldrive-webui | 3000 | ollama | The Open WebUI dashboard and chat interface. |
 | `neuraldrive-caddy` | long-running | neuraldrive-caddy | 443, 8443 | certs | The TLS reverse proxy and API gateway. |
 | `neuraldrive-gpu-monitor` | long-running | neuraldrive-monitor | 1312 | gpu-detect | Monitors GPU temperature, VRAM usage, and health. |
@@ -24,7 +24,8 @@ This document provides a detailed overview of the systemd services that power th
 
 All NeuralDrive services are configured with systemd-native security hardening to minimize the system attack surface:
 
--   **PrivateDevices**: Most services are denied access to `/dev/` nodes, except for the GPU-specific services.
+-   **PrivateDevices**: Most services are denied access to `/dev/` nodes. The Ollama service specifically uses `PrivateDevices=no` to allow access to GPU device nodes required for hardware acceleration.
+-   **DeviceAllow**: This directive was removed from the Ollama service because cgroup v2 eBPF filters can block CUDA access even when devices are explicitly allowed.
 -   **ProtectSystem**: The root filesystem is mounted read-only for service processes.
 -   **ProtectHome**: Services have no access to the `/home/` directory.
 -   **NoNewPrivileges**: Prevents processes from gaining elevated permissions via `setuid` or `setgid`.
diff --git a/docs/user-guide/src/troubleshooting/gpu.md b/docs/user-guide/src/troubleshooting/gpu.md
index 90fa570..cb794fc 100644
--- a/docs/user-guide/src/troubleshooting/gpu.md
+++ b/docs/user-guide/src/troubleshooting/gpu.md
@@ -28,6 +28,33 @@ lsmod | grep nouveau
 
 If the command returns any output, the blacklist failed. Check `/etc/modprobe.d/neuraldrive-blacklist.conf`.
 
+## Driver and Hardware Support
+
+### missing nvidia-uvm module
+
+If Ollama reports CPU-only inference despite having an NVIDIA GPU, the `nvidia-uvm` (Unified Video Memory) kernel module may not be loaded. This module is essential for CUDA memory allocation.
+
+1.  **Verify Module**: Check if the module is loaded:
+    ```bash
+    lsmod | grep nvidia_uvm
+    ```
+2.  **Naming Convention**: On NeuralDrive (based on Debian), the module is named `nvidia-current-uvm`.
+3.  **Manual Load**: If missing, load it manually:
+    ```bash
+    sudo modprobe nvidia-current-uvm && nvidia-modprobe -u
+    ```
+4.  **Automatic Loading**: NeuralDrive should load this automatically at boot via `/etc/modules-load.d/nvidia-uvm.conf`. If it fails, check the `journalctl -u neuraldrive-ollama` logs for `ExecStartPre` failures.
+
+Without this module, `/dev/nvidia-uvm` device nodes will be missing, causing CUDA calls to fail silently and Ollama to fall back to CPU.
+
+### cgroup v2 / DeviceAllow blocking
+
+If the GPU is detected by the system but Ollama still falls back to CPU inference, systemd `DeviceAllow` rules might be blocking access.
+
+1.  **cgroup v2 Behavior**: On systems using cgroup v2, `DeviceAllow` uses eBPF device filters. These filters can block CUDA access even when explicit allow rules for `/dev/nvidia*` and `/dev/dri/*` are present.
+2.  **NeuralDrive Default**: The default NeuralDrive Ollama service has all `DeviceAllow` lines removed to prevent this.
+3.  **Custom Units**: If you have modified the service unit and re-added `DeviceAllow` rules, remove them and ensure `PrivateDevices=no` is set to restore GPU access.
+
 ## Diagnostic Tools
 
 NeuralDrive provides several utilities to inspect GPU state:
diff --git a/docs/user-guide/src/troubleshooting/models.md b/docs/user-guide/src/troubleshooting/models.md
index 34f48ca..f7448b1 100644
--- a/docs/user-guide/src/troubleshooting/models.md
+++ b/docs/user-guide/src/troubleshooting/models.md
@@ -37,7 +37,13 @@ If the model is generating text very slowly (less than 1 token per second):
     ```bash
     cat /run/neuraldrive/gpu.conf
     ```
-2.  **Mixed Models**: Ensure you are not running multiple models simultaneously, which may compete for limited hardware resources.
+### Concurrent Model Loading
+
+Multiple models can be loaded simultaneously if your hardware supports it.
+
+1.  **Automatic Management**: NeuralDrive uses `OLLAMA_MAX_LOADED_MODELS=0` (auto) by default. Ollama manages concurrent loading based on your available VRAM.
+2.  **Model Eviction**: When VRAM is exhausted, least-recently-used models are automatically evicted from memory to make room for new ones.
+3.  **Manual Control**: You can manually load or unload models via the TUI Models screen (accessible with the **F2** key).
 
 ## Management and Corruption
 
diff --git a/docs/user-guide/src/using/local-chat.md b/docs/user-guide/src/using/local-chat.md
index 4f680e3..6ce7175 100644
--- a/docs/user-guide/src/using/local-chat.md
+++ b/docs/user-guide/src/using/local-chat.md
@@ -6,20 +6,20 @@ For quick testing and offline interaction, NeuralDrive includes a lightweight, t
 
 ## Launching the Chat
 
-Access the local chat by pressing **C** from the main TUI dashboard.
+Access the local chat by pressing **F5** from any screen.
 
 ## Using the Chat Interface
 
-1. **Model Selection:** Upon entering the chat screen, you will be prompted to select one of the models currently available on your system. Use the arrow keys to highlight a model and press Enter.
+1. **Model Selection:** A model selector dropdown at the top of the screen lets you choose which installed model to chat with. The selected model persists even when switching away and returning to the chat screen.
 2. **Messaging:** Type your message into the input field at the bottom of the screen. Press Enter to send.
 3. **Streaming Responses:** The model's response will stream directly into the terminal window in real-time.
 4. **Keyboard Shortcuts:**
-   - **Esc or B:** Return to the model selection or main dashboard.
+   - **F1-F4:** Switch to another TUI screen (Dashboard, Models, Services, or Logs).
    - **Ctrl+C:** Interrupt the current response generation.
 
 ## Features and Limitations
 
-The TUI chat is designed for simplicity and speed.
+The TUI chat is designed for simplicity and speed. You must have at least one model downloaded and loaded to use the chat interface.
 
 - **Fast & Lightweight:** Minimal resource overhead compared to the full web UI.
 - **Persistent Context:** The chat maintains a basic conversation history within the current session, allowing for follow-up questions.
diff --git a/docs/user-guide/src/using/models-tui.md b/docs/user-guide/src/using/models-tui.md
index 5f6cf0b..4ad6b38 100644
--- a/docs/user-guide/src/using/models-tui.md
+++ b/docs/user-guide/src/using/models-tui.md
@@ -2,45 +2,54 @@
 
 # Model Management via TUI
 
-The Model Management screen allows you to download, unload, and delete LLMs directly from the terminal. Access this screen by pressing **M** from the main dashboard.
+The Model Management screen allows you to download, load, unload, and delete LLMs directly from the terminal.
 
-## Models Screen Interface
+## Access
+Press **F2** from any screen to access Model Management.
 
-```text
-┌──────────────── Model Management ────────────────────────────┐
-│ NAME                SIZE    STATUS      ACTION               │
-│ llama3.1:8b        4.7GB   LOADED      [U]nload  [D]elete   │
-│ codestral:latest   8.2GB   LOADED      [U]nload  [D]elete   │
-│ mistral:7b         4.1GB   CACHED      [L]oad    [D]elete   │
-├──────────────────────────────────────────────────────────────┤
-│ [P]ull Model  [I]mport GGUF  [B]ack                         │
-└──────────────────────────────────────────────────────────────┘
-```
+## Layout
+The screen is organized into three zones that you can navigate between using **Tab** or **Shift+Tab**:
 
-## Available Actions
+1.  **Installed Models list** (top zone): A scrollable list of models currently on your system.
+2.  **Browse Catalog** button (middle zone): Opens a popup to browse the Ollama library.
+3.  **Pull by name** (bottom zone): A text input field and a **Pull** button for direct model downloads.
 
-Each model in the list supports specific actions based on its current state:
+### Installed Models List
+Each model in the list displays its details in a columnar format. A legend header with `/` separators appears above the list:
+`Model name | Params | Quant | Disk | VRAM | Status`
 
-- **[L]oad:** If a model is **CACHED** (on disk but not in memory), pressing **L** will trigger a load into VRAM.
-- **[U]nload:** If a model is **LOADED**, pressing **U** will eject it from VRAM. This is useful if you want to free up space for a different model manually.
-- **[D]elete:** Pressing **D** will prompt for confirmation and then remove the model weights and metadata from the persistent storage.
+- **Model name**: The name of the model (e.g., `llama3:8b`).
+- **Params**: Parameter count of the model.
+- **Quant**: Quantization level.
+- **Disk**: Space occupied on disk.
+- **VRAM**: Measured or cached VRAM usage (e.g., "6.2 GB" or "~6.2 GB").
+- **Status**: Current state of the model ("loaded (GPU)", "loaded (CPU)", or "ready").
 
-## Pulling New Models
+## Navigation
+- **Tab / Shift+Tab**: Cycle focus between the three zones (models → browse → pull-input → pull-btn).
+- **Up / Down arrows**: Navigate through the installed model list. The view scrolls automatically to follow your focus.
+- **Left / Right arrows**: Navigate between the action buttons (Load/Unload/Delete) for the currently selected model. The cursor automatically skips disabled buttons.
+- **Enter**: Activate the focused button or zone.
+- **PageUp / PageDown**: Fast scroll through the model list.
 
-To download a new model from the Ollama registry:
+## Model Actions
+Each model has specific action buttons:
 
-1. Press **P** (Pull Model).
-2. Enter the full model string (e.g., `llama3.1:8b`).
-3. Press Enter to start the download.
-4. A progress bar will appear in the action column. You can press **Esc** or **Q** to cancel the download at any time.
+- **Load**: Loads the model into VRAM for inference. The status will show "Loading..." while in progress. Loaded models use a `keep_alive: -1` setting for infinite retention.
+- **Unload**: Removes the model from VRAM. The system polls the engine until the unload is confirmed.
+- **Delete**: Permanently removes the model from disk. A confirmation prompt will appear before deletion.
 
-## Importing GGUF Files
+## Downloading Models
 
-If you have a GGUF file on an external device or elsewhere in the filesystem, you can import it by pressing **I** (Import GGUF).
+### Browse Catalog
+Selecting the **Browse Catalog** button opens a scrollable popup listing popular models from the Ollama library. Select a model from the list and confirm to start the download.
 
-1. Provide the absolute path to the `.gguf` file.
-2. NeuralDrive will create a local manifest and copy the file into the internal model storage area.
-3. Once imported, the model will appear in your list with a default name derived from the filename.
+### Pull by Name
+To download a specific model, type its name (e.g., `llama3:8b`) into the text input field in the bottom zone and press **Enter** or click the **Pull** button. 
 
-Press **B** or **Back** to return to the main dashboard.
+A progress bar will show the download status. You can press **Escape** or the **Cancel** button to abort an active download.
 
+## VRAM Management
+VRAM usage values are measured during operation or retrieved from a cache stored in `/var/lib/neuraldrive/config/`. 
+
+Multiple models can be installed and loaded simultaneously. The underlying engine manages VRAM using an LRU (Least Recently Used) eviction policy when the `OLLAMA_MAX_LOADED_MODELS` setting is set to auto.
diff --git a/docs/user-guide/src/using/tui-dashboard.md b/docs/user-guide/src/using/tui-dashboard.md
index 2dd09c1..6098d36 100644
--- a/docs/user-guide/src/using/tui-dashboard.md
+++ b/docs/user-guide/src/using/tui-dashboard.md
@@ -9,32 +9,29 @@ The Dashboard is the central monitoring hub of NeuralDrive. It is designed to pr
 The dashboard is divided into three functional areas:
 
 ### 1. Header and System Info
-Located at the very top, this section displays the version of NeuralDrive, the current hostname, system uptime, and the primary IP address. 
-
-- **Refresh Rate:** Static upon load, updates if network state changes.
+Located at the very top, this section displays the version of NeuralDrive, the current hostname, system uptime, and the primary IP address. A live system clock is displayed in the upper-right corner, showing the exact time of the last data refresh.
 
 ### 2. Hardware Resource Monitor
 This section provides real-time metrics for your hardware.
 
-- **GPU:** Displays the detected GPU model (e.g., NVIDIA RTX 4090).
+- **GPU:** Displays the detected GPU model name, total VRAM capacity, driver version, and CUDA compute capability.
 - **VRAM:** Shows the current VRAM usage (e.g., 12.4/24.0 GB) and a percentage bar.
 - **Temp:** Current GPU temperature in Celsius.
 - **CPU:** Real-time CPU utilization percentage.
 - **RAM:** System memory usage (e.g., 18.2/64.0 GB).
 - **Disk:** Total disk space used on the persistence partition (e.g., 45.2 GB).
 
-- **Refresh Rate:** Hardware metrics refresh every **2 seconds**.
+- **Refresh Rate:** Hardware metrics refresh every **2 seconds**. You can press **R** at any time to trigger a manual refresh of all dashboard data.
 
 ### 3. Loaded Models List
 This list displays the models currently residing in memory and ready for immediate inference.
 
 - **Status Indicator:** A solid circle (●) indicates the model is currently loaded in memory. An open circle (○) indicates the model is cached on disk but not currently loaded.
-- **Backend:** Shows if the model is running on the **[GPU]** or CPU.
+- **Backend:** Shows if the model is running on the **[GPU]** or **[CPU]**.
 - **VRAM Footprint:** The amount of memory the model is currently occupying.
-- **Activity:** Real-time request volume, measured in requests per minute (req/min).
 
-- **Refresh Rate:** The model list and their activity metrics refresh every **10 seconds**.
+- **Refresh Rate:** The model list metrics refresh every **10 seconds**.
 
 ## Interaction
-While the Dashboard is primarily for monitoring, pressing any of the navigation keys at the bottom will transition you to a specific management screen. You can return to the dashboard at any time by pressing the **Back** or **B** key from most sub-screens.
+The Dashboard is accessed via **F1** from any other screen. While it is primarily for monitoring, you can transition to other management screens using the function keys (F2-F5) shown at the bottom. Use the **R** key to manually refresh the displayed information.
 
diff --git a/docs/user-guide/src/using/tui-services.md b/docs/user-guide/src/using/tui-services.md
index ff6476b..d866744 100644
--- a/docs/user-guide/src/using/tui-services.md
+++ b/docs/user-guide/src/using/tui-services.md
@@ -2,36 +2,33 @@
 
 # Service Control
 
-The Services screen provides a centralized interface for managing the background system processes that power NeuralDrive. Access this screen by pressing **S** from the main dashboard.
+The Services screen provides a centralized interface for managing the background system processes that power NeuralDrive.
 
-## Service List
+## Access
+Press **F3** from any screen to access Service Control.
 
-This screen displays all `neuraldrive-*` services and their current operational status:
+## Layout
+The screen displays a scrollable list of services. Each service is represented by a `ServiceItem` widget showing the service name, its current status, and a set of action buttons.
 
-- **Active:** The service is running normally.
-- **Inactive:** The service is stopped.
-- **Failed:** The service encountered an error and crashed.
+### Services Managed
+The TUI allows you to manage critical NeuralDrive components, including:
+- `neuraldrive-ollama`: The core model execution engine.
+- `neuraldrive-webui`: The browser-based user interface.
+- Any other configured system services specific to the NeuralDrive distribution.
 
-The primary services you will see include:
+## Navigation
+- **Up / Down arrows**: Navigate between the different services in the list.
+- **Left / Right arrows**: Navigate between the action buttons (Start/Stop/Restart) for the currently selected service. The focus will automatically skip buttons that are disabled based on the service's current state.
+- **Enter**: Activate the focused action button.
 
-- `neuraldrive-ollama`: The model execution engine.
-- `neuraldrive-webui`: The browser-based interface.
-- `neuraldrive-caddy`: The reverse proxy handling TLS and routing.
-- `neuraldrive-system-api`: The system management API.
+## Action Buttons
+Each service has three colored action buttons that enable or disable dynamically:
 
-## Managing Services
+- **Start** (green): Starts a service that is currently stopped or inactive.
+- **Stop** (red): Gracefully shuts down a running service.
+- **Restart** (amber): Stops and immediately restarts a running service. This is often the quickest way to resolve minor connectivity or performance issues.
 
-You can control individual services using the following keybindings after selecting a service from the list:
-
-- **R (Restart):** Stops and immediately restarts the selected service. This is the first step you should take if a component becomes unresponsive.
-- **S (Start):** Manages starting a service that is currently inactive or failed.
-- **T (Stop):** Gracefully shuts down the selected service.
-
-## Recovery and Troubleshooting
-
-If the Dashboard shows an "Ollama Offline" badge, navigate to the Services screen and check the status of `neuraldrive-ollama`. If it is in a **Failed** or **Inactive** state, use the **S** key to start it or **R** to restart it.
-
-Monitoring service status is critical for maintaining system uptime. If a service repeatedly fails, you should examine the system logs for more detailed error information.
-
-Press **B** or **Back** to return to the main dashboard.
+## Auto-Refresh and Monitoring
+The status of all services is automatically polled every 5 seconds to ensure the interface reflects the actual state of the system. 
 
+If a service like `neuraldrive-ollama` shows a failed or inactive status, use the action buttons to restore it. Continuous monitoring and manual control through this screen help maintain the overall health of your NeuralDrive instance.
diff --git a/docs/user-guide/src/using/tui.md b/docs/user-guide/src/using/tui.md
index 4c5d159..75df3a3 100644
--- a/docs/user-guide/src/using/tui.md
+++ b/docs/user-guide/src/using/tui.md
@@ -29,32 +29,51 @@ This launcher script (installed at `/usr/local/bin/neuraldrive-tui`) activates t
 The main dashboard provides a high-level overview of system health and active models.
 
 ```text
-┌──────────────── NeuralDrive v1.0.0 ──────────────────────────┐
+┌──────────────── NeuralDrive v1.0.0 ───────────────── 10:45:22 ─┐
 │ Host: neuraldrive.local    │ Uptime: 2h 15m │ IP: 192.168.1.50 │
-├──────────────────────────────────────────────────────────────┤
-│ GPU: NVIDIA RTX 4090  │ VRAM: 12.4/24.0 GB │ Temp: 65°C │ 85% │
-│ CPU: 12%              │ RAM: 18.2/64.0 GB  │ Disk: 45.2 GB    │
-├──────────────────────────────────────────────────────────────┤
-│ LOADED MODELS                                                │
-│ ● llama3.1:8b        [GPU] 4.7 GB   85 req/min              │
-│ ● codestral:latest   [GPU] 8.2 GB   12 req/min              │
-│ ○ phi3:mini           ---  (not loaded)                      │
-├──────────────────────────────────────────────────────────────┤
-│ [M]odels  [S]ervices  [N]etwork  [L]ogs  [C]hat  [Q]uit     │
-└──────────────────────────────────────────────────────────────┘
+├────────────────────────────────────────────────────────────────┤
+│ GPU: NVIDIA RTX 4090  │ VRAM: 12.4/24.0 GB │ Temp: 65°C │ 85%  │
+│ CPU: 12%              │ RAM: 18.2/64.0 GB  │ Disk: 45.2 GB     │
+├────────────────────────────────────────────────────────────────┤
+│ LOADED MODELS                                                  │
+│ ● llama3.1:8b        [GPU] 4.7 GB                              │
+│ ● codestral:latest   [GPU] 8.2 GB                              │
+│ ○ phi3:mini           ---  (not loaded)                        │
+├────────────────────────────────────────────────────────────────┤
+│ F1 Dashboard  F2 Models  F3 Services  F4 Logs  F5 Chat  Q Quit │
+└────────────────────────────────────────────────────────────────┘
 ```
 
 ## Navigation Keybindings
 
-Navigation is performed using single-letter hotkeys shown at the bottom of the screen:
+Navigation is performed using function keys:
 
-- **M:** Models screen for managing downloads and loading state.
-- **S:** Services screen for restarting or stopping system components.
-- **N:** Network screen for hostname and IP configuration.
-- **L:** Logs screen for real-time system and service logs.
-- **C:** Chat screen for a lightweight, terminal-based LLM chat.
+- **F1:** Dashboard overview.
+- **F2:** Models screen for managing downloads and loading state.
+- **F3:** Services screen for restarting or stopping system components.
+- **F4:** Logs screen for real-time system and service logs.
+- **F5:** Chat screen for a lightweight, terminal-based LLM chat.
 - **Q:** Quit the TUI and return to the shell.
 
+Within each screen, the following navigation model is used:
+- **Tab / Shift+Tab:** Cycle focus between different screen zones.
+- **Arrow Keys:** Navigate within a focused zone (e.g., scrolling a list).
+- **Enter:** Activate the currently focused element or button.
+
+## Troubleshooting and Debugging
+
+If the TUI encounters a critical error, it will write a crash dump to `/var/lib/neuraldrive/logs/tui-crash-*.log`.
+
+Screenshots captured within the TUI are saved to `/var/lib/neuraldrive/screenshots/`.
+
+### Re-running the First-Boot Wizard
+
+If you need to force the first-boot wizard to run again, launch the TUI with the `--wizard` flag:
+```bash
+neuraldrive-tui --wizard
+```
+This removes the sentinel file and initiates the guided setup process.
+
 ## Resilience
 
 The TUI is designed to be resilient. If the underlying Ollama service is unavailable, an "Ollama Offline" badge will appear on the dashboard, and certain model management features will be disabled until the service is restored via the **Services** screen.