diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index c94380b..16e8a87 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -18,31 +18,42 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
+ token: ${{ secrets.ADMIN_TOKEN }}
+
+ - name: Check if should skip
+ id: check_skip
+ run: |
+ if [ "$(git log -1 --pretty=format:'%an')" = "semantic-release" ]; then
+ echo "skip=true" >> $GITHUB_OUTPUT
+ fi
- name: Set up Python
+ if: steps.check_skip.outputs.skip != 'true'
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install uv
+ if: steps.check_skip.outputs.skip != 'true'
uses: astral-sh/setup-uv@v4
- name: Python Semantic Release
+ if: steps.check_skip.outputs.skip != 'true'
id: release
uses: python-semantic-release/python-semantic-release@v9.15.2
with:
- github_token: ${{ secrets.GITHUB_TOKEN }}
+ github_token: ${{ secrets.ADMIN_TOKEN }}
- name: Build package
- if: steps.release.outputs.released == 'true'
+ if: steps.check_skip.outputs.skip != 'true' && steps.release.outputs.released == 'true'
run: uv build
- name: Publish to PyPI
- if: steps.release.outputs.released == 'true'
+ if: steps.check_skip.outputs.skip != 'true' && steps.release.outputs.released == 'true'
uses: pypa/gh-action-pypi-publish@release/v1
- name: Publish to GitHub Releases
- if: steps.release.outputs.released == 'true'
+ if: steps.check_skip.outputs.skip != 'true' && steps.release.outputs.released == 'true'
uses: python-semantic-release/publish-action@v9.15.2
with:
- github_token: ${{ secrets.GITHUB_TOKEN }}
+ github_token: ${{ secrets.ADMIN_TOKEN }}
diff --git a/README.md b/README.md
index e41807f..4e65d17 100644
--- a/README.md
+++ b/README.md
@@ -4,10 +4,8 @@
[](https://opensource.org/licenses/MIT)
[](https://www.python.org/downloads/)
-
**OpenAdapt Capture** is the data collection component of the [OpenAdapt](https://github.com/OpenAdaptAI) GUI automation ecosystem.
@@ -43,7 +41,7 @@ Capture platform-agnostic GUI interaction streams with time-aligned screenshots
|-----------|---------|------------|
| **openadapt-capture** | Record human demonstrations | [GitHub](https://github.com/OpenAdaptAI/openadapt-capture) |
| **openadapt-ml** | Train and evaluate GUI automation models | [GitHub](https://github.com/OpenAdaptAI/openadapt-ml) |
-| **openadapt-privacy** | PII scrubbing for recordings | Coming soon |
+| **openadapt-privacy** | PII scrubbing for recordings | [GitHub](https://github.com/OpenAdaptAI/openadapt-privacy) |
---
@@ -208,75 +206,29 @@ The HTML viewer includes:
uv run python scripts/generate_readme_demo.py --duration 10
```
-## Optional Extras
+## Sharing Recordings
-| Extra | Features |
-|-------|----------|
-| `audio` | Audio capture + Whisper transcription |
-| `privacy` | PII scrubbing (openadapt-privacy) |
-| `all` | Everything |
-
----
-
-## Training with OpenAdapt-ML
-
-Captured recordings can be used to train vision-language models with [openadapt-ml](https://github.com/OpenAdaptAI/openadapt-ml).
-
-### End-to-End Workflow
+Share recordings between machines using [Magic Wormhole](https://magic-wormhole.readthedocs.io/):
```bash
-# 1. Capture a workflow demonstration
-uv run python -c "
-from openadapt_capture import Recorder
-
-with Recorder('./my_capture', task_description='Turn off Night Shift') as recorder:
- input('Perform the task, then press Enter to stop...')
-"
-
-# 2. Train a model on the capture (requires openadapt-ml)
-uv pip install openadapt-ml
-uv run python -m openadapt_ml.cloud.local train \
- --capture ./my_capture \
- --open # Opens training dashboard
-
-# 3. Compare human vs model predictions
-uv run python -m openadapt_ml.scripts.compare \
- --capture ./my_capture \
- --checkpoint checkpoints/model \
- --open
-```
+# On the sending machine
+capture share send ./my_capture
+# Shows a code like: 7-guitarist-revenge
-### Cloud GPU Training
-
-For faster training with cloud GPUs:
-
-```bash
-# Train on Lambda Labs A10 (~$0.75/hr)
-uv run python -m openadapt_ml.cloud.lambda_labs train \
- --capture ./my_capture \
- --goal "Turn off Night Shift"
+# On the receiving machine
+capture share receive 7-guitarist-revenge
```
-See the [openadapt-ml documentation](https://github.com/OpenAdaptAI/openadapt-ml#6-cloud-gpu-training) for cloud setup.
-
-### Data Format
-
-OpenAdapt-ML converts captures to its Episode format automatically:
-
-```python
-from openadapt_ml.ingest.capture import capture_to_episode
+The `share` command compresses the recording, sends it via Magic Wormhole, and extracts it on the receiving end. No account or setup required - just share the code.
-episode = capture_to_episode("./my_capture")
-print(f"Loaded {len(episode.steps)} steps")
-print(f"Instruction: {episode.instruction}")
-```
+## Optional Extras
-The conversion maps capture event types to ML action types:
-- `mouse.singleclick` / `mouse.click` -> `CLICK`
-- `mouse.doubleclick` -> `DOUBLE_CLICK`
-- `mouse.drag` -> `DRAG`
-- `mouse.scroll` -> `SCROLL`
-- `key.type` -> `TYPE`
+| Extra | Features |
+|-------|----------|
+| `audio` | Audio capture + Whisper transcription |
+| `privacy` | PII scrubbing ([openadapt-privacy](https://github.com/OpenAdaptAI/openadapt-privacy)) |
+| `share` | Recording sharing via Magic Wormhole |
+| `all` | Everything |
---
@@ -290,6 +242,8 @@ uv run pytest
## Related Projects
- [openadapt-ml](https://github.com/OpenAdaptAI/openadapt-ml) - Train and evaluate GUI automation models
+- [openadapt-privacy](https://github.com/OpenAdaptAI/openadapt-privacy) - PII detection and scrubbing for recordings
+- [openadapt-evals](https://github.com/OpenAdaptAI/openadapt-evals) - Benchmark evaluation for GUI agents
- [Windows Agent Arena](https://github.com/microsoft/WindowsAgentArena) - Benchmark for Windows GUI agents
## License
diff --git a/openadapt_capture/__init__.py b/openadapt_capture/__init__.py
index 87c1d47..217cf8d 100644
--- a/openadapt_capture/__init__.py
+++ b/openadapt_capture/__init__.py
@@ -77,6 +77,12 @@
# Browser events and bridge (optional - requires websockets)
try:
+ from openadapt_capture.browser_bridge import (
+ BrowserBridge,
+ BrowserEventRecord,
+ BrowserMode,
+ run_browser_bridge,
+ )
from openadapt_capture.browser_events import (
BoundingBox,
BrowserClickEvent,
@@ -93,12 +99,6 @@
SemanticElementRef,
VisibleElement,
)
- from openadapt_capture.browser_bridge import (
- BrowserBridge,
- BrowserEventRecord,
- BrowserMode,
- run_browser_bridge,
- )
_BROWSER_BRIDGE_AVAILABLE = True
except ImportError:
_BROWSER_BRIDGE_AVAILABLE = False
diff --git a/openadapt_capture/browser_bridge.py b/openadapt_capture/browser_bridge.py
index c449b5a..6a8dff9 100644
--- a/openadapt_capture/browser_bridge.py
+++ b/openadapt_capture/browser_bridge.py
@@ -381,20 +381,14 @@ async def _handle_dom_event(self, data: dict) -> None:
self._event_count += 1
# Parse into typed event if possible
- typed_event = self._parse_typed_event(event_type, payload, data)
+ self._parse_typed_event(event_type, payload, data)
# Store in CaptureStorage if available
if self.storage is not None:
- # Store as JSON in the events table
- # Note: We store the raw event, not Pydantic model to match storage patterns
- try:
- from openadapt_capture.events import BaseEvent
- # Create a minimal event for storage compatibility
- # Browser events don't fit the standard EventType enum
- # so we store them as raw JSON in a custom way
- pass # Storage integration would go here
- except ImportError:
- pass
+ # Storage integration would go here
+ # Browser events don't fit the standard EventType enum
+ # so we store them as raw JSON in a custom way
+ pass
# Notify callback
if self.on_event is not None:
diff --git a/openadapt_capture/browser_events.py b/openadapt_capture/browser_events.py
index 848458a..39b3a09 100644
--- a/openadapt_capture/browser_events.py
+++ b/openadapt_capture/browser_events.py
@@ -12,7 +12,6 @@
from pydantic import BaseModel, Field
-
# =============================================================================
# Browser Event Types
# =============================================================================
diff --git a/openadapt_capture/cli.py b/openadapt_capture/cli.py
index 5cd4e54..7920852 100644
--- a/openadapt_capture/cli.py
+++ b/openadapt_capture/cli.py
@@ -353,7 +353,7 @@ def share(action: str, path_or_code: str, output_dir: str = ".") -> None:
capture share receive 7-guitarist-revenge
capture share receive 7-guitarist-revenge ./recordings
"""
- from openadapt_capture.share import send, receive
+ from openadapt_capture.share import receive, send
if action == "send":
send(path_or_code)
diff --git a/openadapt_capture/platform/__init__.py b/openadapt_capture/platform/__init__.py
new file mode 100644
index 0000000..9ff1c52
--- /dev/null
+++ b/openadapt_capture/platform/__init__.py
@@ -0,0 +1,132 @@
+"""Platform-specific implementations for GUI event capture.
+
+This module provides platform-specific implementations for:
+- Screen capture
+- Input event capture
+- Display information (resolution, DPI, pixel ratio)
+
+The module automatically selects the appropriate implementation based on
+the current platform (darwin, win32, linux).
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from typing import Protocol
+
+ class PlatformProvider(Protocol):
+ """Protocol for platform-specific providers."""
+
+ @staticmethod
+ def get_screen_dimensions() -> tuple[int, int]:
+ """Get screen dimensions in physical pixels."""
+ ...
+
+ @staticmethod
+ def get_display_pixel_ratio() -> float:
+ """Get display pixel ratio (physical/logical)."""
+ ...
+
+ @staticmethod
+ def is_accessibility_enabled() -> bool:
+ """Check if accessibility permissions are enabled."""
+ ...
+
+
+def get_platform() -> str:
+ """Get the current platform identifier.
+
+ Returns:
+ 'darwin' for macOS, 'win32' for Windows, 'linux' for Linux.
+ """
+ return sys.platform
+
+
+def get_platform_provider() -> "PlatformProvider":
+ """Get the platform-specific provider for the current OS.
+
+ Returns:
+ Platform provider instance for the current operating system.
+
+ Raises:
+ NotImplementedError: If the platform is not supported.
+ """
+ platform = get_platform()
+
+ if platform == "darwin":
+ from openadapt_capture.platform.darwin import DarwinPlatform
+ return DarwinPlatform()
+ elif platform == "win32":
+ from openadapt_capture.platform.windows import WindowsPlatform
+ return WindowsPlatform()
+ elif platform.startswith("linux"):
+ from openadapt_capture.platform.linux import LinuxPlatform
+ return LinuxPlatform()
+ else:
+ raise NotImplementedError(f"Platform not supported: {platform}")
+
+
+def get_screen_dimensions() -> tuple[int, int]:
+ """Get screen dimensions in physical pixels.
+
+ This returns the actual screenshot pixel dimensions, which may be
+ larger than logical dimensions on HiDPI/Retina displays.
+
+ Returns:
+ Tuple of (width, height) in physical pixels.
+ """
+ try:
+ provider = get_platform_provider()
+ return provider.get_screen_dimensions()
+ except (NotImplementedError, ImportError):
+ # Fallback to generic implementation
+ try:
+ from PIL import ImageGrab
+ screenshot = ImageGrab.grab()
+ return screenshot.size
+ except Exception:
+ return (1920, 1080) # Default fallback
+
+
+def get_display_pixel_ratio() -> float:
+ """Get the display pixel ratio (physical/logical).
+
+ This is the ratio of physical pixels to logical pixels.
+ For example, 2.0 for Retina displays on macOS.
+
+ Returns:
+ Pixel ratio (e.g., 1.0 for standard displays, 2.0 for Retina).
+ """
+ try:
+ provider = get_platform_provider()
+ return provider.get_display_pixel_ratio()
+ except (NotImplementedError, ImportError):
+ return 1.0
+
+
+def is_accessibility_enabled() -> bool:
+ """Check if accessibility permissions are enabled.
+
+ On macOS, this checks if the application has accessibility permissions
+ required for keyboard and mouse event capture.
+
+ Returns:
+ True if accessibility is enabled, False otherwise.
+ """
+ try:
+ provider = get_platform_provider()
+ return provider.is_accessibility_enabled()
+ except (NotImplementedError, ImportError):
+ return True # Assume enabled on unknown platforms
+
+
+__all__ = [
+ "get_platform",
+ "get_platform_provider",
+ "get_screen_dimensions",
+ "get_display_pixel_ratio",
+ "is_accessibility_enabled",
+]
diff --git a/openadapt_capture/platform/darwin.py b/openadapt_capture/platform/darwin.py
new file mode 100644
index 0000000..06555f5
--- /dev/null
+++ b/openadapt_capture/platform/darwin.py
@@ -0,0 +1,182 @@
+"""macOS (Darwin) platform-specific implementations.
+
+This module provides macOS-specific functionality for:
+- Screen capture using Quartz
+- Display information (resolution, Retina pixel ratio)
+- Accessibility permission checking
+"""
+
+from __future__ import annotations
+
+import sys
+
+if sys.platform != "darwin":
+ raise ImportError("This module is only available on macOS")
+
+
+class DarwinPlatform:
+ """macOS platform provider.
+
+ Provides macOS-specific implementations for screen capture,
+ display information, and accessibility checking.
+ """
+
+ @staticmethod
+ def get_screen_dimensions() -> tuple[int, int]:
+ """Get screen dimensions in physical pixels.
+
+ On Retina displays, this returns the actual pixel dimensions,
+ not the scaled logical dimensions.
+
+ Returns:
+ Tuple of (width, height) in physical pixels.
+ """
+ try:
+ from PIL import ImageGrab
+ screenshot = ImageGrab.grab()
+ return screenshot.size
+ except Exception:
+ # Fallback using Quartz
+ try:
+ import Quartz
+
+ main_display = Quartz.CGMainDisplayID()
+ width = Quartz.CGDisplayPixelsWide(main_display)
+ height = Quartz.CGDisplayPixelsHigh(main_display)
+ return (width, height)
+ except Exception:
+ return (1920, 1080)
+
+ @staticmethod
+ def get_display_pixel_ratio() -> float:
+ """Get the display pixel ratio for Retina displays.
+
+ Returns 2.0 for Retina displays, 1.0 for standard displays.
+
+ Returns:
+ Pixel ratio (physical pixels / logical pixels).
+ """
+ try:
+ import mss
+ from PIL import ImageGrab
+
+ # Get physical dimensions from screenshot
+ screenshot = ImageGrab.grab()
+ physical_width = screenshot.size[0]
+
+ # Get logical dimensions from mss
+ with mss.mss() as sct:
+ # monitors[1] is typically the primary monitor
+ monitor = sct.monitors[1] if len(sct.monitors) > 1 else sct.monitors[0]
+ logical_width = monitor["width"]
+
+ if logical_width > 0:
+ return physical_width / logical_width
+
+ return 1.0
+ except ImportError:
+ # Try using Quartz directly
+ try:
+ import Quartz
+
+ main_display = Quartz.CGMainDisplayID()
+
+ # Get physical dimensions
+ physical_width = Quartz.CGDisplayPixelsWide(main_display)
+
+ # Get logical dimensions using display mode
+ mode = Quartz.CGDisplayCopyDisplayMode(main_display)
+ if mode:
+ logical_width = Quartz.CGDisplayModeGetWidth(mode)
+ if logical_width > 0:
+ return physical_width / logical_width
+
+ return 1.0
+ except Exception:
+ return 1.0
+ except Exception:
+ return 1.0
+
+ @staticmethod
+ def is_accessibility_enabled() -> bool:
+ """Check if accessibility permissions are enabled.
+
+ macOS requires accessibility permissions for capturing
+ keyboard and mouse events globally.
+
+ Returns:
+ True if accessibility is enabled, False otherwise.
+ """
+ try:
+ import Quartz # noqa: F401 - needed for ApplicationServices
+
+ # Check if we can access accessibility features
+ # This uses the AXIsProcessTrustedWithOptions function
+ from ApplicationServices import (
+ AXIsProcessTrustedWithOptions,
+ kAXTrustedCheckOptionPrompt,
+ )
+
+ # Check without prompting
+ options = {kAXTrustedCheckOptionPrompt: False}
+ return AXIsProcessTrustedWithOptions(options)
+ except ImportError:
+ # If ApplicationServices is not available, try a simpler check
+ try:
+ import subprocess
+
+ result = subprocess.run(
+ [
+ "osascript",
+ "-e",
+ 'tell application "System Events" to get name of first process',
+ ],
+ capture_output=True,
+ timeout=5,
+ )
+ return result.returncode == 0
+ except Exception:
+ return True # Assume enabled if we can't check
+ except Exception:
+ return True # Assume enabled if we can't check
+
+ @staticmethod
+ def get_active_window_info() -> dict | None:
+ """Get information about the currently active window.
+
+ Returns:
+ Dictionary with window info (title, app_name, bounds) or None.
+ """
+ try:
+ import Quartz
+
+ # Get the list of windows
+ options = Quartz.kCGWindowListOptionOnScreenOnly
+ window_list = Quartz.CGWindowListCopyWindowInfo(options, Quartz.kCGNullWindowID)
+
+ if not window_list:
+ return None
+
+ # Find the frontmost window (layer 0 is typically the frontmost)
+ for window in window_list:
+ layer = window.get("kCGWindowLayer", -1)
+ if layer == 0:
+ bounds = window.get("kCGWindowBounds", {})
+ return {
+ "title": window.get("kCGWindowName", ""),
+ "app_name": window.get("kCGWindowOwnerName", ""),
+ "bounds": {
+ "x": bounds.get("X", 0),
+ "y": bounds.get("Y", 0),
+ "width": bounds.get("Width", 0),
+ "height": bounds.get("Height", 0),
+ },
+ "pid": window.get("kCGWindowOwnerPID", 0),
+ }
+
+ return None
+ except Exception:
+ return None
+
+
+__all__ = ["DarwinPlatform"]
diff --git a/openadapt_capture/platform/linux.py b/openadapt_capture/platform/linux.py
new file mode 100644
index 0000000..78f5e09
--- /dev/null
+++ b/openadapt_capture/platform/linux.py
@@ -0,0 +1,309 @@
+"""Linux platform-specific implementations.
+
+This module provides Linux-specific functionality for:
+- Screen capture using X11 or Wayland
+- Display information (resolution, scaling)
+- Accessibility/permission checking
+"""
+
+from __future__ import annotations
+
+import sys
+
+if not sys.platform.startswith("linux"):
+ raise ImportError("This module is only available on Linux")
+
+
+class LinuxPlatform:
+ """Linux platform provider.
+
+ Provides Linux-specific implementations for screen capture,
+ display information, and permission checking.
+ """
+
+ @staticmethod
+ def _is_wayland() -> bool:
+ """Check if running under Wayland.
+
+ Returns:
+ True if Wayland, False if X11.
+ """
+ import os
+
+ return os.environ.get("XDG_SESSION_TYPE") == "wayland" or os.environ.get(
+ "WAYLAND_DISPLAY"
+ )
+
+ @staticmethod
+ def get_screen_dimensions() -> tuple[int, int]:
+ """Get screen dimensions in physical pixels.
+
+ Works with both X11 and Wayland (falls back to PIL).
+
+ Returns:
+ Tuple of (width, height) in physical pixels.
+ """
+ try:
+ from PIL import ImageGrab
+ screenshot = ImageGrab.grab()
+ return screenshot.size
+ except Exception:
+ # Fallback for X11
+ try:
+ import subprocess
+
+ result = subprocess.run(
+ ["xdpyinfo"],
+ capture_output=True,
+ text=True,
+ timeout=5,
+ )
+ if result.returncode == 0:
+ for line in result.stdout.split("\n"):
+ if "dimensions:" in line:
+ # Parse "dimensions: 1920x1080 pixels"
+ parts = line.split()
+ for part in parts:
+ if "x" in part and part[0].isdigit():
+ w, h = part.split("x")
+ return (int(w), int(h))
+ except Exception:
+ pass
+
+ # Fallback for Wayland using wlr-randr
+ try:
+ import subprocess
+
+ result = subprocess.run(
+ ["wlr-randr"],
+ capture_output=True,
+ text=True,
+ timeout=5,
+ )
+ if result.returncode == 0:
+ for line in result.stdout.split("\n"):
+ if "current" in line.lower():
+ # Parse resolution from wlr-randr output
+ parts = line.split()
+ for part in parts:
+ if "x" in part and part[0].isdigit():
+ dims = part.split("x")
+ if len(dims) == 2:
+ try:
+ return (int(dims[0]), int(dims[1].split("@")[0]))
+ except ValueError:
+ pass
+ except Exception:
+ pass
+
+ return (1920, 1080)
+
+ @staticmethod
+ def get_display_pixel_ratio() -> float:
+ """Get the display pixel ratio for HiDPI displays.
+
+ Returns the scaling factor set in the desktop environment.
+
+ Returns:
+ Pixel ratio (physical pixels / logical pixels).
+ """
+ import os
+
+ # Check GNOME scaling factor
+ try:
+ import subprocess
+
+ result = subprocess.run(
+ ["gsettings", "get", "org.gnome.desktop.interface", "scaling-factor"],
+ capture_output=True,
+ text=True,
+ timeout=5,
+ )
+ if result.returncode == 0:
+ factor = int(result.stdout.strip())
+ if factor > 0:
+ return float(factor)
+ except Exception:
+ pass
+
+ # Check GDK_SCALE environment variable
+ gdk_scale = os.environ.get("GDK_SCALE")
+ if gdk_scale:
+ try:
+ return float(gdk_scale)
+ except ValueError:
+ pass
+
+ # Check QT_SCALE_FACTOR
+ qt_scale = os.environ.get("QT_SCALE_FACTOR")
+ if qt_scale:
+ try:
+ return float(qt_scale)
+ except ValueError:
+ pass
+
+ # Check for mss-based calculation
+ try:
+ import mss
+ from PIL import ImageGrab
+
+ screenshot = ImageGrab.grab()
+ physical_width = screenshot.size[0]
+
+ with mss.mss() as sct:
+ monitor = sct.monitors[1] if len(sct.monitors) > 1 else sct.monitors[0]
+ logical_width = monitor["width"]
+
+ if logical_width > 0:
+ return physical_width / logical_width
+ except Exception:
+ pass
+
+ return 1.0
+
+ @staticmethod
+ def is_accessibility_enabled() -> bool:
+ """Check if input capture is available.
+
+ On Linux, this typically requires:
+ - X11: xdotool or similar tool access
+ - Wayland: Portal permissions or root access
+
+ Returns:
+ True if input capture is likely available, False otherwise.
+ """
+ import os
+
+ # Check if running as root (always has access)
+ if os.geteuid() == 0:
+ return True
+
+ # Check for X11
+ if not LinuxPlatform._is_wayland():
+ # X11 typically allows input capture
+ display = os.environ.get("DISPLAY")
+ return display is not None
+
+ # Wayland is more restrictive
+ # Check if we have portal access
+ try:
+ import subprocess
+
+ result = subprocess.run(
+ [
+ "dbus-send",
+ "--session",
+ "--dest=org.freedesktop.portal.Desktop",
+ "--type=method_call",
+ "--print-reply",
+ "/org/freedesktop/portal/desktop",
+ "org.freedesktop.DBus.Properties.Get",
+ "string:org.freedesktop.portal.RemoteDesktop",
+ "string:version",
+ ],
+ capture_output=True,
+ timeout=5,
+ )
+ return result.returncode == 0
+ except Exception:
+ pass
+
+ # Assume enabled if we can't determine
+ return True
+
+ @staticmethod
+ def get_active_window_info() -> dict | None:
+ """Get information about the currently active window.
+
+ Returns:
+ Dictionary with window info (title, app_name, bounds) or None.
+ """
+ # Try X11 first
+ if not LinuxPlatform._is_wayland():
+ try:
+ import subprocess
+
+ # Get active window ID
+ result = subprocess.run(
+ ["xdotool", "getactivewindow"],
+ capture_output=True,
+ text=True,
+ timeout=5,
+ )
+ if result.returncode != 0:
+ return None
+
+ window_id = result.stdout.strip()
+
+ # Get window name
+ name_result = subprocess.run(
+ ["xdotool", "getwindowname", window_id],
+ capture_output=True,
+ text=True,
+ timeout=5,
+ )
+ title = name_result.stdout.strip() if name_result.returncode == 0 else ""
+
+ # Get window geometry
+ geo_result = subprocess.run(
+ ["xdotool", "getwindowgeometry", window_id],
+ capture_output=True,
+ text=True,
+ timeout=5,
+ )
+
+ x, y, width, height = 0, 0, 0, 0
+ if geo_result.returncode == 0:
+ for line in geo_result.stdout.split("\n"):
+ if "Position:" in line:
+ # Parse "Position: 100,200 (screen: 0)"
+ pos = line.split(":")[1].split("(")[0].strip()
+ parts = pos.split(",")
+ if len(parts) == 2:
+ x, y = int(parts[0]), int(parts[1])
+ elif "Geometry:" in line:
+ # Parse "Geometry: 800x600"
+ geo = line.split(":")[1].strip()
+ if "x" in geo:
+ parts = geo.split("x")
+ width, height = int(parts[0]), int(parts[1])
+
+ # Get process info
+ pid_result = subprocess.run(
+ ["xdotool", "getwindowpid", window_id],
+ capture_output=True,
+ text=True,
+ timeout=5,
+ )
+ pid = 0
+ app_name = ""
+ if pid_result.returncode == 0:
+ try:
+ pid = int(pid_result.stdout.strip())
+ # Get process name
+ with open(f"/proc/{pid}/comm") as f:
+ app_name = f.read().strip()
+ except Exception:
+ pass
+
+ return {
+ "title": title,
+ "app_name": app_name,
+ "bounds": {
+ "x": x,
+ "y": y,
+ "width": width,
+ "height": height,
+ },
+ "window_id": window_id,
+ "pid": pid,
+ }
+ except Exception:
+ return None
+
+ # Wayland doesn't provide easy access to window info
+ # due to security model
+ return None
+
+
+__all__ = ["LinuxPlatform"]
diff --git a/openadapt_capture/platform/windows.py b/openadapt_capture/platform/windows.py
new file mode 100644
index 0000000..eaa9172
--- /dev/null
+++ b/openadapt_capture/platform/windows.py
@@ -0,0 +1,181 @@
+"""Windows platform-specific implementations.
+
+This module provides Windows-specific functionality for:
+- Screen capture using Win32 API
+- Display information (resolution, DPI scaling)
+- Accessibility/permission checking
+"""
+
+from __future__ import annotations
+
+import sys
+
+if sys.platform != "win32":
+ raise ImportError("This module is only available on Windows")
+
+
+class WindowsPlatform:
+ """Windows platform provider.
+
+ Provides Windows-specific implementations for screen capture,
+ display information, and permission checking.
+ """
+
+ @staticmethod
+ def get_screen_dimensions() -> tuple[int, int]:
+ """Get screen dimensions in physical pixels.
+
+ On high-DPI displays, this returns the actual pixel dimensions,
+ accounting for DPI scaling.
+
+ Returns:
+ Tuple of (width, height) in physical pixels.
+ """
+ try:
+ from PIL import ImageGrab
+ screenshot = ImageGrab.grab()
+ return screenshot.size
+ except Exception:
+ # Fallback using ctypes
+ try:
+ import ctypes
+
+ user32 = ctypes.windll.user32
+ # Make process DPI aware to get correct dimensions
+ try:
+ ctypes.windll.shcore.SetProcessDpiAwareness(2) # Per-monitor DPI aware
+ except Exception:
+ try:
+ user32.SetProcessDPIAware()
+ except Exception:
+ pass
+
+ width = user32.GetSystemMetrics(0) # SM_CXSCREEN
+ height = user32.GetSystemMetrics(1) # SM_CYSCREEN
+ return (width, height)
+ except Exception:
+ return (1920, 1080)
+
+ @staticmethod
+ def get_display_pixel_ratio() -> float:
+ """Get the display pixel ratio for high-DPI displays.
+
+ Returns the DPI scaling factor. For example, 1.5 for 150% scaling.
+
+ Returns:
+ Pixel ratio (physical pixels / logical pixels).
+ """
+ try:
+ import ctypes
+
+ # Get DPI for the primary monitor
+ try:
+ # Windows 8.1+ method
+ shcore = ctypes.windll.shcore
+ dpi = ctypes.c_uint()
+ shcore.GetDpiForMonitor(
+ ctypes.windll.user32.MonitorFromPoint(ctypes.wintypes.POINT(0, 0), 1),
+ 0, # MDT_EFFECTIVE_DPI
+ ctypes.byref(dpi),
+ ctypes.byref(ctypes.c_uint()),
+ )
+ return dpi.value / 96.0 # 96 DPI is the baseline (100% scaling)
+ except Exception:
+ pass
+
+ # Fallback: Get DPI from device context
+ try:
+ user32 = ctypes.windll.user32
+ gdi32 = ctypes.windll.gdi32
+
+ hdc = user32.GetDC(0)
+ dpi = gdi32.GetDeviceCaps(hdc, 88) # LOGPIXELSX
+ user32.ReleaseDC(0, hdc)
+
+ return dpi / 96.0
+ except Exception:
+ pass
+
+ return 1.0
+ except Exception:
+ return 1.0
+
+ @staticmethod
+ def is_accessibility_enabled() -> bool:
+ """Check if the application can capture input events.
+
+ On Windows, input capture typically works without special permissions,
+ but we check if we're running with sufficient privileges.
+
+ Returns:
+ True if input capture is available, False otherwise.
+ """
+ try:
+ import ctypes
+
+ # Check if running as administrator
+ try:
+ ctypes.windll.shell32.IsUserAnAdmin()
+ # Even non-admin can typically capture input
+ return True
+ except Exception:
+ return True # Assume enabled
+ except Exception:
+ return True
+
+ @staticmethod
+ def get_active_window_info() -> dict | None:
+ """Get information about the currently active window.
+
+ Returns:
+ Dictionary with window info (title, app_name, bounds) or None.
+ """
+ try:
+ import ctypes
+ from ctypes import wintypes
+
+ user32 = ctypes.windll.user32
+
+ # Get foreground window handle
+ hwnd = user32.GetForegroundWindow()
+ if not hwnd:
+ return None
+
+ # Get window title
+ title_length = user32.GetWindowTextLengthW(hwnd) + 1
+ title_buffer = ctypes.create_unicode_buffer(title_length)
+ user32.GetWindowTextW(hwnd, title_buffer, title_length)
+ title = title_buffer.value
+
+ # Get window rectangle
+ rect = wintypes.RECT()
+ user32.GetWindowRect(hwnd, ctypes.byref(rect))
+
+ # Get process name
+ process_name = ""
+ try:
+ import psutil
+
+ pid = wintypes.DWORD()
+ user32.GetWindowThreadProcessId(hwnd, ctypes.byref(pid))
+ process = psutil.Process(pid.value)
+ process_name = process.name()
+ except Exception:
+ pass
+
+ return {
+ "title": title,
+ "app_name": process_name,
+ "bounds": {
+ "x": rect.left,
+ "y": rect.top,
+ "width": rect.right - rect.left,
+ "height": rect.bottom - rect.top,
+ },
+ "hwnd": hwnd,
+ }
+ except Exception:
+ return None
+
+
+__all__ = ["WindowsPlatform"]
diff --git a/openadapt_capture/samples.py b/openadapt_capture/samples.py
new file mode 100644
index 0000000..5943b13
--- /dev/null
+++ b/openadapt_capture/samples.py
@@ -0,0 +1,272 @@
+"""Example recordings and sample data loading.
+
+This module provides access to bundled example recordings that can be used
+for testing, demos, and as reference implementations.
+
+Example usage:
+ >>> from openadapt_capture.samples import list_examples, load_example
+ >>> print(list_examples())
+ ['turn-off-nightshift']
+ >>> capture = load_example('turn-off-nightshift')
+ >>> for action in capture.actions():
+ ... print(f"{action.type} at ({action.x}, {action.y})")
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import TYPE_CHECKING, Optional
+
+if TYPE_CHECKING:
+ from openadapt_capture.capture import CaptureSession
+
+logger = logging.getLogger(__name__)
+
+# Default example - the most complete bundled recording
+DEFAULT_EXAMPLE = "turn-off-nightshift"
+
+
+def get_examples_dir() -> Path:
+ """Return the path to the bundled examples directory.
+
+ Returns:
+ Path to examples directory (may not exist if no examples bundled)
+ """
+ return Path(__file__).parent.parent / "examples"
+
+
+def get_external_examples_dir() -> Path:
+ """Return the path to external examples (e.g., in openadapt-capture repo root).
+
+ This looks for examples in the repository root, which is useful during
+ development when examples are not bundled in the package.
+
+ Returns:
+ Path to external examples directory
+ """
+ # Walk up to find repo root (contains pyproject.toml)
+ current = Path(__file__).parent.parent
+ for _ in range(5): # Limit search depth
+ if (current / "pyproject.toml").exists():
+ # Check for demo directories at repo level
+ for demo_name in ["turn-off-nightshift", "demo_new", "demo_capture"]:
+ demo_path = current / demo_name
+ if demo_path.exists() and (demo_path / "capture.db").exists():
+ return current
+ current = current.parent
+ return Path() # Return empty path if not found
+
+
+def list_examples() -> list[str]:
+ """List available example recording names.
+
+ Checks both bundled examples and external examples (repo root).
+
+ Returns:
+ List of example names that can be loaded with load_example()
+ """
+ examples = set()
+
+ # Check bundled examples
+ bundled_dir = get_examples_dir()
+ if bundled_dir.exists():
+ for path in bundled_dir.iterdir():
+ if path.is_dir() and (path / "capture.db").exists():
+ examples.add(path.name)
+
+ # Check external examples (repo root demos)
+ external_dir = get_external_examples_dir()
+ if external_dir.exists():
+ for path in external_dir.iterdir():
+ if path.is_dir() and (path / "capture.db").exists():
+ # Skip non-demo directories
+ if path.name.startswith(("demo_", "turn-off")):
+ examples.add(path.name)
+
+ return sorted(examples)
+
+
+def get_example_path(name: str) -> Path:
+ """Get the path to a specific example recording.
+
+ Args:
+ name: Example name (e.g., 'turn-off-nightshift')
+
+ Returns:
+ Path to the example directory
+
+ Raises:
+ FileNotFoundError: If example not found
+ """
+ # Check bundled examples first
+ bundled_path = get_examples_dir() / name
+ if bundled_path.exists() and (bundled_path / "capture.db").exists():
+ return bundled_path
+
+ # Check external examples
+ external_path = get_external_examples_dir() / name
+ if external_path.exists() and (external_path / "capture.db").exists():
+ return external_path
+
+ # Not found - provide helpful error
+ available = list_examples()
+ if available:
+ raise FileNotFoundError(
+ f"Example '{name}' not found. Available examples: {available}"
+ )
+ else:
+ raise FileNotFoundError(
+ f"Example '{name}' not found. No examples are currently available. "
+ "Install openadapt-capture with examples or point to a capture directory."
+ )
+
+
+def load_example(name: str = DEFAULT_EXAMPLE) -> "CaptureSession":
+ """Load an example recording as a CaptureSession.
+
+ Args:
+ name: Example name (default: 'turn-off-nightshift')
+
+ Returns:
+ CaptureSession object for the recording
+
+ Raises:
+ FileNotFoundError: If example not found
+
+ Example:
+ >>> capture = load_example('turn-off-nightshift')
+ >>> print(f"Duration: {capture.duration:.1f}s")
+ Duration: 59.5s
+ >>> for action in capture.actions():
+ ... print(f"{action.type}: {action.x}, {action.y}")
+ """
+ from openadapt_capture.capture import CaptureSession
+
+ example_path = get_example_path(name)
+ return CaptureSession.load(example_path)
+
+
+def get_example_info(name: str = DEFAULT_EXAMPLE) -> dict:
+ """Get metadata about an example recording without fully loading it.
+
+ Args:
+ name: Example name
+
+ Returns:
+ Dictionary with recording metadata:
+ - name: Recording name
+ - path: Path to recording directory
+ - has_video: Whether video.mp4 exists
+ - has_audio: Whether audio.flac exists
+ - has_transcript: Whether transcript.json exists
+ - has_screenshots: Whether screenshots/ directory exists
+ - screenshot_count: Number of screenshots
+ """
+ example_path = get_example_path(name)
+
+ screenshots_dir = example_path / "screenshots"
+ screenshot_count = 0
+ if screenshots_dir.exists():
+ screenshot_count = len(list(screenshots_dir.glob("*.png")))
+
+ return {
+ "name": name,
+ "path": str(example_path),
+ "has_video": (example_path / "video.mp4").exists(),
+ "has_audio": (example_path / "audio.flac").exists(),
+ "has_transcript": (example_path / "transcript.json").exists(),
+ "has_screenshots": screenshots_dir.exists() and screenshot_count > 0,
+ "screenshot_count": screenshot_count,
+ }
+
+
+def get_example_transcript(name: str = DEFAULT_EXAMPLE) -> Optional[dict]:
+ """Get the transcript for an example recording.
+
+ Args:
+ name: Example name
+
+ Returns:
+ Transcript dict with 'text' and 'segments' keys, or None if not available
+ """
+ import json
+
+ example_path = get_example_path(name)
+ transcript_path = example_path / "transcript.json"
+
+ if not transcript_path.exists():
+ return None
+
+ with open(transcript_path) as f:
+ return json.load(f)
+
+
+def get_example_screenshots(name: str = DEFAULT_EXAMPLE) -> list[Path]:
+ """Get paths to all screenshots for an example recording.
+
+ Args:
+ name: Example name
+
+ Returns:
+ List of paths to screenshot PNG files, sorted by step number
+ """
+ example_path = get_example_path(name)
+ screenshots_dir = example_path / "screenshots"
+
+ if not screenshots_dir.exists():
+ return []
+
+ return sorted(screenshots_dir.glob("*.png"))
+
+
+def load_example_for_retrieval(name: str = DEFAULT_EXAMPLE) -> dict:
+ """Load example in a format suitable for demo retrieval libraries.
+
+ This returns a dict with fields expected by openadapt-retrieval's
+ MultimodalDemoRetriever.add_demo() method.
+
+ Args:
+ name: Example name
+
+ Returns:
+ Dict with demo_id, task, screenshot, platform, app_name, domain
+ """
+ capture = load_example(name)
+ example_path = get_example_path(name)
+
+ # Get first screenshot
+ screenshots = get_example_screenshots(name)
+ first_screenshot = str(screenshots[0]) if screenshots else None
+
+ # Try to get task description from transcript
+ task = capture.task_description
+ if not task:
+ transcript = get_example_transcript(name)
+ if transcript:
+ task = transcript.get("text", f"Demo: {name}")
+ else:
+ task = f"Demo: {name}"
+
+ # Infer app name from task or name
+ app_name = None
+ if "settings" in name.lower() or "settings" in task.lower():
+ app_name = "System Settings"
+ elif "nightshift" in name.lower() or "night shift" in task.lower():
+ app_name = "System Settings"
+ elif "calculator" in task.lower():
+ app_name = "Calculator"
+
+ return {
+ "demo_id": name,
+ "task": task,
+ "screenshot": first_screenshot,
+ "platform": capture.platform,
+ "app_name": app_name,
+ "domain": None, # Desktop demos don't have domains
+ "metadata": {
+ "duration": capture.duration,
+ "step_count": len(get_example_screenshots(name)),
+ "has_audio": (example_path / "audio.flac").exists(),
+ },
+ }
diff --git a/openadapt_capture/share.py b/openadapt_capture/share.py
index 3cf7285..b461f2f 100644
--- a/openadapt_capture/share.py
+++ b/openadapt_capture/share.py
@@ -87,7 +87,7 @@ def send(recording_dir: str) -> str | None:
try:
# Run wormhole send
- result = subprocess.run(
+ subprocess.run(
["wormhole", "send", str(zip_path)],
check=True,
)
@@ -123,7 +123,7 @@ def receive(code: str, output_dir: str = ".") -> Path | None:
try:
# Run wormhole receive
- result = subprocess.run(
+ subprocess.run(
["wormhole", "receive", "--accept-file", "-o", str(tmpdir), code],
check=True,
)
diff --git a/openadapt_capture/shared_ui/__init__.py b/openadapt_capture/shared_ui/__init__.py
new file mode 100644
index 0000000..0dd3a6b
--- /dev/null
+++ b/openadapt_capture/shared_ui/__init__.py
@@ -0,0 +1,11 @@
+"""Shared UI components for OpenAdapt viewers."""
+
+from openadapt_capture.shared_ui.keyboard_shortcuts import (
+ get_keyboard_shortcuts_css,
+ get_keyboard_shortcuts_js,
+)
+
+__all__ = [
+ "get_keyboard_shortcuts_css",
+ "get_keyboard_shortcuts_js",
+]
diff --git a/openadapt_capture/shared_ui/keyboard_shortcuts.py b/openadapt_capture/shared_ui/keyboard_shortcuts.py
new file mode 100644
index 0000000..16a17f3
--- /dev/null
+++ b/openadapt_capture/shared_ui/keyboard_shortcuts.py
@@ -0,0 +1,337 @@
+"""Shared keyboard shortcuts for all OpenAdapt viewers.
+
+This module provides a unified keyboard shortcut system that ensures
+consistent UX across benchmark, training, and capture viewers.
+"""
+
+from __future__ import annotations
+
+
+def get_keyboard_shortcuts_js() -> str:
+ """Get JavaScript code for unified keyboard shortcuts.
+
+ Returns:
+ JavaScript code that implements standardized keyboard shortcuts.
+ """
+ return """
+// Unified OpenAdapt Keyboard Shortcuts
+// Standard shortcuts for consistent UX across all viewers
+
+const KeyboardShortcuts = {
+ // Shortcut definitions
+ shortcuts: {
+ 'Space': { action: 'togglePlay', description: 'Play/Pause' },
+ 'ArrowLeft': { action: 'prevStep', description: 'Previous step' },
+ 'ArrowRight': { action: 'nextStep', description: 'Next step' },
+ 'Home': { action: 'firstStep', description: 'First step' },
+ 'End': { action: 'lastStep', description: 'Last step' },
+ 'Digit1': { action: 'setSpeed', param: 2000, description: 'Speed 0.5x' },
+ 'Digit2': { action: 'setSpeed', param: 1000, description: 'Speed 1x' },
+ 'Digit3': { action: 'setSpeed', param: 500, description: 'Speed 2x' },
+ 'Digit4': { action: 'setSpeed', param: 250, description: 'Speed 4x' },
+ 'Digit5': { action: 'setSpeed', param: 125, description: 'Speed 8x' },
+ 'Escape': { action: 'closeModals', description: 'Close modals/search' },
+ 'Slash': { action: 'showShortcutsOverlay', description: 'Show shortcuts', modifier: 'shift' }, // Shift+/ = ?
+ 'KeyF': { action: 'focusSearch', description: 'Search', modifier: 'ctrl' },
+ },
+
+ // Viewer-specific actions (override these)
+ actions: {
+ togglePlay: null,
+ prevStep: null,
+ nextStep: null,
+ firstStep: null,
+ lastStep: null,
+ setSpeed: null,
+ closeModals: null,
+ showShortcutsOverlay: null,
+ focusSearch: null,
+ },
+
+ // Initialize keyboard shortcuts
+ init: function(actions) {
+ // Register custom actions
+ Object.assign(this.actions, actions);
+
+ // Set up event listener
+ document.addEventListener('keydown', (e) => this.handleKeydown(e));
+
+ // Create shortcuts overlay
+ this.createShortcutsOverlay();
+ },
+
+ // Handle keydown events
+ handleKeydown: function(e) {
+ // Ignore if typing in input/textarea
+ if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA' || e.target.tagName === 'SELECT') {
+ return;
+ }
+
+ const shortcut = this.shortcuts[e.code];
+ if (!shortcut) return;
+
+ // Check modifier requirements
+ if (shortcut.modifier === 'ctrl' && !e.ctrlKey && !e.metaKey) return;
+ if (shortcut.modifier === 'shift' && !e.shiftKey) return;
+ if (shortcut.modifier === 'alt' && !e.altKey) return;
+
+ e.preventDefault();
+
+ const action = this.actions[shortcut.action];
+ if (action) {
+ if (shortcut.param !== undefined) {
+ action(shortcut.param);
+ } else {
+ action();
+ }
+ }
+ },
+
+ // Create shortcuts overlay
+ createShortcutsOverlay: function() {
+ const overlay = document.createElement('div');
+ overlay.id = 'shortcuts-overlay';
+ overlay.className = 'shortcuts-overlay';
+ overlay.innerHTML = `
+
+
+
+ ${this.renderShortcutsList()}
+
+
+ `;
+ document.body.appendChild(overlay);
+
+ // Click outside to close
+ overlay.addEventListener('click', (e) => {
+ if (e.target === overlay) {
+ this.hideShortcutsOverlay();
+ }
+ });
+ },
+
+ // Render shortcuts list
+ renderShortcutsList: function() {
+ const groups = {
+ 'Playback': ['Space', 'ArrowLeft', 'ArrowRight', 'Home', 'End'],
+ 'Speed': ['Digit1', 'Digit2', 'Digit3', 'Digit4', 'Digit5'],
+ 'Navigation': ['Escape', 'Slash', 'KeyF']
+ };
+
+ let html = '';
+ for (const [group, keys] of Object.entries(groups)) {
+ html += `
+
${group}
+
`;
+
+ for (const key of keys) {
+ const shortcut = this.shortcuts[key];
+ if (!shortcut) continue;
+
+ const keyDisplay = this.formatKey(key, shortcut.modifier);
+ html += `
+
+ | ${keyDisplay} |
+ ${shortcut.description} |
+
`;
+ }
+
+ html += `
`;
+ }
+
+ return html;
+ },
+
+ // Format key for display
+ formatKey: function(code, modifier) {
+ const keyMap = {
+ 'Space': '␣ Space',
+ 'ArrowLeft': '← Left',
+ 'ArrowRight': '→ Right',
+ 'Home': '⇱ Home',
+ 'End': '⇲ End',
+ 'Escape': 'Esc',
+ 'Slash': '?',
+ 'KeyF': 'F',
+ 'Digit1': '1',
+ 'Digit2': '2',
+ 'Digit3': '3',
+ 'Digit4': '4',
+ 'Digit5': '5',
+ };
+
+ let key = keyMap[code] || code;
+
+ if (modifier === 'ctrl') {
+ key = (navigator.platform.includes('Mac') ? '⌘' : 'Ctrl+') + key;
+ } else if (modifier === 'shift') {
+ key = '⇧ ' + key;
+ } else if (modifier === 'alt') {
+ key = 'Alt+' + key;
+ }
+
+ return key;
+ },
+
+ // Show shortcuts overlay
+ showShortcutsOverlay: function() {
+ const overlay = document.getElementById('shortcuts-overlay');
+ if (overlay) {
+ overlay.classList.add('active');
+ }
+ },
+
+ // Hide shortcuts overlay
+ hideShortcutsOverlay: function() {
+ const overlay = document.getElementById('shortcuts-overlay');
+ if (overlay) {
+ overlay.classList.remove('active');
+ }
+ }
+};
+""".strip()
+
+
+def get_keyboard_shortcuts_css() -> str:
+ """Get CSS for keyboard shortcuts overlay.
+
+ Returns:
+ CSS code for the shortcuts overlay panel.
+ """
+ return """
+/* Keyboard Shortcuts Overlay */
+.shortcuts-overlay {
+ display: none;
+ position: fixed;
+ top: 0;
+ left: 0;
+ right: 0;
+ bottom: 0;
+ background: rgba(0, 0, 0, 0.75);
+ backdrop-filter: blur(4px);
+ z-index: 9999;
+ align-items: center;
+ justify-content: center;
+}
+
+.shortcuts-overlay.active {
+ display: flex;
+}
+
+.shortcuts-panel {
+ background: var(--bg-secondary, #12121a);
+ border: 1px solid var(--border-color, rgba(255, 255, 255, 0.06));
+ border-radius: 12px;
+ max-width: 600px;
+ width: 90%;
+ max-height: 80vh;
+ overflow-y: auto;
+ box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
+}
+
+.shortcuts-header {
+ display: flex;
+ justify-content: space-between;
+ align-items: center;
+ padding: 20px 24px;
+ border-bottom: 1px solid var(--border-color, rgba(255, 255, 255, 0.06));
+}
+
+.shortcuts-header h2 {
+ font-size: 1.2rem;
+ font-weight: 600;
+ color: var(--text-primary, #f0f0f0);
+ margin: 0;
+}
+
+.shortcuts-close {
+ background: var(--bg-tertiary, #1a1a24);
+ border: 1px solid var(--border-color, rgba(255, 255, 255, 0.06));
+ color: var(--text-secondary, #888);
+ width: 32px;
+ height: 32px;
+ border-radius: 50%;
+ cursor: pointer;
+ font-size: 1.5rem;
+ line-height: 1;
+ transition: all 0.15s ease;
+}
+
+.shortcuts-close:hover {
+ background: var(--bg-primary, #0a0a0f);
+ color: var(--text-primary, #f0f0f0);
+ border-color: rgba(255, 255, 255, 0.12);
+}
+
+.shortcuts-content {
+ padding: 24px;
+}
+
+.shortcuts-group {
+ margin-bottom: 24px;
+}
+
+.shortcuts-group:last-child {
+ margin-bottom: 0;
+}
+
+.shortcuts-group h3 {
+ font-size: 0.85rem;
+ font-weight: 600;
+ color: var(--text-muted, #555);
+ text-transform: uppercase;
+ letter-spacing: 0.05em;
+ margin-bottom: 12px;
+}
+
+.shortcuts-table {
+ width: 100%;
+ border-collapse: collapse;
+}
+
+.shortcuts-table tr {
+ border-bottom: 1px solid var(--border-color, rgba(255, 255, 255, 0.04));
+}
+
+.shortcuts-table tr:last-child {
+ border-bottom: none;
+}
+
+.shortcut-key {
+ padding: 10px 12px;
+ font-family: "SF Mono", Monaco, "Cascadia Code", monospace;
+ font-size: 0.85rem;
+ font-weight: 600;
+ color: var(--accent, #00d4aa);
+ white-space: nowrap;
+ width: 140px;
+}
+
+.shortcut-description {
+ padding: 10px 12px;
+ font-size: 0.85rem;
+ color: var(--text-secondary, #888);
+}
+
+/* Keyboard hint in footer */
+.keyboard-hint {
+ text-align: center;
+ padding: 16px;
+ color: var(--text-muted, #555);
+ font-size: 0.75rem;
+ letter-spacing: 0.02em;
+}
+
+.keyboard-hint a {
+ color: var(--accent, #00d4aa);
+ text-decoration: none;
+ cursor: pointer;
+}
+
+.keyboard-hint a:hover {
+ text-decoration: underline;
+}
+""".strip()
diff --git a/openadapt_capture/storage/__init__.py b/openadapt_capture/storage/__init__.py
new file mode 100644
index 0000000..fc6a62d
--- /dev/null
+++ b/openadapt_capture/storage/__init__.py
@@ -0,0 +1,63 @@
+"""Storage implementations for capture data persistence.
+
+This module provides storage backends for persisting captured GUI events.
+The primary implementation uses SQLite for reliable, portable storage.
+
+Usage:
+ from openadapt_capture.storage import SQLiteStorage
+
+ # Create storage
+ storage = SQLiteStorage("./capture/capture.db")
+
+ # Write events
+ storage.write_event(event)
+
+ # Query events
+ events = storage.get_events(start_time=0.0, end_time=100.0)
+
+ storage.close()
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+# Import SQLite-specific implementation
+from openadapt_capture.storage.sqlite import SQLiteStorage
+
+# Re-export from the original storage module for backward compatibility
+from openadapt_capture.storage_impl import (
+ Capture,
+ CaptureStorage,
+ Stream,
+ create_capture,
+ load_capture,
+)
+
+
+def get_storage(db_path: str | Path) -> CaptureStorage:
+ """Get a storage instance for the given database path.
+
+ This is a convenience function that creates a CaptureStorage instance.
+
+ Args:
+ db_path: Path to the SQLite database file.
+
+ Returns:
+ CaptureStorage instance.
+ """
+ return CaptureStorage(db_path)
+
+
+__all__ = [
+ # Storage classes
+ "CaptureStorage",
+ "SQLiteStorage",
+ # Data models
+ "Capture",
+ "Stream",
+ # Convenience functions
+ "create_capture",
+ "load_capture",
+ "get_storage",
+]
diff --git a/openadapt_capture/storage/sqlite.py b/openadapt_capture/storage/sqlite.py
new file mode 100644
index 0000000..ab36af9
--- /dev/null
+++ b/openadapt_capture/storage/sqlite.py
@@ -0,0 +1,534 @@
+"""SQLite storage backend for capture events.
+
+This module provides a SQLite-based storage implementation that wraps the
+existing CaptureStorage class with a more explicit interface.
+"""
+
+from __future__ import annotations
+
+import json
+import sqlite3
+import threading
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Iterator
+
+from openadapt_capture.events import (
+ AudioChunkEvent,
+ Event,
+ EventType,
+ KeyDownEvent,
+ KeyTypeEvent,
+ KeyUpEvent,
+ MouseClickEvent,
+ MouseDoubleClickEvent,
+ MouseDownEvent,
+ MouseDragEvent,
+ MouseMoveEvent,
+ MouseScrollEvent,
+ MouseUpEvent,
+ ScreenFrameEvent,
+)
+
+if TYPE_CHECKING:
+ from openadapt_capture.storage_impl import Capture
+
+
+# Event type to class mapping
+EVENT_TYPE_MAP: dict[str, type[Event]] = {
+ EventType.MOUSE_MOVE.value: MouseMoveEvent,
+ EventType.MOUSE_DOWN.value: MouseDownEvent,
+ EventType.MOUSE_UP.value: MouseUpEvent,
+ EventType.MOUSE_SCROLL.value: MouseScrollEvent,
+ EventType.KEY_DOWN.value: KeyDownEvent,
+ EventType.KEY_UP.value: KeyUpEvent,
+ EventType.SCREEN_FRAME.value: ScreenFrameEvent,
+ EventType.AUDIO_CHUNK.value: AudioChunkEvent,
+ EventType.MOUSE_SINGLECLICK.value: MouseClickEvent,
+ EventType.MOUSE_DOUBLECLICK.value: MouseDoubleClickEvent,
+ EventType.MOUSE_DRAG.value: MouseDragEvent,
+ EventType.KEY_TYPE.value: KeyTypeEvent,
+}
+
+
+class SQLiteStorage:
+ """SQLite-based storage for capture events.
+
+ Provides efficient storage and retrieval of events with support for:
+ - Streaming writes (events written immediately to disk)
+ - Querying by timestamp range and event type
+ - Parent-child event relationships (for merged events)
+ - Thread-safe operations
+
+ This is a standalone implementation that can be used independently
+ of the existing CaptureStorage class.
+
+ Usage:
+ storage = SQLiteStorage("capture.db")
+
+ # Initialize schema
+ storage.init_schema()
+
+ # Write events
+ storage.write_event(event)
+
+ # Query events
+ events = storage.get_events(start_time=0.0, end_time=100.0)
+
+ # Iterate over events efficiently
+ for event in storage.iter_events():
+ process(event)
+
+ storage.close()
+ """
+
+ # SQL schema
+ CREATE_EVENTS_TABLE = """
+ CREATE TABLE IF NOT EXISTS events (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ timestamp REAL NOT NULL,
+ type TEXT NOT NULL,
+ data JSON NOT NULL,
+ parent_id INTEGER,
+ FOREIGN KEY (parent_id) REFERENCES events(id)
+ )
+ """
+
+ CREATE_EVENTS_INDEX = """
+ CREATE INDEX IF NOT EXISTS idx_events_timestamp ON events(timestamp)
+ """
+
+ CREATE_EVENTS_TYPE_INDEX = """
+ CREATE INDEX IF NOT EXISTS idx_events_type ON events(type)
+ """
+
+ CREATE_CAPTURE_TABLE = """
+ CREATE TABLE IF NOT EXISTS capture (
+ id TEXT PRIMARY KEY,
+ started_at REAL NOT NULL,
+ ended_at REAL,
+ platform TEXT NOT NULL,
+ screen_width INTEGER NOT NULL,
+ screen_height INTEGER NOT NULL,
+ pixel_ratio REAL DEFAULT 1.0,
+ task_description TEXT,
+ double_click_interval_seconds REAL,
+ double_click_distance_pixels REAL,
+ video_start_time REAL,
+ audio_start_time REAL,
+ metadata JSON
+ )
+ """
+
+ def __init__(self, db_path: str | Path, auto_init: bool = True) -> None:
+ """Initialize SQLite storage.
+
+ Args:
+ db_path: Path to SQLite database file. Created if doesn't exist.
+ auto_init: Whether to automatically initialize the schema.
+ """
+ self.db_path = Path(db_path)
+ self._conn: sqlite3.Connection | None = None
+ self._lock = threading.Lock()
+
+ if auto_init:
+ self.init_schema()
+
+ @property
+ def is_open(self) -> bool:
+ """Check if database connection is open."""
+ return self._conn is not None
+
+ @property
+ def conn(self) -> sqlite3.Connection:
+ """Get or create database connection."""
+ if self._conn is None:
+ self._conn = sqlite3.connect(
+ str(self.db_path),
+ check_same_thread=False,
+ )
+ self._conn.row_factory = sqlite3.Row
+ return self._conn
+
+ def init_schema(self) -> None:
+ """Initialize database schema."""
+ cursor = self.conn.cursor()
+ cursor.execute(self.CREATE_CAPTURE_TABLE)
+ cursor.execute(self.CREATE_EVENTS_TABLE)
+ cursor.execute(self.CREATE_EVENTS_INDEX)
+ cursor.execute(self.CREATE_EVENTS_TYPE_INDEX)
+ self.conn.commit()
+
+ def close(self) -> None:
+ """Close database connection."""
+ if self._conn is not None:
+ self._conn.close()
+ self._conn = None
+
+ def __enter__(self) -> "SQLiteStorage":
+ """Context manager entry."""
+ return self
+
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+ """Context manager exit."""
+ self.close()
+
+ # -------------------------------------------------------------------------
+ # Capture metadata methods
+ # -------------------------------------------------------------------------
+
+ def save_capture(self, capture: "Capture") -> None:
+ """Save capture metadata.
+
+ Args:
+ capture: Capture metadata to store.
+ """
+ cursor = self.conn.cursor()
+
+ # Check if capture exists
+ cursor.execute("SELECT id FROM capture WHERE id = ?", (capture.id,))
+ exists = cursor.fetchone() is not None
+
+ if exists:
+ # Update existing
+ cursor.execute(
+ """
+ UPDATE capture SET
+ ended_at = ?,
+ task_description = ?,
+ video_start_time = ?,
+ audio_start_time = ?,
+ metadata = ?
+ WHERE id = ?
+ """,
+ (
+ capture.ended_at,
+ capture.task_description,
+ capture.video_start_time,
+ capture.audio_start_time,
+ json.dumps(capture.metadata),
+ capture.id,
+ ),
+ )
+ else:
+ # Insert new
+ cursor.execute(
+ """
+ INSERT INTO capture (
+ id, started_at, ended_at, platform, screen_width, screen_height,
+ pixel_ratio, task_description, double_click_interval_seconds,
+ double_click_distance_pixels, video_start_time, audio_start_time, metadata
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+ """,
+ (
+ capture.id,
+ capture.started_at,
+ capture.ended_at,
+ capture.platform,
+ capture.screen_width,
+ capture.screen_height,
+ capture.pixel_ratio,
+ capture.task_description,
+ capture.double_click_interval_seconds,
+ capture.double_click_distance_pixels,
+ capture.video_start_time,
+ capture.audio_start_time,
+ json.dumps(capture.metadata),
+ ),
+ )
+ self.conn.commit()
+
+ def load_capture(self) -> "Capture | None":
+ """Load capture metadata.
+
+ Returns:
+ Capture object or None if not found.
+ """
+ from openadapt_capture.storage_impl import Capture
+
+ cursor = self.conn.cursor()
+ cursor.execute("SELECT * FROM capture ORDER BY started_at DESC LIMIT 1")
+ row = cursor.fetchone()
+
+ if row is None:
+ return None
+
+ return Capture(
+ id=row["id"],
+ started_at=row["started_at"],
+ ended_at=row["ended_at"],
+ platform=row["platform"],
+ screen_width=row["screen_width"],
+ screen_height=row["screen_height"],
+ pixel_ratio=row["pixel_ratio"] if "pixel_ratio" in row.keys() else 1.0,
+ task_description=row["task_description"],
+ double_click_interval_seconds=row["double_click_interval_seconds"],
+ double_click_distance_pixels=row["double_click_distance_pixels"],
+ video_start_time=row["video_start_time"],
+ audio_start_time=row["audio_start_time"] if "audio_start_time" in row.keys() else None,
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
+ )
+
+ # -------------------------------------------------------------------------
+ # Event methods
+ # -------------------------------------------------------------------------
+
+ def write_event(self, event: Event, parent_id: int | None = None) -> int:
+ """Write a single event to storage.
+
+ Thread-safe: uses locking for concurrent access.
+
+ Args:
+ event: Event to write.
+ parent_id: Optional parent event ID for merged events.
+
+ Returns:
+ ID of the inserted event.
+ """
+ with self._lock:
+ cursor = self.conn.cursor()
+ event_dict = event.model_dump(
+ exclude={"children"} if hasattr(event, "children") else None
+ )
+ cursor.execute(
+ "INSERT INTO events (timestamp, type, data, parent_id) VALUES (?, ?, ?, ?)",
+ (
+ event.timestamp,
+ event.type if isinstance(event.type, str) else event.type.value,
+ json.dumps(event_dict),
+ parent_id,
+ ),
+ )
+ event_id = cursor.lastrowid
+ self.conn.commit()
+
+ # Write children if present
+ if hasattr(event, "children") and event.children:
+ for child in event.children:
+ self.write_event(child, parent_id=event_id)
+
+ return event_id
+
+ def write_events(self, events: list[Event]) -> list[int]:
+ """Write multiple events in a single transaction.
+
+ Args:
+ events: List of events to write.
+
+ Returns:
+ List of inserted event IDs.
+ """
+ event_ids = []
+ with self._lock:
+ cursor = self.conn.cursor()
+ for event in events:
+ event_dict = event.model_dump(
+ exclude={"children"} if hasattr(event, "children") else None
+ )
+ cursor.execute(
+ "INSERT INTO events (timestamp, type, data, parent_id) VALUES (?, ?, ?, ?)",
+ (
+ event.timestamp,
+ event.type if isinstance(event.type, str) else event.type.value,
+ json.dumps(event_dict),
+ None,
+ ),
+ )
+ event_id = cursor.lastrowid
+ event_ids.append(event_id)
+
+ # Write children
+ if hasattr(event, "children") and event.children:
+ for child in event.children:
+ child_dict = child.model_dump(
+ exclude={"children"} if hasattr(child, "children") else None
+ )
+ cursor.execute(
+ "INSERT INTO events (timestamp, type, data, parent_id) VALUES (?, ?, ?, ?)",
+ (
+ child.timestamp,
+ child.type if isinstance(child.type, str) else child.type.value,
+ json.dumps(child_dict),
+ event_id,
+ ),
+ )
+ self.conn.commit()
+ return event_ids
+
+ def get_events(
+ self,
+ start_time: float | None = None,
+ end_time: float | None = None,
+ event_types: list[EventType | str] | None = None,
+ include_children: bool = False,
+ limit: int | None = None,
+ ) -> list[Event]:
+ """Query events from storage.
+
+ Args:
+ start_time: Minimum timestamp (inclusive).
+ end_time: Maximum timestamp (inclusive).
+ event_types: Filter by event types.
+ include_children: Whether to include child events.
+ limit: Maximum number of events to return.
+
+ Returns:
+ List of events matching the query.
+ """
+ cursor = self.conn.cursor()
+
+ conditions = []
+ params: list[Any] = []
+
+ if not include_children:
+ conditions.append("parent_id IS NULL")
+
+ if start_time is not None:
+ conditions.append("timestamp >= ?")
+ params.append(start_time)
+
+ if end_time is not None:
+ conditions.append("timestamp <= ?")
+ params.append(end_time)
+
+ if event_types:
+ placeholders = ",".join("?" for _ in event_types)
+ conditions.append(f"type IN ({placeholders})")
+ params.extend(
+ t.value if isinstance(t, EventType) else t for t in event_types
+ )
+
+ where_clause = " AND ".join(conditions) if conditions else "1=1"
+ query = f"SELECT * FROM events WHERE {where_clause} ORDER BY timestamp"
+
+ if limit:
+ query += f" LIMIT {limit}"
+
+ cursor.execute(query, params)
+ rows = cursor.fetchall()
+
+ events = []
+ for row in rows:
+ event = self._deserialize_event(row)
+ if event is not None:
+ events.append(event)
+
+ return events
+
+ def _deserialize_event(self, row: sqlite3.Row) -> Event | None:
+ """Deserialize an event from a database row."""
+ event_type = row["type"]
+ event_data = json.loads(row["data"])
+
+ event_class = EVENT_TYPE_MAP.get(event_type)
+ if event_class is None:
+ return None
+
+ return event_class(**event_data)
+
+ def get_event_count(self, event_type: EventType | str | None = None) -> int:
+ """Get count of events in storage.
+
+ Args:
+ event_type: Optional filter by event type.
+
+ Returns:
+ Number of events.
+ """
+ cursor = self.conn.cursor()
+ if event_type is not None:
+ type_value = event_type.value if isinstance(event_type, EventType) else event_type
+ cursor.execute(
+ "SELECT COUNT(*) FROM events WHERE type = ? AND parent_id IS NULL",
+ (type_value,),
+ )
+ else:
+ cursor.execute("SELECT COUNT(*) FROM events WHERE parent_id IS NULL")
+ return cursor.fetchone()[0]
+
+ def iter_events(
+ self,
+ batch_size: int = 1000,
+ event_types: list[EventType | str] | None = None,
+ ) -> Iterator[Event]:
+ """Iterate over events in batches for memory efficiency.
+
+ Args:
+ batch_size: Number of events per batch.
+ event_types: Filter by event types.
+
+ Yields:
+ Events one at a time.
+ """
+ cursor = self.conn.cursor()
+
+ conditions = ["parent_id IS NULL"]
+ params: list[Any] = []
+
+ if event_types:
+ placeholders = ",".join("?" for _ in event_types)
+ conditions.append(f"type IN ({placeholders})")
+ params.extend(
+ t.value if isinstance(t, EventType) else t for t in event_types
+ )
+
+ where_clause = " AND ".join(conditions)
+ query = f"SELECT * FROM events WHERE {where_clause} ORDER BY timestamp"
+
+ cursor.execute(query, params)
+
+ while True:
+ rows = cursor.fetchmany(batch_size)
+ if not rows:
+ break
+ for row in rows:
+ event = self._deserialize_event(row)
+ if event is not None:
+ yield event
+
+ def delete_events(
+ self,
+ start_time: float | None = None,
+ end_time: float | None = None,
+ event_types: list[EventType | str] | None = None,
+ ) -> int:
+ """Delete events from storage.
+
+ Args:
+ start_time: Minimum timestamp (inclusive).
+ end_time: Maximum timestamp (inclusive).
+ event_types: Filter by event types.
+
+ Returns:
+ Number of deleted events.
+ """
+ cursor = self.conn.cursor()
+
+ conditions = []
+ params: list[Any] = []
+
+ if start_time is not None:
+ conditions.append("timestamp >= ?")
+ params.append(start_time)
+
+ if end_time is not None:
+ conditions.append("timestamp <= ?")
+ params.append(end_time)
+
+ if event_types:
+ placeholders = ",".join("?" for _ in event_types)
+ conditions.append(f"type IN ({placeholders})")
+ params.extend(
+ t.value if isinstance(t, EventType) else t for t in event_types
+ )
+
+ where_clause = " AND ".join(conditions) if conditions else "1=1"
+ query = f"DELETE FROM events WHERE {where_clause}"
+
+ cursor.execute(query, params)
+ deleted = cursor.rowcount
+ self.conn.commit()
+
+ return deleted
+
+
+__all__ = ["SQLiteStorage", "EVENT_TYPE_MAP"]
diff --git a/openadapt_capture/storage_impl.py b/openadapt_capture/storage_impl.py
new file mode 100644
index 0000000..3aa1208
--- /dev/null
+++ b/openadapt_capture/storage_impl.py
@@ -0,0 +1,617 @@
+"""SQLite storage for capture events.
+
+This module provides a simple SQLite-based storage system for capture events,
+following OpenAdapt's approach but using Pydantic for serialization.
+"""
+
+from __future__ import annotations
+
+import json
+import sqlite3
+import time
+from pathlib import Path
+from typing import Any, Iterator
+
+from pydantic import BaseModel, Field
+
+from openadapt_capture.events import (
+ AudioChunkEvent,
+ Event,
+ EventType,
+ KeyDownEvent,
+ KeyTypeEvent,
+ KeyUpEvent,
+ MouseClickEvent,
+ MouseDoubleClickEvent,
+ MouseDownEvent,
+ MouseDragEvent,
+ MouseMoveEvent,
+ MouseScrollEvent,
+ MouseUpEvent,
+ ScreenFrameEvent,
+)
+
+# =============================================================================
+# Capture and Stream Models
+# =============================================================================
+
+
+class Stream(BaseModel):
+ """A time-ordered sequence of events of a single type.
+
+ Streams organize events by category: action (input), screen, or audio.
+ """
+
+ id: str = Field(description="Unique stream identifier")
+ stream_type: str = Field(description="Stream type: 'action' | 'screen' | 'audio'")
+ events: list[Event] = Field(default_factory=list, description="Time-ordered events")
+
+
+class Capture(BaseModel):
+ """A complete capture session containing multiple streams.
+
+ The Capture is the top-level container for a recording session,
+ containing action events, screen frames, and optionally audio.
+ """
+
+ id: str = Field(description="Unique capture identifier")
+ started_at: float = Field(description="Unix timestamp when capture started")
+ ended_at: float | None = Field(default=None, description="Unix timestamp when capture ended")
+ platform: str = Field(description="Platform identifier: 'darwin' | 'win32' | 'linux'")
+ screen_width: int = Field(description="Screen width in physical pixels")
+ screen_height: int = Field(description="Screen height in physical pixels")
+ pixel_ratio: float = Field(
+ default=1.0,
+ description="Display pixel ratio (physical/logical), e.g., 2.0 for Retina"
+ )
+ task_description: str | None = Field(default=None, description="User-provided task description")
+ double_click_interval_seconds: float = Field(
+ default=0.5, description="System double-click interval"
+ )
+ double_click_distance_pixels: float = Field(
+ default=5.0, description="System double-click distance threshold"
+ )
+ video_start_time: float | None = Field(
+ default=None, description="Start timestamp of video recording"
+ )
+ audio_start_time: float | None = Field(
+ default=None, description="Start timestamp of audio recording"
+ )
+ metadata: dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
+
+ model_config = {"use_enum_values": True}
+
+
+# =============================================================================
+# SQLite Storage
+# =============================================================================
+
+# SQL schema for events table
+CREATE_EVENTS_TABLE = """
+CREATE TABLE IF NOT EXISTS events (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ timestamp REAL NOT NULL,
+ type TEXT NOT NULL,
+ data JSON NOT NULL,
+ parent_id INTEGER,
+ FOREIGN KEY (parent_id) REFERENCES events(id)
+)
+"""
+
+CREATE_EVENTS_INDEX = """
+CREATE INDEX IF NOT EXISTS idx_events_timestamp ON events(timestamp)
+"""
+
+CREATE_EVENTS_TYPE_INDEX = """
+CREATE INDEX IF NOT EXISTS idx_events_type ON events(type)
+"""
+
+# SQL schema for capture metadata
+CREATE_CAPTURE_TABLE = """
+CREATE TABLE IF NOT EXISTS capture (
+ id TEXT PRIMARY KEY,
+ started_at REAL NOT NULL,
+ ended_at REAL,
+ platform TEXT NOT NULL,
+ screen_width INTEGER NOT NULL,
+ screen_height INTEGER NOT NULL,
+ pixel_ratio REAL DEFAULT 1.0,
+ task_description TEXT,
+ double_click_interval_seconds REAL,
+ double_click_distance_pixels REAL,
+ video_start_time REAL,
+ audio_start_time REAL,
+ metadata JSON
+)
+"""
+
+
+# Event type to class mapping
+EVENT_TYPE_MAP: dict[str, type[Event]] = {
+ EventType.MOUSE_MOVE.value: MouseMoveEvent,
+ EventType.MOUSE_DOWN.value: MouseDownEvent,
+ EventType.MOUSE_UP.value: MouseUpEvent,
+ EventType.MOUSE_SCROLL.value: MouseScrollEvent,
+ EventType.KEY_DOWN.value: KeyDownEvent,
+ EventType.KEY_UP.value: KeyUpEvent,
+ EventType.SCREEN_FRAME.value: ScreenFrameEvent,
+ EventType.AUDIO_CHUNK.value: AudioChunkEvent,
+ EventType.MOUSE_SINGLECLICK.value: MouseClickEvent,
+ EventType.MOUSE_DOUBLECLICK.value: MouseDoubleClickEvent,
+ EventType.MOUSE_DRAG.value: MouseDragEvent,
+ EventType.KEY_TYPE.value: KeyTypeEvent,
+}
+
+
+class CaptureStorage:
+ """SQLite-based storage for capture events.
+
+ Provides efficient storage and retrieval of events with support for:
+ - Streaming writes (events written immediately to disk)
+ - Querying by timestamp range and event type
+ - Parent-child event relationships (for merged events)
+
+ Usage:
+ storage = CaptureStorage("capture.db")
+ storage.init_capture(capture)
+
+ # Write events as they come in
+ storage.write_event(event)
+
+ # Query events
+ events = storage.get_events(start_time=0.0, end_time=100.0)
+
+ storage.close()
+ """
+
+ def __init__(self, db_path: str | Path) -> None:
+ """Initialize storage with database path.
+
+ Args:
+ db_path: Path to SQLite database file. Created if doesn't exist.
+ """
+ import threading
+ self.db_path = Path(db_path)
+ self._conn: sqlite3.Connection | None = None
+ self._lock = threading.Lock()
+
+ @property
+ def is_open(self) -> bool:
+ """Check if database connection is open."""
+ return self._conn is not None
+
+ @property
+ def conn(self) -> sqlite3.Connection:
+ """Get or create database connection."""
+ if self._conn is None:
+ self._conn = sqlite3.connect(
+ str(self.db_path),
+ check_same_thread=False, # Allow multi-threaded access
+ )
+ self._conn.row_factory = sqlite3.Row
+ self._init_schema()
+ return self._conn
+
+ def _init_schema(self) -> None:
+ """Initialize database schema."""
+ cursor = self.conn.cursor()
+ cursor.execute(CREATE_CAPTURE_TABLE)
+ cursor.execute(CREATE_EVENTS_TABLE)
+ cursor.execute(CREATE_EVENTS_INDEX)
+ cursor.execute(CREATE_EVENTS_TYPE_INDEX)
+ self.conn.commit()
+
+ def close(self) -> None:
+ """Close database connection."""
+ if self._conn is not None:
+ self._conn.close()
+ self._conn = None
+
+ def __enter__(self) -> "CaptureStorage":
+ """Context manager entry."""
+ return self
+
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+ """Context manager exit."""
+ self.close()
+
+ # -------------------------------------------------------------------------
+ # Capture methods
+ # -------------------------------------------------------------------------
+
+ def init_capture(self, capture: Capture) -> None:
+ """Initialize a new capture session.
+
+ Args:
+ capture: Capture metadata to store.
+ """
+ cursor = self.conn.cursor()
+ cursor.execute(
+ """
+ INSERT INTO capture (
+ id, started_at, ended_at, platform, screen_width, screen_height,
+ pixel_ratio, task_description, double_click_interval_seconds,
+ double_click_distance_pixels, video_start_time, audio_start_time, metadata
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+ """,
+ (
+ capture.id,
+ capture.started_at,
+ capture.ended_at,
+ capture.platform,
+ capture.screen_width,
+ capture.screen_height,
+ capture.pixel_ratio,
+ capture.task_description,
+ capture.double_click_interval_seconds,
+ capture.double_click_distance_pixels,
+ capture.video_start_time,
+ capture.audio_start_time,
+ json.dumps(capture.metadata),
+ ),
+ )
+ self.conn.commit()
+
+ def update_capture(self, capture: Capture) -> None:
+ """Update capture metadata (e.g., when capture ends).
+
+ Args:
+ capture: Updated capture metadata.
+ """
+ cursor = self.conn.cursor()
+ cursor.execute(
+ """
+ UPDATE capture SET
+ ended_at = ?,
+ task_description = ?,
+ video_start_time = ?,
+ audio_start_time = ?,
+ metadata = ?
+ WHERE id = ?
+ """,
+ (
+ capture.ended_at,
+ capture.task_description,
+ capture.video_start_time,
+ capture.audio_start_time,
+ json.dumps(capture.metadata),
+ capture.id,
+ ),
+ )
+ self.conn.commit()
+
+ def get_capture(self) -> Capture | None:
+ """Get capture metadata.
+
+ Returns:
+ Capture object or None if not initialized.
+ """
+ cursor = self.conn.cursor()
+ # Get most recent capture (by started_at) to handle reused directories
+ cursor.execute("SELECT * FROM capture ORDER BY started_at DESC LIMIT 1")
+ row = cursor.fetchone()
+ if row is None:
+ return None
+ return Capture(
+ id=row["id"],
+ started_at=row["started_at"],
+ ended_at=row["ended_at"],
+ platform=row["platform"],
+ screen_width=row["screen_width"],
+ screen_height=row["screen_height"],
+ pixel_ratio=row["pixel_ratio"] if "pixel_ratio" in row.keys() else 1.0,
+ task_description=row["task_description"],
+ double_click_interval_seconds=row["double_click_interval_seconds"],
+ double_click_distance_pixels=row["double_click_distance_pixels"],
+ video_start_time=row["video_start_time"],
+ audio_start_time=row["audio_start_time"] if "audio_start_time" in row.keys() else None,
+ metadata=json.loads(row["metadata"]) if row["metadata"] else {},
+ )
+
+ # -------------------------------------------------------------------------
+ # Event methods
+ # -------------------------------------------------------------------------
+
+ def write_event(self, event: Event, parent_id: int | None = None) -> int:
+ """Write a single event to storage.
+
+ Thread-safe: uses locking for concurrent access from multiple threads.
+
+ Args:
+ event: Event to write.
+ parent_id: Optional parent event ID for merged events.
+
+ Returns:
+ ID of the inserted event.
+ """
+ with self._lock:
+ cursor = self.conn.cursor()
+ # Serialize event to JSON, excluding children (stored separately)
+ event_dict = event.model_dump(exclude={"children"} if hasattr(event, "children") else None)
+ cursor.execute(
+ "INSERT INTO events (timestamp, type, data, parent_id) VALUES (?, ?, ?, ?)",
+ (
+ event.timestamp,
+ event.type if isinstance(event.type, str) else event.type.value,
+ json.dumps(event_dict),
+ parent_id,
+ ),
+ )
+ event_id = cursor.lastrowid
+ self.conn.commit()
+
+ # Write children if present (outside lock to avoid deadlock with recursive calls)
+ if hasattr(event, "children") and event.children:
+ for child in event.children:
+ self.write_event(child, parent_id=event_id)
+
+ return event_id
+
+ def write_events(self, events: list[Event]) -> None:
+ """Write multiple events to storage in a single transaction.
+
+ Args:
+ events: List of events to write.
+ """
+ cursor = self.conn.cursor()
+ for event in events:
+ event_dict = event.model_dump(
+ exclude={"children"} if hasattr(event, "children") else None
+ )
+ cursor.execute(
+ "INSERT INTO events (timestamp, type, data, parent_id) VALUES (?, ?, ?, ?)",
+ (
+ event.timestamp,
+ event.type if isinstance(event.type, str) else event.type.value,
+ json.dumps(event_dict),
+ None,
+ ),
+ )
+ event_id = cursor.lastrowid
+
+ # Write children if present
+ if hasattr(event, "children") and event.children:
+ for child in event.children:
+ child_dict = child.model_dump(
+ exclude={"children"} if hasattr(child, "children") else None
+ )
+ cursor.execute(
+ "INSERT INTO events (timestamp, type, data, parent_id) VALUES (?, ?, ?, ?)",
+ (
+ child.timestamp,
+ child.type if isinstance(child.type, str) else child.type.value,
+ json.dumps(child_dict),
+ event_id,
+ ),
+ )
+ self.conn.commit()
+
+ def get_events(
+ self,
+ start_time: float | None = None,
+ end_time: float | None = None,
+ event_types: list[EventType | str] | None = None,
+ include_children: bool = False,
+ ) -> list[Event]:
+ """Query events from storage.
+
+ Args:
+ start_time: Minimum timestamp (inclusive).
+ end_time: Maximum timestamp (inclusive).
+ event_types: Filter by event types.
+ include_children: Whether to include child events (for merged events).
+
+ Returns:
+ List of events matching the query.
+ """
+ cursor = self.conn.cursor()
+
+ # Build query
+ conditions = []
+ params: list[Any] = []
+
+ if not include_children:
+ conditions.append("parent_id IS NULL")
+
+ if start_time is not None:
+ conditions.append("timestamp >= ?")
+ params.append(start_time)
+
+ if end_time is not None:
+ conditions.append("timestamp <= ?")
+ params.append(end_time)
+
+ if event_types:
+ placeholders = ",".join("?" for _ in event_types)
+ conditions.append(f"type IN ({placeholders})")
+ params.extend(
+ t.value if isinstance(t, EventType) else t for t in event_types
+ )
+
+ where_clause = " AND ".join(conditions) if conditions else "1=1"
+ query = f"SELECT * FROM events WHERE {where_clause} ORDER BY timestamp"
+
+ cursor.execute(query, params)
+ rows = cursor.fetchall()
+
+ # Deserialize events
+ events = []
+ for row in rows:
+ event = self._deserialize_event(row)
+ if event is not None:
+ events.append(event)
+
+ return events
+
+ def _deserialize_event(self, row: sqlite3.Row) -> Event | None:
+ """Deserialize an event from a database row.
+
+ Args:
+ row: Database row.
+
+ Returns:
+ Deserialized event or None if type unknown.
+ """
+ event_type = row["type"]
+ event_data = json.loads(row["data"])
+
+ event_class = EVENT_TYPE_MAP.get(event_type)
+ if event_class is None:
+ return None
+
+ return event_class(**event_data)
+
+ def get_event_count(self, event_type: EventType | str | None = None) -> int:
+ """Get count of events in storage.
+
+ Args:
+ event_type: Optional filter by event type.
+
+ Returns:
+ Number of events.
+ """
+ cursor = self.conn.cursor()
+ if event_type is not None:
+ type_value = event_type.value if isinstance(event_type, EventType) else event_type
+ cursor.execute(
+ "SELECT COUNT(*) FROM events WHERE type = ? AND parent_id IS NULL",
+ (type_value,),
+ )
+ else:
+ cursor.execute("SELECT COUNT(*) FROM events WHERE parent_id IS NULL")
+ return cursor.fetchone()[0]
+
+ def iter_events(
+ self,
+ batch_size: int = 1000,
+ event_types: list[EventType | str] | None = None,
+ ) -> Iterator[Event]:
+ """Iterate over events in batches for memory efficiency.
+
+ Args:
+ batch_size: Number of events per batch.
+ event_types: Filter by event types.
+
+ Yields:
+ Events one at a time.
+ """
+ cursor = self.conn.cursor()
+
+ # Build query
+ conditions = ["parent_id IS NULL"]
+ params: list[Any] = []
+
+ if event_types:
+ placeholders = ",".join("?" for _ in event_types)
+ conditions.append(f"type IN ({placeholders})")
+ params.extend(
+ t.value if isinstance(t, EventType) else t for t in event_types
+ )
+
+ where_clause = " AND ".join(conditions)
+ query = f"SELECT * FROM events WHERE {where_clause} ORDER BY timestamp"
+
+ cursor.execute(query, params)
+
+ while True:
+ rows = cursor.fetchmany(batch_size)
+ if not rows:
+ break
+ for row in rows:
+ event = self._deserialize_event(row)
+ if event is not None:
+ yield event
+
+
+# =============================================================================
+# Convenience functions
+# =============================================================================
+
+
+def _detect_platform() -> str:
+ """Detect the current platform."""
+ import sys
+ return sys.platform
+
+
+def _detect_screen_size() -> tuple[int, int]:
+ """Detect screen dimensions."""
+ try:
+ from PIL import ImageGrab
+ screenshot = ImageGrab.grab()
+ return screenshot.size
+ except Exception:
+ return (1920, 1080) # Fallback default
+
+
+def create_capture(
+ capture_dir: str | Path,
+ task_description: str | None = None,
+ platform: str | None = None,
+ screen_width: int | None = None,
+ screen_height: int | None = None,
+) -> tuple[Capture, CaptureStorage]:
+ """Create a new capture with storage.
+
+ Args:
+ capture_dir: Directory for capture files.
+ task_description: Optional description of the task being recorded.
+ platform: Platform identifier (auto-detected if not provided).
+ screen_width: Screen width in pixels (auto-detected if not provided).
+ screen_height: Screen height in pixels (auto-detected if not provided).
+
+ Returns:
+ Tuple of (Capture, CaptureStorage).
+ """
+ import uuid
+
+ capture_dir = Path(capture_dir)
+ capture_dir.mkdir(parents=True, exist_ok=True)
+
+ # Auto-detect platform and screen size if not provided
+ if platform is None:
+ platform = _detect_platform()
+
+ if screen_width is None or screen_height is None:
+ detected_width, detected_height = _detect_screen_size()
+ screen_width = screen_width or detected_width
+ screen_height = screen_height or detected_height
+
+ capture_id = str(uuid.uuid4())[:8]
+ started_at = time.time()
+
+ capture = Capture(
+ id=capture_id,
+ started_at=started_at,
+ platform=platform,
+ screen_width=screen_width,
+ screen_height=screen_height,
+ task_description=task_description,
+ )
+
+ db_path = capture_dir / "capture.db"
+ storage = CaptureStorage(db_path)
+ storage.init_capture(capture)
+
+ return capture, storage
+
+
+def load_capture(capture_dir: str | Path) -> tuple[Capture | None, CaptureStorage]:
+ """Load an existing capture from storage.
+
+ Args:
+ capture_dir: Directory containing capture files.
+
+ Returns:
+ Tuple of (Capture, CaptureStorage). Capture is None if not found.
+ """
+ capture_dir = Path(capture_dir)
+ db_path = capture_dir / "capture.db"
+
+ if not db_path.exists():
+ raise FileNotFoundError(f"Capture database not found: {db_path}")
+
+ storage = CaptureStorage(db_path)
+ capture = storage.get_capture()
+
+ return capture, storage
diff --git a/pyproject.toml b/pyproject.toml
index 6eec4ec..a57ab27 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "openadapt-capture"
-version = "0.2.0"
+version = "0.3.0"
description = "GUI interaction capture - platform-agnostic event streams with time-aligned media"
readme = "README.md"
requires-python = ">=3.10"
diff --git a/scripts/compare_codecs.py b/scripts/compare_codecs.py
index 869066f..1838cb9 100644
--- a/scripts/compare_codecs.py
+++ b/scripts/compare_codecs.py
@@ -113,8 +113,8 @@ def test_codec(
Returns:
CodecResult with metrics.
"""
- from openadapt_capture.video import VideoWriter, extract_frames
from openadapt_capture.comparison import compute_psnr
+ from openadapt_capture.video import VideoWriter, extract_frames
video_path = output_dir / f"test_{codec}.mp4"
width, height = frames[0].size
@@ -150,7 +150,7 @@ def test_codec(
print(f" Encode time: {encode_time:.2f}s")
# Decode and compare
- print(f" Extracting frames for comparison...")
+ print(" Extracting frames for comparison...")
decode_start = time.time()
# Extract frames at same timestamps
@@ -168,7 +168,7 @@ def test_codec(
print(f" Decode time: {decode_time:.2f}s")
# Compare frames
- print(f" Computing accuracy metrics...")
+ print(" Computing accuracy metrics...")
diffs = []
max_diffs = []
psnrs = []
@@ -230,7 +230,7 @@ def print_comparison(results: list[CodecResult]) -> None:
if h264 and h265:
size_reduction = (1 - h265.file_size_bytes / h264.file_size_bytes) * 100
- print(f"\nH.265 vs H.264:")
+ print("\nH.265 vs H.264:")
print(f" Size reduction: {size_reduction:.1f}%")
print(f" Quality difference (PSNR): {h265.psnr - h264.psnr:+.2f} dB")
print(f" Encode time ratio: {h265.encode_time_seconds / h264.encode_time_seconds:.2f}x")
diff --git a/scripts/generate_readme_plots.py b/scripts/generate_readme_plots.py
index f7f4336..3645e22 100644
--- a/scripts/generate_readme_plots.py
+++ b/scripts/generate_readme_plots.py
@@ -11,10 +11,9 @@
from pathlib import Path
-import numpy as np
-
# Ensure matplotlib uses non-interactive backend
import matplotlib
+import numpy as np
matplotlib.use("Agg")
import matplotlib.pyplot as plt
diff --git a/scripts/generate_real_capture_plot.py b/scripts/generate_real_capture_plot.py
index 89c0b74..3c02c19 100644
--- a/scripts/generate_real_capture_plot.py
+++ b/scripts/generate_real_capture_plot.py
@@ -14,9 +14,14 @@
import tempfile
import time
from pathlib import Path
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from openadapt_capture.stats import CaptureStats
# Set matplotlib backend before importing pyplot
import matplotlib
+
matplotlib.use("Agg")
import matplotlib.pyplot as plt
@@ -60,7 +65,7 @@ def run_real_capture(duration: float = 5.0) -> "CaptureStats":
print(f"Captured {recorder.event_count} input events, {stat_count} total stats")
return recorder.stats, capture_dir
- except Exception as e:
+ except Exception:
# Clean up on error
shutil.rmtree(capture_dir, ignore_errors=True)
raise
@@ -73,7 +78,6 @@ def generate_performance_plot(stats: "CaptureStats", output_path: Path) -> None:
stats: CaptureStats from a real capture.
output_path: Where to save the plot.
"""
- from openadapt_capture.stats import PerfStat
if not stats.stats:
print("No stats recorded!")
diff --git a/test_viewer.html b/test_viewer.html
new file mode 100644
index 0000000..08edf0e
--- /dev/null
+++ b/test_viewer.html
@@ -0,0 +1,1282 @@
+
+
+
+
+
+ Capture Viewer - 31807990
+
+
+
+
+
+
+
+
+
+
![Frame]()
+
+
0:00.00
+
+
+
+
+
+
+
+
+
+
+ 0:00.00 / 1:00.31
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Keyboard: Space (play/pause) | ← → (prev/next) | Home/End (first/last) | O (toggle overlay)
+
+
+
+
+
+
+
+
diff --git a/tests/test_events.py b/tests/test_events.py
index 2e6eb17..b6d9fdd 100644
--- a/tests/test_events.py
+++ b/tests/test_events.py
@@ -1,6 +1,5 @@
"""Tests for event schemas."""
-import pytest
from openadapt_capture.events import (
AudioChunkEvent,
diff --git a/tests/test_highlevel.py b/tests/test_highlevel.py
index abd6f91..7bd2060 100644
--- a/tests/test_highlevel.py
+++ b/tests/test_highlevel.py
@@ -5,7 +5,7 @@
import pytest
-from openadapt_capture import Capture, CaptureSession, Recorder
+from openadapt_capture import Capture, Recorder
from openadapt_capture.events import MouseButton, MouseDownEvent, MouseUpEvent
from openadapt_capture.storage import CaptureStorage
@@ -95,19 +95,16 @@ def test_capture_properties(self, temp_capture_dir):
def test_capture_actions_iterator(self, temp_capture_dir):
"""Test iterating over actions."""
- import time
-
capture_path = Path(temp_capture_dir) / "capture"
# Create capture and add some events manually
- with Recorder(capture_path) as recorder:
+ with Recorder(capture_path):
pass
# Get the capture's time range and add events within it
storage = CaptureStorage(capture_path / "capture.db")
capture_meta = storage.get_capture()
started_at = capture_meta.started_at
- ended_at = capture_meta.ended_at or time.time()
# Write events with timestamps within the capture window
storage.write_event(
diff --git a/tests/test_processing.py b/tests/test_processing.py
index 962cf8e..bea7ea8 100644
--- a/tests/test_processing.py
+++ b/tests/test_processing.py
@@ -1,6 +1,5 @@
"""Tests for event processing pipeline."""
-import pytest
from openadapt_capture.events import (
KeyDownEvent,
diff --git a/tests/test_processing_comprehensive.py b/tests/test_processing_comprehensive.py
index 125ca00..8812186 100644
--- a/tests/test_processing_comprehensive.py
+++ b/tests/test_processing_comprehensive.py
@@ -28,11 +28,9 @@
merge_consecutive_mouse_move_events,
merge_consecutive_mouse_scroll_events,
process_events,
- remove_invalid_keyboard_events,
remove_redundant_mouse_move_events,
)
-
# =============================================================================
# Test Fixtures and Helpers
# =============================================================================
diff --git a/tests/test_stats.py b/tests/test_stats.py
index dfe59eb..d4502c9 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -203,7 +203,7 @@ def test_plot_capture_with_events(self, tmp_path):
pytest.importorskip("matplotlib")
from PIL import Image
- from openadapt_capture.events import MouseDownEvent, MouseButton
+ from openadapt_capture.events import MouseButton, MouseDownEvent
from openadapt_capture.stats import plot_capture_performance
from openadapt_capture.storage import create_capture
@@ -234,7 +234,7 @@ def test_plot_capture_with_events(self, tmp_path):
def test_plot_capture_saves_to_file(self, tmp_path):
"""Test plotting a capture and saving to file."""
pytest.importorskip("matplotlib")
- from openadapt_capture.events import MouseDownEvent, MouseButton
+ from openadapt_capture.events import MouseButton, MouseDownEvent
from openadapt_capture.stats import plot_capture_performance
from openadapt_capture.storage import create_capture
diff --git a/tests/test_storage.py b/tests/test_storage.py
index a346c2e..295f0e2 100644
--- a/tests/test_storage.py
+++ b/tests/test_storage.py
@@ -8,7 +8,6 @@
from openadapt_capture.events import (
EventType,
KeyDownEvent,
- KeyUpEvent,
MouseButton,
MouseDownEvent,
MouseMoveEvent,