From 2615c1690f25531b03d77cf77ca3f6b0df227265 Mon Sep 17 00:00:00 2001
From: robertvava <rob@Robs-MacBook-Pro.local>
Date: Sat, 7 Mar 2026 18:21:54 +0000
Subject: [PATCH 1/9] PCA init

---
 src/hyperview/cli.py                   |   4 +-
 src/hyperview/core/dataset.py          |   9 +-
 src/hyperview/embeddings/pipelines.py  |  20 +-
 src/hyperview/embeddings/projection.py | 256 +++++++++++++++++++++-
 tests/__init__.py                      |   0
 tests/test_pca_projection.py           | 284 +++++++++++++++++++++++++
 6 files changed, 554 insertions(+), 19 deletions(-)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_pca_projection.py

diff --git a/src/hyperview/cli.py b/src/hyperview/cli.py
index d2da15b..8e30cb1 100644
--- a/src/hyperview/cli.py
+++ b/src/hyperview/cli.py
@@ -95,9 +95,9 @@ def _build_parser() -> argparse.ArgumentParser:
     )
     parser.add_argument(
         "--method",
-        choices=["umap"],
+        choices=["umap", "pca"],
         default="umap",
-        help="Projection method (currently only 'umap')",
+        help="Projection method: 'umap' (default) or 'pca'",
     )
     parser.add_argument(
         "--geometry",
diff --git a/src/hyperview/core/dataset.py b/src/hyperview/core/dataset.py
index 09cba36..76cc0e9 100644
--- a/src/hyperview/core/dataset.py
+++ b/src/hyperview/core/dataset.py
@@ -439,11 +439,12 @@ def compute_visualization(
 
         Args:
             space_key: Embedding space to project. If None, uses the first available.
-            method: Projection method ('umap' supported).
+            method: Projection method ('umap' or 'pca'). PCA is faster and
+                deterministic; UMAP captures nonlinear structure better.
             geometry: Output geometry type ('euclidean' or 'poincare').
-            n_neighbors: Number of neighbors for UMAP.
-            min_dist: Minimum distance for UMAP.
-            metric: Distance metric for UMAP.
+            n_neighbors: Number of neighbors for UMAP (ignored for PCA).
+            min_dist: Minimum distance for UMAP (ignored for PCA).
+            metric: Distance metric for UMAP (ignored for PCA).
             force: Force recomputation even if layout exists.
 
         Returns:
diff --git a/src/hyperview/embeddings/pipelines.py b/src/hyperview/embeddings/pipelines.py
index e3aa716..a46e52a 100644
--- a/src/hyperview/embeddings/pipelines.py
+++ b/src/hyperview/embeddings/pipelines.py
@@ -106,7 +106,7 @@ def compute_layout(
     Args:
         storage: Storage backend with embeddings.
         space_key: Embedding space to project. If None, uses the first available.
-        method: Projection method ('umap' supported).
+        method: Projection method ('umap' or 'pca').
         geometry: Output geometry type ('euclidean' or 'poincare').
         n_neighbors: Number of neighbors for UMAP.
         min_dist: Minimum distance for UMAP.
@@ -122,8 +122,8 @@ def compute_layout(
     """
     from hyperview.embeddings.projection import ProjectionEngine
 
-    if method != "umap":
-        raise ValueError(f"Invalid method: {method}. Only 'umap' is supported.")
+    if method not in ("umap", "pca"):
+        raise ValueError(f"Invalid method: {method}. Supported methods: 'umap', 'pca'.")
 
     if geometry not in ("euclidean", "poincare"):
         raise ValueError(f"Invalid geometry: {geometry}. Must be 'euclidean' or 'poincare'.")
@@ -157,11 +157,15 @@ def compute_layout(
     if len(ids) < 3:
         raise ValueError(f"Need at least 3 samples for visualization, have {len(ids)}")
 
-    layout_params = {
-        "n_neighbors": n_neighbors,
-        "min_dist": min_dist,
-        "metric": metric,
-    }
+    if method == "umap":
+        layout_params = {
+            "n_neighbors": n_neighbors,
+            "min_dist": min_dist,
+            "metric": metric,
+        }
+    else:
+        # PCA is deterministic with no tuning parameters
+        layout_params = {}
     layout_key = make_layout_key(space_key, method, geometry, layout_params)
 
     if not force:
diff --git a/src/hyperview/embeddings/projection.py b/src/hyperview/embeddings/projection.py
index 83dff50..68892e5 100644
--- a/src/hyperview/embeddings/projection.py
+++ b/src/hyperview/embeddings/projection.py
@@ -8,10 +8,124 @@
 
 logger = logging.getLogger(__name__)
 
+_MIN_SAMPLES = 2
+_EPS = 1e-7
 
 class ProjectionEngine:
     """Engine for projecting high-dimensional embeddings to 2D."""
 
+    def logmap_0_hyperboloid(
+        self,
+        embeddings: np.ndarray,
+        curvature: float = 1.0,
+    ) -> np.ndarray:
+        """Logarithmic map from the hyperboloid to the tangent space at the origin.
+
+        Maps points on the hyperboloid H^D (Lorentz model) to the tangent space
+        T_o H^D at the origin o = (1, 0, ..., 0).  The tangent space is Euclidean
+        R^D, so standard PCA can be applied afterward.
+
+        Simplification at the origin:
+            <o, x>_L = -x_0  (Lorentz inner product with north pole)
+            d_L(o, x) = arccosh(x_0)
+            spatial = x[1:]
+            tangent = d_L * spatial / ||spatial||   (if ||spatial|| > 0)
+
+        Args:
+            embeddings: (N, D+1) array on the hyperboloid (first column is time-like).
+            curvature: Positive curvature parameter c (sectional curvature is -c).
+
+        Returns:
+            (N, D) array of tangent vectors at the origin.
+        """
+        if embeddings.ndim != 2 or embeddings.shape[1] < 2:
+            raise ValueError(
+                "embeddings must have shape (N, D+1) with D >= 1, "
+                f"got {embeddings.shape}"
+            )
+
+        c = float(curvature)
+        sqrt_c = np.sqrt(c)
+
+        t = embeddings[:, 0]  # time component, shape (N,)
+        x = embeddings[:, 1:]  # spatial components, shape (N, D)
+
+        # Hyperbolic distance from origin: d = (1/sqrt(c)) * arccosh(sqrt(c) * t)
+        # For c=1 this simplifies to arccosh(t).
+        arg = np.clip(sqrt_c * t, 1.0 + _EPS, None)  # ensure > 1 for arccosh
+        theta = np.arccosh(arg) / sqrt_c  # shape (N,)
+
+        norm_x = np.linalg.norm(x, axis=1)  # shape (N,)
+        # For points very close to the origin, norm_x ≈ 0 and theta ≈ 0.
+        # The tangent vector is simply 0 in that case.
+        safe_norm = np.where(norm_x > _EPS, norm_x, 1.0)
+        scale = np.where(norm_x > _EPS, theta / safe_norm, 0.0)
+
+        tangent = x * scale[:, np.newaxis]
+
+        logger.debug(
+            "logmap_0: input %s -> tangent %s, max_dist=%.4f",
+            embeddings.shape,
+            tangent.shape,
+            float(theta.max()) if len(theta) else 0.0,
+        )
+        return tangent.astype(np.float32)
+
+    def expmap_0_hyperboloid(
+        self,
+        tangent_vectors: np.ndarray,
+        curvature: float = 1.0,
+    ) -> np.ndarray:
+        """Exponential map from the tangent space at the origin back to the hyperboloid.
+
+        Inverse of ``logmap_0_hyperboloid``.
+
+        expmap_0(v):
+            norm_v = ||v||
+            t = cosh(sqrt(c) * norm_v) / sqrt(c)   ... but for unit hyperboloid (c=1): t = cosh(norm_v)
+            spatial = sinh(sqrt(c) * norm_v) / (sqrt(c) * norm_v) * v
+
+        For c = 1:
+            t = cosh(norm_v)
+            spatial = sinh(norm_v) * v / norm_v
+
+        Args:
+            tangent_vectors: (N, D) array in the tangent space at the origin.
+            curvature: Positive curvature parameter c.
+
+        Returns:
+            (N, D+1) array of points on the hyperboloid.
+        """
+        if tangent_vectors.ndim != 2:
+            raise ValueError(
+                f"tangent_vectors must be 2-D, got shape {tangent_vectors.shape}"
+            )
+
+        c = float(curvature)
+        sqrt_c = np.sqrt(c)
+
+        norm_v = np.linalg.norm(tangent_vectors, axis=1)  # (N,)
+        scaled_norm = sqrt_c * norm_v
+
+        t = np.cosh(scaled_norm) / sqrt_c  # time component
+        # sinh(x)/x -> 1 as x -> 0
+        safe_scaled = np.where(scaled_norm > _EPS, scaled_norm, 1.0)
+        coeff = np.where(
+            scaled_norm > _EPS,
+            np.sinh(scaled_norm) / safe_scaled,
+            1.0,
+        )
+        spatial = tangent_vectors * coeff[:, np.newaxis]
+
+        result = np.column_stack([t, spatial])
+
+        logger.debug(
+            "expmap_0: tangent %s -> hyperboloid %s",
+            tangent_vectors.shape,
+            result.shape,
+        )
+        return result.astype(np.float32)
+
     def to_poincare_ball(
         self,
         hyperboloid_embeddings: np.ndarray,
@@ -52,7 +166,6 @@ def to_poincare_ball(
         # Rescale to unit ball: u = u_R / R = sqrt(c) * u_R
         u = u_R / R
 
-        # Numerical guard: ensure inside the unit ball
         radii = np.linalg.norm(u, axis=1)
         mask = radii >= clamp_radius
         if np.any(mask):
@@ -77,14 +190,14 @@ def project(
 
         This separates two concerns:
         1) Geometry/model transforms for the *input* embeddings (e.g. hyperboloid -> Poincaré)
-        2) Dimensionality reduction / layout (currently UMAP)
+        2) Dimensionality reduction / layout (UMAP or PCA)
 
         Args:
             embeddings: Input embeddings (N x D) or hyperboloid (N x D+1).
             input_geometry: Geometry/model of the input embeddings (euclidean, hyperboloid).
             output_geometry: Geometry of the output coordinates (euclidean, poincare).
             curvature: Curvature parameter for hyperbolic embeddings (positive c).
-            method: Layout method (currently only 'umap').
+            method: Layout method ('umap' or 'pca').
             n_neighbors: UMAP neighbors.
             min_dist: UMAP min_dist.
             metric: Input metric (used for euclidean inputs).
@@ -93,8 +206,18 @@ def project(
         Returns:
             2D coordinates (N x 2).
         """
-        if method != "umap":
-            raise ValueError(f"Invalid method: {method}. Only 'umap' is supported.")
+        if method not in ("umap", "pca"):
+            raise ValueError(
+                f"Invalid method: {method}. Supported methods: 'umap', 'pca'."
+            )
+
+        if method == "pca":
+            return self._project_pca(
+                embeddings,
+                input_geometry=input_geometry,
+                output_geometry=output_geometry,
+                curvature=curvature or 1.0,
+            )
 
         prepared = embeddings
         prepared_metric: str = metric
@@ -127,6 +250,129 @@ def project(
             f"Invalid output_geometry: {output_geometry}. Must be 'euclidean' or 'poincare'."
         )
 
+   
+    def _project_pca(
+        self,
+        embeddings: np.ndarray,
+        *,
+        input_geometry: str,
+        output_geometry: str,
+        curvature: float = 1.0,
+    ) -> np.ndarray:
+        """Dispatch PCA projection based on input/output geometry.
+
+        Handles four combinations:
+        - euclidean  -> euclidean:  Standard PCA, normalize to [-1, 1]
+        - euclidean  -> poincare:   Standard PCA, tanh disk mapping
+        - hyperboloid -> euclidean: logmap_0, PCA, normalize to [-1, 1]
+        - hyperboloid -> poincare:  logmap_0, PCA to 2D, expmap_0 back, stereographic to disk
+        """
+        n_samples = embeddings.shape[0]
+        if n_samples < _MIN_SAMPLES:
+            raise ValueError(
+                f"PCA requires at least {_MIN_SAMPLES} samples, got {n_samples}."
+            )
+
+        if not np.all(np.isfinite(embeddings)):
+            raise ValueError("Embeddings contain NaN or Inf values.")
+
+        logger.info(
+            "PCA: input_geometry=%s, output_geometry=%s, N=%d, D=%d",
+            input_geometry,
+            output_geometry,
+            n_samples,
+            embeddings.shape[1],
+        )
+
+        if input_geometry == "hyperboloid":
+            data = self.logmap_0_hyperboloid(embeddings, curvature=curvature)
+        else:
+            data = embeddings
+
+        pca_2d, explained = self._pca_svd(data, n_components=2)
+
+        logger.info("PCA: explained variance ratio: [%.4f, %.4f]", *explained)
+
+        if output_geometry == "poincare":
+            if input_geometry == "hyperboloid":
+                # Tangent PCA -> expmap back to H^2 -> stereographic to Poincaré disk
+                hyp_points = self.expmap_0_hyperboloid(pca_2d, curvature=curvature)
+                coords = self.to_poincare_ball(hyp_points, curvature=curvature)
+                coords = self._center_poincare(coords)
+                coords = self._scale_poincare(coords, factor=0.65)
+            else:
+                # Euclidean PCA -> tanh disk mapping
+                coords = self._pca_to_poincare_disk(pca_2d)
+            return coords.astype(np.float32)
+
+        return self._normalize_coords(pca_2d).astype(np.float32)
+
+    def _pca_svd(
+        self,
+        data: np.ndarray,
+        n_components: int = 2,
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Compute PCA via NumPy SVD. No scikit-learn dependency.
+
+        Args:
+            data: (N, D) array of data points.
+            n_components: Number of principal components to keep.
+
+        Returns:
+            Tuple of:
+            - (N, n_components) projected data
+            - (n_components,) explained variance ratios
+        """
+        data = data.astype(np.float64)
+        mean = data.mean(axis=0)
+        centered = data - mean
+
+        # Economy SVD (full_matrices=False) is O(N * D * min(N,D))
+        _U, S, Vt = np.linalg.svd(centered, full_matrices=False)
+
+        projected = centered @ Vt[:n_components].T
+
+        variance = S**2 / max(len(data) - 1, 1)
+        total_var = variance.sum()
+        if total_var > 0:
+            explained = variance[:n_components] / total_var
+        else:
+            explained = np.zeros(n_components)
+
+        return projected.astype(np.float32), explained
+
+    def _pca_to_poincare_disk(
+        self,
+        coords_2d: np.ndarray,
+        alpha: float = 1.0,
+    ) -> np.ndarray:
+        """Map 2D Euclidean PCA output into the Poincaré disk via tanh scaling.
+
+        tanh(alpha * r) * z / r   for each point z with r = ||z||
+
+        This is a homeomorphism R^2 -> B^2 (open unit ball).
+
+        Args:
+            coords_2d: (N, 2) PCA coordinates.
+            alpha: Scaling factor controlling radial compression (default 1.0).
+
+        Returns:
+            (N, 2) Poincaré disk coordinates.
+        """
+        max_abs = np.abs(coords_2d).max()
+        if max_abs > _EPS:
+            coords_2d = coords_2d / max_abs
+
+        radii = np.linalg.norm(coords_2d, axis=1)
+        safe_radii = np.where(radii > _EPS, radii, 1.0)
+        mapped_radii = np.tanh(alpha * radii)
+        scale = np.where(radii > _EPS, mapped_radii / safe_radii, 0.0)
+        poincare = coords_2d * scale[:, np.newaxis]
+        poincare = self._center_poincare(poincare)
+        poincare = self._scale_poincare(poincare, factor=0.65)
+
+        return poincare.astype(np.float32)
+
     def project_umap(
         self,
         embeddings: np.ndarray,
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_pca_projection.py b/tests/test_pca_projection.py
new file mode 100644
index 0000000..3581810
--- /dev/null
+++ b/tests/test_pca_projection.py
@@ -0,0 +1,284 @@
+import numpy as np
+import pytest
+
+from hyperview.embeddings.projection import ProjectionEngine
+
+
+def _random_euclidean(n: int = 50, d: int = 64, seed: int = 42) -> np.ndarray:
+    """Generate random Euclidean embeddings (N, D)."""
+    rng = np.random.default_rng(seed)
+    return rng.standard_normal((n, d)).astype(np.float32)
+
+
+def _random_hyperboloid(n: int = 50, d: int = 64, seed: int = 42) -> np.ndarray:
+    """Generate random points on the hyperboloid H^d (Lorentz model).
+
+    Points satisfy t^2 - ||x||^2 = 1 (curvature c=1).
+    We sample spatial components from a normal distribution scaled down,
+    then compute t = sqrt(1 + ||x||^2).
+    """
+    rng = np.random.default_rng(seed)
+    x = rng.standard_normal((n, d)).astype(np.float64) * 0.5
+    t = np.sqrt(1.0 + np.sum(x**2, axis=1, keepdims=True))
+    return np.hstack([t, x]).astype(np.float32)
+
+
+class TestPcaEuclideanToEuclidean:
+    """Euclidean input -> Euclidean output (standard PCA)."""
+
+    def test_output_shape(self):
+        engine = ProjectionEngine()
+        data = _random_euclidean(30, 32)
+        coords = engine.project(data, method="pca", input_geometry="euclidean", output_geometry="euclidean")
+        assert coords.shape == (30, 2)
+
+    def test_normalized_range(self):
+        engine = ProjectionEngine()
+        data = _random_euclidean(50, 64)
+        coords = engine.project(data, method="pca", input_geometry="euclidean", output_geometry="euclidean")
+        assert np.all(coords >= -1.0)
+        assert np.all(coords <= 1.0)
+
+    def test_dtype_float32(self):
+        engine = ProjectionEngine()
+        data = _random_euclidean(20, 16)
+        coords = engine.project(data, method="pca", input_geometry="euclidean", output_geometry="euclidean")
+        assert coords.dtype == np.float32
+
+
+class TestPcaEuclideanToPoincare:
+    """Euclidean input -> Poincare disk output."""
+
+    def test_output_shape(self):
+        engine = ProjectionEngine()
+        data = _random_euclidean(30, 32)
+        coords = engine.project(data, method="pca", input_geometry="euclidean", output_geometry="poincare")
+        assert coords.shape == (30, 2)
+
+    def test_inside_unit_disk(self):
+        engine = ProjectionEngine()
+        data = _random_euclidean(50, 64)
+        coords = engine.project(data, method="pca", input_geometry="euclidean", output_geometry="poincare")
+        radii = np.linalg.norm(coords, axis=1)
+        assert np.all(radii < 1.0), f"Max radius: {radii.max()}"
+
+
+class TestPcaHyperboloidToEuclidean:
+    """Hyperboloid input -> Euclidean output (tangent PCA)."""
+
+    def test_output_shape(self):
+        engine = ProjectionEngine()
+        data = _random_hyperboloid(30, 32)
+        coords = engine.project(data, method="pca", input_geometry="hyperboloid", output_geometry="euclidean")
+        assert coords.shape == (30, 2)
+
+    def test_normalized_range(self):
+        engine = ProjectionEngine()
+        data = _random_hyperboloid(50, 64)
+        coords = engine.project(data, method="pca", input_geometry="hyperboloid", output_geometry="euclidean")
+        assert np.all(coords >= -1.0)
+        assert np.all(coords <= 1.0)
+
+
+class TestPcaHyperboloidToPoincare:
+    """Hyperboloid input -> Poincare disk output (tangent PCA + expmap)."""
+
+    def test_output_shape(self):
+        engine = ProjectionEngine()
+        data = _random_hyperboloid(30, 32)
+        coords = engine.project(data, method="pca", input_geometry="hyperboloid", output_geometry="poincare")
+        assert coords.shape == (30, 2)
+
+    def test_inside_unit_disk(self):
+        engine = ProjectionEngine()
+        data = _random_hyperboloid(50, 64)
+        coords = engine.project(data, method="pca", input_geometry="hyperboloid", output_geometry="poincare")
+        radii = np.linalg.norm(coords, axis=1)
+        assert np.all(radii < 1.0), f"Max radius: {radii.max()}"
+
+
+class TestPcaDeterminism:
+    """PCA should produce identical results on identical input."""
+
+    def test_euclidean_deterministic(self):
+        engine = ProjectionEngine()
+        data = _random_euclidean(40, 32)
+        c1 = engine.project(data, method="pca", input_geometry="euclidean", output_geometry="euclidean")
+        c2 = engine.project(data, method="pca", input_geometry="euclidean", output_geometry="euclidean")
+        np.testing.assert_array_equal(c1, c2)
+
+    def test_hyperboloid_deterministic(self):
+        engine = ProjectionEngine()
+        data = _random_hyperboloid(40, 32)
+        c1 = engine.project(data, method="pca", input_geometry="hyperboloid", output_geometry="poincare")
+        c2 = engine.project(data, method="pca", input_geometry="hyperboloid", output_geometry="poincare")
+        np.testing.assert_array_equal(c1, c2)
+
+
+class TestLogmapExpmapRoundtrip:
+    """logmap_0(expmap_0(v)) should approximately recover v."""
+
+    def test_roundtrip(self):
+        engine = ProjectionEngine()
+        rng = np.random.default_rng(123)
+        # Tangent vectors with moderate norms
+        v = rng.standard_normal((20, 8)).astype(np.float32) * 0.5
+        hyp = engine.expmap_0_hyperboloid(v, curvature=1.0)
+        recovered = engine.logmap_0_hyperboloid(hyp, curvature=1.0)
+        np.testing.assert_allclose(recovered, v, atol=1e-4)
+
+    def test_roundtrip_nonunit_curvature(self):
+        engine = ProjectionEngine()
+        rng = np.random.default_rng(456)
+        v = rng.standard_normal((15, 5)).astype(np.float32) * 0.3
+        c = 2.0
+        hyp = engine.expmap_0_hyperboloid(v, curvature=c)
+        recovered = engine.logmap_0_hyperboloid(hyp, curvature=c)
+        np.testing.assert_allclose(recovered, v, atol=1e-4)
+
+
+class TestEdgeCases:
+
+    def test_insufficient_samples(self):
+        engine = ProjectionEngine()
+        data = _random_euclidean(1, 16)
+        with pytest.raises(ValueError, match="at least"):
+            engine.project(data, method="pca", input_geometry="euclidean", output_geometry="euclidean")
+
+    def test_nan_input_raises(self):
+        engine = ProjectionEngine()
+        data = _random_euclidean(10, 16)
+        data[3, 5] = np.nan
+        with pytest.raises(ValueError, match="NaN or Inf"):
+            engine.project(data, method="pca", input_geometry="euclidean", output_geometry="euclidean")
+
+    def test_inf_input_raises(self):
+        engine = ProjectionEngine()
+        data = _random_euclidean(10, 16)
+        data[0, 0] = np.inf
+        with pytest.raises(ValueError, match="NaN or Inf"):
+            engine.project(data, method="pca", input_geometry="euclidean", output_geometry="euclidean")
+
+    def test_identical_embeddings(self):
+        """All-same embeddings -> zero variance -> should not crash."""
+        engine = ProjectionEngine()
+        data = np.ones((10, 16), dtype=np.float32)
+        coords = engine.project(data, method="pca", input_geometry="euclidean", output_geometry="euclidean")
+        assert coords.shape == (10, 2)
+        assert np.allclose(coords, 0.0, atol=1e-6)
+
+    def test_invalid_method(self):
+        engine = ProjectionEngine()
+        data = _random_euclidean(10, 16)
+        with pytest.raises(ValueError, match="Invalid method"):
+            engine.project(data, method="tsne", input_geometry="euclidean", output_geometry="euclidean")
+
+    def test_two_samples_minimal(self):
+        """Exactly 2 samples — the minimum for PCA."""
+        engine = ProjectionEngine()
+        data = _random_euclidean(2, 16)
+        coords = engine.project(data, method="pca", input_geometry="euclidean", output_geometry="euclidean")
+        assert coords.shape == (2, 2)
+
+
+class TestIntegrationDatasetApi:
+
+    def test_compute_visualization_pca_euclidean(self):
+        from hyperview.core.dataset import Dataset
+        from hyperview.core.sample import Sample
+
+        ds = Dataset("test_pca", persist=False)
+
+        for i in range(20):
+            ds.add_sample(Sample(id=f"s{i}", filepath=f"/fake/{i}.png", label=f"c{i % 3}"))
+
+        space_key = "test_space"
+        ds._storage.ensure_space(
+            model_id="test",
+            dim=32,
+            config={"provider": "test", "geometry": "euclidean"},
+            space_key=space_key,
+        )
+        rng = np.random.default_rng(99)
+        ids = [f"s{i}" for i in range(20)]
+        vectors = rng.standard_normal((20, 32)).astype(np.float32)
+        ds._storage.add_embeddings(space_key, ids, vectors)
+
+        layout_key = ds.compute_visualization(
+            space_key=space_key,
+            method="pca",
+            geometry="euclidean",
+        )
+
+        assert layout_key is not None
+        vis_ids, vis_labels, vis_coords = ds.get_visualization_data(layout_key)
+        assert len(vis_ids) == 20
+        assert vis_coords.shape == (20, 2)
+
+    def test_compute_visualization_pca_poincare(self):
+        from hyperview.core.dataset import Dataset
+        from hyperview.core.sample import Sample
+
+        ds = Dataset("test_pca_poinc", persist=False)
+
+        for i in range(20):
+            ds.add_sample(Sample(id=f"s{i}", filepath=f"/fake/{i}.png", label=f"c{i % 3}"))
+
+        space_key = "test_space"
+        ds._storage.ensure_space(
+            model_id="test",
+            dim=32,
+            config={"provider": "test", "geometry": "euclidean"},
+            space_key=space_key,
+        )
+        rng = np.random.default_rng(99)
+        ids = [f"s{i}" for i in range(20)]
+        vectors = rng.standard_normal((20, 32)).astype(np.float32)
+        ds._storage.add_embeddings(space_key, ids, vectors)
+
+        layout_key = ds.compute_visualization(
+            space_key=space_key,
+            method="pca",
+            geometry="poincare",
+        )
+
+        assert layout_key is not None
+        vis_ids, _, vis_coords = ds.get_visualization_data(layout_key)
+        assert len(vis_ids) == 20
+        radii = np.linalg.norm(vis_coords, axis=1)
+        assert np.all(radii < 1.0)
+
+    def test_compute_visualization_pca_hyperboloid(self):
+        """Integration test with hyperboloid input geometry."""
+        from hyperview.core.dataset import Dataset
+        from hyperview.core.sample import Sample
+
+        ds = Dataset("test_pca_hyp", persist=False)
+
+        for i in range(20):
+            ds.add_sample(Sample(id=f"s{i}", filepath=f"/fake/{i}.png", label=f"c{i % 3}"))
+
+        emb_dim = 33  # D+1 for hyperboloid
+        space_key = "test_hyp_space"
+        ds._storage.ensure_space(
+            model_id="test_hyp",
+            dim=emb_dim,
+            config={"provider": "test", "geometry": "hyperboloid", "curvature": 1.0},
+            space_key=space_key,
+        )
+
+        ids = [f"s{i}" for i in range(20)]
+        vectors = _random_hyperboloid(20, emb_dim - 1, seed=77)
+        ds._storage.add_embeddings(space_key, ids, vectors)
+
+        for geom in ("euclidean", "poincare"):
+            layout_key = ds.compute_visualization(
+                space_key=space_key,
+                method="pca",
+                geometry=geom,
+            )
+            assert layout_key is not None
+            vis_ids, _, vis_coords = ds.get_visualization_data(layout_key)
+            assert len(vis_ids) == 20
+            assert vis_coords.shape == (20, 2)
+            assert np.all(np.isfinite(vis_coords))

From 6a090651f03f86f0741a30dd0171901411547abc Mon Sep 17 00:00:00 2001
From: Matin Mahmood <45293386+mnm-matin@users.noreply.github.com>
Date: Sun, 8 Mar 2026 12:59:31 +0100
Subject: [PATCH 2/9] Fix Devin review CI prompt

---
 .github/workflows/devin-review.yml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/devin-review.yml b/.github/workflows/devin-review.yml
index ea1410b..cb36e41 100644
--- a/.github/workflows/devin-review.yml
+++ b/.github/workflows/devin-review.yml
@@ -2,11 +2,15 @@ name: Devin Review
 
 on:
   pull_request:
-    types: [opened, synchronize, reopened]
+    types: [opened, synchronize, reopened, ready_for_review]
 
 jobs:
   devin-review:
     runs-on: ubuntu-latest
+    timeout-minutes: 30
+    permissions:
+      contents: read
+      pull-requests: write
 
     steps:
       - name: Checkout repository
@@ -26,4 +30,4 @@ jobs:
         env:
           CI: true  # Ensures the tool runs in non-interactive CI mode
         run: |
-          script -q -e -c "npx devin-review ${{ github.event.pull_request.html_url }}" /dev/null
+          script -q -e -c "printf 'y\n' | npx devin-review '${{ github.event.pull_request.html_url }}'" /dev/null

From c3194db31286b110f82d5cd21b0ceca4933dbf72 Mon Sep 17 00:00:00 2001
From: Matin Mahmood <45293386+mnm-matin@users.noreply.github.com>
Date: Sun, 8 Mar 2026 13:48:15 +0100
Subject: [PATCH 3/9] Switch Devin review to API workflow

---
 .github/workflows/devin-review.yml | 144 +++++++++++++++++++++++++----
 1 file changed, 128 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/devin-review.yml b/.github/workflows/devin-review.yml
index cb36e41..5db6ed7 100644
--- a/.github/workflows/devin-review.yml
+++ b/.github/workflows/devin-review.yml
@@ -1,33 +1,145 @@
 name: Devin Review
 
 on:
-  pull_request:
+  pull_request_target:
     types: [opened, synchronize, reopened, ready_for_review]
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: Pull request number to review
+        required: true
+        type: string
 
 jobs:
   devin-review:
+    if: ${{ github.event_name == 'workflow_dispatch' || !github.event.pull_request.draft }}
     runs-on: ubuntu-latest
-    timeout-minutes: 30
+    timeout-minutes: 10
+    env:
+      DEVIN_API_KEY: ${{ secrets.DEVIN_API_KEY }}
     permissions:
       contents: read
-      pull-requests: write
+      pull-requests: read
 
     steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
+      - name: Resolve pull request context
+        id: pr
+        uses: actions/github-script@v8
         with:
-          fetch-depth: 0
+          script: |
+            const prNumber = context.eventName === 'workflow_dispatch'
+              ? Number(core.getInput('pr_number'))
+              : context.payload.pull_request.number;
 
-      - name: Setup Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: '20'
+            if (!Number.isInteger(prNumber) || prNumber <= 0) {
+              core.setFailed(`Invalid pull request number: ${prNumber}`);
+              return;
+            }
+
+            const { owner, repo } = context.repo;
+            const { data: pr } = await github.rest.pulls.get({
+              owner,
+              repo,
+              pull_number: prNumber,
+            });
+            const files = await github.paginate(github.rest.pulls.listFiles, {
+              owner,
+              repo,
+              pull_number: prNumber,
+              per_page: 100,
+            });
+
+            core.setOutput('number', String(prNumber));
+            core.setOutput('url', pr.html_url);
+            core.setOutput('title', pr.title);
+            core.setOutput('author', pr.user.login);
+            core.setOutput('head_sha', pr.head.sha);
+            core.setOutput('head_repo', pr.head.repo.full_name);
+            core.setOutput('base_repo', pr.base.repo.full_name);
+            core.setOutput('files_json', JSON.stringify(files.map((file) => file.filename)));
+
+      - name: Validate Devin API configuration
+        if: ${{ env.DEVIN_API_KEY == '' }}
+        run: |
+          echo "DEVIN_API_KEY is not configured for this repository or organization." >&2
+          echo "Configure DEVIN_API_KEY before using the API-based Devin review workflow." >&2
+          exit 1
 
-      - name: Run Devin Review
-        # Use script to emulate a TTY as devin-review requires terminal features
-        # The -q flag suppresses script output, -e exits with command exit code,
-        # and -c runs the command. /dev/null discards script's own output.
+      - name: Start Devin review session
+        id: devin
         env:
-          CI: true  # Ensures the tool runs in non-interactive CI mode
+          PR_NUMBER: ${{ steps.pr.outputs.number }}
+          PR_URL: ${{ steps.pr.outputs.url }}
+          PR_TITLE: ${{ steps.pr.outputs.title }}
+          PR_AUTHOR: ${{ steps.pr.outputs.author }}
+          PR_HEAD_SHA: ${{ steps.pr.outputs.head_sha }}
+          PR_HEAD_REPO: ${{ steps.pr.outputs.head_repo }}
+          PR_BASE_REPO: ${{ steps.pr.outputs.base_repo }}
+          PR_FILES_JSON: ${{ steps.pr.outputs.files_json }}
+          REPOSITORY: ${{ github.repository }}
         run: |
-          script -q -e -c "printf 'y\n' | npx devin-review '${{ github.event.pull_request.html_url }}'" /dev/null
+          PROMPT=$(cat <<EOF
+          You are reviewing pull request #${PR_NUMBER} in ${REPOSITORY}.
+
+          Repository:
+          - Base repository: ${PR_BASE_REPO}
+          - Head repository: ${PR_HEAD_REPO}
+
+          Pull request:
+          - Title: ${PR_TITLE}
+          - Author: ${PR_AUTHOR}
+          - URL: ${PR_URL}
+          - Head SHA: ${PR_HEAD_SHA}
+
+          Changed files JSON:
+          ${PR_FILES_JSON}
+
+          Tasks:
+          1. Review the current pull request diff and related repository context for PR #${PR_NUMBER}.
+          2. Never commit, push, or open a new pull request.
+          3. Never ask the user for confirmation and never wait for user messages.
+          4. Leave at most 3 total review comments.
+          5. Use inline review comments with precise line references when possible.
+          6. Before commenting, check whether the same issue was already raised or already fixed in the current PR discussion.
+          7. If no issues are found, leave a short summary comment saying everything looks good.
+          8. Follow repository instruction files such as AGENTS.md, CLAUDE.md, CONTRIBUTING.md, and REVIEW.md if present.
+
+          Focus on bugs, regressions, missing tests, and clear correctness issues. Avoid speculative nits.
+          EOF
+          )
+
+          PAYLOAD=$(jq -n \
+            --arg prompt "$PROMPT" \
+            --arg title "PR Review #${PR_NUMBER} (${REPOSITORY})" \
+            '{
+              prompt: $prompt,
+              idempotent: true,
+              title: $title,
+              tags: ["github-actions", "devin-review", "pull-request"]
+            }')
+
+          RESPONSE=$(curl --fail --silent --show-error \
+            --request POST \
+            --url https://api.devin.ai/v1/sessions \
+            --header "Authorization: Bearer ${DEVIN_API_KEY}" \
+            --header "Content-Type: application/json" \
+            --data "$PAYLOAD")
+
+          SESSION_ID=$(echo "$RESPONSE" | jq -r '.session_id')
+          SESSION_URL=$(echo "$RESPONSE" | jq -r '.url')
+
+          if [[ -z "$SESSION_ID" || "$SESSION_ID" == "null" || -z "$SESSION_URL" || "$SESSION_URL" == "null" ]]; then
+            echo "Unexpected Devin API response: $RESPONSE" >&2
+            exit 1
+          fi
+
+          echo "session_id=$SESSION_ID" >> "$GITHUB_OUTPUT"
+          echo "session_url=$SESSION_URL" >> "$GITHUB_OUTPUT"
+
+          {
+            echo "Started Devin review session."
+            echo
+            echo "- PR: #${PR_NUMBER}"
+            echo "- Session ID: ${SESSION_ID}"
+            echo "- Session URL: ${SESSION_URL}"
+          } >> "$GITHUB_STEP_SUMMARY"

From 2f65ac19d7d7068ece5150caebc69196b63f2416 Mon Sep 17 00:00:00 2001
From: Matin Mahmood <45293386+mnm-matin@users.noreply.github.com>
Date: Sun, 8 Mar 2026 13:49:41 +0100
Subject: [PATCH 4/9] Fix Devin workflow dispatch input

---
 .github/workflows/devin-review.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/devin-review.yml b/.github/workflows/devin-review.yml
index 5db6ed7..444ce16 100644
--- a/.github/workflows/devin-review.yml
+++ b/.github/workflows/devin-review.yml
@@ -25,10 +25,12 @@ jobs:
       - name: Resolve pull request context
         id: pr
         uses: actions/github-script@v8
+        env:
+          WORKFLOW_PR_NUMBER: ${{ inputs.pr_number }}
         with:
           script: |
             const prNumber = context.eventName === 'workflow_dispatch'
-              ? Number(core.getInput('pr_number'))
+              ? Number(process.env.WORKFLOW_PR_NUMBER)
               : context.payload.pull_request.number;
 
             if (!Number.isInteger(prNumber) || prNumber <= 0) {

From 9bc386b9b5047d66f13c7f502b059addd1ed7fdc Mon Sep 17 00:00:00 2001
From: Matin Mahmood <45293386+mnm-matin@users.noreply.github.com>
Date: Sun, 8 Mar 2026 13:58:54 +0100
Subject: [PATCH 5/9] Use zero-secret Devin review link

---
 .github/workflows/devin-review.yml | 125 +++++------------------------
 1 file changed, 22 insertions(+), 103 deletions(-)

diff --git a/.github/workflows/devin-review.yml b/.github/workflows/devin-review.yml
index 444ce16..66a92f1 100644
--- a/.github/workflows/devin-review.yml
+++ b/.github/workflows/devin-review.yml
@@ -1,7 +1,7 @@
 name: Devin Review
 
 on:
-  pull_request_target:
+  pull_request:
     types: [opened, synchronize, reopened, ready_for_review]
   workflow_dispatch:
     inputs:
@@ -12,18 +12,15 @@ on:
 
 jobs:
   devin-review:
-    if: ${{ github.event_name == 'workflow_dispatch' || !github.event.pull_request.draft }}
     runs-on: ubuntu-latest
-    timeout-minutes: 10
-    env:
-      DEVIN_API_KEY: ${{ secrets.DEVIN_API_KEY }}
+    timeout-minutes: 5
     permissions:
       contents: read
       pull-requests: read
 
     steps:
-      - name: Resolve pull request context
-        id: pr
+      - name: Resolve Devin Review URL
+        id: review
         uses: actions/github-script@v8
         env:
           WORKFLOW_PR_NUMBER: ${{ inputs.pr_number }}
@@ -39,109 +36,31 @@ jobs:
             }
 
             const { owner, repo } = context.repo;
-            const { data: pr } = await github.rest.pulls.get({
-              owner,
-              repo,
-              pull_number: prNumber,
-            });
-            const files = await github.paginate(github.rest.pulls.listFiles, {
-              owner,
-              repo,
-              pull_number: prNumber,
-              per_page: 100,
-            });
+            const reviewUrl = `https://devinreview.com/${owner}/${repo}/pull/${prNumber}`;
+            const prUrl = `https://github.com/${owner}/${repo}/pull/${prNumber}`;
 
             core.setOutput('number', String(prNumber));
-            core.setOutput('url', pr.html_url);
-            core.setOutput('title', pr.title);
-            core.setOutput('author', pr.user.login);
-            core.setOutput('head_sha', pr.head.sha);
-            core.setOutput('head_repo', pr.head.repo.full_name);
-            core.setOutput('base_repo', pr.base.repo.full_name);
-            core.setOutput('files_json', JSON.stringify(files.map((file) => file.filename)));
+            core.setOutput('pr_url', prUrl);
+            core.setOutput('review_url', reviewUrl);
 
-      - name: Validate Devin API configuration
-        if: ${{ env.DEVIN_API_KEY == '' }}
+      - name: Warm Devin Review page
+        env:
+          REVIEW_URL: ${{ steps.review.outputs.review_url }}
         run: |
-          echo "DEVIN_API_KEY is not configured for this repository or organization." >&2
-          echo "Configure DEVIN_API_KEY before using the API-based Devin review workflow." >&2
-          exit 1
+          curl --fail --silent --show-error --location "$REVIEW_URL" --output /dev/null
 
-      - name: Start Devin review session
-        id: devin
+      - name: Publish Devin Review summary
         env:
-          PR_NUMBER: ${{ steps.pr.outputs.number }}
-          PR_URL: ${{ steps.pr.outputs.url }}
-          PR_TITLE: ${{ steps.pr.outputs.title }}
-          PR_AUTHOR: ${{ steps.pr.outputs.author }}
-          PR_HEAD_SHA: ${{ steps.pr.outputs.head_sha }}
-          PR_HEAD_REPO: ${{ steps.pr.outputs.head_repo }}
-          PR_BASE_REPO: ${{ steps.pr.outputs.base_repo }}
-          PR_FILES_JSON: ${{ steps.pr.outputs.files_json }}
-          REPOSITORY: ${{ github.repository }}
+          PR_NUMBER: ${{ steps.review.outputs.number }}
+          PR_URL: ${{ steps.review.outputs.pr_url }}
+          REVIEW_URL: ${{ steps.review.outputs.review_url }}
         run: |
-          PROMPT=$(cat <<EOF
-          You are reviewing pull request #${PR_NUMBER} in ${REPOSITORY}.
-
-          Repository:
-          - Base repository: ${PR_BASE_REPO}
-          - Head repository: ${PR_HEAD_REPO}
-
-          Pull request:
-          - Title: ${PR_TITLE}
-          - Author: ${PR_AUTHOR}
-          - URL: ${PR_URL}
-          - Head SHA: ${PR_HEAD_SHA}
-
-          Changed files JSON:
-          ${PR_FILES_JSON}
-
-          Tasks:
-          1. Review the current pull request diff and related repository context for PR #${PR_NUMBER}.
-          2. Never commit, push, or open a new pull request.
-          3. Never ask the user for confirmation and never wait for user messages.
-          4. Leave at most 3 total review comments.
-          5. Use inline review comments with precise line references when possible.
-          6. Before commenting, check whether the same issue was already raised or already fixed in the current PR discussion.
-          7. If no issues are found, leave a short summary comment saying everything looks good.
-          8. Follow repository instruction files such as AGENTS.md, CLAUDE.md, CONTRIBUTING.md, and REVIEW.md if present.
-
-          Focus on bugs, regressions, missing tests, and clear correctness issues. Avoid speculative nits.
-          EOF
-          )
-
-          PAYLOAD=$(jq -n \
-            --arg prompt "$PROMPT" \
-            --arg title "PR Review #${PR_NUMBER} (${REPOSITORY})" \
-            '{
-              prompt: $prompt,
-              idempotent: true,
-              title: $title,
-              tags: ["github-actions", "devin-review", "pull-request"]
-            }')
-
-          RESPONSE=$(curl --fail --silent --show-error \
-            --request POST \
-            --url https://api.devin.ai/v1/sessions \
-            --header "Authorization: Bearer ${DEVIN_API_KEY}" \
-            --header "Content-Type: application/json" \
-            --data "$PAYLOAD")
-
-          SESSION_ID=$(echo "$RESPONSE" | jq -r '.session_id')
-          SESSION_URL=$(echo "$RESPONSE" | jq -r '.url')
-
-          if [[ -z "$SESSION_ID" || "$SESSION_ID" == "null" || -z "$SESSION_URL" || "$SESSION_URL" == "null" ]]; then
-            echo "Unexpected Devin API response: $RESPONSE" >&2
-            exit 1
-          fi
-
-          echo "session_id=$SESSION_ID" >> "$GITHUB_OUTPUT"
-          echo "session_url=$SESSION_URL" >> "$GITHUB_OUTPUT"
-
           {
-            echo "Started Devin review session."
+            echo "Devin Review is available for PR #${PR_NUMBER}."
+            echo
+            echo "- GitHub PR: ${PR_URL}"
+            echo "- Devin Review: ${REVIEW_URL}"
             echo
-            echo "- PR: #${PR_NUMBER}"
-            echo "- Session ID: ${SESSION_ID}"
-            echo "- Session URL: ${SESSION_URL}"
+            echo "This workflow intentionally does not use DEVIN_API_KEY."
+            echo "For automatic Devin statuses or comments inside GitHub, connect the Devin GitHub integration and enable auto-review in Devin settings."
           } >> "$GITHUB_STEP_SUMMARY"

From 493d90eb6aa2557870540e3185ce679aed9fd281 Mon Sep 17 00:00:00 2001
From: Matin Mahmood <45293386+mnm-matin@users.noreply.github.com>
Date: Sun, 8 Mar 2026 14:20:32 +0100
Subject: [PATCH 6/9] Comment Devin review link on PR

---
 .github/workflows/devin-review.yml | 48 ++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/.github/workflows/devin-review.yml b/.github/workflows/devin-review.yml
index 66a92f1..3c38d5e 100644
--- a/.github/workflows/devin-review.yml
+++ b/.github/workflows/devin-review.yml
@@ -16,6 +16,7 @@ jobs:
     timeout-minutes: 5
     permissions:
       contents: read
+      issues: write
       pull-requests: read
 
     steps:
@@ -64,3 +65,50 @@ jobs:
             echo "This workflow intentionally does not use DEVIN_API_KEY."
             echo "For automatic Devin statuses or comments inside GitHub, connect the Devin GitHub integration and enable auto-review in Devin settings."
           } >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upsert PR comment with Devin Review link
+        uses: actions/github-script@v8
+        env:
+          PR_NUMBER: ${{ steps.review.outputs.number }}
+          PR_URL: ${{ steps.review.outputs.pr_url }}
+          REVIEW_URL: ${{ steps.review.outputs.review_url }}
+        with:
+          script: |
+            const marker = '<!-- devin-review-link -->';
+            const issue_number = Number(process.env.PR_NUMBER);
+            const body = [
+              marker,
+              'Devin Review is available for this pull request.',
+              '',
+              `- GitHub PR: ${process.env.PR_URL}`,
+              `- Devin Review: ${process.env.REVIEW_URL}`,
+              '',
+              'This link opens the hosted Devin Review page for the current PR.',
+            ].join('\n');
+
+            const comments = await github.paginate(github.rest.issues.listComments, {
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number,
+              per_page: 100,
+            });
+
+            const existing = comments.find((comment) => comment.body && comment.body.includes(marker));
+
+            if (existing) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: existing.id,
+                body,
+              });
+              core.info(`Updated existing Devin Review comment: ${existing.html_url}`);
+            } else {
+              const created = await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number,
+                body,
+              });
+              core.info(`Created Devin Review comment: ${created.data.html_url}`);
+            }

From 8be1df11f24e7477691d8286dabe032a0df33c57 Mon Sep 17 00:00:00 2001
From: Matin Mahmood <45293386+mnm-matin@users.noreply.github.com>
Date: Sun, 8 Mar 2026 14:24:41 +0100
Subject: [PATCH 7/9] Handle read-only workflow token

---
 .github/workflows/devin-review.yml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/.github/workflows/devin-review.yml b/.github/workflows/devin-review.yml
index 3c38d5e..4787e8d 100644
--- a/.github/workflows/devin-review.yml
+++ b/.github/workflows/devin-review.yml
@@ -67,6 +67,8 @@ jobs:
           } >> "$GITHUB_STEP_SUMMARY"
 
       - name: Upsert PR comment with Devin Review link
+        id: comment
+        continue-on-error: true
         uses: actions/github-script@v8
         env:
           PR_NUMBER: ${{ steps.review.outputs.number }}
@@ -112,3 +114,14 @@ jobs:
               });
               core.info(`Created Devin Review comment: ${created.data.html_url}`);
             }
+
+      - name: Report comment permission limitation
+        if: ${{ steps.comment.outcome == 'failure' }}
+        run: |
+          {
+            echo
+            echo "PR comment was not posted automatically."
+            echo
+            echo "This repository currently sets GitHub Actions GITHUB_TOKEN to read-only permissions, so the workflow cannot create issue comments."
+            echo "If repository workflow permissions are changed to read/write, this step will start posting or updating the PR comment automatically."
+          } >> "$GITHUB_STEP_SUMMARY"

From 6e915eb6448a6a869550f495a884edbb941fc327 Mon Sep 17 00:00:00 2001
From: Matin Mahmood <45293386+mnm-matin@users.noreply.github.com>
Date: Sun, 8 Mar 2026 14:26:01 +0100
Subject: [PATCH 8/9] Handle comment permission warning cleanly

---
 .github/workflows/devin-review.yml | 56 ++++++++++++++++++------------
 1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/devin-review.yml b/.github/workflows/devin-review.yml
index 4787e8d..5125ed2 100644
--- a/.github/workflows/devin-review.yml
+++ b/.github/workflows/devin-review.yml
@@ -68,7 +68,6 @@ jobs:
 
       - name: Upsert PR comment with Devin Review link
         id: comment
-        continue-on-error: true
         uses: actions/github-script@v8
         env:
           PR_NUMBER: ${{ steps.review.outputs.number }}
@@ -78,6 +77,7 @@ jobs:
           script: |
             const marker = '<!-- devin-review-link -->';
             const issue_number = Number(process.env.PR_NUMBER);
+            core.setOutput('posted', 'false');
             const body = [
               marker,
               'Devin Review is available for this pull request.',
@@ -88,35 +88,45 @@ jobs:
               'This link opens the hosted Devin Review page for the current PR.',
             ].join('\n');
 
-            const comments = await github.paginate(github.rest.issues.listComments, {
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              issue_number,
-              per_page: 100,
-            });
-
-            const existing = comments.find((comment) => comment.body && comment.body.includes(marker));
-
-            if (existing) {
-              await github.rest.issues.updateComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                comment_id: existing.id,
-                body,
-              });
-              core.info(`Updated existing Devin Review comment: ${existing.html_url}`);
-            } else {
-              const created = await github.rest.issues.createComment({
+            try {
+              const comments = await github.paginate(github.rest.issues.listComments, {
                 owner: context.repo.owner,
                 repo: context.repo.repo,
                 issue_number,
-                body,
+                per_page: 100,
               });
-              core.info(`Created Devin Review comment: ${created.data.html_url}`);
+
+              const existing = comments.find((comment) => comment.body && comment.body.includes(marker));
+
+              if (existing) {
+                await github.rest.issues.updateComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  comment_id: existing.id,
+                  body,
+                });
+                core.info(`Updated existing Devin Review comment: ${existing.html_url}`);
+              } else {
+                const created = await github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number,
+                  body,
+                });
+                core.info(`Created Devin Review comment: ${created.data.html_url}`);
+              }
+
+              core.setOutput('posted', 'true');
+            } catch (error) {
+              if (error && error.status === 403) {
+                core.warning('PR comment was not posted because this repository grants GitHub Actions a read-only GITHUB_TOKEN.');
+              } else {
+                throw error;
+              }
             }
 
       - name: Report comment permission limitation
-        if: ${{ steps.comment.outcome == 'failure' }}
+        if: ${{ steps.comment.outputs.posted != 'true' }}
         run: |
           {
             echo

From e493e130f742745e5d1124a1c47ab2b7489dc62b Mon Sep 17 00:00:00 2001
From: robertvava <rob@Robs-MacBook-Pro.local>
Date: Mon, 9 Mar 2026 11:55:52 +0000
Subject: [PATCH 9/9] addressing devin's comments | safeguards for minimum
 number of features

---
 src/hyperview/embeddings/pipelines.py  |  7 ++--
 src/hyperview/embeddings/projection.py | 10 ++++--
 tests/test_pca_projection.py           | 44 ++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/src/hyperview/embeddings/pipelines.py b/src/hyperview/embeddings/pipelines.py
index a46e52a..2211469 100644
--- a/src/hyperview/embeddings/pipelines.py
+++ b/src/hyperview/embeddings/pipelines.py
@@ -154,8 +154,11 @@ def compute_layout(
     if len(ids) == 0:
         raise ValueError(f"No embeddings in space '{space_key}'. Call compute_embeddings() first.")
 
-    if len(ids) < 3:
-        raise ValueError(f"Need at least 3 samples for visualization, have {len(ids)}")
+    min_samples = 3 if method == "umap" else 2
+    if len(ids) < min_samples:
+        raise ValueError(
+            f"Need at least {min_samples} samples for {method} visualization, have {len(ids)}"
+        )
 
     if method == "umap":
         layout_params = {
diff --git a/src/hyperview/embeddings/projection.py b/src/hyperview/embeddings/projection.py
index 68892e5..39ab108 100644
--- a/src/hyperview/embeddings/projection.py
+++ b/src/hyperview/embeddings/projection.py
@@ -330,12 +330,18 @@ def _pca_svd(
         # Economy SVD (full_matrices=False) is O(N * D * min(N,D))
         _U, S, Vt = np.linalg.svd(centered, full_matrices=False)
 
-        projected = centered @ Vt[:n_components].T
+        # SVD produces min(N, D) components; clamp to what's available
+        k = min(n_components, Vt.shape[0])
+        projected = centered @ Vt[:k].T
+        if k < n_components:
+            pad = np.zeros((projected.shape[0], n_components - k), dtype=projected.dtype)
+            projected = np.hstack([projected, pad])
 
         variance = S**2 / max(len(data) - 1, 1)
         total_var = variance.sum()
         if total_var > 0:
-            explained = variance[:n_components] / total_var
+            explained = np.zeros(n_components)
+            explained[:k] = variance[:k] / total_var
         else:
             explained = np.zeros(n_components)
 
diff --git a/tests/test_pca_projection.py b/tests/test_pca_projection.py
index 3581810..dfc33f4 100644
--- a/tests/test_pca_projection.py
+++ b/tests/test_pca_projection.py
@@ -180,6 +180,17 @@ def test_two_samples_minimal(self):
         coords = engine.project(data, method="pca", input_geometry="euclidean", output_geometry="euclidean")
         assert coords.shape == (2, 2)
 
+    def test_pca_svd_fewer_features_than_components(self):
+        """When D < n_components, _pca_svd should pad with zeros."""
+        engine = ProjectionEngine()
+        data = np.array([[1.0], [2.0], [3.0]])  # (3, 1) — only 1 feature
+        projected, explained = engine._pca_svd(data, n_components=2)
+        assert projected.shape == (3, 2)
+        assert explained.shape == (2,)
+        # Second component should be zero-padded
+        np.testing.assert_array_equal(projected[:, 1], 0.0)
+        assert explained[1] == 0.0
+
 
 class TestIntegrationDatasetApi:
 
@@ -282,3 +293,36 @@ def test_compute_visualization_pca_hyperboloid(self):
             assert len(vis_ids) == 20
             assert vis_coords.shape == (20, 2)
             assert np.all(np.isfinite(vis_coords))
+
+    def test_two_samples_through_pipeline(self):
+        """2-sample PCA should work end-to-end through compute_visualization."""
+        from hyperview.core.dataset import Dataset
+        from hyperview.core.sample import Sample
+
+        ds = Dataset("test_pca_2samp", persist=False)
+
+        for i in range(2):
+            ds.add_sample(Sample(id=f"s{i}", filepath=f"/fake/{i}.png", label=f"c{i}"))
+
+        space_key = "test_space"
+        ds._storage.ensure_space(
+            model_id="test",
+            dim=16,
+            config={"provider": "test", "geometry": "euclidean"},
+            space_key=space_key,
+        )
+        rng = np.random.default_rng(42)
+        ids = [f"s{i}" for i in range(2)]
+        vectors = rng.standard_normal((2, 16)).astype(np.float32)
+        ds._storage.add_embeddings(space_key, ids, vectors)
+
+        layout_key = ds.compute_visualization(
+            space_key=space_key,
+            method="pca",
+            geometry="euclidean",
+        )
+
+        assert layout_key is not None
+        vis_ids, _, vis_coords = ds.get_visualization_data(layout_key)
+        assert len(vis_ids) == 2
+        assert vis_coords.shape == (2, 2)