pymc-devs · jessegrabowski · May 15, 2026 · ricardoV94 · May 15, 2026 · jessegrabowski
diff --git a/pytensor/link/numba/dispatch/linalg/_LAPACK.py b/pytensor/link/numba/dispatch/linalg/_LAPACK.py
@@ -1832,3 +1832,206 @@ def hegvd(
             )
 
         return hegvd
+
+    @classmethod
+    def numba_xgesvd(cls, dtype) -> CPUDispatcher:
+        """
+        Compute the singular value decomposition of a general M-by-N matrix using the
+        QR-based algorithm (LAPACK xGESVD).
+
+        Called by scipy.linalg.svd with lapack_driver='gesvd' and numpy.linalg.svd for
+        the non-divide-and-conquer path.
+        """
+        kind = get_blas_kind(dtype)
+        float_ptr = _get_nb_float_from_dtype(kind)
+        is_complex = isinstance(dtype, Complex)
+        real_ptr = nb_f64p if dtype is nb_c128 else nb_f32p
+        unique_func_name = f"scipy.lapack.{kind}gesvd"
+
+        @numba_basic.numba_njit
+        def get_gesvd_pointer():
+            with numba.objmode(ptr=types.intp):
+                ptr = get_lapack_ptr(dtype, "gesvd")
+            return ptr
+
+        if is_complex:
+            gesvd_function_type = types.FunctionType(
+                types.void(
+                    nb_i32p,  # JOBU
+                    nb_i32p,  # JOBVT
+                    nb_i32p,  # M
+                    nb_i32p,  # N
+                    float_ptr,  # A
+                    nb_i32p,  # LDA
+                    real_ptr,  # S
+                    float_ptr,  # U
+                    nb_i32p,  # LDU
+                    float_ptr,  # VT
+                    nb_i32p,  # LDVT
+                    float_ptr,  # WORK
+                    nb_i32p,  # LWORK
+                    real_ptr,  # RWORK
+                    nb_i32p,  # INFO
+                )
+            )
+
+            @numba_basic.numba_njit
+            def gesvd(
+                JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK, INFO
+            ):
+                fn = _call_cached_ptr(
+                    get_ptr_func=get_gesvd_pointer,
+                    func_type_ref=gesvd_function_type,
+                    unique_func_name_lit=unique_func_name,
+                )
+                fn(
+                    JOBU,
+                    JOBVT,
+                    M,
+                    N,
+                    A,
+                    LDA,
+                    S,
+                    U,
+                    LDU,
+                    VT,
+                    LDVT,
+                    WORK,
+                    LWORK,
+                    RWORK,
+                    INFO,
+                )
+
+        else:
+            gesvd_function_type = types.FunctionType(
+                types.void(
+                    nb_i32p,  # JOBU
+                    nb_i32p,  # JOBVT
+                    nb_i32p,  # M
+                    nb_i32p,  # N
+                    float_ptr,  # A
+                    nb_i32p,  # LDA
+                    float_ptr,  # S
+                    float_ptr,  # U
+                    nb_i32p,  # LDU
+                    float_ptr,  # VT
+                    nb_i32p,  # LDVT
+                    float_ptr,  # WORK
+                    nb_i32p,  # LWORK
+                    nb_i32p,  # INFO
+                )
+            )
+
+            @numba_basic.numba_njit
+            def gesvd(
+                JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO
+            ):
+                fn = _call_cached_ptr(
+                    get_ptr_func=get_gesvd_pointer,
+                    func_type_ref=gesvd_function_type,
+                    unique_func_name_lit=unique_func_name,
+                )
+                fn(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO)
+
+        return gesvd
+
+    @classmethod
+    def numba_xgesdd(cls, dtype) -> CPUDispatcher:
+        """
+        Compute the singular value decomposition of a general M-by-N matrix using the
+        divide-and-conquer algorithm (LAPACK xGESDD).
+
+        Called by scipy.linalg.svd (default driver) and numpy.linalg.svd.
+        """
+        kind = get_blas_kind(dtype)
+        float_ptr = _get_nb_float_from_dtype(kind)
+        is_complex = isinstance(dtype, Complex)
+        real_ptr = nb_f64p if dtype is nb_c128 else nb_f32p
+        unique_func_name = f"scipy.lapack.{kind}gesdd"
+
+        @numba_basic.numba_njit
+        def get_gesdd_pointer():
+            with numba.objmode(ptr=types.intp):
+                ptr = get_lapack_ptr(dtype, "gesdd")
+            return ptr
+
+        if is_complex:
+            gesdd_function_type = types.FunctionType(
+                types.void(
+                    nb_i32p,  # JOBZ
+                    nb_i32p,  # M
+                    nb_i32p,  # N
+                    float_ptr,  # A
+                    nb_i32p,  # LDA
+                    real_ptr,  # S
+                    float_ptr,  # U
+                    nb_i32p,  # LDU
+                    float_ptr,  # VT
+                    nb_i32p,  # LDVT
+                    float_ptr,  # WORK
+                    nb_i32p,  # LWORK
+                    real_ptr,  # RWORK
+                    nb_i32p,  # IWORK
+                    nb_i32p,  # INFO
+                )
+            )
+
+            @numba_basic.numba_njit
+            def gesdd(
+                JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK, IWORK, INFO
+            ):
+                fn = _call_cached_ptr(
+                    get_ptr_func=get_gesdd_pointer,
+                    func_type_ref=gesdd_function_type,
+                    unique_func_name_lit=unique_func_name,
+                )
+                fn(
+                    JOBZ,
+                    M,
+                    N,
+                    A,
+                    LDA,
+                    S,
+                    U,
+                    LDU,
+                    VT,
+                    LDVT,
+                    WORK,
+                    LWORK,
+                    RWORK,
+                    IWORK,
+                    INFO,
+                )
+
+        else:
+            gesdd_function_type = types.FunctionType(
+                types.void(
+                    nb_i32p,  # JOBZ
+                    nb_i32p,  # M
+                    nb_i32p,  # N
+                    float_ptr,  # A
+                    nb_i32p,  # LDA
+                    float_ptr,  # S
+                    float_ptr,  # U
+                    nb_i32p,  # LDU
+                    float_ptr,  # VT
+                    nb_i32p,  # LDVT
+                    float_ptr,  # WORK
+                    nb_i32p,  # LWORK
+                    nb_i32p,  # IWORK
+                    nb_i32p,  # INFO
+                )
+            )
+
+            @numba_basic.numba_njit
+            def gesdd(
+                JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, IWORK, INFO
+            ):
+                fn = _call_cached_ptr(
+                    get_ptr_func=get_gesdd_pointer,
+                    func_type_ref=gesdd_function_type,
+                    unique_func_name_lit=unique_func_name,
+                )
+                fn(JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, IWORK, INFO)
+
+        return gesdd
diff --git a/pytensor/link/numba/dispatch/linalg/decomposition/dispatch.py b/pytensor/link/numba/dispatch/linalg/decomposition/dispatch.py
@@ -43,6 +43,10 @@
     schur_complex,
     schur_real,
 )
+from pytensor.link.numba.dispatch.linalg.decomposition.svd import (
+    _svd_gesdd_full,
+    _svd_gesdd_no_uv,
+)
 from pytensor.tensor.linalg.decomposition.cholesky import Cholesky
 from pytensor.tensor.linalg.decomposition.eigen import Eig, Eigh, Eigvalsh
 from pytensor.tensor.linalg.decomposition.lu import LU, LUFactor, PivotToPermutations
@@ -61,14 +65,14 @@ def numba_funcify_SVD(op, node, **kwargs):
     if discrete_input and config.compiler_verbose:
         print("SVD requires casting discrete input to float")  # noqa: T201
 
-    # np.linalg.svd always returns real-valued singular values, even for complex input.
-    # The Op may declare s as complex (matching input dtype), but numba returns the real
-    # component dtype, so we must match that to avoid type unification errors.
+    # Casting discrete input to float allocates a new buffer, so in-place is moot.
+    effective_overwrite_a = op.overwrite_a and not discrete_input
+
     matrix_dtype = out_dtype
-    if out_dtype.kind == "c":
-        s_dtype = np.dtype(f"f{out_dtype.itemsize // 2}")
-    else:
-        s_dtype = out_dtype
+    # SVD declares S with the real component dtype via linalg_real_output_dtype,
+    # so the s output's own dtype is the right answer for both real and complex
+    # input.
+    s_dtype = np.dtype(node.outputs[1 if compute_uv else 0].dtype)
 
     if not compute_uv:
 
@@ -80,8 +84,7 @@ def svd(x):
                 return np.zeros((k,), dtype=s_dtype)
             if discrete_input:
                 x = x.astype(out_dtype)
-            _, ret, _ = np.linalg.svd(x, full_matrices)
-            return ret
+            return _svd_gesdd_no_uv(x, overwrite_a=effective_overwrite_a)
 
     else:
 
@@ -90,8 +93,8 @@ def svd(x):
             if x.size == 0:
                 m, n = x.shape
                 k = min(m, n)
-                # The LAPACK dispatch returns matrices in fortran order. To match this for the empty cases,
-                # build flip the shape inputs to np.zeros and transpose.
+                # LAPACK returns matrices in fortran order; build the empty
+                # returns with reversed shape + transpose to match.
                 if full_matrices:
                     return (
                         np.zeros((m, m), dtype=matrix_dtype).T,
@@ -106,9 +109,13 @@ def svd(x):
                     )
             if discrete_input:
                 x = x.astype(out_dtype)
-            return np.linalg.svd(x, full_matrices)
+            return _svd_gesdd_full(
+                x,
+                full_matrices=full_matrices,
+                overwrite_a=effective_overwrite_a,
+            )
 
-    cache_version = 1
+    cache_version = 2
     return svd, cache_version