From c936a099fc90fc82b8452f92c44044ee939a8e32 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 30 Mar 2026 10:02:40 -0400
Subject: [PATCH 01/10] User must explicitly enable CUDA support via CMake

---
 include/boost/math/tools/config.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/boost/math/tools/config.hpp b/include/boost/math/tools/config.hpp
index 4829ebe36e..26a3a7f73b 100644
--- a/include/boost/math/tools/config.hpp
+++ b/include/boost/math/tools/config.hpp
@@ -11,7 +11,7 @@
 #pragma once
 #endif
 
-#ifndef __CUDACC_RTC__
+#if !(defined(__CUDACC_RTC__) && defined(BOOST_MATH_ENABLE_NVRTC))
 
 #include <boost/math/tools/is_standalone.hpp>
 
@@ -678,7 +678,7 @@ namespace boost{ namespace math{
 // CUDA support:
 //
 
-#ifdef __CUDACC__
+#if defined(__CUDACC__) && defined(BOOST_MATH_ENABLE_CUDA)
 
 // We have to get our include order correct otherwise you get compilation failures
 #include <cuda.h>

From f6ca42f320e2828e5f1e6afd6a6225a8b8606d1b Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 30 Mar 2026 10:02:52 -0400
Subject: [PATCH 02/10] Fix naked NVRTC macro check

---
 include/boost/math/special_functions/sign.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/boost/math/special_functions/sign.hpp b/include/boost/math/special_functions/sign.hpp
index 4f76522654..5d2dfc2e23 100644
--- a/include/boost/math/special_functions/sign.hpp
+++ b/include/boost/math/special_functions/sign.hpp
@@ -14,7 +14,7 @@
 #pragma once
 #endif
 
-#ifndef __CUDACC_RTC__
+#ifndef BOOST_MATH_HAS_NVRTC
 
 #include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
@@ -234,7 +234,7 @@ BOOST_MATH_GPU_ENABLED T sign(T z)
 } // namespace math
 } // namespace boost
 
-#endif // __CUDACC_RTC__
+#endif // BOOST_MATH_HAS_NVRTC
 
 #endif // BOOST_MATH_TOOLS_SIGN_HPP
 

From d6368ee45ed1c15eb8cdae059aab2a15fb7edc33 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 30 Mar 2026 10:05:19 -0400
Subject: [PATCH 03/10] Replace naked NVCC checks

---
 include/boost/math/special_functions/lanczos.hpp | 2 +-
 include/boost/math/special_functions/next.hpp    | 2 +-
 include/boost/math/tools/config.hpp              | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/boost/math/special_functions/lanczos.hpp b/include/boost/math/special_functions/lanczos.hpp
index 0ec24bddbf..409e14c5e1 100644
--- a/include/boost/math/special_functions/lanczos.hpp
+++ b/include/boost/math/special_functions/lanczos.hpp
@@ -2751,7 +2751,7 @@ struct lanczos
 } // namespace math
 } // namespace boost
 
-#if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
+#if !defined(_CRAYC) && !defined(BOOST_MATH_ENABLE_CUDA) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
 #if ((defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)) && !defined(_MANAGED) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
 #include <boost/math/special_functions/detail/lanczos_sse2.hpp>
 #endif
diff --git a/include/boost/math/special_functions/next.hpp b/include/boost/math/special_functions/next.hpp
index 74c34f06ad..f73cb2f6a5 100644
--- a/include/boost/math/special_functions/next.hpp
+++ b/include/boost/math/special_functions/next.hpp
@@ -25,7 +25,7 @@
 #include <cfloat>
 
 
-#if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
+#if !defined(_CRAYC) && !defined(BOOST_MATH_ENABLE_CUDA) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
 #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__)
 #include "xmmintrin.h"
 #define BOOST_MATH_CHECK_SSE2
diff --git a/include/boost/math/tools/config.hpp b/include/boost/math/tools/config.hpp
index 26a3a7f73b..c97dbdede2 100644
--- a/include/boost/math/tools/config.hpp
+++ b/include/boost/math/tools/config.hpp
@@ -168,7 +168,7 @@
 #    define BOOST_MATH_NOINLINE __declspec(noinline)
 #  elif defined(__GNUC__) && __GNUC__ > 3
      // Clang also defines __GNUC__ (as 4)
-#    if defined(__CUDACC__)
+#    if defined(__CUDACC__) && defined(BOOST_MATH_ENABLE_CUDA)
        // nvcc doesn't always parse __noinline__,
        // see: https://svn.boost.org/trac/boost/ticket/9392
 #      define BOOST_MATH_NOINLINE __attribute__ ((noinline))

From b0cef13e8e63d83363d1cd4c92740ae76512227d Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 30 Mar 2026 10:09:55 -0400
Subject: [PATCH 04/10] Add test set

---
 test/cuda_jamfile         |   3 ++
 test/github_issue_1383.cu | 111 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+)
 create mode 100644 test/github_issue_1383.cu

diff --git a/test/cuda_jamfile b/test/cuda_jamfile
index e4d7505da8..b9ebf8d049 100644
--- a/test/cuda_jamfile
+++ b/test/cuda_jamfile
@@ -9,6 +9,9 @@ project : requirements
     [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
     ;
 
+# Github Issues
+run github_issue_1383.cu ;
+
 # Quad
 run test_exp_sinh_quad_float.cu ;
 run test_exp_sinh_quad_double.cu ;
diff --git a/test/github_issue_1383.cu b/test/github_issue_1383.cu
new file mode 100644
index 0000000000..4ec82e785f
--- /dev/null
+++ b/test/github_issue_1383.cu
@@ -0,0 +1,111 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifdef BOOST_MATH_ENABLE_CUDA
+#  undef BOOST_MATH_ENABLE_CUDA
+#endif // BOOST_MATH_ENABLE_CUDA
+
+// Purposefully pull in headers that caused errors in the linked issue
+#include <boost/math/special_functions.hpp>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <cmath>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+#ifdef BOOST_MATH_ENABLE_CUDA
+#  error "We should not be enabling this ourselves"
+#endif // BOOST_MATH_ENABLE_CUDA
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+
+    if (i < numElements)
+    {
+        out[i] = cos(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main()
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(std::cos(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}

From 61e689d98435a8999c09eeabc0fb4358bd2b9530 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 30 Mar 2026 10:15:38 -0400
Subject: [PATCH 05/10] Fix deprecated find package in CML

---
 test/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 4715cf379b..9d673adc7d 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -10,13 +10,13 @@ if(HAVE_BOOST_TEST)
 
         message(STATUS "Building boost.math with CUDA")
 
-        find_package(CUDA REQUIRED)
         enable_language(CUDA)
+        find_package(CUDAToolkit REQUIRED)
         set(CMAKE_CUDA_EXTENSIONS OFF)
 
         enable_testing()
 
-        boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::throw_exception Boost::unit_test_framework ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS}  )
+        boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::throw_exception Boost::unit_test_framework CUDA::cudart COMPILE_DEFINITIONS BOOST_MATH_ENABLE_CUDA=1  )
 
     elseif (BOOST_MATH_ENABLE_NVRTC)
 

From e45257161113be0f0e1fc8a239c5999303702cfa Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 30 Mar 2026 10:17:06 -0400
Subject: [PATCH 06/10] Update CI script

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 578fb5e527..5e91faa944 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -820,7 +820,7 @@ jobs:
         run: |
           cd ../boost-root
           mkdir __build__ && cd __build__
-          cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_MATH_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=86 -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 ..
+          cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_MATH_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCMAKE_CUDA_STANDARD=17 ..
       - name: Build tests
         run: |
           cd ../boost-root/__build__

From 55d82b06d479e8f30a50c69ab2a746c2cd56b46c Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 30 Mar 2026 10:20:17 -0400
Subject: [PATCH 07/10] Fix copy-paste error

---
 test/github_issue_1383.cu | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/github_issue_1383.cu b/test/github_issue_1383.cu
index 4ec82e785f..b3544077de 100644
--- a/test/github_issue_1383.cu
+++ b/test/github_issue_1383.cu
@@ -36,6 +36,7 @@ typedef double float_type;
 __global__ void cuda_test(const float_type *in, float_type *out, int numElements)
 {
     using std::cos;
+    const int i = blockDim.x * blockIdx.x + threadIdx.x;
 
     if (i < numElements)
     {

From ecf9fc1afb8c4c1c834e1e63878776e8b96427de Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 30 Mar 2026 10:39:46 -0400
Subject: [PATCH 08/10] Use more internal functions

---
 test/github_issue_1383.cu | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/test/github_issue_1383.cu b/test/github_issue_1383.cu
index b3544077de..119f5d1d5e 100644
--- a/test/github_issue_1383.cu
+++ b/test/github_issue_1383.cu
@@ -12,11 +12,13 @@
 #include <boost/math/special_functions.hpp>
 #include <boost/math/tools/config.hpp>
 #include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/constants/constants.hpp>
 
 #include <iostream>
 #include <iomanip>
 #include <vector>
 #include <cmath>
+#include <random>
 #include "cuda_managed_ptr.hpp"
 #include "stopwatch.hpp"
 
@@ -63,9 +65,15 @@ int main()
     cuda_managed_ptr<float_type> output_vector(numElements);
 
     // Initialize the input vectors
+    // Check some of our numeric_limits for viability
+    std::mt19937_64 rng {42};
+    std::uniform_real_distribution<float_type> dist(0, boost::math::constants::pi<float_type>());
+    static_assert(boost::math::numeric_limits<float_type>::is_specialized, "Should be since it's a double");
+    static_assert(boost::math::numeric_limits<float_type>::is_signed, "Should be since it's a double");
+
     for (int i = 0; i < numElements; ++i)
     {
-        input_vector[i] = rand()/(float_type)RAND_MAX;
+        input_vector[i] = dist(rng);
     }
 
     // Launch the Vector Add CUDA Kernel

From 936df78a477e2e72969bac5686420b5811f72646 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 30 Mar 2026 10:42:46 -0400
Subject: [PATCH 09/10] Add additional test this time with math's CUDA support

---
 test/cuda_jamfile              |   1 +
 test/github_issue_1383_pt_2.cu | 116 +++++++++++++++++++++++++++++++++
 2 files changed, 117 insertions(+)
 create mode 100644 test/github_issue_1383_pt_2.cu

diff --git a/test/cuda_jamfile b/test/cuda_jamfile
index b9ebf8d049..f2f2c9aa82 100644
--- a/test/cuda_jamfile
+++ b/test/cuda_jamfile
@@ -11,6 +11,7 @@ project : requirements
 
 # Github Issues
 run github_issue_1383.cu ;
+run github_issue_1383_pt_2.cu ;
 
 # Quad
 run test_exp_sinh_quad_float.cu ;
diff --git a/test/github_issue_1383_pt_2.cu b/test/github_issue_1383_pt_2.cu
new file mode 100644
index 0000000000..8e746abfce
--- /dev/null
+++ b/test/github_issue_1383_pt_2.cu
@@ -0,0 +1,116 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// Purposefully pull in headers that caused errors in the linked issue
+#include <boost/math/special_functions.hpp>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/constants/constants.hpp>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <cmath>
+#include <random>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    const int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cos(in[i]);
+        if (out[i] > boost::math::numeric_limits<float_type>::max() || !boost::math::numeric_limits<float_type>::is_signed)
+        {
+            __trap();
+        }
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main()
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    // Check some of our numeric_limits for viability
+    std::mt19937_64 rng {42};
+    std::uniform_real_distribution<float_type> dist(0, boost::math::constants::pi<float_type>());
+    static_assert(boost::math::numeric_limits<float_type>::is_specialized, "Should be since it's a double");
+    static_assert(boost::math::numeric_limits<float_type>::is_signed, "Should be since it's a double");
+
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = dist(rng);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(std::cos(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}

From 1d9e9279da978bc3cd68f5f4f25d3d5cb9f69945 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 30 Mar 2026 10:45:00 -0400
Subject: [PATCH 10/10] Change macro definition

---
 include/boost/math/tools/config.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/boost/math/tools/config.hpp b/include/boost/math/tools/config.hpp
index c97dbdede2..40346c9b0f 100644
--- a/include/boost/math/tools/config.hpp
+++ b/include/boost/math/tools/config.hpp
@@ -774,7 +774,7 @@ BOOST_MATH_GPU_ENABLED constexpr T gpu_safe_max(const T& a, const T& b) { return
 #    define BOOST_MATH_STATIC_LOCAL_VARIABLE
 #  else
 #    define BOOST_MATH_INLINE_CONSTEXPR constexpr
-#    define BOOST_MATH_STATIC constexpr
+#    define BOOST_MATH_STATIC static
 #    define BOOST_MATH_STATIC_LOCAL_VARIABLE static
 #  endif
 #endif