From c936a099fc90fc82b8452f92c44044ee939a8e32 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 30 Mar 2026 10:02:40 -0400 Subject: [PATCH 01/10] User must explicitly enable CUDA support via CMake --- include/boost/math/tools/config.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/math/tools/config.hpp b/include/boost/math/tools/config.hpp index 4829ebe36e..26a3a7f73b 100644 --- a/include/boost/math/tools/config.hpp +++ b/include/boost/math/tools/config.hpp @@ -11,7 +11,7 @@ #pragma once #endif -#ifndef __CUDACC_RTC__ +#if !(defined(__CUDACC_RTC__) && defined(BOOST_MATH_ENABLE_NVRTC)) #include @@ -678,7 +678,7 @@ namespace boost{ namespace math{ // CUDA support: // -#ifdef __CUDACC__ +#if defined(__CUDACC__) && defined(BOOST_MATH_ENABLE_CUDA) // We have to get our include order correct otherwise you get compilation failures #include From f6ca42f320e2828e5f1e6afd6a6225a8b8606d1b Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 30 Mar 2026 10:02:52 -0400 Subject: [PATCH 02/10] Fix naked NVRTC macro check --- include/boost/math/special_functions/sign.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/math/special_functions/sign.hpp b/include/boost/math/special_functions/sign.hpp index 4f76522654..5d2dfc2e23 100644 --- a/include/boost/math/special_functions/sign.hpp +++ b/include/boost/math/special_functions/sign.hpp @@ -14,7 +14,7 @@ #pragma once #endif -#ifndef __CUDACC_RTC__ +#ifndef BOOST_MATH_HAS_NVRTC #include #include @@ -234,7 +234,7 @@ BOOST_MATH_GPU_ENABLED T sign(T z) } // namespace math } // namespace boost -#endif // __CUDACC_RTC__ +#endif // BOOST_MATH_HAS_NVRTC #endif // BOOST_MATH_TOOLS_SIGN_HPP From d6368ee45ed1c15eb8cdae059aab2a15fb7edc33 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 30 Mar 2026 10:05:19 -0400 Subject: [PATCH 03/10] Replace naked NVCC checks --- include/boost/math/special_functions/lanczos.hpp | 2 +- include/boost/math/special_functions/next.hpp | 2 +- include/boost/math/tools/config.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/boost/math/special_functions/lanczos.hpp b/include/boost/math/special_functions/lanczos.hpp index 0ec24bddbf..409e14c5e1 100644 --- a/include/boost/math/special_functions/lanczos.hpp +++ b/include/boost/math/special_functions/lanczos.hpp @@ -2751,7 +2751,7 @@ struct lanczos } // namespace math } // namespace boost -#if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3))) +#if !defined(_CRAYC) && !defined(BOOST_MATH_ENABLE_CUDA) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3))) #if ((defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)) && !defined(_MANAGED) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) #include #endif diff --git a/include/boost/math/special_functions/next.hpp b/include/boost/math/special_functions/next.hpp index 74c34f06ad..f73cb2f6a5 100644 --- a/include/boost/math/special_functions/next.hpp +++ b/include/boost/math/special_functions/next.hpp @@ -25,7 +25,7 @@ #include -#if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3))) +#if !defined(_CRAYC) && !defined(BOOST_MATH_ENABLE_CUDA) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3))) #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__) #include "xmmintrin.h" #define BOOST_MATH_CHECK_SSE2 diff --git a/include/boost/math/tools/config.hpp b/include/boost/math/tools/config.hpp index 26a3a7f73b..c97dbdede2 100644 --- a/include/boost/math/tools/config.hpp +++ b/include/boost/math/tools/config.hpp @@ -168,7 +168,7 @@ # define BOOST_MATH_NOINLINE __declspec(noinline) # elif defined(__GNUC__) && __GNUC__ > 3 // Clang also defines __GNUC__ (as 4) -# if defined(__CUDACC__) +# if defined(__CUDACC__) && defined(BOOST_MATH_ENABLE_CUDA) // nvcc doesn't always parse __noinline__, // see: https://svn.boost.org/trac/boost/ticket/9392 # define BOOST_MATH_NOINLINE __attribute__ ((noinline)) From b0cef13e8e63d83363d1cd4c92740ae76512227d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 30 Mar 2026 10:09:55 -0400 Subject: [PATCH 04/10] Add test set --- test/cuda_jamfile | 3 ++ test/github_issue_1383.cu | 111 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 test/github_issue_1383.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index e4d7505da8..b9ebf8d049 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -9,6 +9,9 @@ project : requirements [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ] ; +# Github Issues +run github_issue_1383.cu ; + # Quad run test_exp_sinh_quad_float.cu ; run test_exp_sinh_quad_double.cu ; diff --git a/test/github_issue_1383.cu b/test/github_issue_1383.cu new file mode 100644 index 0000000000..4ec82e785f --- /dev/null +++ b/test/github_issue_1383.cu @@ -0,0 +1,111 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifdef BOOST_MATH_ENABLE_CUDA +# undef BOOST_MATH_ENABLE_CUDA +#endif // BOOST_MATH_ENABLE_CUDA + +// Purposefully pull in headers that caused errors in the linked issue +#include +#include +#include + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#ifdef BOOST_MATH_ENABLE_CUDA +# error "We should not be enabling this ourselves" +#endif // BOOST_MATH_ENABLE_CUDA + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + + if (i < numElements) + { + out[i] = cos(in[i]); + } +} + +/** + * Host main routine + */ +int main() +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(std::cos(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 61e689d98435a8999c09eeabc0fb4358bd2b9530 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 30 Mar 2026 10:15:38 -0400 Subject: [PATCH 05/10] Fix deprecated find package in CML --- test/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4715cf379b..9d673adc7d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -10,13 +10,13 @@ if(HAVE_BOOST_TEST) message(STATUS "Building boost.math with CUDA") - find_package(CUDA REQUIRED) enable_language(CUDA) + find_package(CUDAToolkit REQUIRED) set(CMAKE_CUDA_EXTENSIONS OFF) enable_testing() - boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::throw_exception Boost::unit_test_framework ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} ) + boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::throw_exception Boost::unit_test_framework CUDA::cudart COMPILE_DEFINITIONS BOOST_MATH_ENABLE_CUDA=1 ) elseif (BOOST_MATH_ENABLE_NVRTC) From e45257161113be0f0e1fc8a239c5999303702cfa Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 30 Mar 2026 10:17:06 -0400 Subject: [PATCH 06/10] Update CI script --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 578fb5e527..5e91faa944 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -820,7 +820,7 @@ jobs: run: | cd ../boost-root mkdir __build__ && cd __build__ - cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_MATH_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=86 -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 .. + cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_MATH_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCMAKE_CUDA_STANDARD=17 .. - name: Build tests run: | cd ../boost-root/__build__ From 55d82b06d479e8f30a50c69ab2a746c2cd56b46c Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 30 Mar 2026 10:20:17 -0400 Subject: [PATCH 07/10] Fix copy-paste error --- test/github_issue_1383.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/test/github_issue_1383.cu b/test/github_issue_1383.cu index 4ec82e785f..b3544077de 100644 --- a/test/github_issue_1383.cu +++ b/test/github_issue_1383.cu @@ -36,6 +36,7 @@ typedef double float_type; __global__ void cuda_test(const float_type *in, float_type *out, int numElements) { using std::cos; + const int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < numElements) { From ecf9fc1afb8c4c1c834e1e63878776e8b96427de Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 30 Mar 2026 10:39:46 -0400 Subject: [PATCH 08/10] Use more internal functions --- test/github_issue_1383.cu | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/test/github_issue_1383.cu b/test/github_issue_1383.cu index b3544077de..119f5d1d5e 100644 --- a/test/github_issue_1383.cu +++ b/test/github_issue_1383.cu @@ -12,11 +12,13 @@ #include #include #include +#include #include #include #include #include +#include #include "cuda_managed_ptr.hpp" #include "stopwatch.hpp" @@ -63,9 +65,15 @@ int main() cuda_managed_ptr output_vector(numElements); // Initialize the input vectors + // Check some of our numeric_limits for viability + std::mt19937_64 rng {42}; + std::uniform_real_distribution dist(0, boost::math::constants::pi()); + static_assert(boost::math::numeric_limits::is_specialized, "Should be since it's a double"); + static_assert(boost::math::numeric_limits::is_signed, "Should be since it's a double"); + for (int i = 0; i < numElements; ++i) { - input_vector[i] = rand()/(float_type)RAND_MAX; + input_vector[i] = dist(rng); } // Launch the Vector Add CUDA Kernel From 936df78a477e2e72969bac5686420b5811f72646 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 30 Mar 2026 10:42:46 -0400 Subject: [PATCH 09/10] Add additional test this time with math's CUDA support --- test/cuda_jamfile | 1 + test/github_issue_1383_pt_2.cu | 116 +++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 test/github_issue_1383_pt_2.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index b9ebf8d049..f2f2c9aa82 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -11,6 +11,7 @@ project : requirements # Github Issues run github_issue_1383.cu ; +run github_issue_1383_pt_2.cu ; # Quad run test_exp_sinh_quad_float.cu ; diff --git a/test/github_issue_1383_pt_2.cu b/test/github_issue_1383_pt_2.cu new file mode 100644 index 0000000000..8e746abfce --- /dev/null +++ b/test/github_issue_1383_pt_2.cu @@ -0,0 +1,116 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// Purposefully pull in headers that caused errors in the linked issue +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + const int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cos(in[i]); + if (out[i] > boost::math::numeric_limits::max() || !boost::math::numeric_limits::is_signed) + { + __trap(); + } + } +} + +/** + * Host main routine + */ +int main() +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + // Check some of our numeric_limits for viability + std::mt19937_64 rng {42}; + std::uniform_real_distribution dist(0, boost::math::constants::pi()); + static_assert(boost::math::numeric_limits::is_specialized, "Should be since it's a double"); + static_assert(boost::math::numeric_limits::is_signed, "Should be since it's a double"); + + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(std::cos(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 1d9e9279da978bc3cd68f5f4f25d3d5cb9f69945 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 30 Mar 2026 10:45:00 -0400 Subject: [PATCH 10/10] Change macro definition --- include/boost/math/tools/config.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/math/tools/config.hpp b/include/boost/math/tools/config.hpp index c97dbdede2..40346c9b0f 100644 --- a/include/boost/math/tools/config.hpp +++ b/include/boost/math/tools/config.hpp @@ -774,7 +774,7 @@ BOOST_MATH_GPU_ENABLED constexpr T gpu_safe_max(const T& a, const T& b) { return # define BOOST_MATH_STATIC_LOCAL_VARIABLE # else # define BOOST_MATH_INLINE_CONSTEXPR constexpr -# define BOOST_MATH_STATIC constexpr +# define BOOST_MATH_STATIC static # define BOOST_MATH_STATIC_LOCAL_VARIABLE static # endif #endif