Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
dc932cc
Support BOUT_FOR_RAJA GPU field operators
ggeorgakoudis Feb 25, 2025
b83d2b5
Working WIP
ggeorgakoudis May 27, 2025
9708709
WIP 2
ggeorgakoudis May 28, 2025
e20da9d
Working - WIP 3
ggeorgakoudis May 28, 2025
096f576
WIP 3
ggeorgakoudis May 28, 2025
34cba4d
WIP 4
ggeorgakoudis May 28, 2025
7d75b9d
Used managed array for indices
ggeorgakoudis May 28, 2025
b8e7e97
Better SFINAE for specializations
ggeorgakoudis May 28, 2025
d830f8d
Fix major bug in the binary expr operator() and add operators
ggeorgakoudis May 29, 2025
c5f9fd6
WIP
ggeorgakoudis May 30, 2025
4d64ad2
More operators
ggeorgakoudis May 31, 2025
5dfa66a
Add more operators
ggeorgakoudis May 31, 2025
026645a
More operators
ggeorgakoudis Jun 1, 2025
338920e
More operators
ggeorgakoudis Jun 1, 2025
6e88181
More operators and cleanup
ggeorgakoudis Jun 1, 2025
25d1272
More operators and cleanup
ggeorgakoudis Jun 1, 2025
56a8678
Cleanup
ggeorgakoudis Jun 1, 2025
3e3a0ea
Add __host__ to make evaluator host-callable, remove offset
ggeorgakoudis Jun 3, 2025
56fe675
Update
ggeorgakoudis Jun 4, 2025
b75103d
Add FFT GPU shiftZ
ggeorgakoudis Oct 24, 2025
40974a9
Fixup: add twiddle header in cmake
ggeorgakoudis Oct 24, 2025
ba5eabf
Default to pinned memory for performance
ggeorgakoudis Oct 24, 2025
c079bb6
Fixup: remove unused twiddles
ggeorgakoudis Oct 24, 2025
932f156
Fixup: cleanup and run GPU fft on its own stream
ggeorgakoudis Oct 24, 2025
b6c738c
Fixup: remove comments, avoid temp for inverse
ggeorgakoudis Oct 24, 2025
11ebfcd
Fixup: preprocessor guards, better variable naming
ggeorgakoudis Oct 24, 2025
cf5a882
Fixup: saner split with BOUT_HAS_CUDA
ggeorgakoudis Oct 24, 2025
9f42dcb
Fixup: remove redundant conditional
ggeorgakoudis Oct 25, 2025
35e6f42
Use streams to reduce synchronization overhead
ggeorgakoudis Nov 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ set(BOUT_SOURCES
./include/bout/sys/range.hxx
./include/bout/sys/timer.hxx
./include/bout/sys/type_name.hxx
./include/bout/twiddle.hxx
./include/bout/sys/uncopyable.hxx
./include/bout/sys/uuid.h
./include/bout/sys/variant.hxx
Expand Down Expand Up @@ -239,7 +240,7 @@ set(BOUT_SOURCES
./include/bout/invert/laplacexy2.hxx
./src/invert/laplacexy2/laplacexy2.cxx
./include/bout/invert/laplacexy2_hypre.hxx
./src/invert/laplacexy2/laplacexy2_hypre.cxx
./src/invert/laplacexy2/laplacexy2_hypre.cxx
./src/invert/laplacexz/impls/cyclic/laplacexz-cyclic.cxx
./src/invert/laplacexz/impls/cyclic/laplacexz-cyclic.hxx
./src/invert/laplacexz/impls/petsc/laplacexz-petsc.cxx
Expand Down Expand Up @@ -386,8 +387,15 @@ if (BOUT_GENERATE_FIELDOPS)
if (NOT ClangFormat_FOUND)
message(FATAL_ERROR "clang-format not found, but you have requested to generate code!")
endif()
if (BOUT_ENABLE_RAJA)
set(GEN_LOOP_EXEC "raja")
elseif (BOUT_ENABLE_OPENMP)
set(GEN_LOOP_EXEC "openmp")
else()
set(GEN_LOOP_EXEC "serial")
endif()
add_custom_command( OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/field/generated_fieldops.cxx
COMMAND ${Python3_EXECUTABLE} gen_fieldops.py --filename generated_fieldops.cxx.tmp
COMMAND ${Python3_EXECUTABLE} gen_fieldops.py --loop-exec ${GEN_LOOP_EXEC} --filename generated_fieldops.cxx.tmp
COMMAND ${ClangFormat_BIN} generated_fieldops.cxx.tmp -i
COMMAND ${CMAKE_COMMAND} -E rename generated_fieldops.cxx.tmp generated_fieldops.cxx
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/field/gen_fieldops.jinja ${CMAKE_CURRENT_SOURCE_DIR}/src/field/gen_fieldops.py
Expand Down Expand Up @@ -518,7 +526,7 @@ if (BOUT_ENABLE_WARNINGS)
$<$<OR:$<CXX_COMPILER_ID:GNU>,$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>>:
-Wall -Wextra > >
$<$<CXX_COMPILER_ID:MSVC>:
/W4 >
/W4 >
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wall -Xcompiler=-Wextra >
)

Expand Down
8 changes: 5 additions & 3 deletions examples/elm-pb-outerloop/elm_pb_outerloop.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1031,7 +1031,8 @@ class ELMpb : public PhysicsModel {
vacuum_trans *= pnorm;

// Transitions from 0 in core to 1 in vacuum
vac_mask = (1.0 - tanh((P0 - vacuum_pressure) / vacuum_trans)) / 2.0;
Field2D tanh_res = tanh((P0 - vacuum_pressure) / vacuum_trans);
vac_mask = (1.0 - tanh_res) / 2.0;

if (spitzer_resist) {
// Use Spitzer resistivity
Expand Down Expand Up @@ -1213,7 +1214,7 @@ class ELMpb : public PhysicsModel {
// Only if not restarting: Check initial perturbation

// Set U to zero where P0 < vacuum_pressure
U = where(P0 - vacuum_pressure, U, 0.0);
U = where(Field2D{P0 - vacuum_pressure}, U, 0.0);

if (constn0) {
ubyn = U;
Expand Down Expand Up @@ -1840,7 +1841,8 @@ class ELMpb : public PhysicsModel {
ddt(U) -= 0.5 * Upara2 * bracket(Pi0, Dperp2Phi, bm_exb) / B0;
Field3D B0phi = B0 * phi;
mesh->communicate(B0phi);
Field3D B0phi0 = B0 * phi0;
Field2D res = B0 * phi0;
Field3D B0phi0 = res;
mesh->communicate(B0phi0);
ddt(U) += 0.5 * Upara2 * bracket(B0phi, Dperp2Pi0, bm_exb) / B0;
ddt(U) += 0.5 * Upara2 * bracket(B0phi0, Dperp2Pi, bm_exb) / B0;
Expand Down
1 change: 1 addition & 0 deletions include/bout/array.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ struct ArrayData {
auto& rm = umpire::ResourceManager::getInstance();
#if BOUT_HAS_CUDA
auto allocator = rm.getAllocator(umpire::resource::Pinned);
//auto allocator = rm.getAllocator(umpire::resource::Unified);
#else
auto allocator = rm.getAllocator("HOST");
#endif
Expand Down
1 change: 1 addition & 0 deletions include/bout/assert.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
if (!(condition)) { \
throw BoutException("Assertion failed in {:s}, line {:d}: {:s}", __FILE__, __LINE__, \
#condition); \
abort(); \
}
#else // CHECKLEVEL >= 1
#define ASSERT1(condition)
Expand Down
14 changes: 13 additions & 1 deletion include/bout/bout_types.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu
*
* Contact Ben Dudson, bd512@york.ac.uk
*
*
* This file is part of BOUT++.
*
* BOUT++ is free software: you can redistribute it and/or modify
Expand Down Expand Up @@ -140,4 +140,16 @@ struct enumWrapper {
/// Boundary condition function
using FuncPtr = BoutReal (*)(BoutReal t, BoutReal x, BoutReal y, BoutReal z);

template<typename T>
struct Constant {
T val;
struct View {
T v;
cudaStream_t stream = 0;
View(T v) : v(v) {}
__host__ __device__ T operator()(int) const { return v; }
};
operator View() const { return {val}; }
};

#endif // BOUT_TYPES_H
2 changes: 1 addition & 1 deletion include/bout/coordinates_accessor.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
/// -> If Coordinates data is changed, the cache should be cleared
/// by calling CoordinatesAccessor::clear()
struct CoordinatesAccessor {
CoordinatesAccessor() = delete;
CoordinatesAccessor() {}

/// Constructor from Coordinates
/// Copies data from coords, doesn't modify it
Expand Down
52 changes: 41 additions & 11 deletions include/bout/field.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class Field;
#include <bout/globals.hxx>
#include <bout/rvec.hxx>

#include "bout/fieldops.hxx"

class Mesh;

/// Base class for scalar fields
Expand Down Expand Up @@ -327,6 +329,12 @@ inline BoutReal min(const T& f, bool allpe = false,
return result;
}

template <typename ResT, typename L, typename R, typename Func>
inline BoutReal min(const BinaryExpr<ResT, L, R, Func>& f, bool allpe = false,
const std::string& rgn = "RGN_NOBNDRY") {
return min(ResT{f}, allpe, rgn);
}

/// Returns true if all elements of \p f over \p region are equal. By
/// default only checks the local processor, use \p allpe to check
/// globally
Expand Down Expand Up @@ -412,6 +420,12 @@ inline BoutReal max(const T& f, bool allpe = false,
return result;
}

template <typename ResT, typename L, typename R, typename Func>
inline BoutReal max(const BinaryExpr<ResT, L, R, Func>& f, bool allpe = false,
const std::string& rgn = "RGN_NOBNDRY") {
return max(ResT{f}, allpe, rgn);
}

/// Mean of \p f, excluding the boundary/guard cells by default (can
/// be changed with \p rgn argument).
///
Expand Down Expand Up @@ -519,17 +533,33 @@ T pow(BoutReal lhs, const T& rhs, const std::string& rgn = "RGN_ALL") {
#ifdef FIELD_FUNC
#error This macro has already been defined
#else
#define FIELD_FUNC(name, func) \
template <typename T, typename = bout::utils::EnableIfField<T>> \
inline T name(const T& f, const std::string& rgn = "RGN_ALL") { \
AUTO_TRACE(); \
/* Check if the input is allocated */ \
checkData(f); \
/* Define and allocate the output result */ \
T result{emptyFrom(f)}; \
BOUT_FOR(d, result.getRegion(rgn)) { result[d] = func(f[d]); } \
checkData(result); \
return result; \
#define FIELD_FUNC(name, func) \
namespace bout::op { \
struct name { \
template <typename LView, typename RView> \
__host__ __device__ BoutReal operator()(int idx, const LView& L, \
const RView& R) const { \
return func(L(idx)); \
} \
}; \
}; \
template <typename T, typename = bout::utils::EnableIfField<T>> \
inline BinaryExpr<T, T, T, bout::op::name> name(const T& f, \
const std::string& rgn = "RGN_ALL") { \
std::cout << "RUNNING " #name " with CUDA\n"; \
return BinaryExpr<T, T, T, bout::op::name>{static_cast<typename T::View>(f), \
static_cast<typename T::View>(f), \
bout::op::name{}, \
f.getMesh(), \
f.getLocation(), \
f.getDirections(), \
std::nullopt, \
f.getRegion(rgn)}; \
} \
template <typename ResT, typename L, typename R, typename Func> \
inline BinaryExpr<ResT, ResT, ResT, bout::op::name> name( \
const BinaryExpr<ResT, L, R, Func>& f, const std::string& rgn = "RGN_ALL") { \
return name(ResT{f}, rgn); \
}
#endif

Expand Down
Loading
Loading