diff --git a/include/xsimd/config/xsimd_config.hpp b/include/xsimd/config/xsimd_config.hpp index 742dc70fc..2d0e89746 100644 --- a/include/xsimd/config/xsimd_config.hpp +++ b/include/xsimd/config/xsimd_config.hpp @@ -357,12 +357,23 @@ /** * @ingroup xsimd_config_macro * - * Set to 1 if NEON64 is available at compile-time, to 0 otherwise. + * Set to 1 if the target is in the ARM architecture family in 64 bits, to 0 otherwise */ #if defined(__aarch64__) || defined(_M_ARM64) -#define XSIMD_WITH_NEON64 1 +#define XSIMD_TARGET_ARM64 1 #else -#define XSIMD_WITH_NEON64 0 +#define XSIMD_TARGET_ARM64 0 +#endif + +/** + * @ingroup xsimd_config_macro + * + * Set to 1 if the target is in the ARM architecture family, to 0 otherwise + */ +#if defined(__arm__) || defined(_M_ARM) || XSIMD_TARGET_ARM64 +#define XSIMD_TARGET_ARM 1 +#else +#define XSIMD_TARGET_ARM 0 #endif /** @@ -370,12 +381,26 @@ * * Set to 1 if NEON is available at compile-time, to 0 otherwise. */ -#if (defined(__ARM_NEON) && __ARM_ARCH >= 7) || XSIMD_WITH_NEON64 +#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && defined(__ARM_NEON)) || XSIMD_TARGET_ARM64 #define XSIMD_WITH_NEON 1 #else #define XSIMD_WITH_NEON 0 #endif +// Neon is always available on Arm64, though it is theoritially possible to compile +// without it, such as -march=armv8-a+nosimd. +// Note that MSVC may never define __ARM_NEON even when available. +/** + * @ingroup xsimd_config_macro + * + * Set to 1 if NEON64 is available at compile-time, to 0 otherwise. + */ +#if XSIMD_TARGET_ARM64 +#define XSIMD_WITH_NEON64 1 +#else +#define XSIMD_WITH_NEON64 0 +#endif + /** * @ingroup xsimd_config_macro * @@ -497,4 +522,15 @@ #define XSIMD_NO_SUPPORTED_ARCHITECTURE #endif +/** + * @ingroup xsimd_config_macro + * + * Set to 1 if the target is a linux + */ +#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18) +#define XSIMD_WITH_LINUX_GETAUXVAL 1 +#else +#define XSIMD_WITH_LINUX_GETAUXVAL 0 +#endif + #endif diff --git a/include/xsimd/config/xsimd_cpu_features_arm.hpp b/include/xsimd/config/xsimd_cpu_features_arm.hpp new file mode 100644 index 000000000..c9e16422e --- /dev/null +++ b/include/xsimd/config/xsimd_cpu_features_arm.hpp @@ -0,0 +1,121 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ***************************************************************************/ + +#ifndef XSIMD_CPU_FEATURES_ARM_HPP +#define XSIMD_CPU_FEATURES_ARM_HPP + +#include "./xsimd_config.hpp" + +#if XSIMD_TARGET_ARM && XSIMD_WITH_LINUX_GETAUXVAL +#include "../utils/bits.hpp" +#include "./xsimd_getauxval.hpp" + +// HWCAP_XXX masks to use on getauxval results. +// Header does not exists on all architectures and masks are architecture +// specific. +#include + +// Port possibly missing mask. Should only be defined on Arm64. +#if XSIMD_TARGET_ARM64 && !defined(HWCAP2_I8MM) +#define HWCAP2_I8MM (1 << 13) +#endif +#endif // XSIMD_TARGET_ARM && XSIMD_WITH_LINUX_GETAUXVAL + +namespace xsimd +{ + /** + * An opinionated CPU feature detection utility for ARM. + * + * Combines compile-time knowledge with runtime detection when available. + * On Linux, runtime detection uses getauxval to query the auxiliary vector. + * On other platforms, only compile-time information is used. + * + * This is well defined on all architectures. + * It will always return false on non-ARM architectures. + */ + class arm_cpu_features + { + public: + arm_cpu_features() noexcept = default; + + inline bool neon() const noexcept + { +#if XSIMD_TARGET_ARM && !XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL + return hwcap().has_feature(HWCAP_NEON); +#else + return static_cast(XSIMD_WITH_NEON); +#endif + } + + constexpr bool neon64() const noexcept + { + return static_cast(XSIMD_WITH_NEON64); + } + + inline bool sve() const noexcept + { +#if XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL + return hwcap().has_feature(HWCAP_SVE); +#else + return false; +#endif + } + + inline bool i8mm() const noexcept + { +#if XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL + return hwcap2().has_feature(HWCAP2_I8MM); +#else + return false; +#endif + } + + private: +#if XSIMD_TARGET_ARM && XSIMD_WITH_LINUX_GETAUXVAL + enum class status + { + hwcap_valid = 0, + hwcap2_valid = 1, + }; + + using status_bitset = utils::uint_bitset; + + mutable status_bitset m_status {}; + + mutable xsimd::linux_auxval m_hwcap {}; + + inline xsimd::linux_auxval const& hwcap() const noexcept + { + if (!m_status.bit_is_set()) + { + m_hwcap = xsimd::linux_auxval::read(AT_HWCAP); + m_status.set_bit(); + } + return m_hwcap; + } + +#if XSIMD_TARGET_ARM64 + mutable xsimd::linux_auxval m_hwcap2 {}; + + inline xsimd::linux_auxval const& hwcap2() const noexcept + { + if (!m_status.bit_is_set()) + { + m_hwcap2 = xsimd::linux_auxval::read(AT_HWCAP2); + m_status.set_bit(); + } + return m_hwcap2; + } +#endif +#endif // XSIMD_TARGET_ARM && XSIMD_WITH_LINUX_GETAUXVAL + }; +} +#endif diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 6871637e3..2f78f90bd 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -13,17 +13,13 @@ #define XSIMD_CPUID_HPP #include "../types/xsimd_all_registers.hpp" +#include "./xsimd_cpu_features_arm.hpp" #include "./xsimd_cpu_features_x86.hpp" -#include "xsimd_inline.hpp" +#include "./xsimd_inline.hpp" -#if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM) || defined(__riscv_vector)) +#if XSIMD_WITH_LINUX_GETAUXVAL && defined(__riscv_vector) #include #include - -#ifndef HWCAP2_I8MM -#define HWCAP2_I8MM (1 << 13) -#endif - #endif namespace xsimd @@ -92,29 +88,24 @@ namespace xsimd vsx = 1; #endif -#if defined(__aarch64__) || defined(_M_ARM64) - neon = 1; - neon64 = 1; -#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18) - i8mm_neon64 = bool(getauxval(AT_HWCAP2) & HWCAP2_I8MM); - sve = bool(getauxval(AT_HWCAP) & HWCAP_SVE); -#endif - -#elif defined(__ARM_NEON) || defined(_M_ARM) +#if XSIMD_WITH_LINUX_GETAUXVAL +#if defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0 -#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18) - neon = bool(getauxval(AT_HWCAP) & HWCAP_NEON); -#endif - -#elif defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0 - -#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18) #ifndef HWCAP_V #define HWCAP_V (1 << ('V' - 'A')) #endif rvv = bool(getauxval(AT_HWCAP) & HWCAP_V); #endif #endif + + // Safe on all platforms, it will be all false if non arm. + const auto arm_cpu = xsimd::arm_cpu_features(); + + neon = arm_cpu.neon(); + neon64 = arm_cpu.neon64(); + i8mm_neon64 = arm_cpu.neon64() && arm_cpu.i8mm(); + sve = arm_cpu.sve(); + // Safe on all platforms, it will be all false if non x86. const auto x86_cpu = xsimd::x86_cpu_features(); diff --git a/include/xsimd/config/xsimd_getauxval.hpp b/include/xsimd/config/xsimd_getauxval.hpp new file mode 100644 index 000000000..5a24279de --- /dev/null +++ b/include/xsimd/config/xsimd_getauxval.hpp @@ -0,0 +1,91 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ***************************************************************************/ + +#ifndef XSIMD_GETAUXVAL_HPP +#define XSIMD_GETAUXVAL_HPP + +#include "./xsimd_config.hpp" + +#if XSIMD_WITH_LINUX_GETAUXVAL +#include // getauxval +#endif + +namespace xsimd +{ + namespace detail + { + using linux_getauxval_t = unsigned long; + + inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept; + } + + /* + * Holds the value of a Linux auxiliary vector entry (e.g. AT_HWCAP). + * + * On Linux systems, the kernel exposes some CPU features through the + * auxiliary vector, which can be queried via `getauxval(AT_HWCAP)`. + * Well defined on all platforms, and will return always falsw on + * non-linux platforms. + * + * Usage: + * auto hwcap = linux_auxval::read(AT_HWCAP); + * bool neon = hwcap.has_feature(HWCAP_NEON); + * + * @see https://www.kernel.org/doc/Documentation/arm64/elf_hwcaps.txt + */ + class linux_auxval + { + private: + using getauxval_t = detail::linux_getauxval_t; + + public: + constexpr linux_auxval() noexcept = default; + + inline static linux_auxval read(getauxval_t type) noexcept + { + return linux_auxval(detail::linux_getauxval(type)); + } + + constexpr bool has_feature(getauxval_t feat) const noexcept + { + return (m_auxval & feat) == feat; + } + + private: + getauxval_t m_auxval = {}; + + constexpr explicit linux_auxval(getauxval_t v) noexcept + : m_auxval(v) + { + } + }; + + /******************** + * Implementation * + ********************/ + + namespace detail + { +#if XSIMD_WITH_LINUX_GETAUXVAL + inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept + { + return getauxval(type); + } +#else + inline linux_getauxval_t linux_getauxval(linux_getauxval_t) noexcept + { + return {}; // All bits set to 0 + } +#endif + } +} + +#endif