Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 190 additions & 26 deletions include/xsimd/config/xsimd_cpu_features_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
#include <cassert>
#include <cstdint>
#include <cstring>
#if __cplusplus >= 201703L
#include <string_view>
#endif

#include "../utils/bits.hpp"
#include "./xsimd_config.hpp"
Expand Down Expand Up @@ -52,8 +55,13 @@ namespace xsimd
template <typename E>
using x86_reg32_bitset = utils::uint_bitset<E, x86_reg32_t>;

template <x86_reg32_t leaf, x86_reg32_t subleaf, typename A, typename B, typename C, typename D>
class x86_cpuid_regs : private x86_reg32_bitset<A>, x86_reg32_bitset<B>, x86_reg32_bitset<C>, x86_reg32_bitset<D>
template <x86_reg32_t leaf_num, x86_reg32_t subleaf_num,
typename A, typename B, typename C, typename D>
class x86_cpuid_regs
: private x86_reg32_bitset<A>,
private x86_reg32_bitset<B>,
private x86_reg32_bitset<C>,
private x86_reg32_bitset<D>
{
private:
using eax_bitset = x86_reg32_bitset<A>;
Expand All @@ -75,6 +83,8 @@ namespace xsimd
using ebx = B;
using ecx = C;
using edx = D;
static constexpr x86_reg32_t leaf = leaf_num;
static constexpr x86_reg32_t subleaf = subleaf_num;

inline static x86_cpuid_regs read()
{
Expand All @@ -84,9 +94,13 @@ namespace xsimd
constexpr x86_cpuid_regs() noexcept = default;

using eax_bitset::all_bits_set;
using eax_bitset::get_range;
using ebx_bitset::all_bits_set;
using ebx_bitset::get_range;
using ecx_bitset::all_bits_set;
using ecx_bitset::get_range;
using edx_bitset::all_bits_set;
using edx_bitset::get_range;
};

template <typename T>
Expand All @@ -95,8 +109,84 @@ namespace xsimd
typename T::ebx,
typename T::ecx,
typename T::edx>;

template <bool extended>
struct x86_cpuid_highest_func
{
private:
using x86_reg32_t = detail::x86_reg32_t;
using manufacturer_str = std::array<char, 3 * sizeof(x86_reg32_t)>;

public:
static constexpr x86_reg32_t leaf = extended ? 0x80000000 : 0x0;

inline static x86_cpuid_highest_func read()
{
auto regs = detail::x86_cpuid(0);
x86_cpuid_highest_func out {};
// Highest function parameter in EAX
out.m_highest_leaf = regs[0];

// Manufacturer string in EBX, EDX, ECX (in that order)
char* manuf = out.m_manufacturer_id.data();
std::memcpy(manuf + 0 * sizeof(x86_reg32_t), &regs[1], sizeof(x86_reg32_t));
std::memcpy(manuf + 1 * sizeof(x86_reg32_t), &regs[3], sizeof(x86_reg32_t));
std::memcpy(manuf + 2 * sizeof(x86_reg32_t), &regs[2], sizeof(x86_reg32_t));

return out;
}

constexpr x86_cpuid_highest_func() noexcept = default;

/**
* Highest available leaf in CPUID non-extended range.
*
* This is the highest function parameter (EAX) that can be passed to CPUID.
* This is valid in the specified range:
* - if `extended` is `false`, that is below `0x80000000`,
* - if `extended` is `true`, that is above `0x80000000`,
*/
constexpr x86_reg32_t highest_leaf() const noexcept
{
return m_highest_leaf;
}

/**
* The manufacturer ID string in a static array.
*
* This raw character array is case specific and may contain both leading
* and trailing whitespaces.
* It cannot be assumed to be null terminated.
* This is not implemented for all manufacturer when `extended` is `true`.
*/
constexpr manufacturer_str manufacturer_id_raw() const noexcept
{
return m_manufacturer_id;
}

#if __cplusplus >= 201703L
constexpr std::string_view manufacturer_id() const noexcept
{
return { m_manufacturer_id.data(), m_manufacturer_id.size() };
}
#endif

private:
manufacturer_str m_manufacturer_id {};
x86_reg32_t m_highest_leaf {};
};
}

/**
* Highest CPUID Function Parameter and Manufacturer ID (EAX=0).
*
* Returns the highest leaf value supported by CPUID in the standard range
* (below 0x80000000), and the processor manufacturer ID string.
*
* @see https://en.wikipedia.org/wiki/CPUID
*/
using x86_cpuid_leaf0 = detail::x86_cpuid_highest_func<false>;

struct x86_cpuid_leaf1_traits
{
static constexpr detail::x86_reg32_t leaf = 1;
Expand Down Expand Up @@ -153,6 +243,10 @@ namespace xsimd

enum class eax
{
/* Start bit for the encoding of the highest subleaf available. */
highest_subleaf_start = 0,
/* End bit for the encoding of the highest subleaf available. */
highest_subleaf_end = 32,
};
enum class ebx
{
Expand Down Expand Up @@ -236,6 +330,16 @@ namespace xsimd
*/
using x86_cpuid_leaf7sub1 = detail::make_x86_cpuid_regs<x86_cpuid_leaf7sub1_traits>;

/**
* Highest Extended CPUID Function Parameter (EAX=0x80000000).
*
* Returns the highest leaf value supported by CPUID in the extended range
* (at or above 0x80000000), and the processor manufacturer ID string.
*
* @see https://en.wikipedia.org/wiki/CPUID
*/
using x86_cpuid_leaf80000000 = detail::x86_cpuid_highest_func<true>;

struct x86_cpuid_leaf80000001_traits
{
static constexpr detail::x86_reg32_t leaf = 0x80000001;
Expand Down Expand Up @@ -449,20 +553,24 @@ namespace xsimd
private:
enum class status
{
leaf1_valid = 0,
leaf7_valid = 1,
leaf7sub1_valid = 2,
leaf80000001_valid = 3,
xcr0_valid = 4,
leaf0_valid = 0,
leaf1_valid = 1,
leaf7_valid = 2,
leaf7sub1_valid = 3,
leaf80000000_valid = 4,
leaf80000001_valid = 5,
xcr0_valid = 6,
};

using status_bitset = utils::uint_bitset<status, std::uint32_t>;

mutable x86_xcr0 m_xcr0 {};
mutable x86_cpuid_leaf0 m_leaf0 {};
mutable x86_cpuid_leaf1 m_leaf1 {};
mutable x86_cpuid_leaf7 m_leaf7 {};
mutable x86_cpuid_leaf7sub1 m_leaf7sub1 {};
mutable x86_cpuid_leaf80000000 m_leaf80000000 {};
mutable x86_cpuid_leaf80000001 m_leaf80000001 {};
mutable x86_xcr0 m_xcr0 {};
mutable status_bitset m_status {};

inline x86_xcr0 const& xcr0() const noexcept
Expand All @@ -475,44 +583,100 @@ namespace xsimd
return m_xcr0;
}

inline x86_cpuid_leaf1 const& leaf1() const
inline x86_cpuid_leaf0 const& leaf0() const
{
if (!m_status.bit_is_set<status::leaf1_valid>())
if (!m_status.bit_is_set<status::leaf0_valid>())
{
m_leaf1 = x86_cpuid_leaf1::read();
m_status.set_bit<status::leaf1_valid>();
m_leaf0 = x86_cpuid_leaf0::read();
m_status.set_bit<status::leaf0_valid>();
}
return m_leaf1;
return m_leaf0;
}

inline x86_cpuid_leaf7 const& leaf7() const
inline x86_cpuid_leaf80000000 const& leaf80000000() const
{
if (!m_status.bit_is_set<status::leaf80000000_valid>())
{
m_leaf80000000 = x86_cpuid_leaf80000000::read();
m_status.set_bit<status::leaf80000000_valid>();
}
return m_leaf80000000;
}

template <status status_id, typename L>
inline auto const& safe_get_leaf(L& leaf) const
{
if (!m_status.bit_is_set<status::leaf7_valid>())
// Check if already initialized
if (m_status.bit_is_set<status_id>())
{
return leaf;
}

// Limit where we need to check leaf0 or leaf 80000000.
constexpr auto extended_threshold = x86_cpuid_leaf80000000::leaf;

// Check if safe to call CPUID with this value.
// First we identify if the leaf is in the regular or extended range.
// TODO(C++17): if constexpr
if (L::leaf < extended_threshold)
{
// Check leaf0 in regular range
if (L::leaf <= leaf0().highest_leaf())
{
leaf = L::read();
}
}
else
{
m_leaf7 = x86_cpuid_leaf7::read();
m_status.set_bit<status::leaf7_valid>();
// Check leaf80000000 in extended range
if (L::leaf <= leaf80000000().highest_leaf())
{
leaf = L::read();
}
}
return m_leaf7;

// Mark as valid in all cases, including if it was not read.
// In this case it will be filled with zeros (all false).
m_status.set_bit<status_id>();
return leaf;
}

inline x86_cpuid_leaf1 const& leaf1() const
{
return safe_get_leaf<status::leaf1_valid>(m_leaf1);
}

inline x86_cpuid_leaf7 const& leaf7() const
{
return safe_get_leaf<status::leaf7_valid>(m_leaf7);
}

inline x86_cpuid_leaf7sub1 const& leaf7sub1() const
{
if (!m_status.bit_is_set<status::leaf7sub1_valid>())
// Check if already initialized
if (m_status.bit_is_set<status::leaf7sub1_valid>())
{
return m_leaf7sub1;
}

// Check if safe to call CPUID with this value as subleaf.
constexpr auto start = x86_cpuid_leaf7::eax::highest_subleaf_start;
constexpr auto end = x86_cpuid_leaf7::eax::highest_subleaf_end;
const auto highest_subleaf7 = leaf7().get_range<start, end>();
if (x86_cpuid_leaf7sub1::subleaf <= highest_subleaf7)
{
m_leaf7sub1 = x86_cpuid_leaf7sub1::read();
m_status.set_bit<status::leaf7sub1_valid>();
}

// Mark as valid in all cases, including if it was not read.
// In this case it will be filled with zeros (all false).
m_status.set_bit<status::leaf7sub1_valid>();
return m_leaf7sub1;
}

inline x86_cpuid_leaf80000001 const& leaf80000001() const
{
if (!m_status.bit_is_set<status::leaf80000001_valid>())
{
m_leaf80000001 = x86_cpuid_leaf80000001::read();
m_status.set_bit<status::leaf80000001_valid>();
}
return m_leaf80000001;
return safe_get_leaf<status::leaf80000001_valid>(m_leaf80000001);
}
};

Expand Down
26 changes: 26 additions & 0 deletions include/xsimd/utils/bits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ namespace xsimd
return value | mask;
}

/**
* Return a mask with the `width` lowest bits set.
*/
template <typename I>
constexpr I make_low_mask(I width) noexcept
{
assert(width >= 0);
assert(width <= static_cast<I>(8 * sizeof(I)));
if (width == static_cast<I>(8 * sizeof(I)))
{
return ~I { 0 };
}
return (I { 1 } << width) - I { 1 };
}

/* A bitset over an unsigned integer type, indexed by an enum key type. */
template <typename K, typename U>
struct uint_bitset
Expand Down Expand Up @@ -82,6 +97,17 @@ namespace xsimd
m_bitset = utils::set_bit<static_cast<storage_type>(bit)>(m_bitset);
}

/* Extract the bits in [start, end[, shifted down to start at bit 0. */
template <key_type start, key_type end>
constexpr storage_type get_range() const noexcept
{
constexpr storage_type start_bit = static_cast<storage_type>(start);
constexpr storage_type end_bit = static_cast<storage_type>(end);
constexpr storage_type width = end_bit - start_bit;
constexpr storage_type mask = make_low_mask<storage_type>(width);
return (m_bitset >> start_bit) & mask;
}

private:
storage_type m_bitset = { 0 };
};
Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ set(XSIMD_TESTS
test_sum.cpp
test_traits.cpp
test_trigonometric.cpp
test_utils_bits.cpp
test_xsimd_api.cpp
test_utils.hpp
)
Expand Down
Loading
Loading