diff --git a/examples/automated/CMakeLists.txt b/examples/automated/CMakeLists.txt index 5880c2ac0..2fbf257cb 100644 --- a/examples/automated/CMakeLists.txt +++ b/examples/automated/CMakeLists.txt @@ -1,3 +1,9 @@ # @author Tyson Jones add_all_local_examples() + +include(CheckCXXCompilerFlag) +check_cxx_compiler_flag("-mbmi2" QUEST_COMPILER_SUPPORTS_MBMI2) +if (QUEST_COMPILER_SUPPORTS_MBMI2) + target_compile_options(benchmark_bmi2_bitwise_cpp PRIVATE -mbmi2) +endif() diff --git a/examples/automated/benchmark_bmi2_bitwise.cpp b/examples/automated/benchmark_bmi2_bitwise.cpp new file mode 100644 index 000000000..396f7e2fb --- /dev/null +++ b/examples/automated/benchmark_bmi2_bitwise.cpp @@ -0,0 +1,113 @@ +/** @file + * Quick benchmark for BMI2-assisted bit-index helpers. + * + * @author tzh476 + */ + +#include "quest/src/core/bitwise.hpp" + +#include +#include +#include +#include +#include +#include +#include + +static volatile qindex sinkValue = 0; + +template +qindex makeMask(const std::array& indices, qindex pattern) { + qindex mask = 0; + for (size_t i=0; i> i) & 1) + mask |= QINDEX_ONE << indices[i]; + return mask; +} + +template +double benchGet(const std::string& name, const std::array& indices, const std::vector& inputs, qindex ampMask) { + constexpr qindex numIterations = 5000000; + constexpr int numReps = 5; + + size_t inputMask = inputs.size() - 1; + double best = std::numeric_limits::max(); + + for (int r=0; r(0x13579BDF); + auto start = std::chrono::steady_clock::now(); + + for (qindex i=0; i(i) & inputMask] + acc) & ampMask; + acc ^= getValueOfBits(n, indices.data(), static_cast(N)) + (i & 7); + } + + auto end = std::chrono::steady_clock::now(); + sinkValue ^= acc; + + double nsPerCall = std::chrono::duration(end - start).count() / static_cast(numIterations); + best = std::min(best, nsPerCall); + } + + std::cout << std::left << std::setw(30) << name << " " << std::fixed << std::setprecision(3) << best << " ns/call\n"; + return best; +} + +template +double benchInsert(const std::string& name, const std::array& indices, const std::vector& inputs, qindex valueMask, qindex insertedMask) { + constexpr qindex numIterations = 5000000; + constexpr int numReps = 5; + + size_t inputMask = inputs.size() - 1; + double best = std::numeric_limits::max(); + + for (int r=0; r(0x2468ACE0); + auto start = std::chrono::steady_clock::now(); + + for (qindex i=0; i(i) & inputMask] + acc) & valueMask; + acc ^= insertBitsWithMaskedValues(n, indices.data(), static_cast(N), insertedMask) + (i & 15); + } + + auto end = std::chrono::steady_clock::now(); + sinkValue ^= acc; + + double nsPerCall = std::chrono::duration(end - start).count() / static_cast(numIterations); + best = std::min(best, nsPerCall); + } + + std::cout << std::left << std::setw(30) << name << " " << std::fixed << std::setprecision(3) << best << " ns/call\n"; + return best; +} + +int main() { +#if defined(QUEST_USE_BMI2_INTRINSICS) + std::cout << "BMI2 intrinsics: enabled\n"; +#else + std::cout << "BMI2 intrinsics: disabled\n"; +#endif + + std::vector inputs(1 << 15); + qindex state = static_cast(0x123456789ABCDEFULL); + for (qindex& input : inputs) { + state = state * static_cast(0x5851F42D4C957F2DULL) + static_cast(0x14057B7EF767814FULL); + input = state; + } + + qindex nineQubitMask = (QINDEX_ONE << 9) - QINDEX_ONE; + const std::array inds2 = {2, 7}; + const std::array inds5 = {0, 2, 4, 6, 8}; + const std::array inds6 = {0, 1, 3, 5, 7, 8}; + + benchGet("getValueOfBits 2 bits", inds2, inputs, nineQubitMask); + benchGet("getValueOfBits 5 bits", inds5, inputs, nineQubitMask); + benchGet("getValueOfBits 6 bits", inds6, inputs, nineQubitMask); + + benchInsert("insertBitsWithMask 2 bits", inds2, inputs, (QINDEX_ONE << 7) - QINDEX_ONE, makeMask(inds2, 0b01)); + benchInsert("insertBitsWithMask 5 bits", inds5, inputs, (QINDEX_ONE << 4) - QINDEX_ONE, makeMask(inds5, 0b10101)); + benchInsert("insertBitsWithMask 6 bits", inds6, inputs, (QINDEX_ONE << 3) - QINDEX_ONE, makeMask(inds6, 0b101011)); + + std::cout << "sink: " << sinkValue << "\n"; + return 0; +} diff --git a/quest/src/core/bitwise.hpp b/quest/src/core/bitwise.hpp index f5266afa4..e6053572d 100644 --- a/quest/src/core/bitwise.hpp +++ b/quest/src/core/bitwise.hpp @@ -14,6 +14,11 @@ #include #endif +#if defined(__BMI2__) && (defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86)) && !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) + #include + #define QUEST_USE_BMI2_INTRINSICS +#endif + #include "quest/include/types.h" #include "quest/src/core/inliner.hpp" @@ -116,6 +121,35 @@ INLINE qindex setBit(qindex number, int bitIndex, int bitValue) { } +INLINE bool getBitMaskAndCheckIsIncreasing(qindex* maskPtr, const int* bitIndices, int numIndices) { + + // bitIndices can be arbitrarily ordered, though PEXT requires increasing order + qindex mask = 0; + bool isIncreasing = true; + + for (int i=0; i 0) + isIncreasing = isIncreasing && bitIndices[i-1] < bitIndices[i]; + } + + *maskPtr = mask; + return isIncreasing; +} + + +INLINE qindex getBitMaskOfIndices(const int* bitIndices, int numIndices) { + + qindex mask = 0; + + for (int i=0; i(_pdep_u64(static_cast(number), ~static_cast(mask))); + return bitValue? result | mask : result; +#endif // bitIndices must be strictly increasing for (int i=0; i(_pext_u64(static_cast(number), static_cast(mask))); +#endif + for (int i=0; i(_pdep_u64(static_cast(number), ~static_cast(mask))); +#endif + return mask | insertBits(number, bitInds, numBits, 0); } @@ -379,4 +431,4 @@ INLINE void setToBitsOfInteger(int* bits, qindex number, int numBits) { -#endif // BITWISE_HPP \ No newline at end of file +#endif // BITWISE_HPP diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 59341759f..7e689de33 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -2,6 +2,7 @@ target_sources(tests PUBLIC + bitwise.cpp calculations.cpp channels.cpp debug.cpp @@ -16,4 +17,4 @@ target_sources(tests qureg.cpp trotterisation.cpp types.cpp -) \ No newline at end of file +) diff --git a/tests/unit/bitwise.cpp b/tests/unit/bitwise.cpp new file mode 100644 index 000000000..80a0fc5dd --- /dev/null +++ b/tests/unit/bitwise.cpp @@ -0,0 +1,126 @@ +/** @file + * Unit tests of internal bitwise helpers. + * + * @defgroup unitbitwise Bitwise + * @ingroup unittests + */ + +#include "quest/src/core/bitwise.hpp" + +#include + +#include "tests/utils/macros.hpp" + + + +/* + * UTILITIES + */ + +#define TEST_CATEGORY \ + LABEL_UNIT_TAG "[bitwise]" + + +static qindex getReferenceInsertBits(qindex number, const int* bitIndices, int numIndices, int bitValue) { + + for (int i=0; i