Skip to content

Commit

Permalink
Merge pull request #1480 from evoskuil/master
Browse files Browse the repository at this point in the history
Remove intrinsics portability functions.
  • Loading branch information
evoskuil committed Jun 14, 2024
2 parents 452ca77 + 3a33215 commit b619ff2
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 138 deletions.
60 changes: 8 additions & 52 deletions include/bitcoin/system/intrinsics/haves.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ BC_PUSH_WARNING(USE_CONSTEXPR_FOR_FUNCTION)
constexpr auto with_neon = false;
#endif

/// Runtime checks for Intel SIMD and ARM Neon availability.
/// Runtime checks for Intel SIMD and ARM SIMD (Neon) availability.
/// ---------------------------------------------------------------------------

namespace cpu1_0
Expand Down Expand Up @@ -154,51 +154,7 @@ constexpr bool try_neon() NOEXCEPT
return false;
}

/// Runtime tests for Intel SIMD, and ARM SIMD (Neon) availability.
/// ---------------------------------------------------------------------------
/// These keep binary portable, otherwise can reply on "with" symbols.
/// TODO: evaluate performance impact of removing the thread statics.

inline bool have_shani() NOEXCEPT
{
if constexpr (with_shani)
return try_shani();
else
return false;
}

inline bool have_avx512() NOEXCEPT
{
if constexpr (with_avx512)
return try_avx512();
else
return false;
}

inline bool have_avx2() NOEXCEPT
{
if constexpr (with_avx2)
return try_avx2();
else
return false;
}

inline bool have_sse41() NOEXCEPT
{
if constexpr (with_sse41)
return try_sse41();
else
return false;
}

inline bool have_neon() NOEXCEPT
{
if constexpr (with_shani)
return try_shani();
else
return false;
}

/// Type system helpers.
/// ---------------------------------------------------------------------------
/// xint types are always defined, though are mocked when not compiled.
/// Use with_ constants to check for compiled option and have_ functions to
Expand Down Expand Up @@ -232,11 +188,11 @@ template <typename Extended, if_extended<Extended> = true>
inline bool have() NOEXCEPT
{
if constexpr (is_same_type<Extended, xint512_t>)
return have_avx512();
return with_avx512;
else if constexpr (is_same_type<Extended, xint256_t>)
return have_avx2();
return with_avx2;
else if constexpr (is_same_type<Extended, xint128_t>)
return have_sse41();
return with_sse41;
else return false;
}

Expand All @@ -246,11 +202,11 @@ template <typename Integral, size_t Lanes,
inline bool have_lanes() NOEXCEPT
{
if constexpr (capacity<xint512_t, Integral> == Lanes)
return have_avx512();
return with_avx512;
else if constexpr (capacity<xint256_t, Integral> == Lanes)
return have_avx2();
return with_avx2;
else if constexpr (capacity<xint128_t, Integral> == Lanes)
return have_sse41();
return with_sse41;
else return false;
}

Expand Down
90 changes: 16 additions & 74 deletions test/intrinsics/haves.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@

BOOST_AUTO_TEST_SUITE(intrinsics_haves_tests)

// helper
template <typename>
constexpr bool is_defined = true;

// Build symbols to constexpr.
// ----------------------------------------------------------------------------

Expand Down Expand Up @@ -56,10 +52,6 @@ constexpr bool is_defined = true;
// try()
// ----------------------------------------------------------------------------

////test/intrinsics/haves.cpp(99): error: in "intrinsics_haves_tests/intrinsics_haves__try_avx2__always__match": check tryit == with_avx2 has failed [true != false]
////test/intrinsics/haves.cpp(113): error: in "intrinsics_haves_tests/intrinsics_haves__try_sse41__always__match": check tryit == with_sse41 has failed [true != false]
////test/intrinsics/haves.cpp(129): error: in "intrinsics_haves_tests/intrinsics_haves__try_shani__always__match": check tryit == with_shani has failed [true != false]

BOOST_AUTO_TEST_CASE(intrinsics_haves__try_avx512__always__match)
{
uint64_t extended{};
Expand Down Expand Up @@ -124,17 +116,16 @@ BOOST_AUTO_TEST_CASE(intrinsics_haves__try_shani__always__match)

BOOST_AUTO_TEST_CASE(intrinsics__have_lanes__avx512__expected)
{
const auto have512 = have_avx512();
auto have = false;

have = have_lanes<uint64_t, 8>();
BOOST_CHECK_EQUAL(have, have512);
BOOST_CHECK_EQUAL(have, with_avx512);
have = have_lanes<uint32_t, 16>();
BOOST_CHECK_EQUAL(have, have512);
BOOST_CHECK_EQUAL(have, with_avx512);
have = have_lanes<uint16_t, 32>();
BOOST_CHECK_EQUAL(have, have512);
BOOST_CHECK_EQUAL(have, with_avx512);
have = have_lanes<uint8_t, 64>();
BOOST_CHECK_EQUAL(have, have512);
BOOST_CHECK_EQUAL(have, with_avx512);

have = have_lanes<uint64_t, 7>();
BOOST_CHECK(!have);
Expand All @@ -148,17 +139,16 @@ BOOST_AUTO_TEST_CASE(intrinsics__have_lanes__avx512__expected)

BOOST_AUTO_TEST_CASE(intrinsics__have_lanes__avx2__expected)
{
const auto have256 = have_avx2();
auto have = false;

have = have_lanes<uint64_t, 4>();
BOOST_CHECK_EQUAL(have, have256);
BOOST_CHECK_EQUAL(have, with_avx2);
have = have_lanes<uint32_t, 8>();
BOOST_CHECK_EQUAL(have, have256);
BOOST_CHECK_EQUAL(have, with_avx2);
have = have_lanes<uint16_t, 16>();
BOOST_CHECK_EQUAL(have, have256);
BOOST_CHECK_EQUAL(have, with_avx2);
have = have_lanes<uint8_t, 32>();
BOOST_CHECK_EQUAL(have, have256);
BOOST_CHECK_EQUAL(have, with_avx2);

have = have_lanes<uint64_t, 3>();
BOOST_CHECK(!have);
Expand All @@ -172,17 +162,16 @@ BOOST_AUTO_TEST_CASE(intrinsics__have_lanes__avx2__expected)

BOOST_AUTO_TEST_CASE(intrinsics__have_lanes__sse41__expected)
{
const auto have128 = have_sse41();
auto have = false;

have = have_lanes<uint64_t, 2>();
BOOST_CHECK_EQUAL(have, have128);
BOOST_CHECK_EQUAL(have, with_sse41);
have = have_lanes<uint32_t, 4>();
BOOST_CHECK_EQUAL(have, have128);
BOOST_CHECK_EQUAL(have, with_sse41);
have = have_lanes<uint16_t, 8>();
BOOST_CHECK_EQUAL(have, have128);
BOOST_CHECK_EQUAL(have, with_sse41);
have = have_lanes<uint8_t, 16>();
BOOST_CHECK_EQUAL(have, have128);
BOOST_CHECK_EQUAL(have, with_sse41);

have = have_lanes<uint64_t, 1>();
BOOST_CHECK(!have);
Expand All @@ -194,60 +183,13 @@ BOOST_AUTO_TEST_CASE(intrinsics__have_lanes__sse41__expected)
BOOST_CHECK(!have);
}

// have() [CI matrix platform assumptions]
// ----------------------------------------------------------------------------
// These use BOOST_WARN to let us know if vectorization did not execute due to
// CI platform processor configuration. Currently all CI platforms have SSE41
// and AVX2, while about 50% have AVX512BW. Windows platforms now have SHANI.

BOOST_AUTO_TEST_CASE(intrinsics_haves__have_avx512__when_defined__true)
{
#if defined(HAVE_AVX512)
BOOST_WARN(have_avx512());
#else
BOOST_REQUIRE(!have_avx512());
#endif
}

BOOST_AUTO_TEST_CASE(intrinsics_haves__have_avx2__when_defined__true)
{
#if defined(HAVE_AVX2)
BOOST_WARN(have_avx2());
#else
BOOST_REQUIRE(!have_avx2());
#endif
}

BOOST_AUTO_TEST_CASE(intrinsics_haves__have_sse41__when_defined__true)
{
#if defined(HAVE_SSE41)
BOOST_WARN(have_sse41());
#else
BOOST_REQUIRE(!have_sse41());
#endif
}

BOOST_AUTO_TEST_CASE(intrinsics_haves__have_shani__when_defined__true)
{
#if defined(HAVE_SHANI)
BOOST_WARN(have_shani());
#else
BOOST_REQUIRE(!have_shani());
#endif
}

BOOST_AUTO_TEST_CASE(intrinsics_haves__have_neon__always__when_defined__true)
{
#if defined(HAVE_NEON)
BOOST_WARN(have_neon());
#else
BOOST_REQUIRE(!have_neon());
#endif
}

// is_extended
// ----------------------------------------------------------------------------

// helper
template <typename>
constexpr bool is_defined = true;

// is_extended is true even with mock type.
static_assert(!is_extended<uint32_t>);
static_assert(is_extended<xint128_t>);
Expand Down
24 changes: 12 additions & 12 deletions test/intrinsics/xcpu/functional.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ BOOST_AUTO_TEST_SUITE(functional_tests)
#if defined(HAVE_SSE41)
BOOST_AUTO_TEST_CASE(functional__sse4__set32__get_expected)
{
if (have_sse41())
if (with_sse41)
{
const auto xword = set<xint128_t>(0, 1, 2, 3);
const auto word0 = get<uint32_t, 0>(xword);
Expand All @@ -43,7 +43,7 @@ BOOST_AUTO_TEST_CASE(functional__sse4__set64__get_expected)
{
if constexpr (!build_x32)
{
if (have_sse41())
if (with_sse41)
{
const auto xword = set<xint128_t>(0, 1);
const auto word0 = get<uint64_t, 0>(xword);
Expand All @@ -58,7 +58,7 @@ BOOST_AUTO_TEST_CASE(functional__sse4__set64__get_expected)
#if defined(HAVE_AVX2)
BOOST_AUTO_TEST_CASE(functional__avx2__set32__get_expected)
{
if (have_avx2())
if (with_avx2)
{
const auto xword = set<xint256_t>(0, 1, 2, 3, 4, 5, 6, 7);
const auto word0 = get<uint32_t, 0>(xword);
Expand All @@ -83,7 +83,7 @@ BOOST_AUTO_TEST_CASE(functional__avx2__set64__get_expected)
{
if constexpr (!build_x32)
{
if (have_avx2())
if (with_avx2)
{
const auto xword = set<xint256_t>(0, 1, 2, 3);
const auto word0 = get<uint64_t, 0>(xword);
Expand All @@ -102,7 +102,7 @@ BOOST_AUTO_TEST_CASE(functional__avx2__set64__get_expected)
#if defined(HAVE_AVX512)
BOOST_AUTO_TEST_CASE(functional__avx512__set32__get_expected)
{
if (have_avx512())
if (with_avx512)
{
const auto xword = set<xint512_t>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
const auto word0 = get<uint32_t, 0>(xword);
Expand Down Expand Up @@ -143,7 +143,7 @@ BOOST_AUTO_TEST_CASE(functional__avx512__set64__get_expected)
{
if constexpr (!build_x32)
{
if (have_avx512())
if (with_avx512)
{
const auto xword = set<xint512_t>(0, 1, 2, 3, 4, 5, 6, 7);
const auto word0 = get<uint64_t, 0>(xword);
Expand Down Expand Up @@ -174,7 +174,7 @@ BOOST_AUTO_TEST_CASE(functional__avx512__set64__get_expected)
#if defined(HAVE_SSE41)
BOOST_AUTO_TEST_CASE(functional__sse4__byteswap32__expected)
{
if (have_sse41())
if (with_sse41)
{
const auto xword = byteswap<uint32_t>(set<xint128_t>(
0x00000001, 0x00000002, 0x00000003, 0x00000004));
Expand All @@ -192,7 +192,7 @@ BOOST_AUTO_TEST_CASE(functional__sse4__byteswap64__expected)
{
if constexpr (!build_x32)
{
if (have_sse41())
if (with_sse41)
{
const auto xword = byteswap<uint64_t>(set<xint128_t>(
0x0000000000000001, 0x0000000000000002));
Expand All @@ -208,7 +208,7 @@ BOOST_AUTO_TEST_CASE(functional__sse4__byteswap64__expected)
#if defined(HAVE_AVX2)
BOOST_AUTO_TEST_CASE(functional__avx2__byteswap32__expected)
{
if (have_avx2())
if (with_avx2)
{
const auto xword = byteswap<uint32_t>(set<xint256_t>(
0x00000001, 0x00000002, 0x00000003, 0x00000004,
Expand All @@ -235,7 +235,7 @@ BOOST_AUTO_TEST_CASE(functional__avx2__byteswap64__expected)
{
if constexpr (!build_x32)
{
if (have_avx2())
if (with_avx2)
{
const auto xword = byteswap<uint64_t>(set<xint256_t>(
0x0000000000000001, 0x0000000000000002,
Expand All @@ -256,7 +256,7 @@ BOOST_AUTO_TEST_CASE(functional__avx2__byteswap64__expected)
#if defined(HAVE_AVX512)
BOOST_AUTO_TEST_CASE(functional__avx512__byteswap32__get_expected)
{
if (have_avx512())
if (with_avx512)
{
const auto xword = byteswap<uint32_t>(set<xint512_t>(
0x00000001, 0x00000002, 0x00000003, 0x00000004,
Expand Down Expand Up @@ -301,7 +301,7 @@ BOOST_AUTO_TEST_CASE(functional__avx512__byteswap64__get_expected)
{
if constexpr (!build_x32)
{
if (have_avx512())
if (with_avx512)
{
const auto xword = byteswap<uint64_t>(set<xint512_t>(
0x0000000000000001,
Expand Down

0 comments on commit b619ff2

Please sign in to comment.