diff --git a/stl/inc/atomic b/stl/inc/atomic index 307168b01e..69bb22d98a 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -28,29 +28,10 @@ _STL_DISABLE_CLANG_WARNINGS #pragma push_macro("new") #undef new -#ifdef _WIN64 -#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 -#define _STD_COMPARE_EXCHANGE_128 _InterlockedCompareExchange128 -#else // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 / _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 vvv -// 16-byte atomics are separately compiled for x64, as not all x64 hardware has the cmpxchg16b -// instruction; in the event this instruction is not available, the fallback is a global -// synchronization object shared by all 16-byte atomics. -// (Note: machines without this instruction typically have 2 cores or fewer, so this isn't too bad) -// All pointer parameters must be 16-byte aligned. -extern "C" _NODISCARD unsigned char __stdcall __std_atomic_compare_exchange_128( - _Inout_bytecount_(16) long long* _Destination, _In_ long long _ExchangeHigh, _In_ long long _ExchangeLow, - _Inout_bytecount_(16) long long* _ComparandResult) noexcept; -extern "C" _NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept; -#define _STD_COMPARE_EXCHANGE_128 __std_atomic_compare_exchange_128 -#endif // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 ^^^ -#endif // defined(_WIN64) - -// Controls whether atomic::is_always_lock_free triggers for sizeof(void *) or 2 * sizeof(void *) -#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 || !defined(_M_X64) || defined(_M_ARM64EC) -#define _ATOMIC_HAS_DCAS 1 -#else // ^^^ We always have DCAS / We only sometimes have DCAS vvv -#define _ATOMIC_HAS_DCAS 0 -#endif // ^^^ We only sometimes have DCAS ^^^ +// Allow _InterlockedCompareExchange128 to be used: +#if defined(__clang__) && defined(_M_X64) +#pragma clang attribute _STD_ATOMIC_HEADER.push([[gnu::target("cx16")]], apply_to = function) +#endif // ^^^ defined(__clang__) && defined(_M_X64) ^^^ // Controls whether ARM64 ldar/ldapr/stlr should be used #ifndef _STD_ATOMIC_USE_ARM64_LDAR_STLR @@ -603,7 +584,7 @@ inline bool __stdcall _Atomic_wait_compare_16_bytes(const void* _Storage, void* const auto _Cmp = static_cast(_Comparand); alignas(16) long long _Tmp[2] = {_Cmp[0], _Cmp[1]}; #if defined(_M_X64) && !defined(_M_ARM64EC) - return _STD_COMPARE_EXCHANGE_128(_Dest, _Tmp[1], _Tmp[0], _Tmp) != 0; + return _InterlockedCompareExchange128(_Dest, _Tmp[1], _Tmp[0], _Tmp) != 0; #else // ^^^ _M_X64 / ARM64, _M_ARM64EC vvv return _InterlockedCompareExchange128_nf(_Dest, _Tmp[1], _Tmp[0], _Tmp) != 0; #endif // ^^^ ARM64, _M_ARM64EC ^^^ @@ -1199,7 +1180,7 @@ struct _Atomic_storage<_Ty&, 16> { // lock-free using 16-byte intrinsics _NODISCARD _TVal load() const noexcept { // load with sequential consistency long long* const _Storage_ptr = const_cast(_STD _Atomic_address_as(_Storage)); _Int128 _Result{}; // atomic CAS 0 with 0 - (void) _STD_COMPARE_EXCHANGE_128(_Storage_ptr, 0, 0, &_Result._Low); + (void) _InterlockedCompareExchange128(_Storage_ptr, 0, 0, &_Result._Low); return reinterpret_cast<_TVal&>(_Result); } @@ -1270,7 +1251,7 @@ struct _Atomic_storage<_Ty&, 16> { // lock-free using 16-byte intrinsics &_Expected_temp._Low); #else // ^^^ _M_ARM64, _M_ARM64EC / _M_X64 vvv (void) _Order; - _Result = _STD_COMPARE_EXCHANGE_128(&reinterpret_cast(_Storage), _Desired_bytes._High, + _Result = _InterlockedCompareExchange128(&reinterpret_cast(_Storage), _Desired_bytes._High, _Desired_bytes._Low, &_Expected_temp._Low); #endif // ^^^ _M_X64 ^^^ if (_Result) { @@ -1296,7 +1277,7 @@ struct _Atomic_storage<_Ty&, 16> { // lock-free using 16-byte intrinsics &_Expected_temp._Low); #else // ^^^ _M_ARM64, _M_ARM64EC / _M_X64 vvv (void) _Order; - _Result = _STD_COMPARE_EXCHANGE_128( + _Result = _InterlockedCompareExchange128( &reinterpret_cast(_Storage), _Desired_bytes._High, _Desired_bytes._Low, &_Expected_temp._Low); #endif // ^^^ _M_X64 ^^^ if (_Result == 0) { @@ -1649,13 +1630,8 @@ struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral oper template constexpr bool _Is_always_lock_free = _TypeSize <= 8 && (_TypeSize & (_TypeSize - 1)) == 0; #else // ^^^ don't break ABI / break ABI vvv -#if _ATOMIC_HAS_DCAS template constexpr bool _Is_always_lock_free = _TypeSize <= 2 * sizeof(void*); -#else // ^^^ _ATOMIC_HAS_DCAS / !_ATOMIC_HAS_DCAS vvv -template -constexpr bool _Is_always_lock_free = _TypeSize <= sizeof(void*); -#endif // ^^^ !_ATOMIC_HAS_DCAS ^^^ #endif // ^^^ break ABI ^^^ template > @@ -2175,23 +2151,10 @@ public: static constexpr bool is_always_lock_free = _Is_always_lock_free; #endif // _HAS_CXX17 -#if 1 // TRANSITION, ABI _NODISCARD bool is_lock_free() const volatile noexcept { - constexpr bool _Result = sizeof(_Ty) <= 8 && (sizeof(_Ty) & sizeof(_Ty) - 1) == 0; - return _Result; + return _Is_always_lock_free; } -#else // ^^^ don't break ABI / break ABI vvv - - _NODISCARD bool is_lock_free() const volatile noexcept { -#if _ATOMIC_HAS_DCAS - return sizeof(_Ty) <= 2 * sizeof(void*); -#else // ^^^ _ATOMIC_HAS_DCAS / !_ATOMIC_HAS_DCAS vvv - return sizeof(_Ty) <= sizeof(void*) || (sizeof(_Ty) <= 2 * sizeof(void*) && __std_atomic_has_cmpxchg16b()); -#endif // ^^^ !_ATOMIC_HAS_DCAS ^^^ - } -#endif // ^^^ break ABI ^^^ - _NODISCARD bool is_lock_free() const noexcept { return static_cast(this)->is_lock_free(); } @@ -2341,7 +2304,7 @@ public: using value_type = _Ty; explicit atomic_ref(_Ty& _Value) noexcept /* strengthened */ : _Base(_Value) { - if constexpr (_Is_potentially_lock_free) { + if constexpr (is_always_lock_free) { _Check_alignment(_Value); } else { this->_Init_spinlock_for_ref(); @@ -2352,30 +2315,13 @@ public: atomic_ref& operator=(const atomic_ref&) = delete; - static constexpr bool _Is_potentially_lock_free = - sizeof(_Ty) <= 2 * sizeof(void*) && (sizeof(_Ty) & (sizeof(_Ty) - 1)) == 0; - static constexpr bool is_always_lock_free = -#if _ATOMIC_HAS_DCAS - _Is_potentially_lock_free; -#else // ^^^ _ATOMIC_HAS_DCAS / !_ATOMIC_HAS_DCAS vvv - _Is_potentially_lock_free && sizeof(_Ty) <= sizeof(void*); -#endif // ^^^ !_ATOMIC_HAS_DCAS ^^^ + sizeof(_Ty) <= 2 * sizeof(void*) && (sizeof(_Ty) & (sizeof(_Ty) - 1)) == 0; - static constexpr size_t required_alignment = _Is_potentially_lock_free ? sizeof(_Ty) : alignof(_Ty); + static constexpr size_t required_alignment = is_always_lock_free ? sizeof(_Ty) : alignof(_Ty); _NODISCARD bool is_lock_free() const noexcept { -#if _ATOMIC_HAS_DCAS return is_always_lock_free; -#else // ^^^ _ATOMIC_HAS_DCAS / !_ATOMIC_HAS_DCAS vvv - if constexpr (is_always_lock_free) { - return true; - } else if constexpr (_Is_potentially_lock_free) { - return __std_atomic_has_cmpxchg16b() != 0; - } else { - return false; - } -#endif // ^^^ !_ATOMIC_HAS_DCAS ^^^ } void store(const _Ty _Value) const noexcept { @@ -3049,16 +2995,18 @@ _STD_END #undef _ATOMIC_STORE_32_SEQ_CST #undef _ATOMIC_STORE_64_SEQ_CST #undef _ATOMIC_STORE_64_SEQ_CST_IX86 -#undef _ATOMIC_HAS_DCAS #undef _ATOMIC_STORE_SEQ_CST_ARM64 #undef __LOAD_ACQUIRE_ARM64 #undef _ATOMIC_LOAD_ARM64 #undef __STORE_RELEASE #undef _STD_ATOMIC_USE_ARM64_LDAR_STLR -#undef _STD_COMPARE_EXCHANGE_128 #undef _INVALID_MEMORY_ORDER +#if defined(__clang__) && defined(_M_X64) +#pragma clang attribute _STD_ATOMIC_HEADER.pop +#endif // ^^^ defined(__clang__) && defined(_M_X64) ^^^ + #pragma pop_macro("new") _STL_RESTORE_CLANG_WARNINGS #pragma warning(pop) diff --git a/stl/inc/yvals.h b/stl/inc/yvals.h index d2201390f5..33ac7c5120 100644 --- a/stl/inc/yvals.h +++ b/stl/inc/yvals.h @@ -315,22 +315,6 @@ _EMIT_STL_WARNING(STL4001, "/clr:pure is deprecated and will be REMOVED."); #define _LOCK_STREAM 2 #define _LOCK_DEBUG 3 -#ifndef _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B -#if _STL_WIN32_WINNT >= _STL_WIN32_WINNT_WINBLUE && defined(_WIN64) -#define _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B 1 -#else // ^^^ modern 64-bit / less modern or 32-bit vvv -#define _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B 0 -#endif // _STL_WIN32_WINNT >= _STL_WIN32_WINNT_WINBLUE && defined(_WIN64) -#endif // !defined(_STD_ATOMIC_ALWAYS_USE_CMPXCHG16B) - -#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 && defined(_M_ARM64) -#error ARM64 requires _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B to be 1. -#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 && defined(_M_ARM64) - -#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 && !defined(_WIN64) -#error _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 requires 64-bit. -#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 && !defined(_WIN64) - _STD_BEGIN enum _Uninitialized { // tag for suppressing initialization _Noinit diff --git a/stl/inc/yvals_core.h b/stl/inc/yvals_core.h index 30b02bfd14..94fb2b4573 100644 --- a/stl/inc/yvals_core.h +++ b/stl/inc/yvals_core.h @@ -1936,10 +1936,9 @@ compiler option, or define _ALLOW_RTCc_IN_STL to suppress this error. #error In yvals_core.h, defined(MRTDLL) implies defined(_M_CEE_PURE); !defined(_M_CEE_PURE) implies !defined(MRTDLL) #endif // defined(MRTDLL) && !defined(_M_CEE_PURE) -#define _STL_WIN32_WINNT_VISTA 0x0600 // _WIN32_WINNT_VISTA from sdkddkver.h -#define _STL_WIN32_WINNT_WIN8 0x0602 // _WIN32_WINNT_WIN8 from sdkddkver.h -#define _STL_WIN32_WINNT_WINBLUE 0x0603 // _WIN32_WINNT_WINBLUE from sdkddkver.h -#define _STL_WIN32_WINNT_WIN10 0x0A00 // _WIN32_WINNT_WIN10 from sdkddkver.h +#define _STL_WIN32_WINNT_VISTA 0x0600 // _WIN32_WINNT_VISTA from sdkddkver.h +#define _STL_WIN32_WINNT_WIN8 0x0602 // _WIN32_WINNT_WIN8 from sdkddkver.h +#define _STL_WIN32_WINNT_WIN10 0x0A00 // _WIN32_WINNT_WIN10 from sdkddkver.h // Note that the STL DLL builds will set this to XP for ABI compatibility with VS2015 which supported XP. #ifndef _STL_WIN32_WINNT diff --git a/stl/src/atomic_wait.cpp b/stl/src/atomic_wait.cpp index b9d907150b..4bafb1a86b 100644 --- a/stl/src/atomic_wait.cpp +++ b/stl/src/atomic_wait.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -90,24 +91,6 @@ namespace { } #endif // defined(_DEBUG) } - - [[nodiscard]] unsigned char __std_atomic_compare_exchange_128_fallback( - _Inout_bytecount_(16) long long* _Destination, _In_ long long _ExchangeHigh, _In_ long long _ExchangeLow, - _Inout_bytecount_(16) long long* _ComparandResult) noexcept { - static SRWLOCK _Mtx = SRWLOCK_INIT; - _SrwLock_guard _Guard{_Mtx}; - if (_Destination[0] == _ComparandResult[0] && _Destination[1] == _ComparandResult[1]) { - _ComparandResult[0] = _Destination[0]; - _ComparandResult[1] = _Destination[1]; - _Destination[0] = _ExchangeLow; - _Destination[1] = _ExchangeHigh; - return static_cast(true); - } else { - _ComparandResult[0] = _Destination[0]; - _ComparandResult[1] = _Destination[1]; - return static_cast(false); - } - } } // unnamed namespace extern "C" { @@ -249,41 +232,27 @@ _Smtx_t* __stdcall __std_atomic_get_mutex(const void* const _Key) noexcept { } #pragma warning(pop) +// TRANSITION, ABI: preserved for binary compatibility [[nodiscard]] unsigned char __stdcall __std_atomic_compare_exchange_128(_Inout_bytecount_(16) long long* _Destination, _In_ long long _ExchangeHigh, _In_ long long _ExchangeLow, _Inout_bytecount_(16) long long* _ComparandResult) noexcept { -#if !defined(_WIN64) - return __std_atomic_compare_exchange_128_fallback(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); -#elif _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 +#ifdef _WIN64 return _InterlockedCompareExchange128(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); -#else // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 / _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 vvv - if (__std_atomic_has_cmpxchg16b()) { - return _InterlockedCompareExchange128(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); - } - - return __std_atomic_compare_exchange_128_fallback(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); -#endif // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 ^^^ +#else // ^^^ 64-bit / 32-bit vvv + (void) _Destination; + (void) _ExchangeHigh; + (void) _ExchangeLow; + (void) _ComparandResult; + _CSTD abort(); +#endif // ^^^ 32-bit ^^^ } +// TRANSITION, ABI: preserved for binary compatibility [[nodiscard]] char __stdcall __std_atomic_has_cmpxchg16b() noexcept { -#if !defined(_WIN64) - return false; -#elif _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 +#ifdef _WIN64 return true; -#else // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 / _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 vvv - constexpr char _Cmpxchg_Absent = 0; - constexpr char _Cmpxchg_Present = 1; - constexpr char _Cmpxchg_Unknown = 2; - - static std::atomic _Cached_value{_Cmpxchg_Unknown}; - - char _Value = _Cached_value.load(std::memory_order_relaxed); - if (_Value == _Cmpxchg_Unknown) { - _Value = IsProcessorFeaturePresent(PF_COMPARE_EXCHANGE128) ? _Cmpxchg_Present : _Cmpxchg_Absent; - _Cached_value.store(_Value, std::memory_order_relaxed); - } - - return _Value; -#endif // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 ^^^ +#else + _CSTD abort(); +#endif } } // extern "C" diff --git a/tests/std/tests/P0019R8_atomic_ref/env.lst b/tests/std/tests/P0019R8_atomic_ref/env.lst index b3f3ea4864..351a8293d9 100644 --- a/tests/std/tests/P0019R8_atomic_ref/env.lst +++ b/tests/std/tests/P0019R8_atomic_ref/env.lst @@ -2,6 +2,3 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception RUNALL_INCLUDE ..\usual_20_matrix.lst -RUNALL_CROSSLIST -* PM_CL="" -* PM_CL="/D_STD_ATOMIC_ALWAYS_USE_CMPXCHG16B=1 /DTEST_CMPXCHG16B" diff --git a/tests/std/tests/P0019R8_atomic_ref/test.cpp b/tests/std/tests/P0019R8_atomic_ref/test.cpp index 6cdfd4518a..8edf08cfd9 100644 --- a/tests/std/tests/P0019R8_atomic_ref/test.cpp +++ b/tests/std/tests/P0019R8_atomic_ref/test.cpp @@ -1,12 +1,6 @@ // Copyright (c) Microsoft Corporation. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#if defined(TEST_CMPXCHG16B) && (defined(__clang__) || !defined(_M_X64)) -// Skip Clang because it would require the -mcx16 compiler option. -// Skip non-x64 because _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B is always 1 for ARM64, and is forbidden to be 1 for 32-bit. -int main() {} -#else // ^^^ skip test / run test vvv - #include #include #include @@ -436,13 +430,8 @@ void test_gh_4472() { static_assert(std::atomic_ref::required_alignment == sizeof(two_pointers_t)); -#ifdef _WIN64 - static_assert(std::atomic_ref::is_always_lock_free == _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B); -#else static_assert(std::atomic_ref::is_always_lock_free); -#endif - // We expect tests to run on machines that support DCAS, which is required by Win8+. std::atomic_ref ar{two_pointers}; assert(ar.is_lock_free()); } @@ -518,5 +507,3 @@ int main() { test_gh_4472(); test_gh_4728(); } - -#endif // ^^^ run test ^^^