Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vectorize bitset from string construction #4839

Open
wants to merge 39 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
9f574ff
test and benchmark
AlexGuteniev Jul 12, 2024
4318dc5
null hypothesis
AlexGuteniev Jul 12, 2024
70374d9
use threshold
AlexGuteniev Jul 12, 2024
551eef7
assume bitset word size
AlexGuteniev Jul 12, 2024
a38fe93
Revert "use threshold"
AlexGuteniev Jul 12, 2024
2b64c6e
SSE4.2 algorithm
AlexGuteniev Jul 12, 2024
73097bc
more cases
AlexGuteniev Jul 13, 2024
afaa90d
use threshold, now separate one
AlexGuteniev Jul 13, 2024
2e3a816
AVX2 algorithm
AlexGuteniev Jul 13, 2024
22e5bae
mighty ARM
AlexGuteniev Jul 13, 2024
36108d1
assumption-shmassumption
AlexGuteniev Jul 13, 2024
adfde23
more useful row
AlexGuteniev Jul 13, 2024
4eeb157
fewer instructions for AVX2 wchar_t case
AlexGuteniev Jul 13, 2024
c360e88
constexpr
AlexGuteniev Jul 13, 2024
9fe1973
early return
AlexGuteniev Jul 13, 2024
89f9806
pick correct range of too long string
AlexGuteniev Jul 13, 2024
bc993cc
this check is unnecessary
AlexGuteniev Jul 13, 2024
fce8222
TMP!
AlexGuteniev Jul 13, 2024
692dd76
clarify flow
AlexGuteniev Jul 13, 2024
8c04d1a
more TMP!
AlexGuteniev Jul 13, 2024
db18ee2
mighty ARM again
AlexGuteniev Jul 13, 2024
dc191d0
fallback fixup
AlexGuteniev Jul 14, 2024
93abc9e
fallback move
AlexGuteniev Jul 14, 2024
b48b553
array
AlexGuteniev Jul 14, 2024
3529d5c
padding
AlexGuteniev Jul 14, 2024
3d075c6
common word increment
AlexGuteniev Jul 14, 2024
e63ab56
TMP loop
AlexGuteniev Jul 14, 2024
e6b36ec
Merge branch 'main' into bitstring
StephanTLavavej Sep 9, 2024
c5931a1
Include what you use.
StephanTLavavej Sep 9, 2024
30bf2d5
`typename` => `class`
StephanTLavavej Sep 9, 2024
1a6625d
`random_digits_init()` shouldn't return a `const` prvalue.
StephanTLavavej Sep 9, 2024
d2ed6cc
Drop unused `basic_string`.
StephanTLavavej Sep 9, 2024
6fedf3e
Drop unnecessary `std::`.
StephanTLavavej Sep 9, 2024
c61c454
`bit_string/data/max` => `digit_array/arr_data/arr_size` and extract …
StephanTLavavej Sep 9, 2024
9cd3687
Replace nested loops with separate loops.
StephanTLavavej Sep 9, 2024
467e01a
Consistent param order for `random_digits_init()`.
StephanTLavavej Sep 10, 2024
1f48693
Drop unnecessary `static`.
StephanTLavavej Sep 10, 2024
03b1fdd
Merge branch 'main' into bitstring
StephanTLavavej Sep 12, 2024
1bb6faf
FizzBuzz!
AlexGuteniev Sep 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ function(add_benchmark name)
target_compile_definitions(benchmark-${name} PRIVATE BENCHMARK_STATIC_DEFINE)
endfunction()

add_benchmark(bitset_from_string src/bitset_from_string.cpp)
add_benchmark(bitset_to_string src/bitset_to_string.cpp)
add_benchmark(find_and_count src/find_and_count.cpp)
add_benchmark(find_first_of src/find_first_of.cpp)
Expand Down
93 changes: 93 additions & 0 deletions benchmarks/src/bitset_from_string.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <array>
#include <benchmark/benchmark.h>
#include <bitset>
#include <climits>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
#include <random>

using namespace std;

namespace {
template <typename charT, size_t Min_length, size_t N>
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
const auto random_digits_init() {
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
mt19937_64 rnd{};
uniform_int_distribution<> dis('0', '1');
std::basic_string<charT> str;
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved

constexpr size_t number_of_bitsets = (Min_length + N - 1) / N;
static_assert(number_of_bitsets != 0);

constexpr size_t actual_size = number_of_bitsets * (N + 1); // +1 for \0

std::array<charT, actual_size> result;
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved

for (auto dest = result.begin(); dest != result.end();) {
for (size_t i = 0; i != N; ++i, ++dest) {
*dest = static_cast<charT>(dis(rnd));
}

*dest = charT{'\0'};
++dest;
}
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved

return result;
}

enum class length_type : bool { char_count, null_term };

template <size_t N, typename charT>
const auto random_digits = random_digits_init<charT, 2048, N>();

template <length_type Length, size_t N, class charT>
void BM_bitset_from_string(benchmark::State& state) {
const auto& bit_string = random_digits<N, charT>;
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
for (auto _ : state) {
benchmark::DoNotOptimize(bit_string);
const charT* const data = bit_string.data();
for (size_t pos = 0, max = bit_string.size(); pos != max; pos += N + 1) {
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
if constexpr (Length == length_type::char_count) {
bitset<N> bs(data + pos, N);
benchmark::DoNotOptimize(bs);
} else {
bitset<N> bs(data + pos);
benchmark::DoNotOptimize(bs);
}
}
}
}
} // namespace

BENCHMARK(BM_bitset_from_string<length_type::char_count, 15, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 16, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 36, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 64, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 512, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 2048, char>);

BENCHMARK(BM_bitset_from_string<length_type::char_count, 15, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 16, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 36, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 64, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 512, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 2048, wchar_t>);

BENCHMARK(BM_bitset_from_string<length_type::null_term, 15, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 16, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 36, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 64, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 512, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 2048, char>);

BENCHMARK(BM_bitset_from_string<length_type::null_term, 15, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 16, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 36, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 64, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 512, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 2048, wchar_t>);

BENCHMARK_MAIN();
36 changes: 34 additions & 2 deletions stl/inc/bitset
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,19 @@ _STL_DISABLE_CLANG_WARNINGS
#endif // !defined(_STD_BITSET_TO_STREAM_STACK_RESERVATION)

#if _USE_STD_VECTOR_ALGORITHMS
// These bitset functions sometimes assume bit array has zero padding to multiple of 2 or 4 bytes
// The assumptions hold true even for vNext suggestion to use smaller types for small bitsets
// due to vectorization thresholds.

extern "C" {
__declspec(noalias) void __stdcall __std_bitset_to_string_1(
char* _Dest, const void* _Src, size_t _Size_bits, char _Elem0, char _Elem1) noexcept;
__declspec(noalias) void __stdcall __std_bitset_to_string_2(
wchar_t* _Dest, const void* _Src, size_t _Size_bits, wchar_t _Elem0, wchar_t _Elem1) noexcept;
__declspec(noalias) bool __stdcall __std_bitset_from_string_1(void* _Dest, const char* _Src, size_t _Size_bytes,
size_t _Size_bits, size_t _Size_chars, char _Elem0, char _Elem1) noexcept;
__declspec(noalias) bool __stdcall __std_bitset_from_string_2(void* _Dest, const wchar_t* _Src, size_t _Size_bytes,
size_t _Size_bits, size_t _Size_chars, wchar_t _Elem0, wchar_t _Elem1) noexcept;
} // extern "C"
#endif // _USE_STD_VECTOR_ALGORITHMS

Expand Down Expand Up @@ -115,6 +123,30 @@ public:
private:
template <class _Traits, class _Elem>
_CONSTEXPR23 void _Construct(const _Elem* const _Ptr, size_t _Count, const _Elem _Elem0, const _Elem _Elem1) {
#if _USE_STD_VECTOR_ALGORITHMS
constexpr size_t _Bitset_from_string_vector_threshold = 16;
if constexpr (_Bits >= _Bitset_from_string_vector_threshold
&& _Is_specialization_v<_Traits, char_traits> && sizeof(_Elem) <= 2) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ 🦄🦄🦄 ⚠️

Maybe it would be best to wait for #4951 and replace _Is_specialization_v<_Traits, char_traits> with _Is_implementation_handled_char_traits<_Traits>

if (!_STD _Is_constant_evaluated()) {
bool _Result;

if constexpr (sizeof(_Elem) == 1) {
_Result = __std_bitset_from_string_1(_Array, reinterpret_cast<const char*>(_Ptr), sizeof(_Array),
_Bits, _Count, static_cast<char>(_Elem0), static_cast<char>(_Elem1));
} else {
_STL_INTERNAL_STATIC_ASSERT(sizeof(_Elem) == 2);
_Result = __std_bitset_from_string_2(_Array, reinterpret_cast<const wchar_t*>(_Ptr), sizeof(_Array),
_Bits, _Count, static_cast<wchar_t>(_Elem0), static_cast<wchar_t>(_Elem1));
}

if (!_Result) {
_Xinv();
}

return;
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS
if (_Count > _Bits) {
for (size_t _Idx = _Bits; _Idx < _Count; ++_Idx) {
const auto _Ch = _Ptr[_Idx];
Expand Down Expand Up @@ -462,8 +494,8 @@ public:
_CONSTEXPR23 void _To_string(
_Elem* const _Buf, const size_t _Len, const _Elem _Elem0, const _Elem _Elem1) const noexcept {
#if _USE_STD_VECTOR_ALGORITHMS
constexpr size_t _Bitset_vector_threshold = 32;
if constexpr (_Bits >= _Bitset_vector_threshold && is_integral_v<_Elem> && sizeof(_Elem) <= 2) {
constexpr size_t _Bitset_to_string_vector_threshold = 32;
if constexpr (_Bits >= _Bitset_to_string_vector_threshold && is_integral_v<_Elem> && sizeof(_Elem) <= 2) {
if (!_Is_constant_evaluated()) {
if constexpr (sizeof(_Elem) == 1) {
__std_bitset_to_string_1(reinterpret_cast<char*>(_Buf), _Array, _Len, static_cast<char>(_Elem0),
Expand Down
Loading