Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

<string>: Fix handling for char_traits<unicorn> #4951

Merged
merged 7 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions stl/inc/__msvc_string_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,17 @@ template <>
struct char_traits<unsigned short> : _WChar_traits<unsigned short> {};
#endif // defined(_CRTBLD)

// signed char and other unsigned integral types are supported as an extension.
// Use of other arithmetic types and nullptr_t should be rejected.
template <class _Ty>
constexpr bool _Is_implementation_handled_char_like_type = is_arithmetic_v<_Ty> || is_null_pointer_v<_Ty>;

template <class>
constexpr bool _Is_implementation_handled_char_traits = false;
template <class _Elem>
constexpr bool _Is_implementation_handled_char_traits<char_traits<_Elem>> =
_Is_implementation_handled_char_like_type<_Elem>;

#if defined(__cpp_char8_t) && !defined(__clang__) && !defined(__EDG__)
#define _HAS_U8_INTRINSICS 1
#else // ^^^ Use intrinsics for char8_t / don't use said intrinsics vvv
Expand Down Expand Up @@ -675,9 +686,9 @@ class _String_bitmap { // _String_bitmap for character types
template <class _Elem>
class _String_bitmap<_Elem, false> { // _String_bitmap for wchar_t/unsigned short/char16_t/char32_t/etc. types
public:
static_assert(is_unsigned_v<_Elem>,
"Standard char_traits is only provided for char, wchar_t, char16_t, and char32_t. See N4950 [char.traits]. "
"Visual C++ accepts other unsigned integral types as an extension.");
static_assert(is_unsigned_v<_Elem>, "Standard char_traits is only provided for char, wchar_t, char8_t, char16_t, "
"and char32_t. See N4988 [char.traits]. "
"Visual C++ accepts other unsigned integral types as an extension.");

constexpr bool _Mark(const _Elem* _First, const _Elem* const _Last) noexcept {
// mark this bitmap such that the characters in [_First, _Last) are intended to match
Expand All @@ -702,7 +713,7 @@ class _String_bitmap<_Elem, false> { // _String_bitmap for wchar_t/unsigned shor
bool _Matches[256] = {};
};

template <class _Traits, bool _Special = _Is_specialization_v<_Traits, char_traits>>
template <class _Traits, bool _Special = _Is_implementation_handled_char_traits<_Traits>>
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
constexpr size_t _Traits_find_first_of(_In_reads_(_Hay_size) const _Traits_ptr_t<_Traits> _Haystack,
const size_t _Hay_size, const size_t _Start_at, _In_reads_(_Needle_size) const _Traits_ptr_t<_Traits> _Needle,
const size_t _Needle_size) noexcept {
Expand Down Expand Up @@ -764,7 +775,7 @@ constexpr size_t _Traits_find_first_of(_In_reads_(_Hay_size) const _Traits_ptr_t
return static_cast<size_t>(-1); // no match
}

template <class _Traits, bool _Special = _Is_specialization_v<_Traits, char_traits>>
template <class _Traits, bool _Special = _Is_implementation_handled_char_traits<_Traits>>
constexpr size_t _Traits_find_last_of(_In_reads_(_Hay_size) const _Traits_ptr_t<_Traits> _Haystack,
const size_t _Hay_size, const size_t _Start_at, _In_reads_(_Needle_size) const _Traits_ptr_t<_Traits> _Needle,
const size_t _Needle_size) noexcept {
Expand Down Expand Up @@ -802,7 +813,7 @@ constexpr size_t _Traits_find_last_of(_In_reads_(_Hay_size) const _Traits_ptr_t<
return static_cast<size_t>(-1); // no match
}

template <class _Traits, bool _Special = _Is_specialization_v<_Traits, char_traits>>
template <class _Traits, bool _Special = _Is_implementation_handled_char_traits<_Traits>>
constexpr size_t _Traits_find_first_not_of(_In_reads_(_Hay_size) const _Traits_ptr_t<_Traits> _Haystack,
const size_t _Hay_size, const size_t _Start_at, _In_reads_(_Needle_size) const _Traits_ptr_t<_Traits> _Needle,
const size_t _Needle_size) noexcept {
Expand Down Expand Up @@ -851,7 +862,7 @@ constexpr size_t _Traits_find_not_ch(_In_reads_(_Hay_size) const _Traits_ptr_t<_
return static_cast<size_t>(-1); // no match
}

template <class _Traits, bool _Special = _Is_specialization_v<_Traits, char_traits>>
template <class _Traits, bool _Special = _Is_implementation_handled_char_traits<_Traits>>
constexpr size_t _Traits_find_last_not_of(_In_reads_(_Hay_size) const _Traits_ptr_t<_Traits> _Haystack,
const size_t _Hay_size, const size_t _Start_at, _In_reads_(_Needle_size) const _Traits_ptr_t<_Traits> _Needle,
const size_t _Needle_size) noexcept {
Expand Down
22 changes: 16 additions & 6 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -193,19 +193,29 @@ struct _Char_traits_lt {
}
};

// signed char and other unsigned integral types are supported as an extension.
template <class _Ty>
constexpr bool _Is_predefined_char_like_type = _Is_character<_Ty>::value || is_unsigned_v<_Ty>;

// library-provided char_traits::eq behaves like equal_to<_Elem>
// TRANSITION: This should not be activated for user-defined specializations of char_traits
template <class _Elem>
constexpr bool _Can_memcmp_elements_with_pred<_Elem, _Elem, _Char_traits_eq<char_traits<_Elem>>> =
_Can_memcmp_elements<_Elem, _Elem>;
_Is_predefined_char_like_type<_Elem> && _Can_memcmp_elements<_Elem, _Elem>;
frederick-vs-ja marked this conversation as resolved.
Show resolved Hide resolved

// library-provided char_traits::lt behaves like less<make_unsigned_t<_Elem>>
// TRANSITION: This should not be activated for user-defined specializations of char_traits
template <class _Elem>
struct _Lex_compare_memcmp_classify_pred<_Elem, _Elem, _Char_traits_lt<char_traits<_Elem>>> {
template <class _Elem, bool = _Is_predefined_char_like_type<_Elem>>
struct _Lex_compare_memcmp_classify_pred_for_char_traits_lt {
using _UElem = make_unsigned_t<_Elem>;
using _Pred = conditional_t<_Lex_compare_memcmp_classify_elements<_UElem, _UElem>, less<int>, void>;
};
template <class _Elem>
struct _Lex_compare_memcmp_classify_pred_for_char_traits_lt<_Elem, false> {
using _Pred = void;
};

// library-provided char_traits::lt behaves like less<make_unsigned_t<_Elem>>
template <class _Elem>
struct _Lex_compare_memcmp_classify_pred<_Elem, _Elem, _Char_traits_lt<char_traits<_Elem>>>
: _Lex_compare_memcmp_classify_pred_for_char_traits_lt<_Elem> {};

template <class _RxTraits>
struct _Cmp_cs { // functor to compare two character values for equality
Expand Down
1 change: 1 addition & 0 deletions tests/std/test.lst
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ tests\GH_004618_mixed_operator_usage_keeps_statistical_properties
tests\GH_004618_normal_distribution_avoids_resets
tests\GH_004657_expected_constraints_permissive
tests\GH_004845_logical_operator_traits_with_non_bool_constant
tests\GH_004930_char_traits_user_specialization
tests\LWG2381_num_get_floating_point
tests\LWG2597_complex_branch_cut
tests\LWG3018_shared_ptr_function
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

RUNALL_INCLUDE ..\usual_matrix.lst
210 changes: 210 additions & 0 deletions tests/std/tests/GH_004930_char_traits_user_specialization/test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <cassert>
#include <cstring>
#include <cwchar>
#include <iosfwd>
#include <string>
#if _HAS_CXX17
#include <string_view>
#endif // _HAS_CXX17
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
#include <type_traits>

#if _HAS_CXX20
#define CONSTEXPR20 constexpr
#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv
#define CONSTEXPR20 inline
#endif // ^^^ !_HAS_CXX20 ^^^

using namespace std;

static constexpr unsigned char odd_mask = 0xF;

enum odd_char : unsigned char {};

template <>
class std::char_traits<odd_char> {
public:
using char_type = odd_char;
using int_type = int;
using off_type = streamoff;
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
using pos_type = streampos;
using state_type = mbstate_t;

static constexpr bool eq(const char_type c, const char_type d) noexcept {
return ((static_cast<unsigned char>(c) ^ static_cast<unsigned char>(d)) & odd_mask) == 0;
}

static constexpr bool lt(const char_type c, const char_type d) noexcept {
return (static_cast<unsigned char>(c) & odd_mask) < (static_cast<unsigned char>(d) & odd_mask);
}

static constexpr int compare(const char_type* const c, const char_type* const d, const size_t n) noexcept {
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
for (size_t i = 0; i != n; ++i) {
int ci = static_cast<unsigned char>(c[i]) & odd_mask;
int di = static_cast<unsigned char>(d[i]) & odd_mask;
int r = ci - di;
if (r != 0) {
return r;
}
}

return 0;
}

static constexpr size_t length(const char_type* const p) noexcept {
const char_type* c = p;
while (static_cast<unsigned char>(*c) != 0) {
++c;
}

return static_cast<size_t>(c - p);
}

static constexpr const char_type* find(const char_type* p, const size_t n, const char_type c) noexcept {
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
for (size_t i = 0; i != n; ++i) {
if (eq(p[i], c)) {
return p + i;
}
}

return nullptr;
}

static CONSTEXPR20 char_type* move(char_type* s, const char_type* p, const size_t n) noexcept {
#if _HAS_CXX20
if (is_constant_evaluated()) {
bool is_dst_in_src_range = false;
for (size_t i = 0; i != n; ++i) {
if (p + i == s) {
is_dst_in_src_range = true;
break;
}
}

if (is_dst_in_src_range) {
for (size_t i = n; i != 0;) {
--i;
s[i] = p[i];
}
} else {
for (size_t i = 0; i != n; ++i) {
s[i] = p[i];
}
}
} else
#endif // _HAS_CXX20
{
memmove(s, p, n);
}
return s;
}

static CONSTEXPR20 char_type* copy(char_type* s, const char_type* p, const size_t n) noexcept {
#if _HAS_CXX20
if (is_constant_evaluated()) {
for (size_t i = 0; i != n; ++i) {
s[i] = p[i];
}
} else
#endif // _HAS_CXX20
{
memmove(s, p, n);
}
return s;
}

static constexpr void assign(char_type& r, const char_type& d) noexcept {
r = d;
}
static CONSTEXPR20 char_type* assign(char_type* const s, const size_t n, const char_type c) {
#if _HAS_CXX20
if (is_constant_evaluated()) {
for (size_t i = 0; i != n; ++i) {
s[i] = c;
}
} else
#endif // _HAS_CXX20
{
memset(s, static_cast<unsigned char>(c), n);
}
return s;
}

static constexpr bool not_eof(int) noexcept {
return true;
frederick-vs-ja marked this conversation as resolved.
Show resolved Hide resolved
}

static constexpr char_type to_char_type(const int_type i) noexcept {
return char_type(static_cast<unsigned char>(i));
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
}

static constexpr int_type to_int_type(const char_type i) noexcept {
return static_cast<unsigned char>(i);
}

static constexpr bool eq_int_type(const int_type c, const int_type d) noexcept {
if (c == -1) {
return d == -1;
} else {
return ((c ^ d) & odd_mask) == 0;
}
}

static constexpr int_type eof() noexcept {
return -1;
}
};

CONSTEXPR20 bool test_gh_4930() {
constexpr odd_char s_init[]{static_cast<odd_char>(0x55), static_cast<odd_char>(0x44), static_cast<odd_char>(0x33),
static_cast<odd_char>(0x22), static_cast<odd_char>(0x11), static_cast<odd_char>(0)};
constexpr odd_char s2_init[]{static_cast<odd_char>(0x83), static_cast<odd_char>(0x12), static_cast<odd_char>(0)};

using odd_string = basic_string<odd_char>;
{
odd_string s(s_init);

assert(s.length() == 5);
assert(s.find(static_cast<odd_char>(0x54)) == 1);
assert(s.find(static_cast<odd_char>(0x26)) == s.npos);

odd_string s2(s2_init);

assert(s.find_first_of(s2) == 2);
assert(s2.find_first_of(s) == 0);

assert(s.find_last_of(s2) == 3);
assert(s2.find_last_of(s) == 1);
}

#if _HAS_CXX17
using odd_string_view = basic_string_view<odd_char>;
{
odd_string_view sv(s_init);

assert(sv.length() == 5);
assert(sv.find(static_cast<odd_char>(0x54)) == 1);
assert(sv.find(static_cast<odd_char>(0x26)) == sv.npos);

odd_string_view sv2(s2_init);

assert(sv.find_first_of(sv2) == 2);
assert(sv2.find_first_of(sv) == 0);

assert(sv.find_last_of(sv2) == 3);
assert(sv2.find_last_of(sv) == 1);
}
#endif // _HAS_CXX17

StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
return true;
}

#if _HAS_CXX20
static_assert(test_gh_4930());
#endif // _HAS_CXX20

int main() {
assert(test_gh_4930());
}