[libcxx-commits] [libcxx] 29c8c07 - [libc++] Use bit field for checking if string is in long or short mode
Nikolas Klauser via libcxx-commits
libcxx-commits at lists.llvm.org
Thu Apr 21 05:20:27 PDT 2022
Author: Nikolas Klauser
Date: 2022-04-21T14:20:21+02:00
New Revision: 29c8c070a1770fc510ccad3be753f6f50336f8cc
URL: https://github.com/llvm/llvm-project/commit/29c8c070a1770fc510ccad3be753f6f50336f8cc
DIFF: https://github.com/llvm/llvm-project/commit/29c8c070a1770fc510ccad3be753f6f50336f8cc.diff
LOG: [libc++] Use bit field for checking if string is in long or short mode
This makes the code a bit simpler and (I think) removes the undefined behaviour from the normal string layout.
Reviewed By: ldionne, Mordante, #libc
Spies: labath, dblaikie, JDevlieghere, krytarowski, jgorbe, jingham, saugustine, arichardson, libcxx-commits
Differential Revision: https://reviews.llvm.org/D123580
Added:
libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp
Modified:
libcxx/include/string
libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp
libcxx/utils/gdb/libcxx/printers.py
Removed:
################################################################################
diff --git a/libcxx/include/string b/libcxx/include/string
index 9fd19d2ad9570..bd1c4303e8a40 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -532,6 +532,8 @@ basic_string<char32_t> operator "" s( const char32_t *str, size_t len ); // C++1
#include <__utility/auto_cast.h>
#include <__utility/move.h>
#include <__utility/swap.h>
+#include <__utility/unreachable.h>
+#include <climits>
#include <compare>
#include <cstdio> // EOF
#include <cstdlib>
@@ -539,6 +541,7 @@ basic_string<char32_t> operator "" s( const char32_t *str, size_t len ); // C++1
#include <initializer_list>
#include <iosfwd>
#include <iterator>
+#include <limits>
#include <memory>
#include <stdexcept>
#include <string_view>
@@ -674,17 +677,10 @@ private:
{
pointer __data_;
size_type __size_;
- size_type __cap_;
+ size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1;
+ size_type __is_long_ : 1;
};
-#ifdef _LIBCPP_BIG_ENDIAN
- static const size_type __short_mask = 0x01;
- static const size_type __long_mask = 0x1ul;
-#else // _LIBCPP_BIG_ENDIAN
- static const size_type __short_mask = 0x80;
- static const size_type __long_mask = ~(size_type(~0) >> 1);
-#endif // _LIBCPP_BIG_ENDIAN
-
enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ?
(sizeof(__long) - 1)/sizeof(value_type) : 2};
@@ -692,26 +688,46 @@ private:
{
value_type __data_[__min_cap];
unsigned char __padding[sizeof(value_type) - 1];
- unsigned char __size_;
+ unsigned char __size_ : 7;
+ unsigned char __is_long_ : 1;
};
+// The __endian_factor is required because the field we use to store the size
+// (either size_type or unsigned char depending on long/short) has one fewer
+// bit than it would if it were not a bitfield.
+//
+// If the LSB is used to store the short-flag in the short string representation,
+// we have to multiply the size by two when it is stored and divide it by two when
+// it is loaded to make sure that we always store an even number. In the long string
+// representation, we can ignore this because we can assume that we always allocate
+// an even amount of value_types.
+//
+// If the MSB is used for the short-flag, the max_size() is numeric_limits<size_type>::max() / 2.
+// This does not impact the short string representation, since we never need the MSB
+// for representing the size of a short string anyway.
+
+#ifdef _LIBCPP_BIG_ENDIAN
+ static const size_type __endian_factor = 2;
#else
+ static const size_type __endian_factor = 1;
+#endif
+
+#else // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
+
+#ifdef _LIBCPP_BIG_ENDIAN
+ static const size_type __endian_factor = 1;
+#else
+ static const size_type __endian_factor = 2;
+#endif
struct __long
{
- size_type __cap_;
+ size_type __is_long_ : 1;
+ size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1;
size_type __size_;
pointer __data_;
};
-#ifdef _LIBCPP_BIG_ENDIAN
- static const size_type __short_mask = 0x80;
- static const size_type __long_mask = ~(size_type(~0) >> 1);
-#else // _LIBCPP_BIG_ENDIAN
- static const size_type __short_mask = 0x01;
- static const size_type __long_mask = 0x1ul;
-#endif // _LIBCPP_BIG_ENDIAN
-
enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ?
(sizeof(__long) - 1)/sizeof(value_type) : 2};
@@ -719,7 +735,10 @@ private:
{
union
{
- unsigned char __size_;
+ struct {
+ unsigned char __is_long_ : 1;
+ unsigned char __size_ : 7;
+ };
value_type __lx;
};
value_type __data_[__min_cap];
@@ -1426,8 +1445,9 @@ public:
_LIBCPP_INLINE_VISIBILITY void __shrink_or_extend(size_type __target_capacity);
_LIBCPP_INLINE_VISIBILITY
- bool __is_long() const _NOEXCEPT
- {return bool(__r_.first().__s.__size_ & __short_mask);}
+ bool __is_long() const _NOEXCEPT {
+ return __r_.first().__s.__is_long_;
+ }
#if _LIBCPP_DEBUG_LEVEL == 2
@@ -1474,43 +1494,18 @@ private:
_LIBCPP_HIDE_FROM_ABI allocator_type& __alloc() _NOEXCEPT { return __r_.second(); }
_LIBCPP_HIDE_FROM_ABI const allocator_type& __alloc() const _NOEXCEPT { return __r_.second(); }
-#ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
-
- _LIBCPP_INLINE_VISIBILITY
- void __set_short_size(size_type __s) _NOEXCEPT
-# ifdef _LIBCPP_BIG_ENDIAN
- {__r_.first().__s.__size_ = (unsigned char)(__s << 1);}
-# else
- {__r_.first().__s.__size_ = (unsigned char)(__s);}
-# endif
-
- _LIBCPP_INLINE_VISIBILITY
- size_type __get_short_size() const _NOEXCEPT
-# ifdef _LIBCPP_BIG_ENDIAN
- {return __r_.first().__s.__size_ >> 1;}
-# else
- {return __r_.first().__s.__size_;}
-# endif
-
-#else // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
-
_LIBCPP_INLINE_VISIBILITY
- void __set_short_size(size_type __s) _NOEXCEPT
-# ifdef _LIBCPP_BIG_ENDIAN
- {__r_.first().__s.__size_ = (unsigned char)(__s);}
-# else
- {__r_.first().__s.__size_ = (unsigned char)(__s << 1);}
-# endif
+ void __set_short_size(size_type __s) _NOEXCEPT {
+ _LIBCPP_ASSERT(__s < __min_cap, "__s should never be greater than or equal to the short string capacity");
+ __r_.first().__s.__size_ = __s;
+ __r_.first().__s.__is_long_ = false;
+ }
_LIBCPP_INLINE_VISIBILITY
- size_type __get_short_size() const _NOEXCEPT
-# ifdef _LIBCPP_BIG_ENDIAN
- {return __r_.first().__s.__size_;}
-# else
- {return __r_.first().__s.__size_ >> 1;}
-# endif
-
-#endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
+ size_type __get_short_size() const _NOEXCEPT {
+ _LIBCPP_ASSERT(!__r_.first().__s.__is_long_, "String has to be short when trying to get the short size");
+ return __r_.first().__s.__size_;
+ }
_LIBCPP_INLINE_VISIBILITY
void __set_long_size(size_type __s) _NOEXCEPT
@@ -1523,11 +1518,15 @@ private:
{if (__is_long()) __set_long_size(__s); else __set_short_size(__s);}
_LIBCPP_INLINE_VISIBILITY
- void __set_long_cap(size_type __s) _NOEXCEPT
- {__r_.first().__l.__cap_ = __long_mask | __s;}
+ void __set_long_cap(size_type __s) _NOEXCEPT {
+ __r_.first().__l.__cap_ = __s / __endian_factor;
+ __r_.first().__l.__is_long_ = true;
+ }
+
_LIBCPP_INLINE_VISIBILITY
- size_type __get_long_cap() const _NOEXCEPT
- {return __r_.first().__l.__cap_ & size_type(~__long_mask);}
+ size_type __get_long_cap() const _NOEXCEPT {
+ return __r_.first().__l.__cap_ * __endian_factor;
+ }
_LIBCPP_INLINE_VISIBILITY
void __set_long_pointer(pointer __p) _NOEXCEPT
@@ -3225,11 +3224,12 @@ typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::max_size() const _NOEXCEPT
{
size_type __m = __alloc_traits::max_size(__alloc());
-#ifdef _LIBCPP_BIG_ENDIAN
- return (__m <= ~__long_mask ? __m : __m/2) - __alignment;
-#else
- return __m - __alignment;
-#endif
+ if (__m <= std::numeric_limits<size_type>::max() / 2) {
+ return __m - __alignment;
+ } else {
+ bool __uses_lsb = __endian_factor == 2;
+ return __uses_lsb ? __m - __alignment : (__m / 2) - __alignment;
+ }
}
template <class _CharT, class _Traits, class _Allocator>
diff --git a/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp
new file mode 100644
index 0000000000000..65dfead25cdf0
--- /dev/null
+++ b/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp
@@ -0,0 +1,125 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED:
+
+// <string>
+
+// This test ensures that the correct max_size() is returned depending on the platform.
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <string>
+
+#include "test_macros.h"
+
+// alignment of the string heap buffer is hardcoded to 16
+static const size_t alignment = 16;
+
+void full_size() {
+ std::string str;
+ assert(str.max_size() == std::numeric_limits<size_t>::max() - alignment);
+
+#ifndef TEST_HAS_NO_CHAR8_T
+ std::u8string u8str;
+ assert(u8str.max_size() == std::numeric_limits<size_t>::max() - alignment);
+#endif
+
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+ std::wstring wstr;
+ assert(wstr.max_size() == std::numeric_limits<size_t>::max() / sizeof(wchar_t) - alignment);
+#endif
+
+#ifndef TEST_HAS_NO_UNICODE_CHARS
+ std::u16string u16str;
+ std::u32string u32str;
+ assert(u16str.max_size() == std::numeric_limits<size_t>::max() / 2 - alignment);
+ assert(u32str.max_size() == std::numeric_limits<size_t>::max() / 4 - alignment);
+#endif
+}
+
+void half_size() {
+ std::string str;
+ assert(str.max_size() == std::numeric_limits<size_t>::max() / 2 - alignment);
+
+#ifndef TEST_HAS_NO_CHAR8_T
+ std::u8string u8str;
+ assert(u8str.max_size() == std::numeric_limits<size_t>::max() / 2 - alignment);
+#endif
+
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+ std::wstring wstr;
+ assert(wstr.max_size() == std::numeric_limits<size_t>::max() / std::max<size_t>(2ul, sizeof(wchar_t)) - alignment);
+#endif
+
+#ifndef TEST_HAS_NO_UNICODE_CHARS
+ std::u16string u16str;
+ std::u32string u32str;
+ assert(u16str.max_size() == std::numeric_limits<size_t>::max() / 2 - alignment);
+ assert(u32str.max_size() == std::numeric_limits<size_t>::max() / 4 - alignment);
+#endif
+}
+
+bool test() {
+#if _LIBCPP_ABI_VERSION == 1
+#ifdef __APPLE__
+
+# ifdef __aarch64__
+ half_size();
+# elif defined(__x86_64__)
+ full_size();
+# else
+# error "Your target system seems to be unsupported."
+# endif
+
+#elif defined(__linux__)
+
+# ifdef __x86_64__
+ full_size();
+# elif defined(__arm__) || defined(__aarch64__)
+# ifdef __BIG_ENDIAN__
+ half_size();
+# else
+ full_size();
+# endif
+
+# else
+# error "Your target system seems to be unsupported."
+# endif
+
+#elif defined(__powerpc__)
+ half_size();
+#elif defined(__powepc64__)
+ half_size();
+
+#elif defined(_WIN64)
+
+# ifdef __x86_64__
+ full_size();
+# else
+# error "Your target system seems to be unsupported."
+# endif
+
+#else
+# error "Your target system seems to be unsupported."
+#endif
+
+#endif
+
+ return true;
+}
+
+int main(int, char**) {
+ test();
+#if TEST_STD_VER > 17
+ // static_assert(test());
+#endif
+
+ return 0;
+}
diff --git a/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp
index 2b6b0e2ae66ea..a2c699456d858 100644
--- a/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp
@@ -9,6 +9,11 @@
// UNSUPPORTED: no-exceptions
// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}}
+// Prior to http://llvm.org/D123580, there was a bug with how the max_size()
+// was calculated. That was inlined into some functions in the dylib, which leads
+// to failures when running this test against an older system dylib.
+// XFAIL: use_system_cxx_lib && target=arm64-apple-macosx{{11.0|12.0}}
+
// <string>
// size_type max_size() const;
diff --git a/libcxx/utils/gdb/libcxx/printers.py b/libcxx/utils/gdb/libcxx/printers.py
index acf77b648cb26..d98b6269f1f9d 100644
--- a/libcxx/utils/gdb/libcxx/printers.py
+++ b/libcxx/utils/gdb/libcxx/printers.py
@@ -192,26 +192,6 @@ def _value_of_pair_first(value):
class StdStringPrinter(object):
"""Print a std::string."""
- def _get_short_size(self, short_field, short_size):
- """Short size depends on both endianness and a compile-time define."""
-
- # If the padding field is present after all this indirection, then string
- # was compiled with _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT defined.
- field = short_field.type.fields()[1].type.fields()[0]
- libcpp_abi_alternate_string_layout = field.name and "__padding" in field.name
-
- # This logical structure closely follows the original code (which is clearer
- # in C++). Keep them parallel to make them easier to compare.
- if libcpp_abi_alternate_string_layout:
- if _libcpp_big_endian:
- return short_size >> 1
- else:
- return short_size
- elif _libcpp_big_endian:
- return short_size
- else:
- return short_size >> 1
-
def __init__(self, val):
self.val = val
@@ -223,18 +203,13 @@ def to_string(self):
short_size = short_field["__size_"]
if short_size == 0:
return ""
- short_mask = self.val["__short_mask"]
- # Counter intuitive to compare the size and short_mask to see if the string
- # is long, but that's the way the implementation does it. Note that
- # __is_long() doesn't use get_short_size in C++.
- is_long = short_size & short_mask
- if is_long:
+ if short_field["__is_long_"]:
long_field = value_field["__l"]
data = long_field["__data_"]
size = long_field["__size_"]
else:
data = short_field["__data_"]
- size = self._get_short_size(short_field, short_size)
+ size = short_field["__size_"]
return data.lazy_string(length=size)
def display_hint(self):
More information about the libcxx-commits
mailing list