[libcxx-commits] [libcxx] [libc++] Add tombstone traits and use them in optional (PR #98498)

Louis Dionne via libcxx-commits libcxx-commits at lists.llvm.org
Mon Dec 2 07:02:42 PST 2024


================
@@ -0,0 +1,288 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___TYPE_TRAITS_DISENGAGED_TRAITS_H
+#define _LIBCPP___TYPE_TRAITS_DISENGAGED_TRAITS_H
+
+#include <__assert>
+#include <__config>
+#include <__cstddef/size_t.h>
+#include <__memory/construct_at.h>
+#include <__type_traits/datasizeof.h>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/is_constant_evaluated.h>
+#include <__type_traits/is_fundamental.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_trivial.h>
+#include <__type_traits/is_trivially_destructible.h>
+#include <__type_traits/remove_cv.h>
+#include <__type_traits/void_t.h>
+#include <__utility/forward.h>
+#include <__utility/move.h>
+#include <__utility/piecewise_construct.h>
+#include <cstdint>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class>
+struct __tombstone_traits;
+
+#if _LIBCPP_STD_VER >= 17
+
+// bools have always exactly one bit set. If there is more than one set it's disengaged.
+template <>
+struct __tombstone_traits<bool> {
+  static constexpr uint8_t __disengaged_value_    = 3;
+  static constexpr size_t __is_disengaged_offset_ = 0;
+};
+
+struct __tombstone_traits_assume_aligned_pointer {
+  static constexpr uint8_t __disengaged_value_ = 1;
+#  ifdef _LIBCPP_LITTLE_ENDIAN
+  static constexpr size_t __is_disengaged_offset_ = 0;
+#  else
+  static constexpr size_t __is_disengaged_offset_ = sizeof(void*) - 1;
+#  endif
+};
+
+// TODO: Look into
+// - filesystem::directory_iterator
+// - vector<T> with alignof(T) == 1
+// - string_view (basic_string_view<T> works with alignof(T) >= 2)
+
+// This is constrained on fundamental types because we might not always know the alignment of a user-defined type.
+// For example, in one TU there may only be a forward declaration and in another there is already the definition
+// available. If we made this optimization conditional on the completeness of the type this would result in a non-benign
+// ODR violation.
+template <class _Tp>
+struct __tombstone_traits<__enable_specialization_if<is_fundamental_v<_Tp> && alignof(_Tp) >= 2, _Tp*>>
+    : __tombstone_traits_assume_aligned_pointer {};
+
+template <class _Tp>
+struct __tombstone_traits<_Tp**> : __tombstone_traits_assume_aligned_pointer {
+  static_assert(alignof(_Tp*) >= 2, "alignment of a pointer isn't at least 2!?");
+};
+
+inline constexpr struct __init_engaged_t {
+} __init_engaged;
+inline constexpr struct __init_disengaged_t {
+} __init_disengaged;
+
+template <class _Tp, class _Payload, bool = __tombstone_traits<_Tp>::__is_disengaged_offset_ == 0>
+struct __tombstone_is_disengaged {
+  using _TombstoneLayout = __tombstone_traits<_Tp>;
+  using _IsDisengagedT   = remove_cv_t<decltype(_TombstoneLayout::__disengaged_value_)>;
+
+  char __padding_[_TombstoneLayout::__is_disengaged_offset_];
+  _IsDisengagedT __is_disengaged_;
+};
+
+template <class _Tp, class _Payload>
+struct __tombstone_is_disengaged<_Tp, _Payload, true> {
+  using _TombstoneLayout = __tombstone_traits<_Tp>;
+  using _IsDisengagedT   = remove_cv_t<decltype(_TombstoneLayout::__disengaged_value_)>;
+
+  _IsDisengagedT __is_disengaged_;
+};
+
+template <class _Tp, class _Payload, bool = __tombstone_traits<_Tp>::__is_disengaged_offset_ == 0>
+struct __tombstone_data {
+  using _TombstoneLayout = __tombstone_traits<_Tp>;
+  using _IsDisengagedT   = remove_cv_t<decltype(_TombstoneLayout::__disengaged_value_)>;
+
+  static_assert(is_trivial<_IsDisengagedT>::value, "disengaged type has to be trivial!");
+  static_assert(_TombstoneLayout::__is_disengaged_offset_ >= __datasizeof_v<_Payload>);
+
+  _LIBCPP_NO_UNIQUE_ADDRESS _Payload __payload_;
+  char __padding_[_TombstoneLayout::__is_disengaged_offset_ - __datasizeof_v<_Payload>];
+  _IsDisengagedT __is_disengaged_;
+
+  template <class... _Args>
+  _LIBCPP_HIDE_FROM_ABI constexpr __tombstone_data(_Args&&... __args)
+      : __payload_(std::forward<_Args>(__args)...), __is_disengaged_(_TombstoneLayout::__disengaged_value_) {}
+};
+
+template <class _Tp, class _Payload>
+struct __tombstone_data<_Tp, _Payload, true> {
+  using _TombstoneLayout = __tombstone_traits<_Tp>;
+  using _IsDisengagedT   = remove_cv_t<decltype(_TombstoneLayout::__disengaged_value_)>;
+
+  _IsDisengagedT __is_disengaged_;
+  _LIBCPP_NO_UNIQUE_ADDRESS _Payload __payload_;
+
+  template <class... _Args>
+  _LIBCPP_HIDE_FROM_ABI constexpr __tombstone_data(_Args&&... __args)
+      : __is_disengaged_(_TombstoneLayout::__disengaged_value_), __payload_(std::forward<_Args>(__args)...) {}
+};
+
+template <class _Tp, class _Payload, bool = is_trivially_destructible_v<_Tp> && is_trivially_destructible_v<_Payload>>
+union _MaybeTombstone {
+  using _TombstoneLayout = __tombstone_traits<_Tp>;
+  using _TombstoneData   = __tombstone_data<_Tp, _Payload>;
+
+  _Tp __value_;
+  _TombstoneData __tombstone_;
+
+  template <class... _Args>
+  constexpr _MaybeTombstone(__init_disengaged_t, _Args&&... __args) : __tombstone_(std::forward<_Args>(__args)...) {}
+
+  template <class... _Args>
+  constexpr _MaybeTombstone(__init_engaged_t, _Args&&... __args) : __value_(std::forward<_Args>(__args)...) {}
+
+  _MaybeTombstone(const _MaybeTombstone&)            = default;
+  _MaybeTombstone(_MaybeTombstone&&)                 = default;
+  _MaybeTombstone& operator=(const _MaybeTombstone&) = default;
+  _MaybeTombstone& operator=(_MaybeTombstone&&)      = default;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr bool __is_engaged() const noexcept {
----------------
ldionne wrote:

I understand it might not actually be possible to implement it that way, but I would find an interface like this for `tombstone_traits` to be more intuitive:

```c++
// Option #1
template <>
struct __tombstone_traits<std::string> {
  static constexpr bool __is_disengaged(std::byte (&__bytes)[sizeof(std::string)]) {
    bool __is_short = __bytes[__builtin_offsetof(std::string, something)] == 0;
    std::size_t __size = ...;
    return __is_short && __size > __min_cap;
  }
};


// Option #2
template <class _Tp, std::size_t _Offset, auto _DisengagedValue>
struct __tombstone_traits_from_offset {
  static constexpr auto __disengaged_value_    = _DisengagedValue;
  static constexpr size_t __is_disengaged_offset_ = _Offset;

  static constexpr bool __is_disengaged(std::byte (&__bytes)[sizeof(_Tp)]) {
    __tombstone_is_disengaged<_Tp, _Payload> __is_disengaged;
    static_assert(sizeof(__tombstone_is_disengaged<_Tp, _Payload>) <= sizeof(_MaybeTombstone));
    __builtin_memcpy(&__is_disengaged, this, sizeof(__tombstone_is_disengaged<_Tp, _Payload>));
    return __is_disengaged.__is_disengaged_ != _TombstoneLayout::__disengaged_value_;
  }
};

template <>
struct __tombstone_traits<std::string> : __tombstone_traits_from_offset<std::string, 0, std::uint8_t(65)> { };
```

I think what I'm after is to avoid basing the whole tombstone mechanism on passing a magic value and a magic offset, and instead use something where (in theory) arbitrary code could run. In practice, I understand that we're limited in what we can do since there may not be an actual object in memory, but it still seems like defining `__is_disengaged` in terms of a function (with potentially a commonly-used helper) is more flexible. This would allow for example checking more than just integral equality to determine whether the object is engaged or not.

https://github.com/llvm/llvm-project/pull/98498


More information about the libcxx-commits mailing list