[clang] [libcxx] [clang & libcxx] constexpr pointer tagging (DO NOT MERGE) (PR #111861)

Louis Dionne via cfe-commits cfe-commits at lists.llvm.org
Fri Oct 11 06:26:11 PDT 2024


Hana =?utf-8?q?Dusíková?= <hanicka at hanicka.net>,
Hana =?utf-8?q?Dusíková?= <hanicka at hanicka.net>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/111861 at github.com>


================
@@ -0,0 +1,517 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___TAGGED_PTR_H
+#define _LIBCPP___TAGGED_PTR_H
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER >= 26
+  
+#include <__config>
+#include <__type_traits/is_trivially_copyable.h>
+#include <__assert>
+#include "__bit/has_single_bit.h"
+#include <__type_traits/rank.h>
+#include "pointer_traits.h"
+#include <compare>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <typename T, typename Y> concept convertible_to_from = std::convertible_to<Y, T> && std::convertible_to<T, Y>;
+  
+template <typename T> concept pointer_tagging_schema = requires(T::dirty_pointer payload, T::clean_pointer clean, T::tag_type tag) {
+  requires convertible_to_from<typename T::tag_type, uintptr_t>;
+  requires std::is_pointer_v<typename T::clean_pointer>;
+  
+  { T::encode_pointer_with_tag(clean, tag) } noexcept -> std::same_as<typename T::dirty_pointer>;
+  { T::recover_pointer(payload) } noexcept -> std::same_as<typename T::clean_pointer>;
+  { T::recover_value(payload) } noexcept -> std::same_as<typename T::tag_type>;
+};
+
+template <typename T> concept pointer_tagging_schema_with_aliasing = pointer_tagging_schema<T> && requires(T::dirty_pointer payload) {
+  { T::recover_aliasing_pointer(payload) } noexcept -> std::same_as<typename T::clean_pointer>;
+};
+
+// no-op schema so I can better explain how schemas work
+struct no_tag {
+  template <typename T, typename Tag> struct schema {
+    using clean_pointer = T *;
+    using dirty_pointer = void *;
+    using tag_type = Tag;
+
+    [[clang::always_inline]] static constexpr dirty_pointer encode_pointer_with_tag(clean_pointer _ptr, tag_type) noexcept {
+      return (dirty_pointer)_ptr;
+    }
+    [[clang::always_inline]] static constexpr clean_pointer recover_pointer(dirty_pointer _ptr) noexcept {
+      return (clean_pointer)_ptr;
+    }
+    [[clang::always_inline]] static constexpr clean_pointer recover_aliasing_pointer(dirty_pointer _ptr) noexcept {
+      return (clean_pointer)_ptr;
+    }
+    [[clang::always_inline]] static constexpr tag_type recover_value(dirty_pointer) noexcept {
+      return {};
+    }
+  };
+};
+
+// most basic schema for tagging
+// it lets user to provide their own mask
+template <uintptr_t Mask> struct bitmask_tag {
+  static constexpr uintptr_t _mask = Mask;
+
+  template <typename T, typename Tag> struct schema {
+    using clean_pointer = T *;
+    using dirty_pointer = void *;
+    using tag_type = Tag;
+
+    [[clang::always_inline]] static constexpr dirty_pointer encode_pointer_with_tag(clean_pointer _ptr, tag_type _value) noexcept {
+#if __has_builtin(__builtin_tag_pointer_mask_or)
+      return static_cast<dirty_pointer>(__builtin_tag_pointer_mask_or((void *)(_ptr), static_cast<uintptr_t>(_value), _mask));
+#else
+      return reinterpret_cast<dirty_pointer>((reinterpret_cast<uintptr_t>(_ptr) & static_cast<uintptr_t>(_mask)) | (static_cast<uintptr_t>(_value) & ~static_cast<uintptr_t>(_mask)));
+#endif
+    }
+    [[clang::always_inline]] static constexpr clean_pointer recover_pointer(dirty_pointer _ptr) noexcept {
+#if __has_builtin(__builtin_tag_pointer_mask)
+      return static_cast<clean_pointer>(__builtin_tag_pointer_mask((void *)_ptr, ~_mask));
+#else
+      return reinterpret_cast<clean_pointer>(reinterpret_cast<uintptr_t>(_ptr) & ~static_cast<uintptr_t>(_mask));
+#endif
+    }
+    [[clang::always_inline]] static constexpr tag_type recover_value(dirty_pointer _ptr) noexcept {
+#if __has_builtin(__builtin_tag_pointer_mask_as_int)
+      return static_cast<tag_type>(__builtin_tag_pointer_mask_as_int((void *)_ptr, _mask));
+#else
+      return static_cast<tag_type>(reinterpret_cast<uintptr_t>(_ptr) & static_cast<uintptr_t>(_mask));
+#endif
+    }
+  };
+};
+
+// schema which allows only pointer of custom provided minimal alignment 
+// otherwise it behaves as custom mask schema
+template <unsigned Alignment> struct custom_alignment_tag {
+  static constexpr uintptr_t mask = (static_cast<uintptr_t>(1u) << static_cast<uintptr_t>(Alignment)) - 1ull;
+  template <typename T, typename Tag> struct schema: bitmask_tag<mask>::template schema<T, Tag> {
+    using _underlying_schema =bitmask_tag<mask>::template schema<T, Tag>;
+  
+    using clean_pointer = _underlying_schema::clean_pointer;
+    using dirty_pointer = _underlying_schema::dirty_pointer;
+    using tag_type = _underlying_schema::tag_type;
+    
+    [[clang::always_inline]] static constexpr dirty_pointer encode_pointer_with_tag(clean_pointer _ptr, tag_type _value) noexcept {
+#if __has_builtin(__builtin_is_aligned)
+      _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__builtin_is_aligned(_ptr, Alignment), "Pointer must be aligned by provided alignemt for tagging");
+#else
+      if !consteval {
+        _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(reinterpret_cast<uintptr_t>(std::addressof(_ptr)) % Alignment == 0, "Pointer must be aligned by provided alignemt for tagging");
+      }
+#endif
+      return _underlying_schema::encode_pointer_with_tag(_ptr, _value);
+    }
+    
+    using _underlying_schema::recover_pointer;
+    using _underlying_schema::recover_value;
+  };
+};
+
+// default scheme which gives only bits from alignment
+struct alignment_low_bits_tag {
+  template <typename T> static constexpr unsigned alignment = alignof(T);
+  template <typename T, typename Tag> using schema = typename custom_alignment_tag<alignment<T>>::template schema<T, Tag>;
+};
+
+// scheme which shifts bits to left by Bits bits and gives the space for tagging
+template <unsigned Bits> struct shift_tag {
+  static constexpr unsigned _shift = Bits;
+  static constexpr uintptr_t _mask = (uintptr_t{1u} << _shift) - 1u;
+
+  template <typename T, typename Tag> struct schema {
+    using clean_pointer = T *;
+    using dirty_pointer = void *;
+    using tag_type = Tag;
+
+    [[clang::always_inline]] static constexpr dirty_pointer encode_pointer_with_tag(clean_pointer _ptr, tag_type _value) noexcept {
+#if __has_builtin(__builtin_tag_pointer_shift_or)
+      return static_cast<dirty_pointer>(__builtin_tag_pointer_shift_or((void *)(_ptr), (uintptr_t)_value, _shift));
+#else
+      return reinterpret_cast<dirty_pointer>((reinterpret_cast<uintptr_t>(_ptr) << _shift) | (static_cast<uintptr_t>(_value) & ((1ull << static_cast<uintptr_t>(_shift)) - 1ull)));
+#endif
+    }
+    [[clang::always_inline]] static constexpr clean_pointer recover_pointer(dirty_pointer _ptr) noexcept {
+#if __has_builtin(__builtin_tag_pointer_unshift)
+      return static_cast<clean_pointer>(__builtin_tag_pointer_unshift((void *)_ptr, _shift));
+#else
+      return reinterpret_cast<clean_pointer>(reinterpret_cast<uintptr_t>(_ptr) >> _shift);
+#endif
+    }
+    [[clang::always_inline]] static constexpr tag_type recover_value(dirty_pointer _ptr) noexcept {
+#if __has_builtin(__builtin_tag_pointer_mask_as_int)
+      return static_cast<tag_type>(__builtin_tag_pointer_mask_as_int((void *)_ptr, _mask));
+#else
+      return static_cast<tag_type>(reinterpret_cast<uintptr_t>(_ptr) & static_cast<uintptr_t>(_mask));
+#endif      
+    }
+  };
+};
+
+// scheme which shifts pointer to left by 8 bits and give this space as guaranteed space for tagging
+struct low_byte_tag {
+  template <typename T, typename Tag> using schema = typename shift_tag<8>::template schema<T, Tag>;
+};
+
+// this will give user access to upper byte of pointer on aarch64
+// also it supports recovering aliasing pointer as no-op (fast-path)
+struct upper_byte_tag {
+  template <typename T> static constexpr unsigned _shift = sizeof(T *) * 8ull - 8ull;
+  template <typename T> static constexpr uintptr_t _mask = 0b1111'1111ull << _shift<T>;
+  
+  template <typename T, typename Tag> struct schema: bitmask_tag<_mask<T>>::template schema<T, Tag> {
+    using _underlying_schema = bitmask_tag<_mask<T>>::template schema<T, Tag>;
+    
+    using clean_pointer = _underlying_schema::clean_pointer;
+    using dirty_pointer = _underlying_schema::dirty_pointer;
+    using tag_type = _underlying_schema::tag_type;
+  
+    [[clang::always_inline]] static constexpr clean_pointer recover_aliasing_pointer(dirty_pointer _ptr) noexcept {
+      return (clean_pointer)_ptr;
+    }
+    
+    using _underlying_schema::encode_pointer_with_tag;
+    using _underlying_schema::recover_pointer;
+    using _underlying_schema::recover_value;
+  };
+};
+
+// improved version of previous aarch64 upper byte scheme
+// with added shifting tag value into position, so the tag doesn't need to know about exact position
+struct upper_byte_shifted_tag: upper_byte_tag { 
+  template <typename T, typename Tag> struct schema: upper_byte_tag::template schema<T, uintptr_t> {
+    using _underlying_schema = upper_byte_tag::template schema<T, uintptr_t>;
+    
+    using clean_pointer = _underlying_schema::clean_pointer;
+    using dirty_pointer = _underlying_schema::dirty_pointer;
+    using tag_type = Tag;
+  
+    [[clang::always_inline]] static constexpr dirty_pointer encode_pointer_with_tag(clean_pointer _ptr, tag_type _value) noexcept {
+      return _underlying_schema::encode_pointer_with_tag(_ptr, static_cast<uintptr_t>(_value) << upper_byte_tag::_shift<T>);
+    }
+    [[clang::always_inline]] static constexpr tag_type recover_value(dirty_pointer _ptr) noexcept {
+      return static_cast<tag_type>(_underlying_schema::recover_value(_ptr) >> upper_byte_tag::_shift<T>);
+    }
+    
+    using _underlying_schema::recover_pointer;
+    using _underlying_schema::recover_aliasing_pointer;
+  };
+};
+
----------------
ldionne wrote:

IMO the most important schema is one where you specify only the number of bits you need, and then it finds those bits in the pointer somewhere (anywhere really). Something like

```
template <size_t Bits>
struct free_bits_tag /* bad name? */ {
  ...
};
```

Basically, the point I'm trying to make is that the most basic interface we want for `tagged_ptr` is

```
using MyTaggedPtr = std::tagged_ptr<SomethingLarge, bool, bits_needed<1>>;
```

90% of the time, the problem we're faced with is that we have a pointer and we have a desire to store K bits somewhere in the pointer. We don't really care where or how those bits are stored, we want them to be stored or be told that the pointer doesn't have space. I think all the other schemas are nice additions on top that provide low level control over how things happen, but the base interface of this pointer should be based on a simple request for a number of bits, IMO.

https://github.com/llvm/llvm-project/pull/111861


More information about the cfe-commits mailing list