[clang] [libcxx] [clang & libcxx] constexpr pointer tagging (DO NOT MERGE) (PR #111861)

Hana Dusíková via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 10 12:06:19 PDT 2024


https://github.com/hanickadot updated https://github.com/llvm/llvm-project/pull/111861

>From e1c8d5e689fe8d3d0338eb64220aaf6371aed48a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hana=20Dusi=CC=81kova=CC=81?= <hanicka at hanicka.net>
Date: Thu, 10 Oct 2024 21:05:55 +0200
Subject: [PATCH] [clang & libcxx] constexpr pointer tagging (not for merging,
 just for review)

---
 clang/include/clang/AST/APValue.h             |   4 +
 clang/include/clang/Basic/Builtins.td         |  37 ++
 .../include/clang/Basic/DiagnosticASTKinds.td |   6 +
 clang/lib/AST/APValue.cpp                     |  10 +
 clang/lib/AST/ExprConstant.cpp                | 112 +++++
 clang/lib/CodeGen/CGBuiltin.cpp               | 102 ++++
 clang/lib/CodeGen/CodeGenFunction.h           |   7 +
 libcxx/include/CMakeLists.txt                 |   1 +
 libcxx/include/__memory/tagged_ptr.h          | 447 ++++++++++++++++++
 libcxx/include/memory                         |   4 +
 libcxx/include/module.modulemap               |   1 +
 11 files changed, 731 insertions(+)
 create mode 100644 libcxx/include/__memory/tagged_ptr.h

diff --git a/clang/include/clang/AST/APValue.h b/clang/include/clang/AST/APValue.h
index 7869ee386689d7..278cf61dd56f89 100644
--- a/clang/include/clang/AST/APValue.h
+++ b/clang/include/clang/AST/APValue.h
@@ -198,6 +198,8 @@ class APValue {
       /// The QualType, if this is a DynamicAllocLValue.
       void *DynamicAllocType;
     };
+  public:
+    uint64_t Metadata{0};
   };
 
   /// A FieldDecl or CXXRecordDecl, along with a flag indicating whether we
@@ -527,6 +529,8 @@ class APValue {
   }
 
   const LValueBase getLValueBase() const;
+  uint64_t getLValueMetadata() const;
+  uint64_t & getLValueMetadata();
   CharUnits &getLValueOffset();
   const CharUnits &getLValueOffset() const {
     return const_cast<APValue*>(this)->getLValueOffset();
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 9ebee81fcb0d3d..6231f1c8af2830 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4867,3 +4867,40 @@ def ArithmeticFence : LangBuiltin<"ALL_LANGUAGES"> {
   let Attributes = [CustomTypeChecking, Constexpr];
   let Prototype = "void(...)";
 }
+
+// support for pointer tagging
+// (ptr & mask) | (val & ~mask)
+def TagPointerMaskOr : Builtin {
+  let Spellings = ["__builtin_tag_pointer_mask_or"];
+  let Attributes = [Constexpr, NoThrow];
+  let Prototype = "void*(void*, size_t, size_t)";
+}
+
+// (ptr & mask) -> void *
+def TagPointerMask : Builtin {
+  let Spellings = ["__builtin_tag_pointer_mask"];
+  let Attributes = [Constexpr, NoThrow];
+  let Prototype = "void*(void*, size_t)";
+}
+
+// (ptr & mask) -> uintptr_t
+def TagPointerMaskAsInt : Builtin {
+  let Spellings = ["__builtin_tag_pointer_mask_as_int"];
+  let Attributes = [Constexpr, NoThrow];
+  let Prototype = "size_t(void*, size_t)";
+}
+
+// (ptr << shift) | (value & ~mask) -> void *
+// mask = (1 << shift) - 1
+def TagPointerShiftOr : Builtin {
+  let Spellings = ["__builtin_tag_pointer_shift_or"];
+  let Attributes = [Constexpr, NoThrow];
+  let Prototype = "void*(void*, size_t, size_t)";
+}
+
+// (ptr >> unshift) -> void *
+def TagPointerUnshift : Builtin {
+  let Spellings = ["__builtin_tag_pointer_unshift"];
+  let Attributes = [Constexpr, NoThrow];
+  let Prototype = "void*(void*, size_t)";
+}
diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td
index 6a658cf14356f5..3d6b0cbebc7002 100644
--- a/clang/include/clang/Basic/DiagnosticASTKinds.td
+++ b/clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -218,6 +218,12 @@ def note_constexpr_access_null : Note<
 def note_constexpr_access_past_end : Note<
   "%sub{access_kind}0 dereferenced one-past-the-end pointer "
   "is not allowed in a constant expression">;
+def note_constexpr_dereferencing_tagged_pointer: Note<
+  "dereferencing tagged pointer">;
+def note_constexpr_tagging_with_shift_zero: Note<
+  "you must shift pointer at least by one bit to store a tag">;
+def note_constexpr_tagging_with_empty_mask: Note<
+  "you must provide non-zero mask for pointer tagging">;
 def note_constexpr_access_unsized_array : Note<
   "%sub{access_kind}0 element of array without known bound "
   "is not allowed in a constant expression">;
diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp
index 4f5d14cbd59bbf..02ba5cf51d4176 100644
--- a/clang/lib/AST/APValue.cpp
+++ b/clang/lib/AST/APValue.cpp
@@ -975,6 +975,16 @@ const APValue::LValueBase APValue::getLValueBase() const {
   return ((const LV *)(const void *)&Data)->Base;
 }
 
+uint64_t APValue::getLValueMetadata() const {
+  assert(isLValue() && "Invalid accessor");
+  return ((const LV *)(const void *)&Data)->Base.Metadata;
+}
+
+uint64_t & APValue::getLValueMetadata() {
+  assert(isLValue() && "Invalid accessor");
+  return ((LV *)(void *)&Data)->Base.Metadata;
+}
+
 bool APValue::isLValueOnePastTheEnd() const {
   assert(isLValue() && "Invalid accessor");
   return ((const LV *)(const void *)&Data)->IsOnePastTheEnd;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 4d5af96093cfeb..ed7eeeba20fcaf 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -4498,6 +4498,11 @@ handleLValueToRValueConversion(EvalInfo &Info, const Expr *Conv, QualType Type,
                                bool WantObjectRepresentation = false) {
   if (LVal.Designator.Invalid)
     return false;
+  
+  if (LVal.Base.Metadata != 0) {
+    Info.FFDiag(Conv, diag::note_constexpr_dereferencing_tagged_pointer);
+    return false;
+  }
 
   // Check for special cases where there is no existing APValue to look at.
   const Expr *Base = LVal.Base.dyn_cast<const Expr*>();
@@ -9735,6 +9740,87 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     return Success(E);
 
   switch (BuiltinOp) {
+  // emulation of pointer tagging without actually touching pointer value
+  // as there is no such thing as address here, so tag is stored as a metadata in lvalue base
+  case Builtin::BI__builtin_tag_pointer_mask_or: { 
+      APSInt Value, Mask;
+      if (!evaluatePointer(E->getArg(0), Result))
+        return Error(E);
+  
+      if (!EvaluateInteger(E->getArg(1), Value, Info))
+        return Error(E);
+  
+      if (!EvaluateInteger(E->getArg(2), Mask, Info))
+        return Error(E);
+      
+      if (Mask.getLimitedValue() == 0) {
+        CCEDiag(E->getArg(2), diag::note_constexpr_tagging_with_empty_mask);
+        return false;
+      }
+  
+      Result.Base.Metadata = (Result.Base.Metadata & ~Mask.getLimitedValue()) | (Value.getLimitedValue() & Mask.getLimitedValue());
+      return true;
+    }
+
+  // alternative approach to tagging which shifts pointer
+  // here we are only shifting metadata
+  case Builtin::BI__builtin_tag_pointer_shift_or: {
+    APSInt Value, Shift;
+    if (!evaluatePointer(E->getArg(0), Result))
+      return Error(E);
+
+    if (!EvaluateInteger(E->getArg(1), Value, Info))
+      return Error(E);
+
+    if (!EvaluateInteger(E->getArg(2), Shift, Info))
+      return Error(E);
+
+    if (Shift.getLimitedValue() == 0) {
+      CCEDiag(E->getArg(2), diag::note_constexpr_tagging_with_shift_zero);
+      return false;
+    }
+
+    const uint64_t Mask = (1ull << static_cast<uint64_t>(Shift.getLimitedValue())) - 1ull;
+    Result.Base.Metadata = (Result.Base.Metadata << static_cast<uint64_t>(Shift.getLimitedValue())) | (Value.getLimitedValue() & Mask);
+    return true;
+  }
+  
+  // recover pointer by masking metadata
+  // exprconstant allows dereferencing only metadata == 0 pointer
+  case Builtin::BI__builtin_tag_pointer_mask: {
+    APSInt Mask;
+    if (!evaluatePointer(E->getArg(0), Result))
+        return Error(E);
+  
+    if (!EvaluateInteger(E->getArg(1), Mask, Info))
+      return Error(E);
+    
+    if (Mask.getLimitedValue() == 0) {
+      CCEDiag(E->getArg(2), diag::note_constexpr_tagging_with_empty_mask);
+      return false;
+    }
+  
+    Result.Base.Metadata = (Result.Base.Metadata & Mask.getLimitedValue());
+    return true;
+  }
+
+  // shifting back pointer (also can convert tagged pointer back to normal pointer)
+  case Builtin::BI__builtin_tag_pointer_unshift: {
+    APSInt Shift;
+    if (!evaluatePointer(E->getArg(0), Result))
+        return Error(E);
+  
+    if (!EvaluateInteger(E->getArg(1), Shift, Info))
+      return Error(E);
+    
+    if (Shift.getLimitedValue() == 0) {
+      CCEDiag(E->getArg(2), diag::note_constexpr_tagging_with_shift_zero);
+      return false;
+    }
+  
+    Result.Base.Metadata = (Result.Base.Metadata >> static_cast<uint64_t>(Shift.getLimitedValue()));
+    return true;
+  }
   case Builtin::BIaddressof:
   case Builtin::BI__addressof:
   case Builtin::BI__builtin_addressof:
@@ -12662,6 +12748,25 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
   default:
     return false;
 
+  case Builtin::BI__builtin_tag_pointer_mask_as_int: {
+    LValue Pointer;
+    APSInt Mask;
+  
+    if (!EvaluatePointer(E->getArg(0), Pointer, Info))
+      return Error(E);
+
+    if (!EvaluateInteger(E->getArg(1), Mask, Info))
+      return Error(E);
+    
+    if (Mask.getLimitedValue() == 0) {
+      CCEDiag(E->getArg(2), diag::note_constexpr_tagging_with_empty_mask);
+      return false;
+    }
+  
+    const uint64_t Result = Pointer.Base.Metadata & (static_cast<uint64_t>(Mask.getLimitedValue()));
+    return Success(Result, E);
+  }
+
   case Builtin::BI__builtin_dynamic_object_size:
   case Builtin::BI__builtin_object_size: {
     // The type was checked when we built the expression.
@@ -14219,6 +14324,13 @@ EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E,
       return Success(CmpResult::Less, E);
     if (CompareLHS > CompareRHS)
       return Success(CmpResult::Greater, E);
+    
+    // this makes tagged pointer not equal to original pointer
+    if (LHSValue.Base.Metadata < RHSValue.Base.Metadata)
+      return Success(CmpResult::Less, E);
+    if (LHSValue.Base.Metadata > RHSValue.Base.Metadata)
+      return Success(CmpResult::Greater, E);
+    
     return Success(CmpResult::Equal, E);
   }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2449b90a0e7902..4ac09e7a317931 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5320,6 +5320,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
     return RValue::get(Carry);
   }
+  
+  // support for pointer tagging
+  case Builtin::BI__builtin_tag_pointer_mask_or:
+    return EmitBuiltinTagPointerMaskOr(E);
+  case Builtin::BI__builtin_tag_pointer_mask:
+    return EmitBuiltinTagPointerMask(E);
+  case Builtin::BI__builtin_tag_pointer_mask_as_int:
+    return EmitBuiltinTagPointerMaskAsInt(E);
+  case Builtin::BI__builtin_tag_pointer_shift_or:
+    return EmitBuiltinTagPointerShiftOr(E);
+  case Builtin::BI__builtin_tag_pointer_unshift:
+    return EmitBuiltinTagPointerUnshift(E);
+
   case Builtin::BIaddressof:
   case Builtin::BI__addressof:
   case Builtin::BI__builtin_addressof:
@@ -21245,6 +21258,95 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
   }
 }
 
+/// Generate (x & ~mask) | (value & mask).
+RValue CodeGenFunction::EmitBuiltinTagPointerMaskOr(const CallExpr *E) {
+  llvm::Value * Ptr = EmitScalarExpr(E->getArg(0));
+  llvm::Value * Value = EmitScalarExpr(E->getArg(1));
+  llvm::Value * Mask = EmitScalarExpr(E->getArg(2));
+  
+  llvm::IntegerType * IntType = IntegerType::get(getLLVMContext(), CGM.getDataLayout().getIndexTypeSizeInBits(Ptr->getType()));
+  
+  // TODO: avoid using bitcast and go path of ptr.tag (mirror to ptr.mask)
+  // to keep pointer's provenance, but this turns out a bit harder to do as it touches 
+  // a lot of places in llvm
+  llvm::Value * PointerInt = Builder.CreateBitOrPointerCast(Ptr, IntType, "pointer_int");
+  llvm::Value * InvertedMask = Builder.CreateNot(Mask, "inverted_mask");
+  
+  llvm::Value * MaskedPtr = Builder.CreateAnd(PointerInt, InvertedMask, "masked_ptr");
+  llvm::Value * MaskedValue = Builder.CreateAnd(Value, Mask, "masked_value");
+  
+  llvm::Value * ResultInt = Builder.CreateOr(MaskedPtr, MaskedValue, "result_int");
+  llvm::Value * Result = Builder.CreateBitOrPointerCast(ResultInt, Ptr->getType(), "result_ptr");
+   
+  return RValue::get(Result);
+}
+
+/// Generate (x << shift) | (value & ((1 << shift) - 1)).
+RValue CodeGenFunction::EmitBuiltinTagPointerShiftOr(const CallExpr *E) {
+  llvm::Value * Ptr = EmitScalarExpr(E->getArg(0));
+  llvm::Value * Value = EmitScalarExpr(E->getArg(1));
+  llvm::Value * Shift = EmitScalarExpr(E->getArg(2));
+  
+  llvm::IntegerType * IntType = IntegerType::get(getLLVMContext(), CGM.getDataLayout().getIndexTypeSizeInBits(Ptr->getType()));
+  
+  // TODO: again, for now a bitcast, later ptr.shift_tag
+  llvm::Value * PointerInt = Builder.CreateBitOrPointerCast(Ptr, IntType, "pointer_int");
+  llvm::Value * ShiftedPointerInt = Builder.CreateShl(PointerInt, Shift);
+  
+  auto *One = llvm::ConstantInt::get(IntType, 1);
+  
+  llvm::Value * Mask = Builder.CreateSub(Builder.CreateShl(One, Shift), One, "mask");
+  llvm::Value * MaskedValue = Builder.CreateAdd(Value, Mask, "masked_value");
+  llvm::Value * PointerWithTag = Builder.CreateOr(ShiftedPointerInt, MaskedValue, "pointer_with_tag_int");
+  
+  llvm::Value * Result = Builder.CreateBitOrPointerCast(PointerWithTag, Ptr->getType(), "result_ptr");
+  return RValue::get(Result);
+}
+
+/// Generate (x >> shift)
+RValue CodeGenFunction::EmitBuiltinTagPointerUnshift(const CallExpr *E) {
+  llvm::Value * Ptr = EmitScalarExpr(E->getArg(0));
+  llvm::Value * Shift = EmitScalarExpr(E->getArg(1));
+  
+  llvm::IntegerType * IntType = IntegerType::get(getLLVMContext(), CGM.getDataLayout().getIndexTypeSizeInBits(Ptr->getType()));
+  
+  // for now I'm going path of bitcast
+  llvm::Value * PointerInt = Builder.CreateBitOrPointerCast(Ptr, IntType, "pointer_int");
+  llvm::Value * UnShiftedPointerInt = Builder.CreateAShr(PointerInt, Shift, "unshifted_pointer_int");
+  
+  llvm::Value * Result = Builder.CreateBitOrPointerCast(UnShiftedPointerInt, Ptr->getType(), "result_ptr");
+  return RValue::get(Result);
+}
+
+/// Generate (x & mask).
+RValue CodeGenFunction::EmitBuiltinTagPointerMask(const CallExpr *E) {
+  llvm::Value * Ptr = EmitScalarExpr(E->getArg(0));
+  llvm::Value * Mask = EmitScalarExpr(E->getArg(1));
+  
+  llvm::Value *Result = Builder.CreateIntrinsic(
+        Intrinsic::ptrmask, {Ptr->getType(), Mask->getType()},
+        {Ptr, Mask}, nullptr, "result");
+  
+  return RValue::get(Result);
+}
+
+/// Generate (x & mask) (but return it as number).
+RValue CodeGenFunction::EmitBuiltinTagPointerMaskAsInt(const CallExpr *E) {
+  llvm::Value * Ptr = EmitScalarExpr(E->getArg(0));
+  llvm::Value * Mask = EmitScalarExpr(E->getArg(1));
+  
+  llvm::IntegerType * IntType = IntegerType::get(getLLVMContext(), CGM.getDataLayout().getIndexTypeSizeInBits(Ptr->getType()));
+  
+  llvm::Value *Result = Builder.CreateIntrinsic(
+        Intrinsic::ptrmask, {Ptr->getType(), Mask->getType()},
+        {Ptr, Mask}, nullptr, "result");
+
+  llvm::Value * IntResult = Builder.CreateBitOrPointerCast(Result, IntType, "int_result");
+  
+  return RValue::get(IntResult);
+}
+
+
 namespace {
 struct BuiltinAlignArgs {
   llvm::Value *Src = nullptr;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 9ba0ed02a564dd..7938593f8532cc 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4556,6 +4556,13 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   RValue emitRotate(const CallExpr *E, bool IsRotateRight);
 
+  /// Emit IR for pointer tagging
+  RValue EmitBuiltinTagPointerMaskOr(const CallExpr *E);
+  RValue EmitBuiltinTagPointerMask(const CallExpr *E);
+  RValue EmitBuiltinTagPointerMaskAsInt(const CallExpr *E);
+  RValue EmitBuiltinTagPointerShiftOr(const CallExpr *E);
+  RValue EmitBuiltinTagPointerUnshift(const CallExpr *E);
+
   /// Emit IR for __builtin_os_log_format.
   RValue emitBuiltinOSLogFormat(const CallExpr &E);
 
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index c2a597f49e317f..edb17efb0c72bc 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -553,6 +553,7 @@ set(files
   __memory/raw_storage_iterator.h
   __memory/shared_ptr.h
   __memory/swap_allocator.h
+  __memory/tagged_ptr.h
   __memory/temp_value.h
   __memory/temporary_buffer.h
   __memory/uninitialized_algorithms.h
diff --git a/libcxx/include/__memory/tagged_ptr.h b/libcxx/include/__memory/tagged_ptr.h
new file mode 100644
index 00000000000000..7c99d0e9299f48
--- /dev/null
+++ b/libcxx/include/__memory/tagged_ptr.h
@@ -0,0 +1,447 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___TAGGED_PTR_H
+#define _LIBCPP___TAGGED_PTR_H
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER >= 26
+  
+#include <__config>
+#include <__type_traits/is_trivially_copyable.h>
+#include <__assert>
+#include "__bit/has_single_bit.h"
+#include <__type_traits/rank.h>
+#include "pointer_traits.h"
+#include <compare>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <typename T, typename Y> concept convertible_to_from = std::convertible_to<Y, T> && std::convertible_to<T, Y>;
+  
+template <typename T> concept pointer_tagging_schema = requires(T::dirty_pointer payload, T::clean_pointer clean, T::tag_type tag) {
+  //requires convertible_to_from<typename T::tag_type, uintptr_t>;
+  requires std::is_pointer_v<typename T::clean_pointer>;
+  
+  { T::encode_pointer_with_tag(clean, tag) } noexcept -> std::same_as<typename T::dirty_pointer>;
+  { T::recover_pointer(payload) } noexcept -> std::same_as<typename T::clean_pointer>;
+  { T::recover_value(payload) } noexcept -> std::same_as<typename T::tag_type>;
+};
+
+template <typename T> concept pointer_tagging_schema_with_aliasing = pointer_tagging_schema<T> && requires(T::dirty_pointer payload) {
+  { T::recover_aliasing_pointer(payload) } noexcept -> std::same_as<typename T::clean_pointer>;
+};
+
+struct no_tag {
+  template <typename T, typename Tag> struct schema {
+    using clean_pointer = T *;
+    using dirty_pointer = void *;
+    using tag_type = Tag;
+
+    [[clang::always_inline]] static constexpr dirty_pointer encode_pointer_with_tag(clean_pointer _ptr, tag_type) noexcept {
+      return (dirty_pointer)_ptr;
+    }
+    [[clang::always_inline]] static constexpr clean_pointer recover_pointer(dirty_pointer _ptr) noexcept {
+      return (clean_pointer)_ptr;
+    }
+    [[clang::always_inline]] static constexpr tag_type recover_value(dirty_pointer) noexcept {
+      return {};
+    }
+  };
+};
+
+template <uintptr_t Mask> struct bitmask_tag {
+  static constexpr uintptr_t _mask = Mask;
+
+  template <typename T, typename Tag> struct schema {
+    using clean_pointer = T *;
+    using dirty_pointer = void *;
+    using tag_type = Tag;
+
+    [[clang::always_inline]] static constexpr dirty_pointer encode_pointer_with_tag(clean_pointer _ptr, tag_type _value) noexcept {
+      return static_cast<dirty_pointer>(__builtin_tag_pointer_mask_or((void *)(_ptr), static_cast<uintptr_t>(_value), _mask));
+    }
+    [[clang::always_inline]] static constexpr clean_pointer recover_pointer(dirty_pointer _ptr) noexcept {
+      return static_cast<clean_pointer>(__builtin_tag_pointer_mask((void *)_ptr, ~_mask));
+    }
+    [[clang::always_inline]] static constexpr tag_type recover_value(dirty_pointer _ptr) noexcept {
+      return static_cast<tag_type>(__builtin_tag_pointer_mask_as_int((void *)_ptr, _mask));
+    }
+  };
+};
+
+template <unsigned Alignment> struct custom_alignment_tag {
+  static constexpr uintptr_t mask = (static_cast<uintptr_t>(1u) << static_cast<uintptr_t>(Alignment)) - 1ull;
+  template <typename T, typename Tag> using schema = typename bitmask_tag<mask>::template schema<T, Tag>;
+};
+
+struct alignment_low_bits_tag {
+  template <typename T> static constexpr unsigned alignment = alignof(T);
+  template <typename T, typename Tag> using schema = typename custom_alignment_tag<alignment<T>>::template schema<T, Tag>;
+};
+
+template <unsigned Bits> struct shift_tag {
+  static constexpr unsigned _shift = Bits;
+  static constexpr uintptr_t _mask = (uintptr_t{1u} << _shift) - 1u;
+
+  template <typename T, typename Tag> struct schema {
+    using clean_pointer = T *;
+    using dirty_pointer = void *;
+    using tag_type = Tag;
+
+    [[clang::always_inline]] static constexpr dirty_pointer encode_pointer_with_tag(clean_pointer _ptr, tag_type _value) noexcept {
+      return static_cast<dirty_pointer>(__builtin_tag_pointer_shift_or((void *)(_ptr), (uintptr_t)_value, _shift));
+    }
+    [[clang::always_inline]] static constexpr clean_pointer recover_pointer(dirty_pointer _ptr) noexcept {
+      return static_cast<clean_pointer>(__builtin_tag_pointer_unshift((void *)_ptr, _shift));
+    }
+    [[clang::always_inline]] static constexpr tag_type recover_value(dirty_pointer _ptr) noexcept {
+      return static_cast<tag_type>(__builtin_tag_pointer_mask_as_int((void *)_ptr, _mask));
+    }
+  };
+};
+
+struct low_byte_tag {
+  template <typename T, typename Tag> using schema = typename shift_tag<8>::template schema<T, Tag>;
+};
+
+struct upper_byte_tag {
+  template <typename T> static constexpr unsigned _shift = sizeof(T *) * 8ull - 8ull;
+  template <typename T> static constexpr uintptr_t _mask = 0b1111'1111ull << _shift<T>;
+  
+  template <typename T, typename Tag> using schema = typename bitmask_tag<_mask<T>>::template schema<T, Tag>;
+};
+
+struct upper_byte_shifted_tag: upper_byte_tag {
+  template <typename T, typename Tag> struct schema {
+    using _underlying_schema = typename upper_byte_tag::template schema<T, uintptr_t>;
+    static constexpr unsigned _shift = upper_byte_tag::template _shift<T>;
+    
+    using clean_pointer = T *;
+    using dirty_pointer = void *;
+    using tag_type = Tag;
+  
+    [[clang::always_inline]] static constexpr dirty_pointer encode_pointer_with_tag(clean_pointer _ptr, tag_type _value) noexcept {
+      return _underlying_schema::encode_pointer_with_tag(_ptr, static_cast<uintptr_t>(_value) << _shift);
+    }
+    [[clang::always_inline]] static constexpr clean_pointer recover_pointer(dirty_pointer _ptr) noexcept {
+      return _underlying_schema::recover_pointer(_ptr);
+    }
+    [[clang::always_inline]] static constexpr tag_type recover_value(dirty_pointer _ptr) noexcept {
+      return static_cast<tag_type>(_underlying_schema::recover_value(_ptr) >> _shift);
+    }
+  };
+};
+
+
+
+// forward declaration
+template <typename _T, typename _Tag = uintptr_t, typename _Schema = alignment_low_bits_tag> class tagged_ptr;
+
+
+template <typename _Schema, typename _T, typename _Tag = uintptr_t> constexpr auto tag_ptr(_T * _ptr, _Tag _tag = {}) noexcept {
+  return tagged_ptr<_T, _Tag, _Schema>{_ptr, _tag};
+}
+
+template <typename _T, typename _Tag, typename _Schema = alignment_low_bits_tag> constexpr auto tagged_pointer_cast(typename _Schema::template schema<_T, _Tag>::dirty_pointer _ptr) noexcept -> tagged_ptr<_T, _Tag, _Schema> {
+  using result_type = tagged_ptr<_T, _Tag, _Schema>;
+  return result_type{typename result_type::already_tagged_tag{_ptr}};
+}
+
+template <typename _Schema2, typename _T, typename _Tag, typename _Schema> constexpr auto scheme_pointer_cast(tagged_ptr<_T, _Tag, _Schema> in) noexcept {
+  return tagged_ptr<_T, _Tag, _Schema2>{in.pointer(), in.tag()};
+}
+
+template <typename _Y, typename _T, typename _Tag, typename _Schema> constexpr auto const_pointer_cast(tagged_ptr<_T, _Tag, _Schema> in) noexcept {
+  // TODO we can just use native pointer here
+  return tagged_ptr<_Y, _Tag, _Schema>{const_cast<_Y*>(in.pointer()), in.tag()};
+}
+
+template <typename _Y, typename _T, typename _Tag, typename _Schema> constexpr auto static_pointer_cast(tagged_ptr<_T, _Tag, _Schema> in) noexcept {
+  return tagged_ptr<_Y, _Tag, _Schema>{static_cast<_Y*>(in.pointer()), in.tag()};
+}
+
+template <typename _Y, typename _T, typename _Tag, typename _Schema> constexpr auto dynamic_pointer_cast(tagged_ptr<_T, _Tag, _Schema> in) noexcept {
+  return tagged_ptr<_Y, _Tag, _Schema>{dynamic_cast<_Y*>(in.pointer()), in.tag()};
+}
+
+template <typename _Y, typename _T, typename _Tag, typename _Schema> auto reinterpret_pointer_cast(tagged_ptr<_T, _Tag, _Schema> in) noexcept {
+  return tagged_ptr<_Y, _Tag, _Schema>{reinterpret_cast<_Y*>(in.pointer()), in.tag()};
+}
+
+
+// wrapper class containing the pointer value and provides access
+template <typename _T, typename _Tag, typename _Schema> class tagged_ptr {
+public:
+  using schema = typename _Schema::template schema<_T, _Tag>;
+  using dirty_pointer = typename schema::dirty_pointer;
+  using clean_pointer = typename schema::clean_pointer;
+  using tag_type = typename schema::tag_type;
+  
+  using value_type = std::remove_cvref_t<decltype(*std::declval<clean_pointer>())>;
+  using difference_type = typename std::pointer_traits<clean_pointer>::difference_type;
+  
+  
+  template <typename _Y> using rebind = tagged_ptr<_Y, _Tag, _Schema>;
+  
+private:
+  
+  dirty_pointer _pointer{nullptr};
+  
+  friend constexpr auto tagged_pointer_cast<_T, _Tag, _Schema>(typename _Schema::template schema<_T, _Tag>::dirty_pointer ptr) noexcept -> tagged_ptr<_T, _Tag, _Schema>;
+  
+  struct already_tagged_tag {
+    dirty_pointer _ptr;
+  };
+ 
+  // special hidden constructor to allow constructing unsafely
+  [[clang::always_inline]] constexpr tagged_ptr(already_tagged_tag _in) noexcept: _pointer{_in._ptr} { }
+  
+  template <typename _Y, typename _T2, typename _Tag2, typename _Schema2> constexpr auto const_pointer_cast(tagged_ptr<_T2, _Tag2, _Schema2> in) noexcept -> rebind<_T>;
+  
+public:
+  tagged_ptr() = default;
+  consteval tagged_ptr(nullptr_t) noexcept: _pointer{nullptr} { }
+  tagged_ptr(const tagged_ptr &) = default;
+  tagged_ptr(tagged_ptr &&) = default;
+  ~tagged_ptr() = default;
+  tagged_ptr & operator=(const tagged_ptr &) = default;
+  tagged_ptr & operator=(tagged_ptr &&) = default;
+  
+  [[clang::always_inline]] explicit constexpr tagged_ptr(clean_pointer _ptr, tag_type _tag = {}) noexcept: _pointer{schema::encode_pointer_with_tag(_ptr, _tag)} {
+    _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(pointer() == _ptr, "pointer must be recoverable after untagging");
+    _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(tag() == _tag, "stored tag must be recoverable and within schema provided bit capacity");
+  } 
+
+  // accessors
+  [[clang::always_inline]] constexpr decltype(auto) operator*() const noexcept {
+    return *pointer();
+  }
+  
+  [[clang::always_inline]] constexpr clean_pointer operator->() const noexcept {
+    return pointer();
+  }
+   
+  template <typename...Ts> [[clang::always_inline]] [[clang::always_inline]] constexpr decltype(auto) operator[](Ts... args) const noexcept requires std::is_array_v<value_type> && (sizeof...(Ts) == std::rank_v<value_type>) {
+    return (*pointer())[args...];
+  }
+  
+  [[clang::always_inline]] constexpr decltype(auto) operator[](difference_type diff) const noexcept requires (!std::is_array_v<value_type>) {
+    return *(pointer() + diff);
+  }
+  
+  // swap
+  [[clang::always_inline]] friend constexpr void swap(tagged_ptr & lhs, tagged_ptr & rhs) noexcept {
+    std::swap(lhs._pointer, rhs._pointer);
+  }
+  
+  // modifiers for tag
+  [[clang::always_inline]] constexpr auto & set(tag_type new_tag) noexcept {
+    // this is here so I can avoid checks
+    // TODO we should be able to check what bits available
+    _pointer = schema::encode_pointer_with_tag(pointer(), new_tag);
+    return *this;
+  }
+  
+  [[clang::always_inline]] constexpr auto & set_union(tag_type addition) noexcept {
+    return set(tag() | addition);
+  }
+  
+  [[clang::always_inline]] constexpr auto & set_difference(tag_type mask) noexcept {
+    return set(tag() & (~static_cast<uintptr_t>(mask)));
+  }
+  
+  [[clang::always_inline]] constexpr auto & set_intersection(tag_type mask) noexcept {
+    return set(tag() & mask);
+  }
+  
+  [[clang::always_inline]] constexpr auto & set_all() noexcept {
+    return set(static_cast<tag_type>(0xFFFFFFFF'FFFFFFFFull));
+  }
+
+  // modifiers for pointer
+  [[clang::always_inline]] constexpr auto & operator++() noexcept {
+    _pointer = tagged_ptr{pointer()+1u, tag()}._pointer;
+    return *this;
+  }
+  
+  [[clang::always_inline]] constexpr auto operator++(int) noexcept {
+    auto copy = auto(*this);
+    this->operator++();
+    return copy;
+  }
+  
+  [[clang::always_inline]] constexpr auto & operator+=(difference_type diff) noexcept {
+    _pointer = tagged_ptr{pointer()+diff, tag()}._pointer;
+    return *this;
+  }
+  
+  [[clang::always_inline]] friend constexpr auto operator+(tagged_ptr lhs, difference_type diff) noexcept {
+    lhs += diff;
+    return lhs;
+  }
+  
+  [[clang::always_inline]] friend constexpr auto operator+(difference_type diff, tagged_ptr rhs) noexcept {
+    rhs += diff;
+    return rhs;
+  }
+  
+  [[clang::always_inline]] friend constexpr auto operator-(tagged_ptr lhs, difference_type diff) noexcept {
+    lhs -= diff;
+    return lhs;
+  }
+  
+  [[clang::always_inline]] friend constexpr auto operator-(difference_type diff, tagged_ptr rhs) noexcept {
+    rhs -= diff;
+    return rhs;
+  }
+  
+  [[clang::always_inline]] constexpr auto & operator-=(difference_type diff) noexcept {
+    _pointer = tagged_ptr{pointer()-diff, tag()}._pointer;
+    return *this;
+  }
+  
+  [[clang::always_inline]] constexpr auto & operator--() noexcept {
+    _pointer = tagged_ptr{pointer()-1u, tag()}._pointer;
+    return *this;
+  }
+  
+  [[clang::always_inline]] constexpr auto operator--(int) noexcept {
+    auto copy = auto(*this);
+    this->operator--();
+    return copy;
+  }
+  
+  // observers
+  constexpr dirty_pointer unsafe_dirty_pointer() const noexcept {
+    // this function is not intentionally constexpr, as it is needed only to interact with
+    // existing runtime code
+    return _pointer;
+  } 
+  
+  static constexpr bool support_aliasing_masking = pointer_tagging_schema_with_aliasing<schema>;
+  
+  [[clang::always_inline]] constexpr clean_pointer aliasing_pointer() const noexcept {
+    if constexpr (support_aliasing_masking) {
+      if !consteval {
+        return schema::recover_aliasing_pointer(_pointer);
+      }
+    }
+    
+    return schema::recover_pointer(_pointer);
+  }
+  
+  [[clang::always_inline]] constexpr clean_pointer pointer() const noexcept {
+    return schema::recover_pointer(_pointer);
+  }
+  
+  [[clang::always_inline]] constexpr tag_type tag() const noexcept {
+    return schema::recover_value(_pointer);
+  }
+  
+  template <std::size_t I> [[nodiscard, clang::always_inline]] friend constexpr decltype(auto) get(tagged_ptr _pair) noexcept {
+    static_assert(I < 3);
+    if constexpr (I == 0) {
+      return _pair.pointer();
+    } else {
+      return _pair.tag();
+    }
+  }
+  
+  [[clang::always_inline]] constexpr explicit operator bool() const noexcept {
+    return pointer() != nullptr;
+  }
+  
+  [[clang::always_inline]] friend constexpr ptrdiff_t operator-(tagged_ptr lhs, tagged_ptr rhs) noexcept {
+    return lhs.pointer() - rhs.pointer();
+  }
+  
+  // comparison operators
+  [[clang::always_inline]] friend bool operator==(tagged_ptr, tagged_ptr) = default;
+  
+  struct _compare_object {
+    clean_pointer pointer;
+    tag_type tag;
+    
+    friend auto operator<=>(_compare_object, _compare_object) = default;
+  };
+  
+  [[clang::always_inline]] friend constexpr auto operator<=>(tagged_ptr lhs, tagged_ptr rhs) noexcept {
+    return _compare_object{lhs.pointer(), lhs.tag()} <=> _compare_object{rhs.pointer(), rhs.tag()};
+  }
+  [[clang::always_inline]] friend constexpr bool operator==(tagged_ptr lhs, clean_pointer rhs) noexcept {
+    return lhs.pointer() == rhs;
+  }
+  [[clang::always_inline]] friend constexpr auto operator<=>(tagged_ptr lhs, clean_pointer rhs) noexcept {
+    return lhs.pointer() <=> rhs;
+  }
+  [[clang::always_inline]] friend constexpr bool operator==(tagged_ptr lhs, nullptr_t) noexcept {
+    return lhs.pointer() == nullptr;
+  }
+};
+
+// to_address specialization
+template <typename _T, typename _Tag, typename _Schema> static constexpr auto to_address(tagged_ptr<_T, _Tag, _Schema> p) noexcept -> tagged_ptr<_T, _Tag, _Schema>::element_type * {
+  return p.pointer();
+}
+
+// iterator traits
+template <typename _T, typename _Tag, typename _Schema>
+struct _LIBCPP_TEMPLATE_VIS iterator_traits<tagged_ptr<_T, _Tag, _Schema>> {
+  using _tagged_ptr = tagged_ptr<_T, _Tag, _Schema>;
+  
+  using iterator_category = std::random_access_iterator_tag;
+  using iterator_concept = std::contiguous_iterator_tag;
+  
+  using value_type = _tagged_ptr::value_type;
+  using reference = value_type &;
+  using pointer = _tagged_ptr::clean_pointer;
+  using difference_type = _tagged_ptr::difference_type;
+};
+
+// pointer traits
+template <typename _T, typename _Tag, typename _Schema>
+struct _LIBCPP_TEMPLATE_VIS pointer_traits<tagged_ptr<_T, _Tag, _Schema>> {
+  using _tagged_ptr = tagged_ptr<_T, _Tag, _Schema>;
+  using pointer = _tagged_ptr::clean_pointer;
+  using element_type = _tagged_ptr::value_type;
+  using difference_type = _tagged_ptr::difference_type;
+  
+  // what to do with this?
+  template <typename _Up> using rebind = typename _tagged_ptr::template rebind<_Up>;
+
+public:
+  _LIBCPP_HIDE_FROM_ABI constexpr static pointer pointer_to(pointer ptr) _NOEXCEPT {
+    return _tagged_ptr{ptr};
+  }
+};
+
+// we are defaulting always to low_bits schema
+template <typename _T> tagged_ptr(_T *) -> tagged_ptr<_T>;
+template <typename _T, typename _Tag> tagged_ptr(_T *, _Tag) -> tagged_ptr<_T, _Tag>;
+
+// support for tuple protocol so we can split tagged pointer to structured bindings:
+// auto [ptr, tag] = tagged_ptr
+template <typename _T, typename _Tag, typename _Schema>
+struct tuple_size<tagged_ptr<_T, _Tag, _Schema>>: std::integral_constant<std::size_t, 2> {};
+
+template <std::size_t I, typename _T, typename _Tag, typename _Schema>
+struct tuple_element<I, tagged_ptr<_T, _Tag, _Schema>> {
+  using _pair_type = tagged_ptr<_T, _Tag, _Schema>;
+  using type = std::conditional_t<I == 0, typename _pair_type::clean_pointer, typename _pair_type::tag_type>;
+};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_STD_VER >= 26
+
+#endif // _LIBCPP___TAGGED_PTR_H
diff --git a/libcxx/include/memory b/libcxx/include/memory
index db3386cca48009..db491c0c92e1e3 100644
--- a/libcxx/include/memory
+++ b/libcxx/include/memory
@@ -969,6 +969,10 @@ template<class Pointer = void, class Smart, class... Args>
 #  include <__memory/allocate_at_least.h>
 #endif
 
+#if _LIBCPP_STD_VER >= 26
+#  include <__memory/tagged_ptr.h>
+#endif
+
 #include <version>
 
 // [memory.syn]
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index 3ea91274a9cc9a..a52431653392ec 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -1514,6 +1514,7 @@ module std [system] {
     module raw_storage_iterator               { header "__memory/raw_storage_iterator.h" }
     module shared_ptr                         { header "__memory/shared_ptr.h" }
     module swap_allocator                     { header "__memory/swap_allocator.h" }
+    module tagged_ptr                         { header "__memory/tagged_ptr.h" }
     module temp_value                         { header "__memory/temp_value.h" }
     module temporary_buffer                   {
       header "__memory/temporary_buffer.h"



More information about the cfe-commits mailing list