[clang] [libcxx] [clang & libcxx] constexpr pointer tagging (DO NOT MERGE) (PR #111861)

Hana Dusíková via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 10 08:47:28 PDT 2024


https://github.com/hanickadot created https://github.com/llvm/llvm-project/pull/111861

This PR is not for merging! It's just for reviewing design meant for standard library proposal P3125 (pointer tagging)

This code allows us to do pointer tagging in a safe way and also in constexpr. It's allowed by introducing builtins:

```c++
__builtin_tag_pointer_mask_or(void *, uintptr_t value, uintptr_t mask) -> void *; // (ptr & ~mask) | (value & mask)
__builtin_tag_pointer_mask(void *, uintptr_t mask) -> void *; // (ptr & mask)
__builtin_tag_pointer_mask_as_int(void *, uintptr_t mask) -> uintptr_t;  // (ptr & mask)
__builtin_tag_pointer_shift_or(void *, uintptr_t value, unsigned shift) -> void *; // (ptr << shift) | (mask & value) ... (mask = 1 << shift - 1)
__builtin_tag_pointer_unshift(void *, unsigned shift) -> void *; // (ptr >> shift)
```

This builtins are implemented both for codegen and also in exprconstant where they use a metadata variable put into lvalue base, so technically in constexpr you are not changing pointer, but storing the tag next to pointer. Also exprconstant implementation makes dereferencing tagged pointer an error. Comparing pointers with tag and without tag is sorted based on tag value (in constexpr).

Part of PR is standard library style interface which wraps the builtins in "schemas" used for `tagged_ptr<T, Tag, schema>` type. Each schema has internal definition how to tag (bitmask, alignment bits from pointer type, custom alignment bits, upper byte, low byte). 

Part of the `tagged_ptr` type are functions to access pointer or tag:
```c++
.tag() -> TagType;
.pointer() -> T *; // returns exact original pointer user put into tagged_ptr
.aliasing_pointer() -> T *; // returns pointer which points to same object as original, but can have different representation, it's a fast path for schemas for platforms which allows you to change bits of pointer without changing meaning (aarch64)
.dirty_unsafe_pointer() -> T *; // or void *, depending on schema
```

>From 6679082fa13048516a2b1ea27edf8de4cecb6f07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hana=20Dusi=CC=81kova=CC=81?= <hanicka at hanicka.net>
Date: Thu, 10 Oct 2024 17:35:34 +0200
Subject: [PATCH] [clang & libcxx] constexpr pointer tagging (not for merging,
 just for review)

---
 clang/include/clang/AST/APValue.h             |   4 +
 clang/include/clang/Basic/Builtins.td         |  37 ++
 .../include/clang/Basic/DiagnosticASTKinds.td |   6 +
 clang/lib/AST/APValue.cpp                     |  10 +
 clang/lib/AST/ExprConstant.cpp                | 112 +++++
 clang/lib/CodeGen/CGBuiltin.cpp               | 102 ++++
 clang/lib/CodeGen/CodeGenFunction.h           |   7 +
 libcxx/include/CMakeLists.txt                 |   1 +
 libcxx/include/__memory/tagged_ptr.h          | 447 ++++++++++++++++++
 libcxx/include/memory                         |   4 +
 libcxx/include/module.modulemap               |   1 +
 11 files changed, 731 insertions(+)
 create mode 100644 libcxx/include/__memory/tagged_ptr.h

diff --git a/clang/include/clang/AST/APValue.h b/clang/include/clang/AST/APValue.h
index 7869ee386689d7..278cf61dd56f89 100644
--- a/clang/include/clang/AST/APValue.h
+++ b/clang/include/clang/AST/APValue.h
@@ -198,6 +198,8 @@ class APValue {
       /// The QualType, if this is a DynamicAllocLValue.
       void *DynamicAllocType;
     };
+  public:
+    uint64_t Metadata{0};
   };
 
   /// A FieldDecl or CXXRecordDecl, along with a flag indicating whether we
@@ -527,6 +529,8 @@ class APValue {
   }
 
   const LValueBase getLValueBase() const;
+  uint64_t getLValueMetadata() const;
+  uint64_t & getLValueMetadata();
   CharUnits &getLValueOffset();
   const CharUnits &getLValueOffset() const {
     return const_cast<APValue*>(this)->getLValueOffset();
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 9ebee81fcb0d3d..6231f1c8af2830 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4867,3 +4867,40 @@ def ArithmeticFence : LangBuiltin<"ALL_LANGUAGES"> {
   let Attributes = [CustomTypeChecking, Constexpr];
   let Prototype = "void(...)";
 }
+
+// support for pointer tagging
+// (ptr & mask) | (val & ~mask)
+def TagPointerMaskOr : Builtin {
+  let Spellings = ["__builtin_tag_pointer_mask_or"];
+  let Attributes = [Constexpr, NoThrow];
+  let Prototype = "void*(void*, size_t, size_t)";
+}
+
+// (ptr & mask) -> void *
+def TagPointerMask : Builtin {
+  let Spellings = ["__builtin_tag_pointer_mask"];
+  let Attributes = [Constexpr, NoThrow];
+  let Prototype = "void*(void*, size_t)";
+}
+
+// (ptr & mask) -> uintptr_t
+def TagPointerMaskAsInt : Builtin {
+  let Spellings = ["__builtin_tag_pointer_mask_as_int"];
+  let Attributes = [Constexpr, NoThrow];
+  let Prototype = "size_t(void*, size_t)";
+}
+
+// (ptr << shift) | (value & ~mask) -> void *
+// mask = (1 << shift) - 1
+def TagPointerShiftOr : Builtin {
+  let Spellings = ["__builtin_tag_pointer_shift_or"];
+  let Attributes = [Constexpr, NoThrow];
+  let Prototype = "void*(void*, size_t, size_t)";
+}
+
+// (ptr >> unshift) -> void *
+def TagPointerUnshift : Builtin {
+  let Spellings = ["__builtin_tag_pointer_unshift"];
+  let Attributes = [Constexpr, NoThrow];
+  let Prototype = "void*(void*, size_t)";
+}
diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td
index 6a658cf14356f5..3d6b0cbebc7002 100644
--- a/clang/include/clang/Basic/DiagnosticASTKinds.td
+++ b/clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -218,6 +218,12 @@ def note_constexpr_access_null : Note<
 def note_constexpr_access_past_end : Note<
   "%sub{access_kind}0 dereferenced one-past-the-end pointer "
   "is not allowed in a constant expression">;
+def note_constexpr_dereferencing_tagged_pointer: Note<
+  "dereferencing tagged pointer">;
+def note_constexpr_tagging_with_shift_zero: Note<
+  "you must shift pointer at least by one bit to store a tag">;
+def note_constexpr_tagging_with_empty_mask: Note<
+  "you must provide non-zero mask for pointer tagging">;
 def note_constexpr_access_unsized_array : Note<
   "%sub{access_kind}0 element of array without known bound "
   "is not allowed in a constant expression">;
diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp
index 4f5d14cbd59bbf..02ba5cf51d4176 100644
--- a/clang/lib/AST/APValue.cpp
+++ b/clang/lib/AST/APValue.cpp
@@ -975,6 +975,16 @@ const APValue::LValueBase APValue::getLValueBase() const {
   return ((const LV *)(const void *)&Data)->Base;
 }
 
+uint64_t APValue::getLValueMetadata() const {
+  assert(isLValue() && "Invalid accessor");
+  return ((const LV *)(const void *)&Data)->Base.Metadata;
+}
+
+uint64_t & APValue::getLValueMetadata() {
+  assert(isLValue() && "Invalid accessor");
+  return ((LV *)(void *)&Data)->Base.Metadata;
+}
+
 bool APValue::isLValueOnePastTheEnd() const {
   assert(isLValue() && "Invalid accessor");
   return ((const LV *)(const void *)&Data)->IsOnePastTheEnd;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 4d5af96093cfeb..ed7eeeba20fcaf 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -4498,6 +4498,11 @@ handleLValueToRValueConversion(EvalInfo &Info, const Expr *Conv, QualType Type,
                                bool WantObjectRepresentation = false) {
   if (LVal.Designator.Invalid)
     return false;
+  
+  if (LVal.Base.Metadata != 0) {
+    Info.FFDiag(Conv, diag::note_constexpr_dereferencing_tagged_pointer);
+    return false;
+  }
 
   // Check for special cases where there is no existing APValue to look at.
   const Expr *Base = LVal.Base.dyn_cast<const Expr*>();
@@ -9735,6 +9740,87 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     return Success(E);
 
   switch (BuiltinOp) {
+  // emulation of pointer tagging without actually touching pointer value
+  // as there is no such thing as address here, so tag is stored as a metadata in lvalue base
+  case Builtin::BI__builtin_tag_pointer_mask_or: { 
+      APSInt Value, Mask;
+      if (!evaluatePointer(E->getArg(0), Result))
+        return Error(E);
+  
+      if (!EvaluateInteger(E->getArg(1), Value, Info))
+        return Error(E);
+  
+      if (!EvaluateInteger(E->getArg(2), Mask, Info))
+        return Error(E);
+      
+      if (Mask.getLimitedValue() == 0) {
+        CCEDiag(E->getArg(2), diag::note_constexpr_tagging_with_empty_mask);
+        return false;
+      }
+  
+      Result.Base.Metadata = (Result.Base.Metadata & ~Mask.getLimitedValue()) | (Value.getLimitedValue() & Mask.getLimitedValue());
+      return true;
+    }
+
+  // alternative approach to tagging which shifts pointer
+  // here we are only shifting metadata
+  case Builtin::BI__builtin_tag_pointer_shift_or: {
+    APSInt Value, Shift;
+    if (!evaluatePointer(E->getArg(0), Result))
+      return Error(E);
+
+    if (!EvaluateInteger(E->getArg(1), Value, Info))
+      return Error(E);
+
+    if (!EvaluateInteger(E->getArg(2), Shift, Info))
+      return Error(E);
+
+    if (Shift.getLimitedValue() == 0) {
+      CCEDiag(E->getArg(2), diag::note_constexpr_tagging_with_shift_zero);
+      return false;
+    }
+
+    const uint64_t Mask = (1ull << static_cast<uint64_t>(Shift.getLimitedValue())) - 1ull;
+    Result.Base.Metadata = (Result.Base.Metadata << static_cast<uint64_t>(Shift.getLimitedValue())) | (Value.getLimitedValue() & Mask);
+    return true;
+  }
+  
+  // recover pointer by masking metadata
+  // exprconstant allows dereferencing only metadata == 0 pointer
+  case Builtin::BI__builtin_tag_pointer_mask: {
+    APSInt Mask;
+    if (!evaluatePointer(E->getArg(0), Result))
+        return Error(E);
+  
+    if (!EvaluateInteger(E->getArg(1), Mask, Info))
+      return Error(E);
+    
+    if (Mask.getLimitedValue() == 0) {
+      CCEDiag(E->getArg(2), diag::note_constexpr_tagging_with_empty_mask);
+      return false;
+    }
+  
+    Result.Base.Metadata = (Result.Base.Metadata & Mask.getLimitedValue());
+    return true;
+  }
+
+  // shifting back pointer (also can convert tagged pointer back to normal pointer)
+  case Builtin::BI__builtin_tag_pointer_unshift: {
+    APSInt Shift;
+    if (!evaluatePointer(E->getArg(0), Result))
+        return Error(E);
+  
+    if (!EvaluateInteger(E->getArg(1), Shift, Info))
+      return Error(E);
+    
+    if (Shift.getLimitedValue() == 0) {
+      CCEDiag(E->getArg(2), diag::note_constexpr_tagging_with_shift_zero);
+      return false;
+    }
+  
+    Result.Base.Metadata = (Result.Base.Metadata >> static_cast<uint64_t>(Shift.getLimitedValue()));
+    return true;
+  }
   case Builtin::BIaddressof:
   case Builtin::BI__addressof:
   case Builtin::BI__builtin_addressof:
@@ -12662,6 +12748,25 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
   default:
     return false;
 
+  case Builtin::BI__builtin_tag_pointer_mask_as_int: {
+    LValue Pointer;
+    APSInt Mask;
+  
+    if (!EvaluatePointer(E->getArg(0), Pointer, Info))
+      return Error(E);
+
+    if (!EvaluateInteger(E->getArg(1), Mask, Info))
+      return Error(E);
+    
+    if (Mask.getLimitedValue() == 0) {
+      CCEDiag(E->getArg(2), diag::note_constexpr_tagging_with_empty_mask);
+      return false;
+    }
+  
+    const uint64_t Result = Pointer.Base.Metadata & (static_cast<uint64_t>(Mask.getLimitedValue()));
+    return Success(Result, E);
+  }
+
   case Builtin::BI__builtin_dynamic_object_size:
   case Builtin::BI__builtin_object_size: {
     // The type was checked when we built the expression.
@@ -14219,6 +14324,13 @@ EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E,
       return Success(CmpResult::Less, E);
     if (CompareLHS > CompareRHS)
       return Success(CmpResult::Greater, E);
+    
+    // this makes tagged pointer not equal to original pointer
+    if (LHSValue.Base.Metadata < RHSValue.Base.Metadata)
+      return Success(CmpResult::Less, E);
+    if (LHSValue.Base.Metadata > RHSValue.Base.Metadata)
+      return Success(CmpResult::Greater, E);
+    
     return Success(CmpResult::Equal, E);
   }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2449b90a0e7902..4ac09e7a317931 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5320,6 +5320,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
     return RValue::get(Carry);
   }
+  
+  // support for pointer tagging
+  case Builtin::BI__builtin_tag_pointer_mask_or:
+    return EmitBuiltinTagPointerMaskOr(E);
+  case Builtin::BI__builtin_tag_pointer_mask:
+    return EmitBuiltinTagPointerMask(E);
+  case Builtin::BI__builtin_tag_pointer_mask_as_int:
+    return EmitBuiltinTagPointerMaskAsInt(E);
+  case Builtin::BI__builtin_tag_pointer_shift_or:
+    return EmitBuiltinTagPointerShiftOr(E);
+  case Builtin::BI__builtin_tag_pointer_unshift:
+    return EmitBuiltinTagPointerUnshift(E);
+
   case Builtin::BIaddressof:
   case Builtin::BI__addressof:
   case Builtin::BI__builtin_addressof:
@@ -21245,6 +21258,95 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
   }
 }
 
+/// Generate (x & ~mask) | (value & mask).
+RValue CodeGenFunction::EmitBuiltinTagPointerMaskOr(const CallExpr *E) {
+  llvm::Value * Ptr = EmitScalarExpr(E->getArg(0));
+  llvm::Value * Value = EmitScalarExpr(E->getArg(1));
+  llvm::Value * Mask = EmitScalarExpr(E->getArg(2));
+  
+  llvm::IntegerType * IntType = IntegerType::get(getLLVMContext(), CGM.getDataLayout().getIndexTypeSizeInBits(Ptr->getType()));
+  
+  // TODO: avoid using bitcast and go path of ptr.tag (mirror to ptr.mask)
+  // to keep pointer's provenance, but this turns out a bit harder to do as it touches 
+  // a lot of places in llvm
+  llvm::Value * PointerInt = Builder.CreateBitOrPointerCast(Ptr, IntType, "pointer_int");
+  llvm::Value * InvertedMask = Builder.CreateNot(Mask, "inverted_mask");
+  
+  llvm::Value * MaskedPtr = Builder.CreateAnd(PointerInt, InvertedMask, "masked_ptr");
+  llvm::Value * MaskedValue = Builder.CreateAnd(Value, Mask, "masked_value");
+  
+  llvm::Value * ResultInt = Builder.CreateOr(MaskedPtr, MaskedValue, "result_int");
+  llvm::Value * Result = Builder.CreateBitOrPointerCast(ResultInt, Ptr->getType(), "result_ptr");
+   
+  return RValue::get(Result);
+}
+
+/// Generate (x << shift) | (value & ((1 << shift) - 1)).
+RValue CodeGenFunction::EmitBuiltinTagPointerShiftOr(const CallExpr *E) {
+  llvm::Value * Ptr = EmitScalarExpr(E->getArg(0));
+  llvm::Value * Value = EmitScalarExpr(E->getArg(1));
+  llvm::Value * Shift = EmitScalarExpr(E->getArg(2));
+  
+  llvm::IntegerType * IntType = IntegerType::get(getLLVMContext(), CGM.getDataLayout().getIndexTypeSizeInBits(Ptr->getType()));
+  
+  // TODO: again, for now a bitcast, later ptr.shift_tag
+  llvm::Value * PointerInt = Builder.CreateBitOrPointerCast(Ptr, IntType, "pointer_int");
+  llvm::Value * ShiftedPointerInt = Builder.CreateShl(PointerInt, Shift);
+  
+  auto *One = llvm::ConstantInt::get(IntType, 1);
+  
+  llvm::Value * Mask = Builder.CreateSub(Builder.CreateShl(One, Shift), One, "mask");
+  llvm::Value * MaskedValue = Builder.CreateAdd(Value, Mask, "masked_value");
+  llvm::Value * PointerWithTag = Builder.CreateOr(ShiftedPointerInt, MaskedValue, "pointer_with_tag_int");
+  
+  llvm::Value * Result = Builder.CreateBitOrPointerCast(PointerWithTag, Ptr->getType(), "result_ptr");
+  return RValue::get(Result);
+}
+
+/// Generate (x >> shift)
+RValue CodeGenFunction::EmitBuiltinTagPointerUnshift(const CallExpr *E) {
+  llvm::Value * Ptr = EmitScalarExpr(E->getArg(0));
+  llvm::Value * Shift = EmitScalarExpr(E->getArg(1));
+  
+  llvm::IntegerType * IntType = IntegerType::get(getLLVMContext(), CGM.getDataLayout().getIndexTypeSizeInBits(Ptr->getType()));
+  
+  // for now I'm going path of bitcast
+  llvm::Value * PointerInt = Builder.CreateBitOrPointerCast(Ptr, IntType, "pointer_int");
+  llvm::Value * UnShiftedPointerInt = Builder.CreateAShr(PointerInt, Shift, "unshifted_pointer_int");
+  
+  llvm::Value * Result = Builder.CreateBitOrPointerCast(UnShiftedPointerInt, Ptr->getType(), "result_ptr");
+  return RValue::get(Result);
+}
+
+/// Generate (x & mask).
+RValue CodeGenFunction::EmitBuiltinTagPointerMask(const CallExpr *E) {
+  llvm::Value * Ptr = EmitScalarExpr(E->getArg(0));
+  llvm::Value * Mask = EmitScalarExpr(E->getArg(1));
+  
+  llvm::Value *Result = Builder.CreateIntrinsic(
+        Intrinsic::ptrmask, {Ptr->getType(), Mask->getType()},
+        {Ptr, Mask}, nullptr, "result");
+  
+  return RValue::get(Result);
+}
+
+/// Generate (x & mask) (but return it as number).
+RValue CodeGenFunction::EmitBuiltinTagPointerMaskAsInt(const CallExpr *E) {
+  llvm::Value * Ptr = EmitScalarExpr(E->getArg(0));
+  llvm::Value * Mask = EmitScalarExpr(E->getArg(1));
+  
+  llvm::IntegerType * IntType = IntegerType::get(getLLVMContext(), CGM.getDataLayout().getIndexTypeSizeInBits(Ptr->getType()));
+  
+  llvm::Value *Result = Builder.CreateIntrinsic(
+        Intrinsic::ptrmask, {Ptr->getType(), Mask->getType()},
+        {Ptr, Mask}, nullptr, "result");
+
+  llvm::Value * IntResult = Builder.CreateBitOrPointerCast(Result, IntType, "int_result");
+  
+  return RValue::get(IntResult);
+}
+
+
 namespace {
 struct BuiltinAlignArgs {
   llvm::Value *Src = nullptr;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 9ba0ed02a564dd..7938593f8532cc 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4556,6 +4556,13 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   RValue emitRotate(const CallExpr *E, bool IsRotateRight);
 
+  /// Emit IR for pointer tagging
+  RValue EmitBuiltinTagPointerMaskOr(const CallExpr *E);
+  RValue EmitBuiltinTagPointerMask(const CallExpr *E);
+  RValue EmitBuiltinTagPointerMaskAsInt(const CallExpr *E);
+  RValue EmitBuiltinTagPointerShiftOr(const CallExpr *E);
+  RValue EmitBuiltinTagPointerUnshift(const CallExpr *E);
+
   /// Emit IR for __builtin_os_log_format.
   RValue emitBuiltinOSLogFormat(const CallExpr &E);
 
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index c2a597f49e317f..edb17efb0c72bc 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -553,6 +553,7 @@ set(files
   __memory/raw_storage_iterator.h
   __memory/shared_ptr.h
   __memory/swap_allocator.h
+  __memory/tagged_ptr.h
   __memory/temp_value.h
   __memory/temporary_buffer.h
   __memory/uninitialized_algorithms.h
diff --git a/libcxx/include/__memory/tagged_ptr.h b/libcxx/include/__memory/tagged_ptr.h
new file mode 100644
index 00000000000000..7c99d0e9299f48
--- /dev/null
+++ b/libcxx/include/__memory/tagged_ptr.h
@@ -0,0 +1,447 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___TAGGED_PTR_H
+#define _LIBCPP___TAGGED_PTR_H
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER >= 26
+  
+#include <__config>
+#include <__type_traits/is_trivially_copyable.h>
+#include <__assert>
+#include "__bit/has_single_bit.h"
+#include <__type_traits/rank.h>
+#include "pointer_traits.h"
+#include <compare>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <typename T, typename Y> concept convertible_to_from = std::convertible_to<Y, T> && std::convertible_to<T, Y>;
+  
+template <typename T> concept pointer_tagging_schema = requires(T::dirty_pointer payload, T::clean_pointer clean, T::tag_type tag) {
+  //requires convertible_to_from<typename T::tag_type, uintptr_t>;
+  requires std::is_pointer_v<typename T::clean_pointer>;
+  
+  { T::encode_pointer_with_tag(clean, tag) } noexcept -> std::same_as<typename T::dirty_pointer>;
+  { T::recover_pointer(payload) } noexcept -> std::same_as<typename T::clean_pointer>;
+  { T::recover_value(payload) } noexcept -> std::same_as<typename T::tag_type>;
+};
+
+template <typename T> concept pointer_tagging_schema_with_aliasing = pointer_tagging_schema<T> && requires(T::dirty_pointer payload) {
+  { T::recover_aliasing_pointer(payload) } noexcept -> std::same_as<typename T::clean_pointer>;
+};
+
+struct no_tag {
+  template <typename T, typename Tag> struct schema {
+    using clean_pointer = T *;
+    using dirty_pointer = void *;
+    using tag_type = Tag;
+
+    [[clang::always_inline]] static constexpr dirty_pointer encode_pointer_with_tag(clean_pointer _ptr, tag_type) noexcept {
+      return (dirty_pointer)_ptr;
+    }
+    [[clang::always_inline]] static constexpr clean_pointer recover_pointer(dirty_pointer _ptr) noexcept {
+      return (clean_pointer)_ptr;
+    }
+    [[clang::always_inline]] static constexpr tag_type recover_value(dirty_pointer) noexcept {
+      return {};
+    }
+  };
+};
+
+template <uintptr_t Mask> struct bitmask_tag {
+  static constexpr uintptr_t _mask = Mask;
+
+  template <typename T, typename Tag> struct schema {
+    using clean_pointer = T *;
+    using dirty_pointer = void *;
+    using tag_type = Tag;
+
+    [[clang::always_inline]] static constexpr dirty_pointer encode_pointer_with_tag(clean_pointer _ptr, tag_type _value) noexcept {
+      return static_cast<dirty_pointer>(__builtin_tag_pointer_mask_or((void *)(_ptr), static_cast<uintptr_t>(_value), _mask));
+    }
+    [[clang::always_inline]] static constexpr clean_pointer recover_pointer(dirty_pointer _ptr) noexcept {
+      return static_cast<clean_pointer>(__builtin_tag_pointer_mask((void *)_ptr, ~_mask));
+    }
+    [[clang::always_inline]] static constexpr tag_type recover_value(dirty_pointer _ptr) noexcept {
+      return static_cast<tag_type>(__builtin_tag_pointer_mask_as_int((void *)_ptr, _mask));
+    }
+  };
+};
+
+template <unsigned Alignment> struct custom_alignment_tag {
+  static constexpr uintptr_t mask = (static_cast<uintptr_t>(1u) << static_cast<uintptr_t>(Alignment)) - 1ull;
+  template <typename T, typename Tag> using schema = typename bitmask_tag<mask>::template schema<T, Tag>;
+};
+
+struct alignment_low_bits_tag {
+  template <typename T> static constexpr unsigned alignment = alignof(T);
+  template <typename T, typename Tag> using schema = typename custom_alignment_tag<alignment<T>>::template schema<T, Tag>;
+};
+
+template <unsigned Bits> struct shift_tag {
+  static constexpr unsigned _shift = Bits;
+  static constexpr uintptr_t _mask = (uintptr_t{1u} << _shift) - 1u;
+
+  template <typename T, typename Tag> struct schema {
+    using clean_pointer = T *;
+    using dirty_pointer = void *;
+    using tag_type = Tag;
+
+    [[clang::always_inline]] static constexpr dirty_pointer encode_pointer_with_tag(clean_pointer _ptr, tag_type _value) noexcept {
+      return static_cast<dirty_pointer>(__builtin_tag_pointer_shift_or((void *)(_ptr), (uintptr_t)_value, _shift));
+    }
+    [[clang::always_inline]] static constexpr clean_pointer recover_pointer(dirty_pointer _ptr) noexcept {
+      return static_cast<clean_pointer>(__builtin_tag_pointer_unshift((void *)_ptr, _shift));
+    }
+    [[clang::always_inline]] static constexpr tag_type recover_value(dirty_pointer _ptr) noexcept {
+      return static_cast<tag_type>(__builtin_tag_pointer_mask_as_int((void *)_ptr, _mask));
+    }
+  };
+};
+
+struct low_byte_tag {
+  template <typename T, typename Tag> using schema = typename shift_tag<8>::template schema<T, Tag>;
+};
+
+struct upper_byte_tag {
+  template <typename T> static constexpr unsigned _shift = sizeof(T *) * 8ull - 8ull;
+  template <typename T> static constexpr uintptr_t _mask = 0b1111'1111ull << _shift<T>;
+  
+  template <typename T, typename Tag> using schema = typename bitmask_tag<_mask<T>>::template schema<T, Tag>;
+};
+
+struct upper_byte_shifted_tag: upper_byte_tag {
+  template <typename T, typename Tag> struct schema {
+    using _underlying_schema = typename upper_byte_tag::template schema<T, uintptr_t>;
+    static constexpr unsigned _shift = upper_byte_tag::template _shift<T>;
+    
+    using clean_pointer = T *;
+    using dirty_pointer = void *;
+    using tag_type = Tag;
+  
+    [[clang::always_inline]] static constexpr dirty_pointer encode_pointer_with_tag(clean_pointer _ptr, tag_type _value) noexcept {
+      return _underlying_schema::encode_pointer_with_tag(_ptr, static_cast<uintptr_t>(_value) << _shift);
+    }
+    [[clang::always_inline]] static constexpr clean_pointer recover_pointer(dirty_pointer _ptr) noexcept {
+      return _underlying_schema::recover_pointer(_ptr);
+    }
+    [[clang::always_inline]] static constexpr tag_type recover_value(dirty_pointer _ptr) noexcept {
+      return static_cast<tag_type>(_underlying_schema::recover_value(_ptr) >> _shift);
+    }
+  };
+};
+
+
+
+// forward declaration
+template <typename _T, typename _Tag = uintptr_t, typename _Schema = alignment_low_bits_tag> class tagged_ptr;
+
+
+template <typename _Schema, typename _T, typename _Tag = uintptr_t> constexpr auto tag_ptr(_T * _ptr, _Tag _tag = {}) noexcept {
+  return tagged_ptr<_T, _Tag, _Schema>{_ptr, _tag};
+}
+
+template <typename _T, typename _Tag, typename _Schema = alignment_low_bits_tag> constexpr auto tagged_pointer_cast(typename _Schema::template schema<_T, _Tag>::dirty_pointer _ptr) noexcept -> tagged_ptr<_T, _Tag, _Schema> {
+  using result_type = tagged_ptr<_T, _Tag, _Schema>;
+  return result_type{typename result_type::already_tagged_tag{_ptr}};
+}
+
+template <typename _Schema2, typename _T, typename _Tag, typename _Schema> constexpr auto scheme_pointer_cast(tagged_ptr<_T, _Tag, _Schema> in) noexcept {
+  return tagged_ptr<_T, _Tag, _Schema2>{in.pointer(), in.tag()};
+}
+
+template <typename _Y, typename _T, typename _Tag, typename _Schema> constexpr auto const_pointer_cast(tagged_ptr<_T, _Tag, _Schema> in) noexcept {
+  // TODO we can just use native pointer here
+  return tagged_ptr<_Y, _Tag, _Schema>{const_cast<_Y*>(in.pointer()), in.tag()};
+}
+
+template <typename _Y, typename _T, typename _Tag, typename _Schema> constexpr auto static_pointer_cast(tagged_ptr<_T, _Tag, _Schema> in) noexcept {
+  return tagged_ptr<_Y, _Tag, _Schema>{static_cast<_Y*>(in.pointer()), in.tag()};
+}
+
+template <typename _Y, typename _T, typename _Tag, typename _Schema> constexpr auto dynamic_pointer_cast(tagged_ptr<_T, _Tag, _Schema> in) noexcept {
+  return tagged_ptr<_Y, _Tag, _Schema>{dynamic_cast<_Y*>(in.pointer()), in.tag()};
+}
+
+template <typename _Y, typename _T, typename _Tag, typename _Schema> auto reinterpret_pointer_cast(tagged_ptr<_T, _Tag, _Schema> in) noexcept {
+  return tagged_ptr<_Y, _Tag, _Schema>{reinterpret_cast<_Y*>(in.pointer()), in.tag()};
+}
+
+
+// wrapper class containing the pointer value and provides access
+template <typename _T, typename _Tag, typename _Schema> class tagged_ptr {
+public:
+  using schema = typename _Schema::template schema<_T, _Tag>;
+  using dirty_pointer = typename schema::dirty_pointer;
+  using clean_pointer = typename schema::clean_pointer;
+  using tag_type = typename schema::tag_type;
+  
+  using value_type = std::remove_cvref_t<decltype(*std::declval<clean_pointer>())>;
+  using difference_type = typename std::pointer_traits<clean_pointer>::difference_type;
+  
+  
+  template <typename _Y> using rebind = tagged_ptr<_Y, _Tag, _Schema>;
+  
+private:
+  
+  dirty_pointer _pointer{nullptr};
+  
+  friend constexpr auto tagged_pointer_cast<_T, _Tag, _Schema>(typename _Schema::template schema<_T, _Tag>::dirty_pointer ptr) noexcept -> tagged_ptr<_T, _Tag, _Schema>;
+  
+  struct already_tagged_tag {
+    dirty_pointer _ptr;
+  };
+ 
+  // special hidden constructor to allow constructing unsafely
+  [[clang::always_inline]] constexpr tagged_ptr(already_tagged_tag _in) noexcept: _pointer{_in._ptr} { }
+  
+  template <typename _Y, typename _T2, typename _Tag2, typename _Schema2> constexpr auto const_pointer_cast(tagged_ptr<_T2, _Tag2, _Schema2> in) noexcept -> rebind<_T>;
+  
+public:
+  tagged_ptr() = default;
+  consteval tagged_ptr(nullptr_t) noexcept: _pointer{nullptr} { }
+  tagged_ptr(const tagged_ptr &) = default;
+  tagged_ptr(tagged_ptr &&) = default;
+  ~tagged_ptr() = default;
+  tagged_ptr & operator=(const tagged_ptr &) = default;
+  tagged_ptr & operator=(tagged_ptr &&) = default;
+  
+  [[clang::always_inline]] explicit constexpr tagged_ptr(clean_pointer _ptr, tag_type _tag = {}) noexcept: _pointer{schema::encode_pointer_with_tag(_ptr, _tag)} {
+    _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(pointer() == _ptr, "pointer must be recoverable after untagging");
+    _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(tag() == _tag, "stored tag must be recoverable and within schema provided bit capacity");
+  } 
+
+  // accessors
+  [[clang::always_inline]] constexpr decltype(auto) operator*() const noexcept {
+    return *pointer();
+  }
+  
+  [[clang::always_inline]] constexpr clean_pointer operator->() const noexcept {
+    return pointer();
+  }
+   
+  template <typename...Ts> [[clang::always_inline]] [[clang::always_inline]] constexpr decltype(auto) operator[](Ts... args) const noexcept requires std::is_array_v<value_type> && (sizeof...(Ts) == std::rank_v<value_type>) {
+    return (*pointer())[args...];
+  }
+  
+  [[clang::always_inline]] constexpr decltype(auto) operator[](difference_type diff) const noexcept requires (!std::is_array_v<value_type>) {
+    return *(pointer() + diff);
+  }
+  
+  // swap
+  [[clang::always_inline]] friend constexpr void swap(tagged_ptr & lhs, tagged_ptr & rhs) noexcept {
+    std::swap(lhs._pointer, rhs._pointer);
+  }
+  
+  // modifiers for tag
+  [[clang::always_inline]] constexpr auto & set(tag_type new_tag) noexcept {
+    // this is here so I can avoid checks
+    // TODO we should be able to check what bits available
+    _pointer = schema::encode_pointer_with_tag(pointer(), new_tag);
+    return *this;
+  }
+  
+  [[clang::always_inline]] constexpr auto & set_union(tag_type addition) noexcept {
+    return set(tag() | addition);
+  }
+  
+  [[clang::always_inline]] constexpr auto & set_difference(tag_type mask) noexcept {
+    return set(tag() & (~static_cast<uintptr_t>(mask)));
+  }
+  
+  [[clang::always_inline]] constexpr auto & set_intersection(tag_type mask) noexcept {
+    return set(tag() & mask);
+  }
+  
+  [[clang::always_inline]] constexpr auto & set_all() noexcept {
+    return set(static_cast<tag_type>(0xFFFFFFFF'FFFFFFFFull));
+  }
+
+  // modifiers for pointer
+  [[clang::always_inline]] constexpr auto & operator++() noexcept {
+    _pointer = tagged_ptr{pointer()+1u, tag()}._pointer;
+    return *this;
+  }
+  
+  [[clang::always_inline]] constexpr auto operator++(int) noexcept {
+    auto copy = auto(*this);
+    this->operator++();
+    return copy;
+  }
+  
+  [[clang::always_inline]] constexpr auto & operator+=(difference_type diff) noexcept {
+    _pointer = tagged_ptr{pointer()+diff, tag()}._pointer;
+    return *this;
+  }
+  
+  [[clang::always_inline]] friend constexpr auto operator+(tagged_ptr lhs, difference_type diff) noexcept {
+    lhs += diff;
+    return lhs;
+  }
+  
+  [[clang::always_inline]] friend constexpr auto operator+(difference_type diff, tagged_ptr rhs) noexcept {
+    rhs += diff;
+    return rhs;
+  }
+  
+  [[clang::always_inline]] friend constexpr auto operator-(tagged_ptr lhs, difference_type diff) noexcept {
+    lhs -= diff;
+    return lhs;
+  }
+  
+  [[clang::always_inline]] friend constexpr auto operator-(difference_type diff, tagged_ptr rhs) noexcept {
+    rhs -= diff;
+    return rhs;
+  }
+  
+  [[clang::always_inline]] constexpr auto & operator-=(difference_type diff) noexcept {
+    _pointer = tagged_ptr{pointer()-diff, tag()}._pointer;
+    return *this;
+  }
+  
+  [[clang::always_inline]] constexpr auto & operator--() noexcept {
+    _pointer = tagged_ptr{pointer()-1u, tag()}._pointer;
+    return *this;
+  }
+  
+  [[clang::always_inline]] constexpr auto operator--(int) noexcept {
+    auto copy = auto(*this);
+    this->operator--();
+    return copy;
+  }
+  
+  // observers
+  constexpr dirty_pointer unsafe_dirty_pointer() const noexcept {
+    // this function is not intentionally constexpr, as it is needed only to interact with
+    // existing runtime code
+    return _pointer;
+  } 
+  
+  static constexpr bool support_aliasing_masking = pointer_tagging_schema_with_aliasing<schema>;
+  
+  [[clang::always_inline]] constexpr clean_pointer aliasing_pointer() const noexcept {
+    if constexpr (support_aliasing_masking) {
+      if !consteval {
+        return schema::recover_aliasing_pointer(_pointer);
+      }
+    }
+    
+    return schema::recover_pointer(_pointer);
+  }
+  
+  [[clang::always_inline]] constexpr clean_pointer pointer() const noexcept {
+    return schema::recover_pointer(_pointer);
+  }
+  
+  [[clang::always_inline]] constexpr tag_type tag() const noexcept {
+    return schema::recover_value(_pointer);
+  }
+  
+  template <std::size_t I> [[nodiscard, clang::always_inline]] friend constexpr decltype(auto) get(tagged_ptr _pair) noexcept {
+    static_assert(I < 3);
+    if constexpr (I == 0) {
+      return _pair.pointer();
+    } else {
+      return _pair.tag();
+    }
+  }
+  
+  [[clang::always_inline]] constexpr explicit operator bool() const noexcept {
+    return pointer() != nullptr;
+  }
+  
+  [[clang::always_inline]] friend constexpr ptrdiff_t operator-(tagged_ptr lhs, tagged_ptr rhs) noexcept {
+    return lhs.pointer() - rhs.pointer();
+  }
+  
+  // comparison operators
+  [[clang::always_inline]] friend bool operator==(tagged_ptr, tagged_ptr) = default;
+  
+  struct _compare_object {
+    clean_pointer pointer;
+    tag_type tag;
+    
+    friend auto operator<=>(_compare_object, _compare_object) = default;
+  };
+  
+  [[clang::always_inline]] friend constexpr auto operator<=>(tagged_ptr lhs, tagged_ptr rhs) noexcept {
+    return _compare_object{lhs.pointer(), lhs.tag()} <=> _compare_object{rhs.pointer(), rhs.tag()};
+  }
+  [[clang::always_inline]] friend constexpr bool operator==(tagged_ptr lhs, clean_pointer rhs) noexcept {
+    return lhs.pointer() == rhs;
+  }
+  [[clang::always_inline]] friend constexpr auto operator<=>(tagged_ptr lhs, clean_pointer rhs) noexcept {
+    return lhs.pointer() <=> rhs;
+  }
+  [[clang::always_inline]] friend constexpr bool operator==(tagged_ptr lhs, nullptr_t) noexcept {
+    return lhs.pointer() == nullptr;
+  }
+};
+
+// to_address specialization
+template <typename _T, typename _Tag, typename _Schema> static constexpr auto to_address(tagged_ptr<_T, _Tag, _Schema> p) noexcept -> tagged_ptr<_T, _Tag, _Schema>::element_type * {
+  return p.pointer();
+}
+
+// iterator traits
+template <typename _T, typename _Tag, typename _Schema>
+struct _LIBCPP_TEMPLATE_VIS iterator_traits<tagged_ptr<_T, _Tag, _Schema>> {
+  using _tagged_ptr = tagged_ptr<_T, _Tag, _Schema>;
+  
+  using iterator_category = std::random_access_iterator_tag;
+  using iterator_concept = std::contiguous_iterator_tag;
+  
+  using value_type = _tagged_ptr::value_type;
+  using reference = value_type &;
+  using pointer = _tagged_ptr::clean_pointer;
+  using difference_type = _tagged_ptr::difference_type;
+};
+
+// pointer traits
+template <typename _T, typename _Tag, typename _Schema>
+struct _LIBCPP_TEMPLATE_VIS pointer_traits<tagged_ptr<_T, _Tag, _Schema>> {
+  using _tagged_ptr = tagged_ptr<_T, _Tag, _Schema>;
+  using pointer = _tagged_ptr::clean_pointer;
+  using element_type = _tagged_ptr::value_type;
+  using difference_type = _tagged_ptr::difference_type;
+  
+  // what to do with this?
+  template <typename _Up> using rebind = typename _tagged_ptr::template rebind<_Up>;
+
+public:
+  _LIBCPP_HIDE_FROM_ABI constexpr static pointer pointer_to(pointer ptr) _NOEXCEPT {
+    return _tagged_ptr{ptr};
+  }
+};
+
+// we are defaulting always to low_bits schema
+template <typename _T> tagged_ptr(_T *) -> tagged_ptr<_T>;
+template <typename _T, typename _Tag> tagged_ptr(_T *, _Tag) -> tagged_ptr<_T, _Tag>;
+
+// support for tuple protocol so we can split tagged pointer to structured bindings:
+// auto [ptr, tag] = tagged_ptr
+template <typename _T, typename _Tag, typename _Schema>
+struct tuple_size<tagged_ptr<_T, _Tag, _Schema>>: std::integral_constant<std::size_t, 2> {};
+
+template <std::size_t I, typename _T, typename _Tag, typename _Schema>
+struct tuple_element<I, tagged_ptr<_T, _Tag, _Schema>> {
+  using _pair_type = tagged_ptr<_T, _Tag, _Schema>;
+  using type = std::conditional_t<I == 0, typename _pair_type::clean_pointer, typename _pair_type::tag_type>;
+};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_STD_VER >= 26
+
+#endif // _LIBCPP___TAGGED_PTR_H
diff --git a/libcxx/include/memory b/libcxx/include/memory
index db3386cca48009..db491c0c92e1e3 100644
--- a/libcxx/include/memory
+++ b/libcxx/include/memory
@@ -969,6 +969,10 @@ template<class Pointer = void, class Smart, class... Args>
 #  include <__memory/allocate_at_least.h>
 #endif
 
+#if _LIBCPP_STD_VER >= 26
+#  include <__memory/tagged_ptr.h>
+#endif
+
 #include <version>
 
 // [memory.syn]
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index 3ea91274a9cc9a..a52431653392ec 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -1514,6 +1514,7 @@ module std [system] {
     module raw_storage_iterator               { header "__memory/raw_storage_iterator.h" }
     module shared_ptr                         { header "__memory/shared_ptr.h" }
     module swap_allocator                     { header "__memory/swap_allocator.h" }
+    module tagged_ptr                         { header "__memory/tagged_ptr.h" }
     module temp_value                         { header "__memory/temp_value.h" }
     module temporary_buffer                   {
       header "__memory/temporary_buffer.h"



More information about the cfe-commits mailing list