[clang] [clang]bytecode] Add degenerate pointers (PR #160086)

via cfe-commits cfe-commits at lists.llvm.org
Mon Sep 22 05:36:52 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Timm Baeder (tbaederr)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/160086.diff


7 Files Affected:

- (modified) clang/lib/AST/ByteCode/Interp.h (+32-17) 
- (modified) clang/lib/AST/ByteCode/Pointer.cpp (+75-18) 
- (modified) clang/lib/AST/ByteCode/Pointer.h (+38-9) 
- (modified) clang/test/AST/ByteCode/codegen.c (+5-1) 
- (modified) clang/test/AST/ByteCode/const-eval.c (+8) 
- (modified) clang/test/CodeGen/const-arithmetic.c (+1) 
- (modified) clang/test/CodeGenCXX/PR19955.cpp (+1-1) 


``````````diff
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index b3b4b998439cc..00e56da265b1f 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -2234,7 +2234,7 @@ std::optional<Pointer> OffsetHelper(InterpState &S, CodePtr OpPC,
   }
 
   // Arrays of unknown bounds cannot have pointers into them.
-  if (!CheckArray(S, OpPC, Ptr))
+  if (S.getLangOpts().CPlusPlus && !CheckArray(S, OpPC, Ptr))
     return std::nullopt;
 
   // This is much simpler for integral pointers, so handle them first.
@@ -2245,7 +2245,8 @@ std::optional<Pointer> OffsetHelper(InterpState &S, CodePtr OpPC,
       return Pointer(V + O, Ptr.asIntPointer().Desc);
     else
       return Pointer(V - O, Ptr.asIntPointer().Desc);
-  } else if (Ptr.isFunctionPointer()) {
+  }
+  if (Ptr.isFunctionPointer()) {
     uint64_t O = static_cast<uint64_t>(Offset);
     uint64_t N;
     if constexpr (Op == ArithOp::Add)
@@ -2258,8 +2259,17 @@ std::optional<Pointer> OffsetHelper(InterpState &S, CodePtr OpPC,
           << N << /*non-array*/ true << 0;
     return Pointer(Ptr.asFunctionPointer().getFunction(), N);
   }
+  if (Ptr.isUnknownSizeArray()) {
+    // Everything on unknown size arrays is invalid anyway. Compute it here
+    // manually and return a degenerate pointer.
+    assert(!Ptr.isZero());
+    if constexpr (Op == ArithOp::Add)
+      return Pointer(DegenPointer{Ptr.block()}, static_cast<uint64_t>(Offset));
+    else
+      return Pointer(DegenPointer{Ptr.block()}, static_cast<uint64_t>(-Offset));
+  }
 
-  assert(Ptr.isBlockPointer());
+  assert(Ptr.isBlockPointer() || Ptr.isDegenPointer());
 
   uint64_t MaxIndex = static_cast<uint64_t>(Ptr.getNumElems());
   uint64_t Index;
@@ -2305,10 +2315,6 @@ std::optional<Pointer> OffsetHelper(InterpState &S, CodePtr OpPC,
     }
   }
 
-  if (Invalid && S.getLangOpts().CPlusPlus)
-    return std::nullopt;
-
-  // Offset is valid - compute it on unsigned.
   int64_t WideIndex = static_cast<int64_t>(Index);
   int64_t WideOffset = static_cast<int64_t>(Offset);
   int64_t Result;
@@ -2317,6 +2323,12 @@ std::optional<Pointer> OffsetHelper(InterpState &S, CodePtr OpPC,
   else
     Result = WideIndex - WideOffset;
 
+  if (Invalid) {
+    if (S.getLangOpts().CPlusPlus)
+      return std::nullopt;
+
+    return Pointer(DegenPointer{Ptr.block()}, Result);
+  }
   // When the pointer is one-past-end, going back to index 0 is the only
   // useful thing we can do. Any other index has been diagnosed before and
   // we don't get here.
@@ -2326,6 +2338,9 @@ std::optional<Pointer> OffsetHelper(InterpState &S, CodePtr OpPC,
     return Pointer(Ptr.asBlockPointer().Pointee, Ptr.asBlockPointer().Base);
   }
 
+  if (Ptr.isDegenPointer())
+    return Pointer(DegenPointer{Ptr.block()}, Result);
+
   return Ptr.atIndex(static_cast<uint64_t>(Result));
 }
 
@@ -2438,16 +2453,16 @@ inline bool SubPtr(InterpState &S, CodePtr OpPC) {
     }
   }
 
-  int64_t A64 =
-      LHS.isBlockPointer()
-          ? (LHS.isElementPastEnd() ? LHS.getNumElems() : LHS.getIndex())
-          : LHS.getIntegerRepresentation();
-
-  int64_t B64 =
-      RHS.isBlockPointer()
-          ? (RHS.isElementPastEnd() ? RHS.getNumElems() : RHS.getIndex())
-          : RHS.getIntegerRepresentation();
+  int64_t A64;
+  int64_t B64;
 
+  if (LHS.isBlockPointer() && RHS.isBlockPointer()) {
+    A64 = (LHS.isElementPastEnd() ? LHS.getNumElems() : LHS.getIndex());
+    B64 = (RHS.isElementPastEnd() ? RHS.getNumElems() : RHS.getIndex());
+  } else {
+    A64 = LHS.getIntegerRepresentation();
+    B64 = RHS.getIntegerRepresentation();
+  }
   int64_t R64 = A64 - B64;
   if (static_cast<int64_t>(T::from(R64)) != R64)
     return handleOverflow(S, OpPC, R64);
@@ -3196,7 +3211,7 @@ inline bool CopyArray(InterpState &S, CodePtr OpPC, uint32_t SrcIndex,
 inline bool ArrayDecay(InterpState &S, CodePtr OpPC) {
   const Pointer &Ptr = S.Stk.pop<Pointer>();
 
-  if (Ptr.isZero()) {
+  if (Ptr.isZero() || Ptr.isDegenPointer()) {
     S.Stk.push<Pointer>(Ptr);
     return true;
   }
diff --git a/clang/lib/AST/ByteCode/Pointer.cpp b/clang/lib/AST/ByteCode/Pointer.cpp
index 81d4ce14f9310..3c041465ccb81 100644
--- a/clang/lib/AST/ByteCode/Pointer.cpp
+++ b/clang/lib/AST/ByteCode/Pointer.cpp
@@ -57,6 +57,9 @@ Pointer::Pointer(const Pointer &P)
   case Storage::Typeid:
     Typeid = P.Typeid;
     break;
+  case Storage::Degen:
+    DP = P.DP;
+    break;
   }
 }
 
@@ -76,6 +79,9 @@ Pointer::Pointer(Pointer &&P) : Offset(P.Offset), StorageKind(P.StorageKind) {
   case Storage::Typeid:
     Typeid = P.Typeid;
     break;
+  case Storage::Degen:
+    DP = P.DP;
+    break;
   }
 }
 
@@ -110,20 +116,26 @@ Pointer &Pointer::operator=(const Pointer &P) {
   StorageKind = P.StorageKind;
   Offset = P.Offset;
 
-  if (P.isBlockPointer()) {
+  switch (StorageKind) {
+  case Storage::Int:
+    Int = P.Int;
+    break;
+  case Storage::Block:
     BS = P.BS;
-
     if (BS.Pointee)
       BS.Pointee->addPointer(this);
-  } else if (P.isIntegralPointer()) {
-    Int = P.Int;
-  } else if (P.isFunctionPointer()) {
+    break;
+  case Storage::Fn:
     Fn = P.Fn;
-  } else if (P.isTypeidPointer()) {
+    break;
+  case Storage::Typeid:
     Typeid = P.Typeid;
-  } else {
-    assert(false && "Unhandled storage kind");
+    break;
+  case Storage::Degen:
+    DP = P.DP;
+    break;
   }
+
   return *this;
 }
 
@@ -147,23 +159,37 @@ Pointer &Pointer::operator=(Pointer &&P) {
   StorageKind = P.StorageKind;
   Offset = P.Offset;
 
-  if (P.isBlockPointer()) {
+  switch (StorageKind) {
+  case Storage::Int:
+    Int = P.Int;
+    break;
+  case Storage::Block:
     BS = P.BS;
-
     if (BS.Pointee)
       BS.Pointee->addPointer(this);
-  } else if (P.isIntegralPointer()) {
-    Int = P.Int;
-  } else if (P.isFunctionPointer()) {
+    break;
+  case Storage::Fn:
     Fn = P.Fn;
-  } else if (P.isTypeidPointer()) {
+    break;
+  case Storage::Typeid:
     Typeid = P.Typeid;
-  } else {
-    assert(false && "Unhandled storage kind");
+    break;
+  case Storage::Degen:
+    DP = P.DP;
+    break;
   }
+
   return *this;
 }
 
+static QualType getPointeeOrElemType(QualType T) {
+  if (const ArrayType *AT = T->getAsArrayTypeUnsafe())
+    return AT->getElementType();
+  if (T->isPointerOrReferenceType())
+    return T->getPointeeType();
+  return T;
+}
+
 APValue Pointer::toAPValue(const ASTContext &ASTCtx) const {
   llvm::SmallVector<APValue::LValuePathEntry, 5> Path;
 
@@ -193,6 +219,26 @@ APValue Pointer::toAPValue(const ASTContext &ASTCtx) const {
                    /*OnePastTheEnd=*/false, /*IsNull=*/false);
   }
 
+  if (isDegenPointer()) {
+    assert(!isZero());
+    QualType PtrType = getType();
+    if (PtrType->isArrayType() || PtrType->isPointerOrReferenceType()) {
+      QualType ElemType = getPointeeOrElemType(PtrType);
+      CharUnits ByteOffset = Offset * ASTCtx.getTypeSizeInChars(ElemType);
+      Path.push_back(
+          APValue::LValuePathEntry::ArrayIndex(ByteOffset.getQuantity()));
+      return APValue(DP.Pointee->getDescriptor()->asVarDecl(), ByteOffset, Path,
+                     /*IsOnePastEnd=*/false, /*IsNullPtr=*/false);
+    }
+    // No LValuePath.
+    CharUnits ByteOffset = Offset * ASTCtx.getTypeSizeInChars(PtrType);
+    return APValue(
+        APValue::LValueBase(DP.Pointee->getDescriptor()->asVarDecl()),
+        ByteOffset, APValue::NoLValuePath());
+  }
+
+  assert(isBlockPointer());
+
   // Build the lvalue base from the block.
   const Descriptor *Desc = getDeclDesc();
   APValue::LValueBase Base;
@@ -354,6 +400,9 @@ void Pointer::print(llvm::raw_ostream &OS) const {
     OS << "(Typeid) { " << (const void *)asTypeidPointer().TypePtr << ", "
        << (const void *)asTypeidPointer().TypeInfoType << " + " << Offset
        << "}";
+    break;
+  case Storage::Degen:
+    OS << "(Degen) { " << DP.Pointee << " + " << Offset << "}";
   }
 }
 
@@ -362,6 +411,10 @@ size_t Pointer::computeOffsetForComparison() const {
     return asIntPointer().Value + Offset;
   if (isTypeidPointer())
     return reinterpret_cast<uintptr_t>(asTypeidPointer().TypePtr) + Offset;
+  if (isDegenPointer()) {
+    uint64_t ByteOffset = (Offset * elemSize());
+    return ByteOffset;
+  }
 
   if (!isBlockPointer())
     return Offset;
@@ -426,6 +479,7 @@ std::string Pointer::toDiagnosticString(const ASTContext &Ctx) const {
   if (isFunctionPointer())
     return asFunctionPointer().toDiagnosticString(Ctx);
 
+  toAPValue(Ctx).dump();
   return toAPValue(Ctx).getAsString(Ctx, getType());
 }
 
@@ -635,10 +689,13 @@ bool Pointer::hasSameBase(const Pointer &A, const Pointer &B) {
   if (A.isTypeidPointer() && B.isTypeidPointer())
     return true;
 
-  if (A.StorageKind != B.StorageKind)
+  if (!A.isBlockPointer() && !A.isDegenPointer() && !B.isBlockPointer() &&
+      !B.isDegenPointer())
     return false;
 
-  return A.asBlockPointer().Pointee == B.asBlockPointer().Pointee;
+  const Block *BlockA = A.isBlockPointer() ? A.BS.Pointee : A.DP.Pointee;
+  const Block *BlockB = B.isBlockPointer() ? B.BS.Pointee : B.DP.Pointee;
+  return BlockA == BlockB;
 }
 
 bool Pointer::pointToSameBlock(const Pointer &A, const Pointer &B) {
diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h
index bbf20801ce923..d39cac6f6d6dd 100644
--- a/clang/lib/AST/ByteCode/Pointer.h
+++ b/clang/lib/AST/ByteCode/Pointer.h
@@ -56,7 +56,13 @@ struct TypeidPointer {
   const Type *TypeInfoType;
 };
 
-enum class Storage { Block, Int, Fn, Typeid };
+/// A pointer that points to valid memory, but the offset is degenerate in that
+/// it doesn't point to anything we can read from, e.g. before the object.
+struct DegenPointer {
+  const Block *Pointee;
+};
+
+enum class Storage { Int, Block, Fn, Typeid, Degen };
 
 /// A pointer to a memory block, live or dead.
 ///
@@ -110,6 +116,8 @@ class Pointer {
     Typeid.TypePtr = TypePtr;
     Typeid.TypeInfoType = TypeInfoType;
   }
+  Pointer(DegenPointer DP, uint64_t Offset = 0)
+      : Offset(Offset), StorageKind(Storage::Degen), DP(DP) {}
   Pointer(Block *Pointee, unsigned Base, uint64_t Offset);
   ~Pointer();
 
@@ -145,7 +153,11 @@ class Pointer {
       return Int.Value + (Offset * elemSize());
     if (isFunctionPointer())
       return Fn.getIntegerRepresentation() + Offset;
-    return reinterpret_cast<uint64_t>(BS.Pointee) + Offset;
+    if (isDegenPointer())
+      return reinterpret_cast<uint64_t>(DP.Pointee) + Offset;
+    assert(isBlockPointer());
+    return reinterpret_cast<uint64_t>(BS.Pointee) +
+           (Offset - BS.Pointee->getDescriptor()->getMetadataSize());
   }
 
   /// Converts the pointer to an APValue that is an rvalue.
@@ -252,14 +264,19 @@ class Pointer {
 
   /// Checks if the pointer is null.
   bool isZero() const {
-    if (isBlockPointer())
+    switch (StorageKind) {
+    case Storage::Int:
+      return Int.Value == 0 && Offset == 0;
+    case Storage::Block:
       return BS.Pointee == nullptr;
-    if (isFunctionPointer())
-      return Fn.isZero();
-    if (isTypeidPointer())
+    case Storage::Fn:
+      return asFunctionPointer().isZero();
+    case Storage::Typeid:
       return false;
-    assert(isIntegralPointer());
-    return Int.Value == 0 && Offset == 0;
+    case Storage::Degen:
+      return DP.Pointee == nullptr;
+    }
+    llvm_unreachable("huh²");
   }
   /// Checks if the pointer is live.
   bool isLive() const {
@@ -279,6 +296,11 @@ class Pointer {
   const Descriptor *getDeclDesc() const {
     if (isIntegralPointer())
       return Int.Desc;
+    if (isDegenPointer()) {
+      if (DP.Pointee)
+        return DP.Pointee->Desc;
+      return nullptr;
+    }
     if (isFunctionPointer() || isTypeidPointer())
       return nullptr;
 
@@ -323,6 +345,11 @@ class Pointer {
   const Descriptor *getFieldDesc() const {
     if (isIntegralPointer())
       return Int.Desc;
+    if (isDegenPointer()) {
+      if (DP.Pointee)
+        return DP.Pointee->Desc;
+      return nullptr;
+    }
 
     if (isRoot())
       return getDeclDesc();
@@ -462,10 +489,11 @@ class Pointer {
     return Typeid;
   }
 
-  bool isBlockPointer() const { return StorageKind == Storage::Block; }
   bool isIntegralPointer() const { return StorageKind == Storage::Int; }
+  bool isBlockPointer() const { return StorageKind == Storage::Block; }
   bool isFunctionPointer() const { return StorageKind == Storage::Fn; }
   bool isTypeidPointer() const { return StorageKind == Storage::Typeid; }
+  bool isDegenPointer() const { return StorageKind == Storage::Degen; }
 
   /// Returns the record descriptor of a class.
   const Record *getRecord() const { return getFieldDesc()->ElemRecord; }
@@ -822,6 +850,7 @@ class Pointer {
     BlockPointer BS;
     FunctionPointer Fn;
     TypeidPointer Typeid;
+    DegenPointer DP;
   };
 };
 
diff --git a/clang/test/AST/ByteCode/codegen.c b/clang/test/AST/ByteCode/codegen.c
index 3c6f17e2b8726..edf8acd46c733 100644
--- a/clang/test/AST/ByteCode/codegen.c
+++ b/clang/test/AST/ByteCode/codegen.c
@@ -1,8 +1,12 @@
-// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s                                         | FileCheck %s
 // RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s -fexperimental-new-constant-interpreter | FileCheck %s
 
 typedef __INTPTR_TYPE__ intptr_t;
 
+int x;
+const int *p = &x - 100;
+// CHECK: @p = global ptr getelementptr (i8, ptr @x, i64 -400), align 8
+
 const intptr_t Z1 = (intptr_t)(((char*)-1LL) + 1);
 // CHECK: @Z1 = constant i64 0
 
diff --git a/clang/test/AST/ByteCode/const-eval.c b/clang/test/AST/ByteCode/const-eval.c
index c6b51d16b811e..cb6e3be0980c0 100644
--- a/clang/test/AST/ByteCode/const-eval.c
+++ b/clang/test/AST/ByteCode/const-eval.c
@@ -130,6 +130,14 @@ EVAL_EXPR(47, &x < &x + 1 ? 1 : -1)
 EVAL_EXPR(48, &x != &x - 1 ? 1 : -1)
 EVAL_EXPR(49, &x < &x - 100 ? 1 : -1) // ref-error {{not an integer constant expression}}
 
+
+/// Offset wraps.
+EVAL_EXPR(59, (&x  + ((__UINTPTR_MAX__ - 35)/ 4)) == &x - 9 ? 1 : -1)
+
+EVAL_EXPR(60, &x != &x - 9 ? 1 : -1)
+EVAL_EXPR(61, (&x - 10 + 11) > &x ? 1 : -1)
+EVAL_EXPR(62, (g17 - 10 + 11) > g17 ? 1 : -1)
+
 extern struct Test50S Test50;
 EVAL_EXPR(50, &Test50 < (struct Test50S*)((unsigned long)&Test50 + 10)) // both-error {{not an integer constant expression}}
 
diff --git a/clang/test/CodeGen/const-arithmetic.c b/clang/test/CodeGen/const-arithmetic.c
index 78b9208e3f865..288873d1c0bcc 100644
--- a/clang/test/CodeGen/const-arithmetic.c
+++ b/clang/test/CodeGen/const-arithmetic.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -fexperimental-new-constant-interpreter | FileCheck %s
 
 // CHECK: @g1 ={{.*}} global [2 x ptr] [ptr getelementptr (i8, ptr @g0, i64 -2), ptr getelementptr (i8, ptr @g0, i64 -46)], align 16
 // CHECK: @g2 ={{.*}} global [2 x ptr] [ptr getelementptr (i8, ptr @g0, i64 -2), ptr getelementptr (i8, ptr @g0, i64 -46)], align 16
diff --git a/clang/test/CodeGenCXX/PR19955.cpp b/clang/test/CodeGenCXX/PR19955.cpp
index 808199cd64345..247a4043ada84 100644
--- a/clang/test/CodeGenCXX/PR19955.cpp
+++ b/clang/test/CodeGenCXX/PR19955.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple i686-windows-msvc -fms-extensions -fno-rtti -emit-llvm -std=c++1y -O0 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple i686-windows-msvc -fms-extensions -fno-rtti -emit-llvm -std=c++1y -O0 -o - %s -fexperimental-new-constant-interpreter | FileCheck %s
 // RUN: %clang_cc1 -triple x86_64-windows-msvc -fms-extensions -fno-rtti -emit-llvm -std=c++1y -O0 -o - %s | FileCheck %s --check-prefix X64
 
 extern int __declspec(dllimport) var;

``````````

</details>


https://github.com/llvm/llvm-project/pull/160086


More information about the cfe-commits mailing list