[clang] [clang]bytecode] Add degenerate pointers (PR #160086)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 22 05:36:52 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Timm Baeder (tbaederr)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/160086.diff
7 Files Affected:
- (modified) clang/lib/AST/ByteCode/Interp.h (+32-17)
- (modified) clang/lib/AST/ByteCode/Pointer.cpp (+75-18)
- (modified) clang/lib/AST/ByteCode/Pointer.h (+38-9)
- (modified) clang/test/AST/ByteCode/codegen.c (+5-1)
- (modified) clang/test/AST/ByteCode/const-eval.c (+8)
- (modified) clang/test/CodeGen/const-arithmetic.c (+1)
- (modified) clang/test/CodeGenCXX/PR19955.cpp (+1-1)
``````````diff
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index b3b4b998439cc..00e56da265b1f 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -2234,7 +2234,7 @@ std::optional<Pointer> OffsetHelper(InterpState &S, CodePtr OpPC,
}
// Arrays of unknown bounds cannot have pointers into them.
- if (!CheckArray(S, OpPC, Ptr))
+ if (S.getLangOpts().CPlusPlus && !CheckArray(S, OpPC, Ptr))
return std::nullopt;
// This is much simpler for integral pointers, so handle them first.
@@ -2245,7 +2245,8 @@ std::optional<Pointer> OffsetHelper(InterpState &S, CodePtr OpPC,
return Pointer(V + O, Ptr.asIntPointer().Desc);
else
return Pointer(V - O, Ptr.asIntPointer().Desc);
- } else if (Ptr.isFunctionPointer()) {
+ }
+ if (Ptr.isFunctionPointer()) {
uint64_t O = static_cast<uint64_t>(Offset);
uint64_t N;
if constexpr (Op == ArithOp::Add)
@@ -2258,8 +2259,17 @@ std::optional<Pointer> OffsetHelper(InterpState &S, CodePtr OpPC,
<< N << /*non-array*/ true << 0;
return Pointer(Ptr.asFunctionPointer().getFunction(), N);
}
+ if (Ptr.isUnknownSizeArray()) {
+ // Everything on unknown size arrays is invalid anyway. Compute it here
+ // manually and return a degenerate pointer.
+ assert(!Ptr.isZero());
+ if constexpr (Op == ArithOp::Add)
+ return Pointer(DegenPointer{Ptr.block()}, static_cast<uint64_t>(Offset));
+ else
+ return Pointer(DegenPointer{Ptr.block()}, static_cast<uint64_t>(-Offset));
+ }
- assert(Ptr.isBlockPointer());
+ assert(Ptr.isBlockPointer() || Ptr.isDegenPointer());
uint64_t MaxIndex = static_cast<uint64_t>(Ptr.getNumElems());
uint64_t Index;
@@ -2305,10 +2315,6 @@ std::optional<Pointer> OffsetHelper(InterpState &S, CodePtr OpPC,
}
}
- if (Invalid && S.getLangOpts().CPlusPlus)
- return std::nullopt;
-
- // Offset is valid - compute it on unsigned.
int64_t WideIndex = static_cast<int64_t>(Index);
int64_t WideOffset = static_cast<int64_t>(Offset);
int64_t Result;
@@ -2317,6 +2323,12 @@ std::optional<Pointer> OffsetHelper(InterpState &S, CodePtr OpPC,
else
Result = WideIndex - WideOffset;
+ if (Invalid) {
+ if (S.getLangOpts().CPlusPlus)
+ return std::nullopt;
+
+ return Pointer(DegenPointer{Ptr.block()}, Result);
+ }
// When the pointer is one-past-end, going back to index 0 is the only
// useful thing we can do. Any other index has been diagnosed before and
// we don't get here.
@@ -2326,6 +2338,9 @@ std::optional<Pointer> OffsetHelper(InterpState &S, CodePtr OpPC,
return Pointer(Ptr.asBlockPointer().Pointee, Ptr.asBlockPointer().Base);
}
+ if (Ptr.isDegenPointer())
+ return Pointer(DegenPointer{Ptr.block()}, Result);
+
return Ptr.atIndex(static_cast<uint64_t>(Result));
}
@@ -2438,16 +2453,16 @@ inline bool SubPtr(InterpState &S, CodePtr OpPC) {
}
}
- int64_t A64 =
- LHS.isBlockPointer()
- ? (LHS.isElementPastEnd() ? LHS.getNumElems() : LHS.getIndex())
- : LHS.getIntegerRepresentation();
-
- int64_t B64 =
- RHS.isBlockPointer()
- ? (RHS.isElementPastEnd() ? RHS.getNumElems() : RHS.getIndex())
- : RHS.getIntegerRepresentation();
+ int64_t A64;
+ int64_t B64;
+ if (LHS.isBlockPointer() && RHS.isBlockPointer()) {
+ A64 = (LHS.isElementPastEnd() ? LHS.getNumElems() : LHS.getIndex());
+ B64 = (RHS.isElementPastEnd() ? RHS.getNumElems() : RHS.getIndex());
+ } else {
+ A64 = LHS.getIntegerRepresentation();
+ B64 = RHS.getIntegerRepresentation();
+ }
int64_t R64 = A64 - B64;
if (static_cast<int64_t>(T::from(R64)) != R64)
return handleOverflow(S, OpPC, R64);
@@ -3196,7 +3211,7 @@ inline bool CopyArray(InterpState &S, CodePtr OpPC, uint32_t SrcIndex,
inline bool ArrayDecay(InterpState &S, CodePtr OpPC) {
const Pointer &Ptr = S.Stk.pop<Pointer>();
- if (Ptr.isZero()) {
+ if (Ptr.isZero() || Ptr.isDegenPointer()) {
S.Stk.push<Pointer>(Ptr);
return true;
}
diff --git a/clang/lib/AST/ByteCode/Pointer.cpp b/clang/lib/AST/ByteCode/Pointer.cpp
index 81d4ce14f9310..3c041465ccb81 100644
--- a/clang/lib/AST/ByteCode/Pointer.cpp
+++ b/clang/lib/AST/ByteCode/Pointer.cpp
@@ -57,6 +57,9 @@ Pointer::Pointer(const Pointer &P)
case Storage::Typeid:
Typeid = P.Typeid;
break;
+ case Storage::Degen:
+ DP = P.DP;
+ break;
}
}
@@ -76,6 +79,9 @@ Pointer::Pointer(Pointer &&P) : Offset(P.Offset), StorageKind(P.StorageKind) {
case Storage::Typeid:
Typeid = P.Typeid;
break;
+ case Storage::Degen:
+ DP = P.DP;
+ break;
}
}
@@ -110,20 +116,26 @@ Pointer &Pointer::operator=(const Pointer &P) {
StorageKind = P.StorageKind;
Offset = P.Offset;
- if (P.isBlockPointer()) {
+ switch (StorageKind) {
+ case Storage::Int:
+ Int = P.Int;
+ break;
+ case Storage::Block:
BS = P.BS;
-
if (BS.Pointee)
BS.Pointee->addPointer(this);
- } else if (P.isIntegralPointer()) {
- Int = P.Int;
- } else if (P.isFunctionPointer()) {
+ break;
+ case Storage::Fn:
Fn = P.Fn;
- } else if (P.isTypeidPointer()) {
+ break;
+ case Storage::Typeid:
Typeid = P.Typeid;
- } else {
- assert(false && "Unhandled storage kind");
+ break;
+ case Storage::Degen:
+ DP = P.DP;
+ break;
}
+
return *this;
}
@@ -147,23 +159,37 @@ Pointer &Pointer::operator=(Pointer &&P) {
StorageKind = P.StorageKind;
Offset = P.Offset;
- if (P.isBlockPointer()) {
+ switch (StorageKind) {
+ case Storage::Int:
+ Int = P.Int;
+ break;
+ case Storage::Block:
BS = P.BS;
-
if (BS.Pointee)
BS.Pointee->addPointer(this);
- } else if (P.isIntegralPointer()) {
- Int = P.Int;
- } else if (P.isFunctionPointer()) {
+ break;
+ case Storage::Fn:
Fn = P.Fn;
- } else if (P.isTypeidPointer()) {
+ break;
+ case Storage::Typeid:
Typeid = P.Typeid;
- } else {
- assert(false && "Unhandled storage kind");
+ break;
+ case Storage::Degen:
+ DP = P.DP;
+ break;
}
+
return *this;
}
+static QualType getPointeeOrElemType(QualType T) {
+ if (const ArrayType *AT = T->getAsArrayTypeUnsafe())
+ return AT->getElementType();
+ if (T->isPointerOrReferenceType())
+ return T->getPointeeType();
+ return T;
+}
+
APValue Pointer::toAPValue(const ASTContext &ASTCtx) const {
llvm::SmallVector<APValue::LValuePathEntry, 5> Path;
@@ -193,6 +219,26 @@ APValue Pointer::toAPValue(const ASTContext &ASTCtx) const {
/*OnePastTheEnd=*/false, /*IsNull=*/false);
}
+ if (isDegenPointer()) {
+ assert(!isZero());
+ QualType PtrType = getType();
+ if (PtrType->isArrayType() || PtrType->isPointerOrReferenceType()) {
+ QualType ElemType = getPointeeOrElemType(PtrType);
+ CharUnits ByteOffset = Offset * ASTCtx.getTypeSizeInChars(ElemType);
+ Path.push_back(
+ APValue::LValuePathEntry::ArrayIndex(ByteOffset.getQuantity()));
+ return APValue(DP.Pointee->getDescriptor()->asVarDecl(), ByteOffset, Path,
+ /*IsOnePastEnd=*/false, /*IsNullPtr=*/false);
+ }
+ // No LValuePath.
+ CharUnits ByteOffset = Offset * ASTCtx.getTypeSizeInChars(PtrType);
+ return APValue(
+ APValue::LValueBase(DP.Pointee->getDescriptor()->asVarDecl()),
+ ByteOffset, APValue::NoLValuePath());
+ }
+
+ assert(isBlockPointer());
+
// Build the lvalue base from the block.
const Descriptor *Desc = getDeclDesc();
APValue::LValueBase Base;
@@ -354,6 +400,9 @@ void Pointer::print(llvm::raw_ostream &OS) const {
OS << "(Typeid) { " << (const void *)asTypeidPointer().TypePtr << ", "
<< (const void *)asTypeidPointer().TypeInfoType << " + " << Offset
<< "}";
+ break;
+ case Storage::Degen:
+ OS << "(Degen) { " << DP.Pointee << " + " << Offset << "}";
}
}
@@ -362,6 +411,10 @@ size_t Pointer::computeOffsetForComparison() const {
return asIntPointer().Value + Offset;
if (isTypeidPointer())
return reinterpret_cast<uintptr_t>(asTypeidPointer().TypePtr) + Offset;
+ if (isDegenPointer()) {
+ uint64_t ByteOffset = (Offset * elemSize());
+ return ByteOffset;
+ }
if (!isBlockPointer())
return Offset;
@@ -426,6 +479,7 @@ std::string Pointer::toDiagnosticString(const ASTContext &Ctx) const {
if (isFunctionPointer())
return asFunctionPointer().toDiagnosticString(Ctx);
+ toAPValue(Ctx).dump();
return toAPValue(Ctx).getAsString(Ctx, getType());
}
@@ -635,10 +689,13 @@ bool Pointer::hasSameBase(const Pointer &A, const Pointer &B) {
if (A.isTypeidPointer() && B.isTypeidPointer())
return true;
- if (A.StorageKind != B.StorageKind)
+ if (!A.isBlockPointer() && !A.isDegenPointer() && !B.isBlockPointer() &&
+ !B.isDegenPointer())
return false;
- return A.asBlockPointer().Pointee == B.asBlockPointer().Pointee;
+ const Block *BlockA = A.isBlockPointer() ? A.BS.Pointee : A.DP.Pointee;
+ const Block *BlockB = B.isBlockPointer() ? B.BS.Pointee : B.DP.Pointee;
+ return BlockA == BlockB;
}
bool Pointer::pointToSameBlock(const Pointer &A, const Pointer &B) {
diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h
index bbf20801ce923..d39cac6f6d6dd 100644
--- a/clang/lib/AST/ByteCode/Pointer.h
+++ b/clang/lib/AST/ByteCode/Pointer.h
@@ -56,7 +56,13 @@ struct TypeidPointer {
const Type *TypeInfoType;
};
-enum class Storage { Block, Int, Fn, Typeid };
+/// A pointer that points to valid memory, but the offset is degenerate in that
+/// it doesn't point to anything we can read from, e.g. before the object.
+struct DegenPointer {
+ const Block *Pointee;
+};
+
+enum class Storage { Int, Block, Fn, Typeid, Degen };
/// A pointer to a memory block, live or dead.
///
@@ -110,6 +116,8 @@ class Pointer {
Typeid.TypePtr = TypePtr;
Typeid.TypeInfoType = TypeInfoType;
}
+ Pointer(DegenPointer DP, uint64_t Offset = 0)
+ : Offset(Offset), StorageKind(Storage::Degen), DP(DP) {}
Pointer(Block *Pointee, unsigned Base, uint64_t Offset);
~Pointer();
@@ -145,7 +153,11 @@ class Pointer {
return Int.Value + (Offset * elemSize());
if (isFunctionPointer())
return Fn.getIntegerRepresentation() + Offset;
- return reinterpret_cast<uint64_t>(BS.Pointee) + Offset;
+ if (isDegenPointer())
+ return reinterpret_cast<uint64_t>(DP.Pointee) + Offset;
+ assert(isBlockPointer());
+ return reinterpret_cast<uint64_t>(BS.Pointee) +
+ (Offset - BS.Pointee->getDescriptor()->getMetadataSize());
}
/// Converts the pointer to an APValue that is an rvalue.
@@ -252,14 +264,19 @@ class Pointer {
/// Checks if the pointer is null.
bool isZero() const {
- if (isBlockPointer())
+ switch (StorageKind) {
+ case Storage::Int:
+ return Int.Value == 0 && Offset == 0;
+ case Storage::Block:
return BS.Pointee == nullptr;
- if (isFunctionPointer())
- return Fn.isZero();
- if (isTypeidPointer())
+ case Storage::Fn:
+ return asFunctionPointer().isZero();
+ case Storage::Typeid:
return false;
- assert(isIntegralPointer());
- return Int.Value == 0 && Offset == 0;
+ case Storage::Degen:
+ return DP.Pointee == nullptr;
+ }
+ llvm_unreachable("huh²");
}
/// Checks if the pointer is live.
bool isLive() const {
@@ -279,6 +296,11 @@ class Pointer {
const Descriptor *getDeclDesc() const {
if (isIntegralPointer())
return Int.Desc;
+ if (isDegenPointer()) {
+ if (DP.Pointee)
+ return DP.Pointee->Desc;
+ return nullptr;
+ }
if (isFunctionPointer() || isTypeidPointer())
return nullptr;
@@ -323,6 +345,11 @@ class Pointer {
const Descriptor *getFieldDesc() const {
if (isIntegralPointer())
return Int.Desc;
+ if (isDegenPointer()) {
+ if (DP.Pointee)
+ return DP.Pointee->Desc;
+ return nullptr;
+ }
if (isRoot())
return getDeclDesc();
@@ -462,10 +489,11 @@ class Pointer {
return Typeid;
}
- bool isBlockPointer() const { return StorageKind == Storage::Block; }
bool isIntegralPointer() const { return StorageKind == Storage::Int; }
+ bool isBlockPointer() const { return StorageKind == Storage::Block; }
bool isFunctionPointer() const { return StorageKind == Storage::Fn; }
bool isTypeidPointer() const { return StorageKind == Storage::Typeid; }
+ bool isDegenPointer() const { return StorageKind == Storage::Degen; }
/// Returns the record descriptor of a class.
const Record *getRecord() const { return getFieldDesc()->ElemRecord; }
@@ -822,6 +850,7 @@ class Pointer {
BlockPointer BS;
FunctionPointer Fn;
TypeidPointer Typeid;
+ DegenPointer DP;
};
};
diff --git a/clang/test/AST/ByteCode/codegen.c b/clang/test/AST/ByteCode/codegen.c
index 3c6f17e2b8726..edf8acd46c733 100644
--- a/clang/test/AST/ByteCode/codegen.c
+++ b/clang/test/AST/ByteCode/codegen.c
@@ -1,8 +1,12 @@
-// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s -fexperimental-new-constant-interpreter | FileCheck %s
typedef __INTPTR_TYPE__ intptr_t;
+int x;
+const int *p = &x - 100;
+// CHECK: @p = global ptr getelementptr (i8, ptr @x, i64 -400), align 8
+
const intptr_t Z1 = (intptr_t)(((char*)-1LL) + 1);
// CHECK: @Z1 = constant i64 0
diff --git a/clang/test/AST/ByteCode/const-eval.c b/clang/test/AST/ByteCode/const-eval.c
index c6b51d16b811e..cb6e3be0980c0 100644
--- a/clang/test/AST/ByteCode/const-eval.c
+++ b/clang/test/AST/ByteCode/const-eval.c
@@ -130,6 +130,14 @@ EVAL_EXPR(47, &x < &x + 1 ? 1 : -1)
EVAL_EXPR(48, &x != &x - 1 ? 1 : -1)
EVAL_EXPR(49, &x < &x - 100 ? 1 : -1) // ref-error {{not an integer constant expression}}
+
+/// Offset wraps.
+EVAL_EXPR(59, (&x + ((__UINTPTR_MAX__ - 35)/ 4)) == &x - 9 ? 1 : -1)
+
+EVAL_EXPR(60, &x != &x - 9 ? 1 : -1)
+EVAL_EXPR(61, (&x - 10 + 11) > &x ? 1 : -1)
+EVAL_EXPR(62, (g17 - 10 + 11) > g17 ? 1 : -1)
+
extern struct Test50S Test50;
EVAL_EXPR(50, &Test50 < (struct Test50S*)((unsigned long)&Test50 + 10)) // both-error {{not an integer constant expression}}
diff --git a/clang/test/CodeGen/const-arithmetic.c b/clang/test/CodeGen/const-arithmetic.c
index 78b9208e3f865..288873d1c0bcc 100644
--- a/clang/test/CodeGen/const-arithmetic.c
+++ b/clang/test/CodeGen/const-arithmetic.c
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -fexperimental-new-constant-interpreter | FileCheck %s
// CHECK: @g1 ={{.*}} global [2 x ptr] [ptr getelementptr (i8, ptr @g0, i64 -2), ptr getelementptr (i8, ptr @g0, i64 -46)], align 16
// CHECK: @g2 ={{.*}} global [2 x ptr] [ptr getelementptr (i8, ptr @g0, i64 -2), ptr getelementptr (i8, ptr @g0, i64 -46)], align 16
diff --git a/clang/test/CodeGenCXX/PR19955.cpp b/clang/test/CodeGenCXX/PR19955.cpp
index 808199cd64345..247a4043ada84 100644
--- a/clang/test/CodeGenCXX/PR19955.cpp
+++ b/clang/test/CodeGenCXX/PR19955.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple i686-windows-msvc -fms-extensions -fno-rtti -emit-llvm -std=c++1y -O0 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple i686-windows-msvc -fms-extensions -fno-rtti -emit-llvm -std=c++1y -O0 -o - %s -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-windows-msvc -fms-extensions -fno-rtti -emit-llvm -std=c++1y -O0 -o - %s | FileCheck %s --check-prefix X64
extern int __declspec(dllimport) var;
``````````
</details>
https://github.com/llvm/llvm-project/pull/160086
More information about the cfe-commits
mailing list