[clang] [clang][bytecode] Always track item types in InterpStack (PR #151088)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 28 22:49:14 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Timm Baeder (tbaederr)
<details>
<summary>Changes</summary>
This has been a long-standing problem, but we didn't use to call the destructors of items on the stack unless we explicitly `pop()` or `discard()` them.
When interpretation was interrupted midway-through (because something failed), we left `Pointer`s on the stack. Since all `Block`s track what `Pointer`s point to them (via a doubly-linked list in the `Pointer`), that meant we potentially leave deallocated pointers in that list. We used to work around this by removing the `Pointer` from the list before deallocating the block.
However, we now want to track pointers to global blocks as well, which poses a problem since the blocks are never deallocated and thus those pointers are always left dangling.
I've tried a few different approaches to fixing this but in the end I just gave up on the idea of never knowing what items are in the stack. We already have an `ItemTypes` vector that we use for debugging assertions. This patch simply enables this vector unconditionally and uses it in the abort case to properly `discard()` all elements from the stack. That's a little sad IMO but I don't know of another way of solving this problem.
As expected, this is a slight hit to compile times: https://llvm-compile-time-tracker.com/compare.php?from=574d0a92060bf4808776b7a0239ffe91a092b15d&to=0317105f559093cfb909bfb01857a6b837991940&stat=instructions:u
---
Full diff: https://github.com/llvm/llvm-project/pull/151088.diff
11 Files Affected:
- (modified) clang/lib/AST/ByteCode/Context.cpp (+4-10)
- (modified) clang/lib/AST/ByteCode/Context.h (+1-1)
- (modified) clang/lib/AST/ByteCode/Descriptor.h (+1-1)
- (modified) clang/lib/AST/ByteCode/Function.h (+1-1)
- (modified) clang/lib/AST/ByteCode/InterpBlock.cpp (-12)
- (modified) clang/lib/AST/ByteCode/InterpBlock.h (+2-2)
- (modified) clang/lib/AST/ByteCode/InterpStack.cpp (+20-37)
- (modified) clang/lib/AST/ByteCode/InterpStack.h (+9-17)
- (modified) clang/lib/AST/ByteCode/InterpState.cpp (+9-6)
- (modified) clang/lib/AST/ByteCode/Pointer.h (+1-1)
- (modified) clang/lib/AST/ByteCode/PrimType.h (+4-3)
``````````diff
diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp
index aaeb52e0fa449..db10f830f4d4c 100644
--- a/clang/lib/AST/ByteCode/Context.cpp
+++ b/clang/lib/AST/ByteCode/Context.cpp
@@ -397,17 +397,11 @@ const llvm::fltSemantics &Context::getFloatSemantics(QualType T) const {
}
bool Context::Run(State &Parent, const Function *Func) {
-
- {
- InterpState State(Parent, *P, Stk, *this, Func);
- if (Interpret(State)) {
- assert(Stk.empty());
- return true;
- }
- // State gets destroyed here, so the Stk.clear() below doesn't accidentally
- // remove values the State's destructor might access.
+ InterpState State(Parent, *P, Stk, *this, Func);
+ if (Interpret(State)) {
+ assert(Stk.empty());
+ return true;
}
-
Stk.clear();
return false;
}
diff --git a/clang/lib/AST/ByteCode/Context.h b/clang/lib/AST/ByteCode/Context.h
index 62ef5297bd19f..d290b994cc32e 100644
--- a/clang/lib/AST/ByteCode/Context.h
+++ b/clang/lib/AST/ByteCode/Context.h
@@ -30,7 +30,7 @@ namespace interp {
class Function;
class Program;
class State;
-enum PrimType : unsigned;
+enum PrimType : uint8_t;
struct ParamOffset {
unsigned Offset;
diff --git a/clang/lib/AST/ByteCode/Descriptor.h b/clang/lib/AST/ByteCode/Descriptor.h
index 4c925f6f0af1e..497789293a15a 100644
--- a/clang/lib/AST/ByteCode/Descriptor.h
+++ b/clang/lib/AST/ByteCode/Descriptor.h
@@ -24,7 +24,7 @@ class Record;
class SourceInfo;
struct InitMap;
struct Descriptor;
-enum PrimType : unsigned;
+enum PrimType : uint8_t;
using DeclTy = llvm::PointerUnion<const Decl *, const Expr *>;
using InitMapPtr = std::optional<std::pair<bool, std::shared_ptr<InitMap>>>;
diff --git a/clang/lib/AST/ByteCode/Function.h b/clang/lib/AST/ByteCode/Function.h
index de88f3ded34dc..18274c2b53644 100644
--- a/clang/lib/AST/ByteCode/Function.h
+++ b/clang/lib/AST/ByteCode/Function.h
@@ -28,7 +28,7 @@ namespace interp {
class Program;
class ByteCodeEmitter;
class Pointer;
-enum PrimType : uint32_t;
+enum PrimType : uint8_t;
/// Describes a scope block.
///
diff --git a/clang/lib/AST/ByteCode/InterpBlock.cpp b/clang/lib/AST/ByteCode/InterpBlock.cpp
index f60307870ffcc..ef1203b6a1b10 100644
--- a/clang/lib/AST/ByteCode/InterpBlock.cpp
+++ b/clang/lib/AST/ByteCode/InterpBlock.cpp
@@ -18,10 +18,6 @@ using namespace clang::interp;
void Block::addPointer(Pointer *P) {
assert(P);
- if (IsStatic) {
- assert(!Pointers);
- return;
- }
#ifndef NDEBUG
assert(!hasPointer(P));
@@ -39,10 +35,6 @@ void Block::addPointer(Pointer *P) {
void Block::removePointer(Pointer *P) {
assert(P->isBlockPointer());
assert(P);
- if (IsStatic) {
- assert(!Pointers);
- return;
- }
#ifndef NDEBUG
assert(hasPointer(P));
@@ -70,10 +62,6 @@ void Block::replacePointer(Pointer *Old, Pointer *New) {
assert(Old);
assert(New);
assert(Old != New);
- if (IsStatic) {
- assert(!Pointers);
- return;
- }
#ifndef NDEBUG
assert(hasPointer(Old));
#endif
diff --git a/clang/lib/AST/ByteCode/InterpBlock.h b/clang/lib/AST/ByteCode/InterpBlock.h
index 51622238e275c..e1765cd564cb8 100644
--- a/clang/lib/AST/ByteCode/InterpBlock.h
+++ b/clang/lib/AST/ByteCode/InterpBlock.h
@@ -22,7 +22,7 @@ class Block;
class DeadBlock;
class InterpState;
class Pointer;
-enum PrimType : unsigned;
+enum PrimType : uint8_t;
/// A memory block, either on the stack or in the heap.
///
@@ -126,7 +126,7 @@ class Block final {
void dump() const { dump(llvm::errs()); }
void dump(llvm::raw_ostream &OS) const;
-private:
+public:
friend class Pointer;
friend class DeadBlock;
friend class InterpState;
diff --git a/clang/lib/AST/ByteCode/InterpStack.cpp b/clang/lib/AST/ByteCode/InterpStack.cpp
index 6b748d62b83bd..7920378f365f9 100644
--- a/clang/lib/AST/ByteCode/InterpStack.cpp
+++ b/clang/lib/AST/ByteCode/InterpStack.cpp
@@ -26,33 +26,33 @@ InterpStack::~InterpStack() {
std::free(Chunk);
Chunk = nullptr;
StackSize = 0;
-#ifndef NDEBUG
ItemTypes.clear();
-#endif
}
// We keep the last chunk around to reuse.
void InterpStack::clear() {
- if (!Chunk)
- return;
-
- if (Chunk->Next)
- std::free(Chunk->Next);
-
- assert(Chunk);
- StackSize = 0;
-#ifndef NDEBUG
- ItemTypes.clear();
-#endif
+ for (PrimType Item : llvm::reverse(ItemTypes)) {
+ TYPE_SWITCH(Item, { this->discard<T>(); });
+ }
+ assert(ItemTypes.empty());
+ assert(empty());
}
void InterpStack::clearTo(size_t NewSize) {
- assert(NewSize <= size());
- size_t ToShrink = size() - NewSize;
- if (ToShrink == 0)
+ if (NewSize == 0)
+ return clear();
+ if (NewSize == size())
return;
- shrink(ToShrink);
+ assert(NewSize <= size());
+ for (PrimType Item : llvm::reverse(ItemTypes)) {
+ TYPE_SWITCH(Item, { this->discard<T>(); });
+
+ if (size() == NewSize)
+ break;
+ }
+
+ // Note: discard() above already removed the types from ItemTypes.
assert(size() == NewSize);
}
@@ -105,25 +105,9 @@ void InterpStack::shrink(size_t Size) {
Chunk->End -= Size;
StackSize -= Size;
-
-#ifndef NDEBUG
- size_t TypesSize = 0;
- for (PrimType T : ItemTypes)
- TYPE_SWITCH(T, { TypesSize += aligned_size<T>(); });
-
- size_t StackSize = size();
- while (TypesSize > StackSize) {
- TYPE_SWITCH(ItemTypes.back(), {
- TypesSize -= aligned_size<T>();
- ItemTypes.pop_back();
- });
- }
- assert(TypesSize == StackSize);
-#endif
}
void InterpStack::dump() const {
-#ifndef NDEBUG
llvm::errs() << "Items: " << ItemTypes.size() << ". Size: " << size() << '\n';
if (ItemTypes.empty())
return;
@@ -133,11 +117,11 @@ void InterpStack::dump() const {
// The type of the item on the top of the stack is inserted to the back
// of the vector, so the iteration has to happen backwards.
- for (auto TyIt = ItemTypes.rbegin(); TyIt != ItemTypes.rend(); ++TyIt) {
- Offset += align(primSize(*TyIt));
+ for (PrimType Item : llvm::reverse(ItemTypes)) {
+ Offset += align(primSize(Item));
llvm::errs() << Index << '/' << Offset << ": ";
- TYPE_SWITCH(*TyIt, {
+ TYPE_SWITCH(Item, {
const T &V = peek<T>(Offset);
llvm::errs() << V;
});
@@ -145,5 +129,4 @@ void InterpStack::dump() const {
++Index;
}
-#endif
}
diff --git a/clang/lib/AST/ByteCode/InterpStack.h b/clang/lib/AST/ByteCode/InterpStack.h
index 0b76f1d650580..b0f9f6e225682 100644
--- a/clang/lib/AST/ByteCode/InterpStack.h
+++ b/clang/lib/AST/ByteCode/InterpStack.h
@@ -14,12 +14,9 @@
#define LLVM_CLANG_AST_INTERP_INTERPSTACK_H
#include "FixedPoint.h"
-#include "FunctionPointer.h"
#include "IntegralAP.h"
#include "MemberPointer.h"
#include "PrimType.h"
-#include <memory>
-#include <vector>
namespace clang {
namespace interp {
@@ -35,18 +32,14 @@ class InterpStack final {
/// Constructs a value in place on the top of the stack.
template <typename T, typename... Tys> void push(Tys &&...Args) {
new (grow(aligned_size<T>())) T(std::forward<Tys>(Args)...);
-#ifndef NDEBUG
ItemTypes.push_back(toPrimType<T>());
-#endif
}
/// Returns the value from the top of the stack and removes it.
template <typename T> T pop() {
-#ifndef NDEBUG
assert(!ItemTypes.empty());
assert(ItemTypes.back() == toPrimType<T>());
ItemTypes.pop_back();
-#endif
T *Ptr = &peekInternal<T>();
T Value = std::move(*Ptr);
shrink(aligned_size<T>());
@@ -55,22 +48,20 @@ class InterpStack final {
/// Discards the top value from the stack.
template <typename T> void discard() {
-#ifndef NDEBUG
assert(!ItemTypes.empty());
assert(ItemTypes.back() == toPrimType<T>());
ItemTypes.pop_back();
-#endif
T *Ptr = &peekInternal<T>();
- Ptr->~T();
+ if constexpr (!std::is_trivially_destructible_v<T>) {
+ Ptr->~T();
+ }
shrink(aligned_size<T>());
}
/// Returns a reference to the value on the top of the stack.
template <typename T> T &peek() const {
-#ifndef NDEBUG
assert(!ItemTypes.empty());
assert(ItemTypes.back() == toPrimType<T>());
-#endif
return peekInternal<T>();
}
@@ -85,7 +76,7 @@ class InterpStack final {
/// Returns the size of the stack in bytes.
size_t size() const { return StackSize; }
- /// Clears the stack without calling any destructors.
+ /// Clears the stack.
void clear();
void clearTo(size_t NewSize);
@@ -148,9 +139,11 @@ class InterpStack final {
/// Total size of the stack.
size_t StackSize = 0;
-#ifndef NDEBUG
- /// vector recording the type of data we pushed into the stack.
- std::vector<PrimType> ItemTypes;
+ /// SmallVector recording the type of data we pushed into the stack.
+ /// We don't usually need this during normal code interpretation but
+ /// when aborting, we need type information to call the destructors
+ /// for what's left on the stack.
+ llvm::SmallVector<PrimType> ItemTypes;
template <typename T> static constexpr PrimType toPrimType() {
if constexpr (std::is_same_v<T, Pointer>)
@@ -194,7 +187,6 @@ class InterpStack final {
llvm_unreachable("unknown type push()'ed into InterpStack");
}
-#endif
};
} // namespace interp
diff --git a/clang/lib/AST/ByteCode/InterpState.cpp b/clang/lib/AST/ByteCode/InterpState.cpp
index 7848f298cfec8..76dbc8dc654bb 100644
--- a/clang/lib/AST/ByteCode/InterpState.cpp
+++ b/clang/lib/AST/ByteCode/InterpState.cpp
@@ -43,6 +43,12 @@ InterpState::~InterpState() {
while (DeadBlocks) {
DeadBlock *Next = DeadBlocks->Next;
+
+ // There might be a pointer in a global structure pointing to the dead
+ // block.
+ for (Pointer *P = DeadBlocks->B.Pointers; P; P = P->Next)
+ DeadBlocks->B.removePointer(P);
+
std::free(DeadBlocks);
DeadBlocks = Next;
}
@@ -51,12 +57,6 @@ InterpState::~InterpState() {
void InterpState::cleanup() {
// As a last resort, make sure all pointers still pointing to a dead block
// don't point to it anymore.
- for (DeadBlock *DB = DeadBlocks; DB; DB = DB->Next) {
- for (Pointer *P = DB->B.Pointers; P; P = P->Next) {
- P->PointeeStorage.BS.Pointee = nullptr;
- }
- }
-
Alloc.cleanup();
}
@@ -77,6 +77,9 @@ void InterpState::deallocate(Block *B) {
const Descriptor *Desc = B->getDescriptor();
assert(Desc);
+ assert(!B->isStatic());
+ assert(!B->IsDead);
+
if (B->hasPointers()) {
size_t Size = B->getSize();
diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h
index d17eba5da9ca6..cd052eb978bc1 100644
--- a/clang/lib/AST/ByteCode/Pointer.h
+++ b/clang/lib/AST/ByteCode/Pointer.h
@@ -29,7 +29,7 @@ class DeadBlock;
class Pointer;
class Context;
template <unsigned A, bool B> class Integral;
-enum PrimType : unsigned;
+enum PrimType : uint8_t;
class Pointer;
inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Pointer &P);
diff --git a/clang/lib/AST/ByteCode/PrimType.h b/clang/lib/AST/ByteCode/PrimType.h
index 38c29b9f82672..d2ad7716e5e9c 100644
--- a/clang/lib/AST/ByteCode/PrimType.h
+++ b/clang/lib/AST/ByteCode/PrimType.h
@@ -32,7 +32,7 @@ template <bool Signed> class IntegralAP;
template <unsigned Bits, bool Signed> class Integral;
/// Enumeration of the primitive types of the VM.
-enum PrimType : unsigned {
+enum PrimType : uint8_t {
PT_Sint8 = 0,
PT_Uint8 = 1,
PT_Sint16 = 2,
@@ -52,14 +52,15 @@ enum PrimType : unsigned {
// Like std::optional<PrimType>, but only sizeof(PrimType).
class OptPrimType final {
- unsigned V = ~0u;
+ static constexpr uint8_t None = 255;
+ uint8_t V = None;
public:
OptPrimType() = default;
OptPrimType(std::nullopt_t) {}
OptPrimType(PrimType T) : V(static_cast<unsigned>(T)) {}
- explicit constexpr operator bool() const { return V != ~0u; }
+ explicit constexpr operator bool() const { return V != None; }
PrimType operator*() const {
assert(operator bool());
return static_cast<PrimType>(V);
``````````
</details>
https://github.com/llvm/llvm-project/pull/151088
More information about the cfe-commits
mailing list