[clang] [clang][bytecode] Improve __builtin_{,dynamic_}object_size implementation (PR #153601)
Timm Baeder via cfe-commits
cfe-commits at lists.llvm.org
Sat Aug 16 06:07:58 PDT 2025
https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/153601
>From 9a50e6a74a1f1111e253462e3afd12794da5f919 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Thu, 14 Aug 2025 17:13:56 +0200
Subject: [PATCH] bos
---
clang/lib/AST/ByteCode/Compiler.cpp | 52 ++++++--
clang/lib/AST/ByteCode/Compiler.h | 2 +
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 111 +++++++++++++++---
clang/lib/AST/ByteCode/Program.cpp | 4 +-
.../ByteCode/builtin-object-size-codegen.cpp | 53 +++++++++
clang/test/Sema/builtin-object-size.c | 4 +
clang/test/SemaCXX/new-delete.cpp | 14 +--
7 files changed, 199 insertions(+), 41 deletions(-)
create mode 100644 clang/test/AST/ByteCode/builtin-object-size-codegen.cpp
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 8e651cf060620..d0e1a1ca0f7de 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -60,16 +60,18 @@ template <class Emitter> class OptionScope final {
public:
/// Root constructor, compiling or discarding primitives.
OptionScope(Compiler<Emitter> *Ctx, bool NewDiscardResult,
- bool NewInitializing)
+ bool NewInitializing, bool NewToLValue)
: Ctx(Ctx), OldDiscardResult(Ctx->DiscardResult),
- OldInitializing(Ctx->Initializing) {
+ OldInitializing(Ctx->Initializing), OldToLValue(NewToLValue) {
Ctx->DiscardResult = NewDiscardResult;
Ctx->Initializing = NewInitializing;
+ Ctx->ToLValue = NewToLValue;
}
~OptionScope() {
Ctx->DiscardResult = OldDiscardResult;
Ctx->Initializing = OldInitializing;
+ Ctx->ToLValue = OldToLValue;
}
private:
@@ -78,6 +80,7 @@ template <class Emitter> class OptionScope final {
/// Old discard flag to restore.
bool OldDiscardResult;
bool OldInitializing;
+ bool OldToLValue;
};
template <class Emitter>
@@ -222,6 +225,9 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
switch (CE->getCastKind()) {
case CK_LValueToRValue: {
+ if (ToLValue && CE->getType()->isPointerType())
+ return this->delegate(SubExpr);
+
if (SubExpr->getType().isVolatileQualified())
return this->emitInvalidCast(CastKind::Volatile, /*Fatal=*/true, CE);
@@ -4140,13 +4146,13 @@ bool Compiler<Emitter>::VisitStmtExpr(const StmtExpr *E) {
template <class Emitter> bool Compiler<Emitter>::discard(const Expr *E) {
OptionScope<Emitter> Scope(this, /*NewDiscardResult=*/true,
- /*NewInitializing=*/false);
+ /*NewInitializing=*/false, /*ToLValue=*/false);
return this->Visit(E);
}
template <class Emitter> bool Compiler<Emitter>::delegate(const Expr *E) {
// We're basically doing:
- // OptionScope<Emitter> Scope(this, DicardResult, Initializing);
+ // OptionScope<Emitter> Scope(this, DicardResult, Initializing, ToLValue);
// but that's unnecessary of course.
return this->Visit(E);
}
@@ -4174,7 +4180,7 @@ template <class Emitter> bool Compiler<Emitter>::visit(const Expr *E) {
// Otherwise,we have a primitive return value, produce the value directly
// and push it on the stack.
OptionScope<Emitter> Scope(this, /*NewDiscardResult=*/false,
- /*NewInitializing=*/false);
+ /*NewInitializing=*/false, /*ToLValue=*/ToLValue);
return this->Visit(E);
}
@@ -4183,7 +4189,13 @@ bool Compiler<Emitter>::visitInitializer(const Expr *E) {
assert(!canClassify(E->getType()));
OptionScope<Emitter> Scope(this, /*NewDiscardResult=*/false,
- /*NewInitializing=*/true);
+ /*NewInitializing=*/true, /*ToLValue=*/false);
+ return this->Visit(E);
+}
+
+template <class Emitter> bool Compiler<Emitter>::visitAsLValue(const Expr *E) {
+ OptionScope<Emitter> Scope(this, /*NewDiscardResult=*/false,
+ /*NewInitializing=*/false, /*ToLValue=*/true);
return this->Visit(E);
}
@@ -4944,7 +4956,6 @@ bool Compiler<Emitter>::visitAPValueInitializer(const APValue &Val,
template <class Emitter>
bool Compiler<Emitter>::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinID) {
-
if (BuiltinID == Builtin::BI__builtin_constant_p) {
// Void argument is always invalid and harder to handle later.
if (E->getArg(0)->getType()->isVoidType()) {
@@ -4989,12 +5000,31 @@ bool Compiler<Emitter>::VisitBuiltinCallExpr(const CallExpr *E,
return false;
}
- if (!Context::isUnevaluatedBuiltin(BuiltinID)) {
- // Put arguments on the stack.
- for (const auto *Arg : E->arguments()) {
- if (!this->visit(Arg))
+ // Prepare function arguments including special cases.
+ switch (BuiltinID) {
+ case Builtin::BI__builtin_object_size:
+ case Builtin::BI__builtin_dynamic_object_size: {
+ assert(E->getNumArgs() == 2);
+ if (E->getArg(0)->isGLValue()) {
+ if (!this->visit(E->getArg(0)))
+ return false;
+
+ } else {
+ if (!this->visitAsLValue(E->getArg(0)))
return false;
}
+ if (!this->visit(E->getArg(1)))
+ return false;
+
+ } break;
+ default:
+ if (!Context::isUnevaluatedBuiltin(BuiltinID)) {
+ // Put arguments on the stack.
+ for (const auto *Arg : E->arguments()) {
+ if (!this->visit(Arg))
+ return false;
+ }
+ }
}
if (!this->emitCallBI(E, BuiltinID, E))
diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h
index 901934e530ade..20571df0432f9 100644
--- a/clang/lib/AST/ByteCode/Compiler.h
+++ b/clang/lib/AST/ByteCode/Compiler.h
@@ -282,6 +282,7 @@ class Compiler : public ConstStmtVisitor<Compiler<Emitter>, bool>,
/// been created. visitInitializer() then relies on a pointer to this
/// variable being on top of the stack.
bool visitInitializer(const Expr *E);
+ bool visitAsLValue(const Expr *E);
/// Evaluates an expression for side effects and discards the result.
bool discard(const Expr *E);
/// Just pass evaluation on to \p E. This leaves all the parsing flags
@@ -426,6 +427,7 @@ class Compiler : public ConstStmtVisitor<Compiler<Emitter>, bool>,
bool DiscardResult = false;
bool InStmtExpr = false;
+ bool ToLValue = false;
/// Flag inidicating if we're initializing an already created
/// variable. This is set in visitInitializer().
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b602b9731a6e3..a9dfcccdff39d 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2170,8 +2170,8 @@ static bool interp__builtin_memchr(InterpState &S, CodePtr OpPC,
return true;
}
-static unsigned computeFullDescSize(const ASTContext &ASTCtx,
- const Descriptor *Desc) {
+static std::optional<unsigned> computeFullDescSize(const ASTContext &ASTCtx,
+ const Descriptor *Desc) {
if (Desc->isPrimitive())
return ASTCtx.getTypeSizeInChars(Desc->getType()).getQuantity();
@@ -2183,8 +2183,7 @@ static unsigned computeFullDescSize(const ASTContext &ASTCtx,
if (Desc->isRecord())
return ASTCtx.getTypeSizeInChars(Desc->getType()).getQuantity();
- llvm_unreachable("Unhandled descriptor type");
- return 0;
+ return std::nullopt;
}
static unsigned computePointerOffset(const ASTContext &ASTCtx,
@@ -2192,7 +2191,7 @@ static unsigned computePointerOffset(const ASTContext &ASTCtx,
unsigned Result = 0;
Pointer P = Ptr;
- while (P.isArrayElement() || P.isField()) {
+ while (P.isField() || P.isArrayElement()) {
P = P.expand();
const Descriptor *D = P.getFieldDesc();
@@ -2205,7 +2204,6 @@ static unsigned computePointerOffset(const ASTContext &ASTCtx,
Result += ElemSize * P.getIndex();
P = P.expand().getArray();
} else if (P.isBaseClass()) {
-
const auto *RD = cast<CXXRecordDecl>(D->asDecl());
bool IsVirtual = Ptr.isVirtualBaseClass();
P = P.getBase();
@@ -2234,30 +2232,113 @@ static unsigned computePointerOffset(const ASTContext &ASTCtx,
return Result;
}
+/// Does Ptr point to the last subobject?
+static bool pointsToLastObject(const Pointer &Ptr) {
+ Pointer P = Ptr;
+ while (!P.isRoot()) {
+
+ if (P.isArrayElement()) {
+ P = P.expand().getArray();
+ continue;
+ }
+ if (P.isBaseClass()) {
+ P = P.getBase();
+ continue;
+ }
+
+ Pointer Base = P.getBase();
+ if (const Record *R = Base.getRecord()) {
+ assert(P.getField());
+ if (P.getField()->getFieldIndex() != R->getNumFields() - 1)
+ return false;
+ }
+ P = Base;
+ }
+
+ return true;
+}
+
+/// Does Ptr point to the last object AND to a flexible array member?
+static bool isUserWritingOffTheEnd(const ASTContext &Ctx, const Pointer &Ptr) {
+ auto isFlexibleArrayMember = [&](const Descriptor *FieldDesc) {
+ using FAMKind = LangOptions::StrictFlexArraysLevelKind;
+ FAMKind StrictFlexArraysLevel =
+ Ctx.getLangOpts().getStrictFlexArraysLevel();
+
+ if (StrictFlexArraysLevel == FAMKind::Default)
+ return true;
+
+ unsigned NumElems = FieldDesc->getNumElems();
+ if (NumElems == 0 && StrictFlexArraysLevel != FAMKind::IncompleteOnly)
+ return true;
+
+ if (NumElems == 1 && StrictFlexArraysLevel == FAMKind::OneZeroOrIncomplete)
+ return true;
+ return false;
+ };
+
+ const Descriptor *FieldDesc = Ptr.getFieldDesc();
+ if (!FieldDesc->isArray())
+ return false;
+
+ return Ptr.isDummy() && pointsToLastObject(Ptr) &&
+ isFlexibleArrayMember(FieldDesc);
+}
+
static bool interp__builtin_object_size(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const CallExpr *Call) {
PrimType KindT = *S.getContext().classify(Call->getArg(1));
- [[maybe_unused]] unsigned Kind = popToAPSInt(S.Stk, KindT).getZExtValue();
+ // From the GCC docs:
+ // Kind is an integer constant from 0 to 3. If the least significant bit is
+ // clear, objects are whole variables. If it is set, a closest surrounding
+ // subobject is considered the object a pointer points to. The second bit
+ // determines if maximum or minimum of remaining bytes is computed.
+ unsigned Kind = popToAPSInt(S.Stk, KindT).getZExtValue();
assert(Kind <= 3 && "unexpected kind");
-
+ bool UseFieldDesc = (Kind & 1u);
+ bool ReportMinimum = (Kind & 2u);
const Pointer &Ptr = S.Stk.pop<Pointer>();
+ const ASTContext &ASTCtx = S.getASTContext();
- if (Ptr.isZero())
+ if (Ptr.isZero() || !Ptr.isBlockPointer())
return false;
- const Descriptor *DeclDesc = Ptr.getDeclDesc();
- if (!DeclDesc)
+ // We can't load through pointers.
+ if (Ptr.isDummy() && Ptr.getType()->isPointerType())
return false;
- const ASTContext &ASTCtx = S.getASTContext();
+ bool DetermineForCompleteObject = Ptr.getFieldDesc() == Ptr.getDeclDesc();
- unsigned ByteOffset = computePointerOffset(ASTCtx, Ptr);
- unsigned FullSize = computeFullDescSize(ASTCtx, DeclDesc);
+ if (ReportMinimum) {
+ if (!DetermineForCompleteObject && !UseFieldDesc && Ptr.isDummy())
+ return false;
+ } else {
+ if (!DetermineForCompleteObject &&
+ isUserWritingOffTheEnd(ASTCtx, Ptr.expand()))
+ return false;
+ }
- pushInteger(S, FullSize - ByteOffset, Call->getType());
+ const Descriptor *Desc =
+ UseFieldDesc ? Ptr.getFieldDesc() : Ptr.getDeclDesc();
+ assert(Desc);
+ std::optional<unsigned> FullSize = computeFullDescSize(ASTCtx, Desc);
+ if (!FullSize)
+ return false;
+
+ unsigned ByteOffset;
+ if (UseFieldDesc && !Ptr.isBaseClass())
+ ByteOffset = computePointerOffset(ASTCtx, Ptr) -
+ computePointerOffset(ASTCtx, Ptr.expand().atIndex(0).narrow());
+ else
+ ByteOffset = computePointerOffset(ASTCtx, Ptr);
+
+ assert(ByteOffset <= *FullSize);
+ unsigned Result = *FullSize - ByteOffset;
+
+ pushInteger(S, Result, Call->getType());
return true;
}
diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp
index 2843b325fe025..749ae2510612c 100644
--- a/clang/lib/AST/ByteCode/Program.cpp
+++ b/clang/lib/AST/ByteCode/Program.cpp
@@ -164,8 +164,8 @@ unsigned Program::getOrCreateDummy(const DeclTy &D) {
const auto *VD = cast<ValueDecl>(cast<const Decl *>(D));
IsWeak = VD->isWeak();
QT = VD->getType();
- if (const auto *RT = QT->getAs<ReferenceType>())
- QT = RT->getPointeeType();
+ if (QT->isPointerOrReferenceType())
+ QT = QT->getPointeeType();
}
assert(!QT.isNull());
diff --git a/clang/test/AST/ByteCode/builtin-object-size-codegen.cpp b/clang/test/AST/ByteCode/builtin-object-size-codegen.cpp
new file mode 100644
index 0000000000000..4e0e087245d72
--- /dev/null
+++ b/clang/test/AST/ByteCode/builtin-object-size-codegen.cpp
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -triple x86_64-apple-darwin -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - %s | FileCheck %s
+
+void foo() {
+ struct A { char buf[16]; };
+ struct B : A {};
+ struct C { int i; B bs[1]; } *c;
+
+ int gi;
+ // CHECK: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false)
+ gi = __builtin_object_size(&c->bs[0], 0);
+ // CHECK: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false)
+ gi = __builtin_object_size(&c->bs[0], 1);
+ // CHECK: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false)
+ gi = __builtin_object_size(&c->bs[0], 2);
+ // CHECK: store i32 16
+ gi = __builtin_object_size(&c->bs[0], 3);
+}
+
+
+void foo2() {
+ struct A { int a; };
+ struct B { int b; };
+ struct C: public A, public B {};
+
+ C c;
+
+ int gi;
+ // CHECK: store i32 8
+ gi = __builtin_object_size(&c, 0);
+ // CHECK: store i32 8
+ gi = __builtin_object_size((A*)&c, 0);
+ // CHECK: store i32 4
+ gi = __builtin_object_size((B*)&c, 0);
+
+ // CHECK: store i32 8
+ gi = __builtin_object_size((char*)&c, 0);
+ // CHECK: store i32 8
+ gi = __builtin_object_size((char*)(A*)&c, 0);
+ // CHECK: store i32 4
+ gi = __builtin_object_size((char*)(B*)&c, 0);
+}
+
+
+typedef struct {
+ double c[0];
+ float f;
+} foofoo0_t;
+
+unsigned babar0(foofoo0_t *f) {
+ // CHECK: ret i32 0
+ return __builtin_object_size(f->c, 1);
+}
diff --git a/clang/test/Sema/builtin-object-size.c b/clang/test/Sema/builtin-object-size.c
index 20d4e2ab6da79..a763c24fd6620 100644
--- a/clang/test/Sema/builtin-object-size.c
+++ b/clang/test/Sema/builtin-object-size.c
@@ -2,6 +2,10 @@
// RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-darwin9 -verify %s
// RUN: %clang_cc1 -DDYNAMIC -fsyntax-only -triple x86_64-apple-darwin9 -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-darwin9 -verify %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -DDYNAMIC -fsyntax-only -triple x86_64-apple-darwin9 -verify %s -fexperimental-new-constant-interpreter
+
#ifndef DYNAMIC
#define OBJECT_SIZE_BUILTIN __builtin_object_size
#else
diff --git a/clang/test/SemaCXX/new-delete.cpp b/clang/test/SemaCXX/new-delete.cpp
index c05130bb30729..1adb993f46c8f 100644
--- a/clang/test/SemaCXX/new-delete.cpp
+++ b/clang/test/SemaCXX/new-delete.cpp
@@ -721,19 +721,7 @@ int (*const_fold)[12] = new int[3][&const_fold + 12 - &const_fold];
#if __cplusplus >= 201402L && !defined(NEW_INTERP)
// expected-error at -2 {{array size is not a constant expression}}
// expected-note at -3 {{cannot refer to element 12 of non-array}}
-#elif __cplusplus < 201103L && !defined(NEW_INTERP)
+#elif __cplusplus < 201103L
// expected-error at -5 {{cannot allocate object of variably modified type}}
// expected-warning at -6 {{variable length arrays in C++ are a Clang extension}}
#endif
-#ifdef NEW_INTERP
-#if __cplusplus >= 201402L
-// expected-error at -10 {{array size is not a constant expression}}
-// expected-note at -11 {{cannot refer to element 12 of non-array}}
-#elif __cplusplus >= 201103L
-// expected-error at -13 {{only the first dimension of an allocated array may have dynamic size}}
-// expected-note at -14 {{cannot refer to element 12 of non-array}}
-#else
-// expected-error at -16 {{only the first dimension of an allocated array may have dynamic size}}
-// expected-note at -17 {{cannot refer to element 12 of non-array}}
-#endif
-#endif
More information about the cfe-commits
mailing list