[clang] [WIP][Clang] Add __builtin_get_counted_by builtin (PR #102549)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Aug 8 16:17:22 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Bill Wendling (bwendling)
<details>
<summary>Changes</summary>
The __builtin_get_counted_by builtin is used on a flexible array
pointer and returns a pointer to the "counted_by" attribute's COUNT
argument, which is a field in the same non-anonymous struct as the
flexible array member. This is useful for automatically setting the
count field without needing the programmer's intervention. Otherwise
it's possible to get this anti-pattern:
ptr = alloc(<ty>, COUNT);
ptr->FAM[9] = 37; /* <<< Sanitizer will complain */
ptr->count = COUNT;
---
Full diff: https://github.com/llvm/llvm-project/pull/102549.diff
7 Files Affected:
- (modified) clang/include/clang/Basic/Builtins.td (+6)
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+60)
- (modified) clang/lib/CodeGen/CGExpr.cpp (+17-12)
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+4)
- (modified) clang/lib/Sema/SemaExpr.cpp (+87-2)
- (added) clang/test/CodeGen/builtin-get-counted-by.c (+83)
- (added) clang/test/Sema/builtin-get-counted-by.c (+22)
``````````diff
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index b025a7681bfac..254cd157d5f9d 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4774,3 +4774,9 @@ def ArithmeticFence : LangBuiltin<"ALL_LANGUAGES"> {
let Attributes = [CustomTypeChecking, Constexpr];
let Prototype = "void(...)";
}
+
+def GetCountedBy : Builtin {
+ let Spellings = ["__builtin_get_counted_by"];
+ let Attributes = [NoThrow];
+ let Prototype = "size_t*(void*)";
+}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d1af7fde157b6..58fc0dfe45e1b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -27,6 +27,7 @@
#include "clang/AST/Decl.h"
#include "clang/AST/OSLog.h"
#include "clang/AST/OperationKinds.h"
+#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
@@ -2536,6 +2537,45 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
return RValue::get(CGF->Builder.CreateCall(UBF, Args));
}
+namespace {
+
+/// MemberExprVisitor - Find the MemberExpr through all of the casts, array
+/// subscripts, and unary ops. This intentionally avoids all of them because
+/// we're interested only in the MemberExpr to check if it's a flexible array
+/// member.
+class MemberExprVisitor
+ : public ConstStmtVisitor<MemberExprVisitor, const Expr *> {
+public:
+ //===--------------------------------------------------------------------===//
+ // Visitor Methods
+ //===--------------------------------------------------------------------===//
+
+ const Expr *Visit(const Expr *E) {
+ return ConstStmtVisitor<MemberExprVisitor, const Expr *>::Visit(E);
+ }
+ const Expr *VisitStmt(const Stmt *S) { return nullptr; }
+
+ const Expr *VisitMemberExpr(const MemberExpr *E) { return E; }
+
+ const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+ return Visit(E->getBase());
+ }
+ const Expr *VisitCastExpr(const CastExpr *E) {
+ return Visit(E->getSubExpr());
+ }
+ const Expr *VisitParenExpr(const ParenExpr *E) {
+ return Visit(E->getSubExpr());
+ }
+ const Expr *VisitUnaryAddrOf(const clang::UnaryOperator *E) {
+ return Visit(E->getSubExpr());
+ }
+ const Expr *VisitUnaryDeref(const clang::UnaryOperator *E) {
+ return Visit(E->getSubExpr());
+ }
+};
+
+} // anonymous namespace
+
RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue) {
@@ -3563,6 +3603,26 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
/*EmittedE=*/nullptr, IsDynamic));
}
+ case Builtin::BI__builtin_get_counted_by: {
+ llvm::Value *Result = llvm::ConstantPointerNull::get(
+ cast<llvm::PointerType>(ConvertType(E->getType())));
+
+ if (const Expr *Ptr = MemberExprVisitor().Visit(E->getArg(0))) {
+ const MemberExpr *ME = cast<MemberExpr>(Ptr);
+ bool IsFlexibleArrayMember = ME->isFlexibleArrayMemberLike(
+ getContext(), getLangOpts().getStrictFlexArraysLevel(),
+ /*IgnoreTemplateOrMacroSubstitution=*/false);
+
+ if (!ME->HasSideEffects(getContext()) && IsFlexibleArrayMember &&
+ ME->getMemberDecl()->getType()->isCountAttributedType()) {
+ const FieldDecl *FAMDecl = dyn_cast<FieldDecl>(ME->getMemberDecl());
+ if (const FieldDecl *CountFD = FindCountedByField(FAMDecl))
+ Result = GetCountedByFieldExprGEP(ME, FAMDecl, CountFD);
+ }
+ }
+
+ return RValue::get(Result);
+ }
case Builtin::BI__builtin_prefetch: {
Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
// FIXME: Technically these constants should of type 'int', yes?
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index a1dce741c78a1..55cd95c08e3ff 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1100,15 +1100,7 @@ static bool getGEPIndicesToField(CodeGenFunction &CGF, const RecordDecl *RD,
return false;
}
-/// This method is typically called in contexts where we can't generate
-/// side-effects, like in __builtin_dynamic_object_size. When finding
-/// expressions, only choose those that have either already been emitted or can
-/// be loaded without side-effects.
-///
-/// - \p FAMDecl: the \p Decl for the flexible array member. It may not be
-/// within the top-level struct.
-/// - \p CountDecl: must be within the same non-anonymous struct as \p FAMDecl.
-llvm::Value *CodeGenFunction::EmitLoadOfCountedByField(
+llvm::Value *CodeGenFunction::GetCountedByFieldExprGEP(
const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl) {
const RecordDecl *RD = CountDecl->getParent()->getOuterLexicalRecordContext();
@@ -1141,12 +1133,25 @@ llvm::Value *CodeGenFunction::EmitLoadOfCountedByField(
return nullptr;
Indices.push_back(Builder.getInt32(0));
- Res = Builder.CreateInBoundsGEP(
+ return Builder.CreateInBoundsGEP(
ConvertType(QualType(RD->getTypeForDecl(), 0)), Res,
RecIndicesTy(llvm::reverse(Indices)), "..counted_by.gep");
+}
- return Builder.CreateAlignedLoad(ConvertType(CountDecl->getType()), Res,
- getIntAlign(), "..counted_by.load");
+/// This method is typically called in contexts where we can't generate
+/// side-effects, like in __builtin_dynamic_object_size. When finding
+/// expressions, only choose those that have either already been emitted or can
+/// be loaded without side-effects.
+///
+/// - \p FAMDecl: the \p Decl for the flexible array member. It may not be
+/// within the top-level struct.
+/// - \p CountDecl: must be within the same non-anonymous struct as \p FAMDecl.
+llvm::Value *CodeGenFunction::EmitLoadOfCountedByField(
+ const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl) {
+ if (llvm::Value *GEP = GetCountedByFieldExprGEP(Base, FAMDecl, CountDecl))
+ return Builder.CreateAlignedLoad(ConvertType(CountDecl->getType()), GEP,
+ getIntAlign(), "..counted_by.load");
+ return nullptr;
}
const FieldDecl *CodeGenFunction::FindCountedByField(const FieldDecl *FD) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 1c0a0e117e560..e5f5b94bba54b 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3309,6 +3309,10 @@ class CodeGenFunction : public CodeGenTypeCache {
/// \p nullptr if either the attribute or the field doesn't exist.
const FieldDecl *FindCountedByField(const FieldDecl *FD);
+ llvm::Value *GetCountedByFieldExprGEP(const Expr *Base,
+ const FieldDecl *FAMDecl,
+ const FieldDecl *CountDecl);
+
/// Build an expression accessing the "counted_by" field.
llvm::Value *EmitLoadOfCountedByField(const Expr *Base,
const FieldDecl *FAMDecl,
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 8defc8e1c185c..33bc71d621ddd 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -28,6 +28,7 @@
#include "clang/AST/OperationKinds.h"
#include "clang/AST/ParentMapContext.h"
#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/StmtVisitor.h"
#include "clang/AST/Type.h"
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/Builtins.h"
@@ -6390,9 +6391,65 @@ ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
currentEvaluationContext().ReferenceToConsteval.erase(DRE);
}
}
+
return Call;
}
+const FieldDecl *FindCountedByField(const FieldDecl *FD) {
+ if (!FD)
+ return nullptr;
+
+ const auto *CAT = FD->getType()->getAs<CountAttributedType>();
+ if (!CAT)
+ return nullptr;
+
+ const auto *CountDRE = cast<DeclRefExpr>(CAT->getCountExpr());
+ const auto *CountDecl = CountDRE->getDecl();
+ if (const auto *IFD = dyn_cast<IndirectFieldDecl>(CountDecl))
+ CountDecl = IFD->getAnonField();
+
+ return dyn_cast<FieldDecl>(CountDecl);
+}
+
+namespace {
+
+/// MemberExprVisitor - Find the MemberExpr through all of the casts, array
+/// subscripts, and unary ops. This intentionally avoids all of them because
+/// we're interested only in the MemberExpr to check if it's a flexible array
+/// member.
+class MemberExprVisitor
+ : public ConstStmtVisitor<MemberExprVisitor, const Expr *> {
+public:
+ //===--------------------------------------------------------------------===//
+ // Visitor Methods
+ //===--------------------------------------------------------------------===//
+
+ const Expr *Visit(const Expr *E) {
+ return ConstStmtVisitor<MemberExprVisitor, const Expr *>::Visit(E);
+ }
+ const Expr *VisitStmt(const Stmt *S) { return nullptr; }
+
+ const Expr *VisitMemberExpr(const MemberExpr *E) { return E; }
+
+ const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+ return Visit(E->getBase());
+ }
+ const Expr *VisitCastExpr(const CastExpr *E) {
+ return Visit(E->getSubExpr());
+ }
+ const Expr *VisitParenExpr(const ParenExpr *E) {
+ return Visit(E->getSubExpr());
+ }
+ const Expr *VisitUnaryAddrOf(const UnaryOperator *E) {
+ return Visit(E->getSubExpr());
+ }
+ const Expr *VisitUnaryDeref(const UnaryOperator *E) {
+ return Visit(E->getSubExpr());
+ }
+};
+
+} // anonymous namespace
+
ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
MultiExprArg ArgExprs, SourceLocation RParenLoc,
Expr *ExecConfig, bool IsExecConfig,
@@ -6590,8 +6647,36 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
return CallExpr::Create(Context, Fn, ArgExprs, Context.DependentTy,
VK_PRValue, RParenLoc, CurFPFeatureOverrides());
}
- return BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc,
- ExecConfig, IsExecConfig);
+
+ Result = BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc,
+ ExecConfig, IsExecConfig);
+
+ if (FunctionDecl *FDecl = dyn_cast_or_null<FunctionDecl>(NDecl);
+ FDecl && FDecl->getBuiltinID() == Builtin::BI__builtin_get_counted_by) {
+ if (const Expr *Ptr = MemberExprVisitor().Visit(ArgExprs[0])) {
+ const MemberExpr *ME = cast<MemberExpr>(Ptr);
+ bool IsFlexibleArrayMember = ME->isFlexibleArrayMemberLike(
+ Context, getLangOpts().getStrictFlexArraysLevel(),
+ /*IgnoreTemplateOrMacroSubstitution=*/false);
+
+ if (!ME->HasSideEffects(Context) && IsFlexibleArrayMember &&
+ ME->getMemberDecl()->getType()->isCountAttributedType()) {
+ const FieldDecl *FAMDecl = dyn_cast<FieldDecl>(ME->getMemberDecl());
+ if (const FieldDecl *CountFD = FindCountedByField(FAMDecl)) {
+ // The builtin returns a 'size_t *', however 'size_t' might not be
+ // the type of the count field. Thus we create an explicit c-style
+ // cast to ensure the proper types going forward.
+ QualType PtrTy = Context.getPointerType(CountFD->getType());
+ Result = CStyleCastExpr::Create(
+ Context, PtrTy, VK_LValue, CK_BitCast, Result.get(), nullptr,
+ FPOptionsOverride(), Context.CreateTypeSourceInfo(PtrTy),
+ LParenLoc, RParenLoc);
+ }
+ }
+ }
+ }
+
+ return Result;
}
Expr *Sema::BuildBuiltinCallExpr(SourceLocation Loc, Builtin::ID Id,
diff --git a/clang/test/CodeGen/builtin-get-counted-by.c b/clang/test/CodeGen/builtin-get-counted-by.c
new file mode 100644
index 0000000000000..8209db6a77111
--- /dev/null
+++ b/clang/test/CodeGen/builtin-get-counted-by.c
@@ -0,0 +1,83 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -O2 -emit-llvm -o - %s | FileCheck %s --check-prefix=X86_64
+// RUN: %clang_cc1 -triple i386-unknown-unknown -O2 -emit-llvm -o - %s | FileCheck %s --check-prefix=I386
+
+struct s {
+ char x;
+ short count;
+ int array[] __attribute__((counted_by(count)));
+};
+
+// X86_64-LABEL: define dso_local noalias noundef ptr @test1(
+// X86_64-SAME: i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// X86_64-NEXT: [[ENTRY:.*:]]
+// X86_64-NEXT: [[CONV:%.*]] = sext i32 [[SIZE]] to i64
+// X86_64-NEXT: [[MUL:%.*]] = shl nsw i64 [[CONV]], 2
+// X86_64-NEXT: [[ADD:%.*]] = add nsw i64 [[MUL]], 4
+// X86_64-NEXT: [[CALL:%.*]] = tail call ptr @malloc(i64 noundef [[ADD]]) #[[ATTR3:[0-9]+]]
+// X86_64-NEXT: [[CONV1:%.*]] = trunc i32 [[SIZE]] to i16
+// X86_64-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[CALL]], i64 2
+// X86_64-NEXT: store i16 [[CONV1]], ptr [[DOT_COUNTED_BY_GEP]], align 2, !tbaa [[TBAA2:![0-9]+]]
+// X86_64-NEXT: ret ptr [[CALL]]
+//
+// I386-LABEL: define dso_local noalias noundef ptr @test1(
+// I386-SAME: i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// I386-NEXT: [[ENTRY:.*:]]
+// I386-NEXT: [[MUL:%.*]] = shl i32 [[SIZE]], 2
+// I386-NEXT: [[ADD:%.*]] = add i32 [[MUL]], 4
+// I386-NEXT: [[CALL:%.*]] = tail call ptr @malloc(i32 noundef [[ADD]]) #[[ATTR3:[0-9]+]]
+// I386-NEXT: [[CONV:%.*]] = trunc i32 [[SIZE]] to i16
+// I386-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[CALL]], i32 2
+// I386-NEXT: store i16 [[CONV]], ptr [[DOT_COUNTED_BY_GEP]], align 2, !tbaa [[TBAA3:![0-9]+]]
+// I386-NEXT: ret ptr [[CALL]]
+//
+struct s *test1(int size) {
+ struct s *p = __builtin_malloc(sizeof(struct s) + sizeof(int) * size);
+
+ *__builtin_get_counted_by(p->array) = size;
+ *__builtin_get_counted_by(&p->array[0]) = size;
+ return p;
+}
+
+struct z {
+ char x;
+ short count;
+ int array[];
+};
+
+// X86_64-LABEL: define dso_local noalias noundef ptr @test2(
+// X86_64-SAME: i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// X86_64-NEXT: [[ENTRY:.*:]]
+// X86_64-NEXT: [[CONV:%.*]] = sext i32 [[SIZE]] to i64
+// X86_64-NEXT: [[MUL:%.*]] = shl nsw i64 [[CONV]], 2
+// X86_64-NEXT: [[ADD:%.*]] = add nsw i64 [[MUL]], 4
+// X86_64-NEXT: [[CALL:%.*]] = tail call ptr @malloc(i64 noundef [[ADD]]) #[[ATTR3]]
+// X86_64-NEXT: ret ptr [[CALL]]
+//
+// I386-LABEL: define dso_local noalias noundef ptr @test2(
+// I386-SAME: i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// I386-NEXT: [[ENTRY:.*:]]
+// I386-NEXT: [[MUL:%.*]] = shl i32 [[SIZE]], 2
+// I386-NEXT: [[ADD:%.*]] = add i32 [[MUL]], 4
+// I386-NEXT: [[CALL:%.*]] = tail call ptr @malloc(i32 noundef [[ADD]]) #[[ATTR3]]
+// I386-NEXT: ret ptr [[CALL]]
+//
+struct z *test2(int size) {
+ struct z *p = __builtin_malloc(sizeof(struct z) + sizeof(int) * size);
+
+ if (__builtin_get_counted_by(&p->array[0]))
+ *__builtin_get_counted_by(&p->array[0]) = size;
+
+ return p;
+}
+//.
+// X86_64: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// X86_64: [[META3]] = !{!"short", [[META4:![0-9]+]], i64 0}
+// X86_64: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// X86_64: [[META5]] = !{!"Simple C/C++ TBAA"}
+//.
+// I386: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// I386: [[META4]] = !{!"short", [[META5:![0-9]+]], i64 0}
+// I386: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// I386: [[META6]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/Sema/builtin-get-counted-by.c b/clang/test/Sema/builtin-get-counted-by.c
new file mode 100644
index 0000000000000..18cef35b0509a
--- /dev/null
+++ b/clang/test/Sema/builtin-get-counted-by.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+struct fam_struct {
+ char x;
+ short count;
+ int array[] __attribute__((counted_by(count)));
+} *p;
+
+struct non_fam_struct {
+ char x;
+ short count;
+ int array[];
+} *q;
+
+void foo(int size) {
+ *__builtin_get_counted_by(p->array) = size;
+
+ if (__builtin_get_counted_by(q->array))
+ *__builtin_get_counted_by(q->array) = size;
+
+ *__builtin_get_counted_by(p->count) = size; // expected-error{{incompatible integer to pointer conversion passing 'short' to parameter of type 'void *'}}
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/102549
More information about the cfe-commits
mailing list