[clang] [WIP][Clang] Add __builtin_get_counted_by builtin (PR #102549)

via cfe-commits cfe-commits at lists.llvm.org
Thu Aug 8 16:17:22 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Bill Wendling (bwendling)

<details>
<summary>Changes</summary>

The __builtin_get_counted_by builtin is used on a flexible array
pointer and returns a pointer to the "counted_by" attribute's COUNT
argument, which is a field in the same non-anonymous struct as the
flexible array member. This is useful for automatically setting the
count field without needing the programmer's intervention. Otherwise
it's possible to get this anti-pattern:

  ptr = alloc(<ty>, COUNT);
  ptr->FAM[9] = 37; /* <<< Sanitizer will complain */
  ptr->count = COUNT;

---
Full diff: https://github.com/llvm/llvm-project/pull/102549.diff


7 Files Affected:

- (modified) clang/include/clang/Basic/Builtins.td (+6) 
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+60) 
- (modified) clang/lib/CodeGen/CGExpr.cpp (+17-12) 
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+4) 
- (modified) clang/lib/Sema/SemaExpr.cpp (+87-2) 
- (added) clang/test/CodeGen/builtin-get-counted-by.c (+83) 
- (added) clang/test/Sema/builtin-get-counted-by.c (+22) 


``````````diff
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index b025a7681bfac..254cd157d5f9d 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4774,3 +4774,9 @@ def ArithmeticFence : LangBuiltin<"ALL_LANGUAGES"> {
   let Attributes = [CustomTypeChecking, Constexpr];
   let Prototype = "void(...)";
 }
+
+def GetCountedBy : Builtin {
+  let Spellings = ["__builtin_get_counted_by"];
+  let Attributes = [NoThrow];
+  let Prototype = "size_t*(void*)";
+}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d1af7fde157b6..58fc0dfe45e1b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -27,6 +27,7 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/OSLog.h"
 #include "clang/AST/OperationKinds.h"
+#include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
@@ -2536,6 +2537,45 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
   return RValue::get(CGF->Builder.CreateCall(UBF, Args));
 }
 
+namespace {
+
+/// MemberExprVisitor - Find the MemberExpr through all of the casts, array
+/// subscripts, and unary ops. This intentionally avoids all of them because
+/// we're interested only in the MemberExpr to check if it's a flexible array
+/// member.
+class MemberExprVisitor
+    : public ConstStmtVisitor<MemberExprVisitor, const Expr *> {
+public:
+  //===--------------------------------------------------------------------===//
+  //                            Visitor Methods
+  //===--------------------------------------------------------------------===//
+
+  const Expr *Visit(const Expr *E) {
+    return ConstStmtVisitor<MemberExprVisitor, const Expr *>::Visit(E);
+  }
+  const Expr *VisitStmt(const Stmt *S) { return nullptr; }
+
+  const Expr *VisitMemberExpr(const MemberExpr *E) { return E; }
+
+  const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+    return Visit(E->getBase());
+  }
+  const Expr *VisitCastExpr(const CastExpr *E) {
+    return Visit(E->getSubExpr());
+  }
+  const Expr *VisitParenExpr(const ParenExpr *E) {
+    return Visit(E->getSubExpr());
+  }
+  const Expr *VisitUnaryAddrOf(const clang::UnaryOperator *E) {
+    return Visit(E->getSubExpr());
+  }
+  const Expr *VisitUnaryDeref(const clang::UnaryOperator *E) {
+    return Visit(E->getSubExpr());
+  }
+};
+
+} // anonymous namespace
+
 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                                         const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
@@ -3563,6 +3603,26 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
                                              /*EmittedE=*/nullptr, IsDynamic));
   }
+  case Builtin::BI__builtin_get_counted_by: {
+    llvm::Value *Result = llvm::ConstantPointerNull::get(
+        cast<llvm::PointerType>(ConvertType(E->getType())));
+
+    if (const Expr *Ptr = MemberExprVisitor().Visit(E->getArg(0))) {
+      const MemberExpr *ME = cast<MemberExpr>(Ptr);
+      bool IsFlexibleArrayMember = ME->isFlexibleArrayMemberLike(
+              getContext(), getLangOpts().getStrictFlexArraysLevel(),
+              /*IgnoreTemplateOrMacroSubstitution=*/false);
+
+      if (!ME->HasSideEffects(getContext()) && IsFlexibleArrayMember &&
+          ME->getMemberDecl()->getType()->isCountAttributedType()) {
+        const FieldDecl *FAMDecl = dyn_cast<FieldDecl>(ME->getMemberDecl());
+        if (const FieldDecl *CountFD = FindCountedByField(FAMDecl))
+          Result = GetCountedByFieldExprGEP(ME, FAMDecl, CountFD);
+      }
+    }
+
+    return RValue::get(Result);
+  }
   case Builtin::BI__builtin_prefetch: {
     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
     // FIXME: Technically these constants should of type 'int', yes?
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index a1dce741c78a1..55cd95c08e3ff 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1100,15 +1100,7 @@ static bool getGEPIndicesToField(CodeGenFunction &CGF, const RecordDecl *RD,
   return false;
 }
 
-/// This method is typically called in contexts where we can't generate
-/// side-effects, like in __builtin_dynamic_object_size. When finding
-/// expressions, only choose those that have either already been emitted or can
-/// be loaded without side-effects.
-///
-/// - \p FAMDecl: the \p Decl for the flexible array member. It may not be
-///   within the top-level struct.
-/// - \p CountDecl: must be within the same non-anonymous struct as \p FAMDecl.
-llvm::Value *CodeGenFunction::EmitLoadOfCountedByField(
+llvm::Value *CodeGenFunction::GetCountedByFieldExprGEP(
     const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl) {
   const RecordDecl *RD = CountDecl->getParent()->getOuterLexicalRecordContext();
 
@@ -1141,12 +1133,25 @@ llvm::Value *CodeGenFunction::EmitLoadOfCountedByField(
     return nullptr;
 
   Indices.push_back(Builder.getInt32(0));
-  Res = Builder.CreateInBoundsGEP(
+  return Builder.CreateInBoundsGEP(
       ConvertType(QualType(RD->getTypeForDecl(), 0)), Res,
       RecIndicesTy(llvm::reverse(Indices)), "..counted_by.gep");
+}
 
-  return Builder.CreateAlignedLoad(ConvertType(CountDecl->getType()), Res,
-                                   getIntAlign(), "..counted_by.load");
+/// This method is typically called in contexts where we can't generate
+/// side-effects, like in __builtin_dynamic_object_size. When finding
+/// expressions, only choose those that have either already been emitted or can
+/// be loaded without side-effects.
+///
+/// - \p FAMDecl: the \p Decl for the flexible array member. It may not be
+///   within the top-level struct.
+/// - \p CountDecl: must be within the same non-anonymous struct as \p FAMDecl.
+llvm::Value *CodeGenFunction::EmitLoadOfCountedByField(
+    const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl) {
+  if (llvm::Value *GEP = GetCountedByFieldExprGEP(Base, FAMDecl, CountDecl))
+    return Builder.CreateAlignedLoad(ConvertType(CountDecl->getType()), GEP,
+                                     getIntAlign(), "..counted_by.load");
+  return nullptr;
 }
 
 const FieldDecl *CodeGenFunction::FindCountedByField(const FieldDecl *FD) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 1c0a0e117e560..e5f5b94bba54b 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3309,6 +3309,10 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// \p nullptr if either the attribute or the field doesn't exist.
   const FieldDecl *FindCountedByField(const FieldDecl *FD);
 
+  llvm::Value *GetCountedByFieldExprGEP(const Expr *Base,
+                                        const FieldDecl *FAMDecl,
+                                        const FieldDecl *CountDecl);
+
   /// Build an expression accessing the "counted_by" field.
   llvm::Value *EmitLoadOfCountedByField(const Expr *Base,
                                         const FieldDecl *FAMDecl,
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 8defc8e1c185c..33bc71d621ddd 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -28,6 +28,7 @@
 #include "clang/AST/OperationKinds.h"
 #include "clang/AST/ParentMapContext.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/StmtVisitor.h"
 #include "clang/AST/Type.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/Builtins.h"
@@ -6390,9 +6391,65 @@ ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
       currentEvaluationContext().ReferenceToConsteval.erase(DRE);
     }
   }
+
   return Call;
 }
 
+const FieldDecl *FindCountedByField(const FieldDecl *FD) {
+  if (!FD)
+    return nullptr;
+
+  const auto *CAT = FD->getType()->getAs<CountAttributedType>();
+  if (!CAT)
+    return nullptr;
+
+  const auto *CountDRE = cast<DeclRefExpr>(CAT->getCountExpr());
+  const auto *CountDecl = CountDRE->getDecl();
+  if (const auto *IFD = dyn_cast<IndirectFieldDecl>(CountDecl))
+    CountDecl = IFD->getAnonField();
+
+  return dyn_cast<FieldDecl>(CountDecl);
+}
+
+namespace {
+
+/// MemberExprVisitor - Find the MemberExpr through all of the casts, array
+/// subscripts, and unary ops. This intentionally avoids all of them because
+/// we're interested only in the MemberExpr to check if it's a flexible array
+/// member.
+class MemberExprVisitor
+    : public ConstStmtVisitor<MemberExprVisitor, const Expr *> {
+public:
+  //===--------------------------------------------------------------------===//
+  //                            Visitor Methods
+  //===--------------------------------------------------------------------===//
+
+  const Expr *Visit(const Expr *E) {
+    return ConstStmtVisitor<MemberExprVisitor, const Expr *>::Visit(E);
+  }
+  const Expr *VisitStmt(const Stmt *S) { return nullptr; }
+
+  const Expr *VisitMemberExpr(const MemberExpr *E) { return E; }
+
+  const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+    return Visit(E->getBase());
+  }
+  const Expr *VisitCastExpr(const CastExpr *E) {
+    return Visit(E->getSubExpr());
+  }
+  const Expr *VisitParenExpr(const ParenExpr *E) {
+    return Visit(E->getSubExpr());
+  }
+  const Expr *VisitUnaryAddrOf(const UnaryOperator *E) {
+    return Visit(E->getSubExpr());
+  }
+  const Expr *VisitUnaryDeref(const UnaryOperator *E) {
+    return Visit(E->getSubExpr());
+  }
+};
+
+} // anonymous namespace
+
 ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
                                MultiExprArg ArgExprs, SourceLocation RParenLoc,
                                Expr *ExecConfig, bool IsExecConfig,
@@ -6590,8 +6647,36 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
     return CallExpr::Create(Context, Fn, ArgExprs, Context.DependentTy,
                             VK_PRValue, RParenLoc, CurFPFeatureOverrides());
   }
-  return BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc,
-                               ExecConfig, IsExecConfig);
+
+  Result = BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc,
+                                 ExecConfig, IsExecConfig);
+
+  if (FunctionDecl *FDecl = dyn_cast_or_null<FunctionDecl>(NDecl);
+      FDecl && FDecl->getBuiltinID() == Builtin::BI__builtin_get_counted_by) {
+    if (const Expr *Ptr = MemberExprVisitor().Visit(ArgExprs[0])) {
+      const MemberExpr *ME = cast<MemberExpr>(Ptr);
+      bool IsFlexibleArrayMember = ME->isFlexibleArrayMemberLike(
+              Context, getLangOpts().getStrictFlexArraysLevel(),
+              /*IgnoreTemplateOrMacroSubstitution=*/false);
+
+      if (!ME->HasSideEffects(Context) && IsFlexibleArrayMember &&
+          ME->getMemberDecl()->getType()->isCountAttributedType()) {
+        const FieldDecl *FAMDecl = dyn_cast<FieldDecl>(ME->getMemberDecl());
+        if (const FieldDecl *CountFD = FindCountedByField(FAMDecl)) {
+          // The builtin returns a 'size_t *', however 'size_t' might not be
+          // the type of the count field. Thus we create an explicit c-style
+          // cast to ensure the proper types going forward.
+          QualType PtrTy = Context.getPointerType(CountFD->getType());
+          Result = CStyleCastExpr::Create(
+              Context, PtrTy, VK_LValue, CK_BitCast, Result.get(), nullptr,
+              FPOptionsOverride(), Context.CreateTypeSourceInfo(PtrTy),
+              LParenLoc, RParenLoc);
+        }
+      }
+    }
+  }
+
+  return Result;
 }
 
 Expr *Sema::BuildBuiltinCallExpr(SourceLocation Loc, Builtin::ID Id,
diff --git a/clang/test/CodeGen/builtin-get-counted-by.c b/clang/test/CodeGen/builtin-get-counted-by.c
new file mode 100644
index 0000000000000..8209db6a77111
--- /dev/null
+++ b/clang/test/CodeGen/builtin-get-counted-by.c
@@ -0,0 +1,83 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -O2 -emit-llvm -o - %s | FileCheck %s --check-prefix=X86_64
+// RUN: %clang_cc1 -triple i386-unknown-unknown -O2 -emit-llvm -o - %s | FileCheck %s --check-prefix=I386
+
+struct s {
+  char x;
+  short count;
+  int array[] __attribute__((counted_by(count)));
+};
+
+// X86_64-LABEL: define dso_local noalias noundef ptr @test1(
+// X86_64-SAME: i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// X86_64-NEXT:  [[ENTRY:.*:]]
+// X86_64-NEXT:    [[CONV:%.*]] = sext i32 [[SIZE]] to i64
+// X86_64-NEXT:    [[MUL:%.*]] = shl nsw i64 [[CONV]], 2
+// X86_64-NEXT:    [[ADD:%.*]] = add nsw i64 [[MUL]], 4
+// X86_64-NEXT:    [[CALL:%.*]] = tail call ptr @malloc(i64 noundef [[ADD]]) #[[ATTR3:[0-9]+]]
+// X86_64-NEXT:    [[CONV1:%.*]] = trunc i32 [[SIZE]] to i16
+// X86_64-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[CALL]], i64 2
+// X86_64-NEXT:    store i16 [[CONV1]], ptr [[DOT_COUNTED_BY_GEP]], align 2, !tbaa [[TBAA2:![0-9]+]]
+// X86_64-NEXT:    ret ptr [[CALL]]
+//
+// I386-LABEL: define dso_local noalias noundef ptr @test1(
+// I386-SAME: i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// I386-NEXT:  [[ENTRY:.*:]]
+// I386-NEXT:    [[MUL:%.*]] = shl i32 [[SIZE]], 2
+// I386-NEXT:    [[ADD:%.*]] = add i32 [[MUL]], 4
+// I386-NEXT:    [[CALL:%.*]] = tail call ptr @malloc(i32 noundef [[ADD]]) #[[ATTR3:[0-9]+]]
+// I386-NEXT:    [[CONV:%.*]] = trunc i32 [[SIZE]] to i16
+// I386-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[CALL]], i32 2
+// I386-NEXT:    store i16 [[CONV]], ptr [[DOT_COUNTED_BY_GEP]], align 2, !tbaa [[TBAA3:![0-9]+]]
+// I386-NEXT:    ret ptr [[CALL]]
+//
+struct s *test1(int size) {
+  struct s *p = __builtin_malloc(sizeof(struct s) + sizeof(int) * size);
+
+  *__builtin_get_counted_by(p->array) = size;
+  *__builtin_get_counted_by(&p->array[0]) = size;
+  return p;
+}
+
+struct z {
+  char x;
+  short count;
+  int array[];
+};
+
+// X86_64-LABEL: define dso_local noalias noundef ptr @test2(
+// X86_64-SAME: i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// X86_64-NEXT:  [[ENTRY:.*:]]
+// X86_64-NEXT:    [[CONV:%.*]] = sext i32 [[SIZE]] to i64
+// X86_64-NEXT:    [[MUL:%.*]] = shl nsw i64 [[CONV]], 2
+// X86_64-NEXT:    [[ADD:%.*]] = add nsw i64 [[MUL]], 4
+// X86_64-NEXT:    [[CALL:%.*]] = tail call ptr @malloc(i64 noundef [[ADD]]) #[[ATTR3]]
+// X86_64-NEXT:    ret ptr [[CALL]]
+//
+// I386-LABEL: define dso_local noalias noundef ptr @test2(
+// I386-SAME: i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// I386-NEXT:  [[ENTRY:.*:]]
+// I386-NEXT:    [[MUL:%.*]] = shl i32 [[SIZE]], 2
+// I386-NEXT:    [[ADD:%.*]] = add i32 [[MUL]], 4
+// I386-NEXT:    [[CALL:%.*]] = tail call ptr @malloc(i32 noundef [[ADD]]) #[[ATTR3]]
+// I386-NEXT:    ret ptr [[CALL]]
+//
+struct z *test2(int size) {
+  struct z *p = __builtin_malloc(sizeof(struct z) + sizeof(int) * size);
+
+  if (__builtin_get_counted_by(&p->array[0]))
+    *__builtin_get_counted_by(&p->array[0]) = size;
+
+  return p;
+}
+//.
+// X86_64: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// X86_64: [[META3]] = !{!"short", [[META4:![0-9]+]], i64 0}
+// X86_64: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// X86_64: [[META5]] = !{!"Simple C/C++ TBAA"}
+//.
+// I386: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// I386: [[META4]] = !{!"short", [[META5:![0-9]+]], i64 0}
+// I386: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// I386: [[META6]] = !{!"Simple C/C++ TBAA"}
+//.
diff --git a/clang/test/Sema/builtin-get-counted-by.c b/clang/test/Sema/builtin-get-counted-by.c
new file mode 100644
index 0000000000000..18cef35b0509a
--- /dev/null
+++ b/clang/test/Sema/builtin-get-counted-by.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+struct fam_struct {
+  char x;
+  short count;
+  int array[] __attribute__((counted_by(count)));
+} *p;
+
+struct non_fam_struct {
+  char x;
+  short count;
+  int array[];
+} *q;
+
+void foo(int size) {
+  *__builtin_get_counted_by(p->array) = size;
+
+  if (__builtin_get_counted_by(q->array))
+    *__builtin_get_counted_by(q->array) = size;
+
+  *__builtin_get_counted_by(p->count) = size; // expected-error{{incompatible integer to pointer conversion passing 'short' to parameter of type 'void *'}}
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/102549


More information about the cfe-commits mailing list