[clang] [Clang][counted_by] Refactor __builtin_dynamic_object_size on FAMs (PR #122198)

Bill Wendling via cfe-commits cfe-commits at lists.llvm.org
Thu Jan 9 14:49:07 PST 2025


================
@@ -1060,238 +1061,331 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
 }
 
-const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberFieldAndOffset(
-    ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
-    uint64_t &Offset) {
+namespace {
+
+/// StructFieldAccess is a simple visitor class to grab the first MemberExpr
+/// from an Expr. It records any ArraySubscriptExpr we meet along the way.
+struct StructFieldAccess
+    : public ConstStmtVisitor<StructFieldAccess, const MemberExpr *> {
+  const ArraySubscriptExpr *ASE = nullptr;
+
+  const MemberExpr *VisitMemberExpr(const MemberExpr *E) { return E; }
+
+  const MemberExpr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+    ASE = E;
+    return Visit(E->getBase());
+  }
+  const MemberExpr *VisitCastExpr(const CastExpr *E) {
+    return Visit(E->getSubExpr());
+  }
+  const MemberExpr *VisitParenExpr(const ParenExpr *E) {
+    return Visit(E->getSubExpr());
+  }
+  const MemberExpr *VisitUnaryAddrOf(const clang::UnaryOperator *E) {
+    return Visit(E->getSubExpr());
+  }
+  const MemberExpr *VisitUnaryDeref(const clang::UnaryOperator *E) {
+    return Visit(E->getSubExpr());
+  }
+};
+
+} // end anonymous namespace
+
+/// Find a struct's flexible array member. It may be embedded inside multiple
+/// sub-structs, but must still be the last field.
+static const FieldDecl *FindFlexibleArrayMemberField(CodeGenFunction &CGF,
+                                                     ASTContext &Ctx,
+                                                     const RecordDecl *RD) {
   const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
-      getLangOpts().getStrictFlexArraysLevel();
-  uint32_t FieldNo = 0;
+      CGF.getLangOpts().getStrictFlexArraysLevel();
 
   if (RD->isImplicit())
     return nullptr;
 
   for (const FieldDecl *FD : RD->fields()) {
-    if ((!FAMDecl || FD == FAMDecl) &&
-        Decl::isFlexibleArrayMemberLike(
+    if (Decl::isFlexibleArrayMemberLike(
             Ctx, FD, FD->getType(), StrictFlexArraysLevel,
-            /*IgnoreTemplateOrMacroSubstitution=*/true)) {
-      const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
-      Offset += Layout.getFieldOffset(FieldNo);
+            /*IgnoreTemplateOrMacroSubstitution=*/true))
       return FD;
+
+    if (auto RT = FD->getType()->getAs<RecordType>())
+      if (const FieldDecl *FD =
+              FindFlexibleArrayMemberField(CGF, Ctx, RT->getAsRecordDecl()))
+        return FD;
+  }
+
+  return nullptr;
+}
+
+/// Calculate the offset of a struct field. It may be embedded inside multiple
+/// sub-structs.
+static bool GetFieldOffset(ASTContext &Ctx, const RecordDecl *RD,
+                           const FieldDecl *FD, int64_t &Offset) {
+  if (RD->isImplicit())
+    return false;
+
+  const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
+  uint32_t FieldNo = 0;
+
+  for (const FieldDecl *Field : RD->fields()) {
+    if (Field == FD) {
+      Offset += Layout.getFieldOffset(FieldNo);
+      return true;
     }
 
-    QualType Ty = FD->getType();
-    if (Ty->isRecordType()) {
-      if (const FieldDecl *Field = FindFlexibleArrayMemberFieldAndOffset(
-              Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
-        const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
+    if (auto RT = Field->getType()->getAs<RecordType>()) {
+      if (GetFieldOffset(Ctx, RT->getAsRecordDecl(), FD, Offset)) {
         Offset += Layout.getFieldOffset(FieldNo);
-        return Field;
+        return true;
       }
     }
 
     if (!RD->isUnion())
       ++FieldNo;
   }
 
-  return nullptr;
+  return false;
 }
 
-static unsigned CountCountedByAttrs(const RecordDecl *RD) {
-  unsigned Num = 0;
+llvm::Value *
+CodeGenFunction::emitCountedByMemberSize(const Expr *E, llvm::Value *EmittedE,
+                                         unsigned Type,
+                                         llvm::IntegerType *ResType) {
+  ASTContext &Ctx = getContext();
 
-  for (const FieldDecl *FD : RD->fields()) {
-    if (FD->getType()->isCountAttributedType())
-      return ++Num;
+  // Note: If the whole struct is specificed in the __bdos (i.e. Visitor
+  // returns a DeclRefExpr). The calculation of the whole size of the structure
+  // with a flexible array member can be done in two ways:
+  //
+  //     1) sizeof(struct S) + count * sizeof(typeof(fam))
+  //     2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
+  //
+  // The first will add additional padding after the end of the array
+  // allocation while the second method is more precise, but not quite expected
+  // from programmers. See
+  // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a discussion
+  // of the topic.
+  //
+  // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
+  // structure. Therefore, because of the above issue, we choose to match what
+  // GCC does for consistency's sake.
 
-    QualType Ty = FD->getType();
-    if (Ty->isRecordType())
-      Num += CountCountedByAttrs(Ty->getAsRecordDecl());
-  }
+  StructFieldAccess Visitor;
+  const MemberExpr *ME = Visitor.Visit(E);
+  if (!ME)
+    return nullptr;
 
-  return Num;
-}
+  const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl());
+  if (!FD)
+    return nullptr;
 
-llvm::Value *
-CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
-                                             llvm::IntegerType *ResType) {
-  // The code generated here calculates the size of a struct with a flexible
-  // array member that uses the counted_by attribute. There are two instances
-  // we handle:
+  const RecordDecl *RD = FD->getDeclContext()->getOuterLexicalRecordContext();
+  const FieldDecl *FlexibleArrayMemberFD = nullptr;
+
+  if (Decl::isFlexibleArrayMemberLike(
+          Ctx, FD, FD->getType(), getLangOpts().getStrictFlexArraysLevel(),
+          /*IgnoreTemplateOrMacroSubstitution=*/true))
+    FlexibleArrayMemberFD = FD;
+  else
+    FlexibleArrayMemberFD = FindFlexibleArrayMemberField(*this, Ctx, RD);
+
+  if (!FlexibleArrayMemberFD ||
+      !FlexibleArrayMemberFD->getType()->isCountAttributedType())
+    return nullptr;
+
+  const FieldDecl *CountFD = FlexibleArrayMemberFD->findCountedByField();
+  if (!CountFD)
+    // Can't find the field referenced by the "counted_by" attribute.
+    return nullptr;
+
+  const Expr *Idx = nullptr;
+  if (Visitor.ASE) {
+    Idx = Visitor.ASE->getIdx();
+
+    if (Idx->HasSideEffects(Ctx))
+      // We can't have side-effects.
+      return getDefaultBuiltinObjectSizeResult(Type, ResType);
+
+    if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
+      int64_t Val = IL->getValue().getSExtValue();
+      if (Val < 0)
+        return getDefaultBuiltinObjectSizeResult(Type, ResType);
+
+      // The index is 0, so we don't need to take it into account.
+      if (Val == 0)
+        Idx = nullptr;
+    }
+  }
+
+  // Calculate the flexible array member's object size using these formulae
+  // (note: if the calculation is negative, we return 0.):
   //
-  //       struct s {
-  //         unsigned long flags;
-  //         int count;
-  //         int array[] __attribute__((counted_by(count)));
-  //       }
+  //      struct p;
+  //      struct s {
+  //          /* ... */
+  //          int count;
+  //          struct p *array[] __attribute__((counted_by(count)));
+  //      };
   //
-  //   1) bdos of the flexible array itself:
+  // 1) 'ptr->array':
   //
-  //     __builtin_dynamic_object_size(p->array, 1) ==
-  //         p->count * sizeof(*p->array)
+  //    size_t count = (size_t) ptr->count;
   //
-  //   2) bdos of a pointer into the flexible array:
+  //    size_t flexible_array_member_base_size = sizeof (*ptr->array);
+  //    size_t flexible_array_member_size =
+  //            count * flexible_array_member_base_size;
   //
-  //     __builtin_dynamic_object_size(&p->array[42], 1) ==
-  //         (p->count - 42) * sizeof(*p->array)
+  //    return flexible_array_member_size;
   //
-  //   2) bdos of the whole struct, including the flexible array:
+  // 2) '&ptr->array[idx]':
   //
-  //     __builtin_dynamic_object_size(p, 1) ==
-  //        max(sizeof(struct s),
-  //            offsetof(struct s, array) + p->count * sizeof(*p->array))
+  //    size_t count = (size_t) ptr->count;
+  //    size_t index = (size_t) idx;
   //
-  ASTContext &Ctx = getContext();
-  const Expr *Base = E->IgnoreParenImpCasts();
-  const Expr *Idx = nullptr;
-
-  if (const auto *UO = dyn_cast<UnaryOperator>(Base);
-      UO && UO->getOpcode() == UO_AddrOf) {
-    Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
-    if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
-      Base = ASE->getBase()->IgnoreParenImpCasts();
-      Idx = ASE->getIdx()->IgnoreParenImpCasts();
-
-      if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
-        int64_t Val = IL->getValue().getSExtValue();
-        if (Val < 0)
-          return getDefaultBuiltinObjectSizeResult(Type, ResType);
-
-        if (Val == 0)
-          // The index is 0, so we don't need to take it into account.
-          Idx = nullptr;
-      }
-    } else {
-      // Potential pointer to another element in the struct.
-      Base = SubExpr;
-    }
-  }
+  //    size_t flexible_array_member_base_size = sizeof (*ptr->array);
+  //    size_t flexible_array_member_size =
+  //            count * flexible_array_member_base_size;
+  //
+  //    size_t index_size = index * flexible_array_member_base_size;
+  //
+  //    return flexible_array_member_size - index_size;
+  //
+  // 3) '&ptr->field':
+  //
+  //    size_t count = (size_t) ptr->count;
+  //    size_t sizeof_struct = sizeof (struct s);
+  //
+  //    size_t flexible_array_member_base_size = sizeof (*ptr->array);
+  //    size_t flexible_array_member_size =
+  //            count * flexible_array_member_base_size;
+  //
+  //    size_t field_offset = offsetof (struct s, field);
+  //    size_t offset_diff = struct_size - field_offset;
+  //
+  //    return flexible_array_member_size + offset_diff;
+  //
+  // 4) '&ptr->field_array[idx]':
+  //
+  //    size_t count = (size_t) ptr->count;
+  //    size_t index = (size_t) idx;
+  //    size_t sizeof_struct = sizeof (struct s);
+  //
+  //    size_t flexible_array_member_base_size = sizeof (*ptr->array);
+  //    size_t flexible_array_member_size =
+  //            count * flexible_array_member_base_size;
+  //
+  //    size_t field_base_size = sizeof (*ptr->field_array);
+  //    size_t field_offset = offsetof (struct s, field)
+  //    field_offset += index * field_base_size;
+  //
+  //    size_t offset_diff = sizeof_struct - field_offset;
+  //
+  //    return offset_diff + flexible_array_member_size;
 
-  // Get the flexible array member Decl.
-  const RecordDecl *OuterRD = nullptr;
-  const FieldDecl *FAMDecl = nullptr;
-  if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
-    // Check if \p Base is referencing the FAM itself.
-    const ValueDecl *VD = ME->getMemberDecl();
-    OuterRD = VD->getDeclContext()->getOuterLexicalRecordContext();
-    FAMDecl = dyn_cast<FieldDecl>(VD);
-    if (!FAMDecl)
-      return nullptr;
-  } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
-    // Check if we're pointing to the whole struct.
-    QualType Ty = DRE->getDecl()->getType();
-    if (Ty->isPointerType())
-      Ty = Ty->getPointeeType();
-    OuterRD = Ty->getAsRecordDecl();
-
-    // If we have a situation like this:
-    //
-    //     struct union_of_fams {
-    //         int flags;
-    //         union {
-    //             signed char normal_field;
-    //             struct {
-    //                 int count1;
-    //                 int arr1[] __counted_by(count1);
-    //             };
-    //             struct {
-    //                 signed char count2;
-    //                 int arr2[] __counted_by(count2);
-    //             };
-    //         };
-    //    };
-    //
-    // We don't know which 'count' to use in this scenario:
-    //
-    //     size_t get_size(struct union_of_fams *p) {
-    //         return __builtin_dynamic_object_size(p, 1);
-    //     }
-    //
-    // Instead of calculating a wrong number, we give up.
-    if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
-      return nullptr;
-  }
+  QualType CountTy = CountFD->getType();
+  bool IsSigned = CountTy->isSignedIntegerType();
 
-  if (!OuterRD)
-    return nullptr;
+  QualType FlexibleArrayMemberTy = FlexibleArrayMemberFD->getType();
+  QualType FieldTy = FD->getType();
 
-  // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
-  // get its offset.
-  uint64_t Offset = 0;
-  FAMDecl =
-      FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
-  Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
+  // Explicit cast because otherwise the CharWidth will promote an i32's into
+  // u64's leading to overflows..
+  int64_t CharWidth = static_cast<int64_t>(CGM.getContext().getCharWidth());
 
-  if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
-    // No flexible array member found or it doesn't have the "counted_by"
-    // attribute.
+  //  size_t count = (size_t) ptr->count;
+  Value *Count = EmitLoadOfCountedByField(ME, FlexibleArrayMemberFD, CountFD);
+  if (!Count)
     return nullptr;
+  Count = Builder.CreateIntCast(Count, ResType, IsSigned, "count");
 
-  const FieldDecl *CountedByFD = FAMDecl->findCountedByField();
-  if (!CountedByFD)
-    // Can't find the field referenced by the "counted_by" attribute.
-    return nullptr;
-
-  if (isa<DeclRefExpr>(Base))
-    // The whole struct is specificed in the __bdos. The calculation of the
-    // whole size of the structure can be done in two ways:
-    //
-    //     1) sizeof(struct S) + count * sizeof(typeof(fam))
-    //     2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
-    //
-    // The first will add additional padding after the end of the array,
-    // allocation while the second method is more precise, but not quite
-    // expected from programmers. See
-    // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a
-    // discussion of the topic.
-    //
-    // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
-    // structure. Therefore, because of the above issue, we'll choose to match
-    // what GCC does for consistency's sake.
-    return nullptr;
+  //  size_t index = (size_t) ptr->index;
+  Value *Index = nullptr;
+  if (Idx) {
+    bool IdxSigned = Idx->getType()->isSignedIntegerType();
+    Index = EmitAnyExprToTemp(Idx).getScalarVal();
+    Index = Builder.CreateIntCast(Index, ResType, IdxSigned, "index");
+  }
+
+  //  size_t sizeof_struct = sizeof (struct s);
+  llvm::StructType *StructTy = getTypes().getCGRecordLayout(RD).getLLVMType();
+  const llvm::DataLayout &Layout = Builder.GetInsertBlock()->getDataLayout();
----------------
bwendling wrote:

Ah! Much nicer. Thanks. :-)

https://github.com/llvm/llvm-project/pull/122198


More information about the cfe-commits mailing list