[clang] [clang] Move tailclipping to bitfield allocation (PR #87090)

Fri Mar 29 10:13:41 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Nathan Sidwell (urnathan)

<details>
<summary>Changes</summary>

This follows on from PR65742. As I suspected clipTailPadding's functionality can be moved into accumulateBitfields; it has sufficient information now.  It turns out that clipTailPadding was undoing some of the work just added to accumulateBitfields.  Specifically, on a strict-alignment LP64 machine, accumulateBitfields could determine that a 24-bit access unit was the best unit to use (loading 3 separate bytes being better than 4). But if those 3 bytes were followed by a padding byte, clipTailPadding would not make the storage a 3-byte array, and the access unit would be an (unaligned) i32.  Boo!  The new testcase shows this -- it's written to work on ILP32 and LP64, but the common case I think would be on LP64 where a pointer follows the bitfields, and there's 4 bytes of alignment padding between.

The fine-grained-bitfield accesses needed adjusting, to preserve the existing behaviour of extending them into useable tail padding. You'll see we now check this later -- allowing it to grow, but preventing accumulation of a subsequent span. I think that's in keeping with the desired behaviour of that option.


---
Full diff: https://github.com/llvm/llvm-project/pull/87090.diff


2 Files Affected:

- (modified) clang/lib/CodeGen/CGRecordLayoutBuilder.cpp (+43-35) 
- (modified) clang/test/CodeGen/bitfield-access-unit.c (+18) 


``````````diff

diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
index e32023aeac1e6f..acf1746294ef73 100644
--- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -41,10 +41,11 @@ namespace {
 ///   contains enough information to determine where the runs break.  Microsoft
 ///   and Itanium follow different rules and use different codepaths.
 /// * It is desired that, when possible, bitfields use the appropriate iN type
-///   when lowered to llvm types.  For example unsigned x : 24 gets lowered to
+///   when lowered to llvm types. For example unsigned x : 24 gets lowered to
 ///   i24.  This isn't always possible because i24 has storage size of 32 bit
-///   and if it is possible to use that extra byte of padding we must use
-///   [i8 x 3] instead of i24.  The function clipTailPadding does this.
+///   and if it is possible to use that extra byte of padding we must use [i8 x
+///   3] instead of i24. This is computed when accumulating bitfields in
+///   accumulateBitfields.
 ///   C++ examples that require clipping:
 ///   struct { int a : 24; char b; }; // a must be clipped, b goes at offset 3
 ///   struct A { int a : 24; ~A(); }; // a must be clipped because:
@@ -197,9 +198,7 @@ struct CGRecordLowering {
   /// not the primary vbase of some base class.
   bool hasOwnStorage(const CXXRecordDecl *Decl, const CXXRecordDecl *Query);
   void calculateZeroInit();
-  /// Lowers bitfield storage types to I8 arrays for bitfields with tail
-  /// padding that is or can potentially be used.
-  void clipTailPadding();
+  void checkTailPadding();
   /// Determines if we need a packed llvm struct.
   void determinePacked(bool NVBaseType);
   /// Inserts padding everywhere it's needed.
@@ -302,7 +301,7 @@ void CGRecordLowering::lower(bool NVBaseType) {
   }
   llvm::stable_sort(Members);
   Members.push_back(StorageInfo(Size, getIntNType(8)));
-  clipTailPadding();
+  checkTailPadding();
   determinePacked(NVBaseType);
   insertPadding();
   Members.pop_back();
@@ -521,6 +520,7 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
   // available padding characters.
   RecordDecl::field_iterator BestEnd = Begin;
   CharUnits BestEndOffset;
+  bool BestClipped; // Whether the representation must be in a byte array.
 
   for (;;) {
     // AtAlignedBoundary is true iff Field is the (potential) start of a new
@@ -583,10 +583,9 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
         // this is the best seen so far.
         BestEnd = Field;
         BestEndOffset = BeginOffset + AccessSize;
-        if (Types.getCodeGenOpts().FineGrainedBitfieldAccesses)
-          // Fine-grained access, so no merging of spans.
-          InstallBest = true;
-        else if (!BitSizeSinceBegin)
+        // Assume clipped until proven not below.
+        BestClipped = true;
+        if (!BitSizeSinceBegin)
           // A zero-sized initial span -- this will install nothing and reset
           // for another.
           InstallBest = true;
@@ -614,6 +613,12 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
             // The access unit is not at a naturally aligned offset within the
             // structure.
             InstallBest = true;
+
+          if (InstallBest && BestEnd == Field)
+            // We're installing the first span, who's clipping was
+            // conservatively presumed above. Compute it correctly.
+            if (getSize(Type) == AccessSize)
+              BestClipped = false;
         }
 
         if (!InstallBest) {
@@ -642,11 +647,15 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
             // access unit.
             BestEndOffset = BeginOffset + TypeSize;
             BestEnd = Field;
+            BestClipped = false;
           }
 
           if (Barrier)
             // The next field is a barrier that we cannot merge across.
             InstallBest = true;
+          else if (Types.getCodeGenOpts().FineGrainedBitfieldAccesses)
+            // Fine-grained access, so no merging of spans.
+            InstallBest = true;
           else
             // Otherwise, we're not installing. Update the bit size
             // of the current span to go all the way to LimitOffset, which is
@@ -665,7 +674,17 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
         // Add the storage member for the access unit to the record. The
         // bitfields get the offset of their storage but come afterward and
         // remain there after a stable sort.
-        llvm::Type *Type = getIntNType(Context.toBits(AccessSize));
+        llvm::Type *Type;
+        if (BestClipped) {
+          assert(getSize(getIntNType(Context.toBits(AccessSize))) >
+                     AccessSize &&
+                 "Clipped access need not be clipped");
+          Type = getByteArrayType(AccessSize);
+        } else {
+          Type = getIntNType(Context.toBits(AccessSize));
+          assert(getSize(Type) == AccessSize &&
+                 "Unclipped access must be clipped");
+        }
         Members.push_back(StorageInfo(BeginOffset, Type));
         for (; Begin != BestEnd; ++Begin)
           if (!Begin->isZeroLengthBitField(Context))
@@ -911,32 +930,21 @@ void CGRecordLowering::calculateZeroInit() {
   }
 }
 
-void CGRecordLowering::clipTailPadding() {
-  std::vector<MemberInfo>::iterator Prior = Members.begin();
-  CharUnits Tail = getSize(Prior->Data);
-  for (std::vector<MemberInfo>::iterator Member = Prior + 1,
-                                         MemberEnd = Members.end();
-       Member != MemberEnd; ++Member) {
+// Verify accumulateBitfields computed the correct storage representations.
+void CGRecordLowering::checkTailPadding() {
+#ifndef NDEBUG
+  auto Tail = CharUnits::Zero();
+  for (auto M : Members) {
     // Only members with data and the scissor can cut into tail padding.
-    if (!Member->Data && Member->Kind != MemberInfo::Scissor)
+    if (!M.Data && M.Kind != MemberInfo::Scissor)
       continue;
-    if (Member->Offset < Tail) {
-      assert(Prior->Kind == MemberInfo::Field &&
-             "Only storage fields have tail padding!");
-      if (!Prior->FD || Prior->FD->isBitField())
-        Prior->Data = getByteArrayType(bitsToCharUnits(llvm::alignTo(
-            cast<llvm::IntegerType>(Prior->Data)->getIntegerBitWidth(), 8)));
-      else {
-        assert(Prior->FD->hasAttr<NoUniqueAddressAttr>() &&
-               "should not have reused this field's tail padding");
-        Prior->Data = getByteArrayType(
-            Context.getTypeInfoDataSizeInChars(Prior->FD->getType()).Width);
-      }
-    }
-    if (Member->Data)
-      Prior = Member;
-    Tail = Prior->Offset + getSize(Prior->Data);
+
+    assert(M.Offset >= Tail && "bitfield access unit not already clipped");
+    Tail = M.Offset;
+    if (M.Data)
+      Tail += getSize(M.Data);
   }
+#endif
 }
 
 void CGRecordLowering::determinePacked(bool NVBaseType) {
diff --git a/clang/test/CodeGen/bitfield-access-unit.c b/clang/test/CodeGen/bitfield-access-unit.c
index 1aed2e7202fc65..d0553c5183eeff 100644
--- a/clang/test/CodeGen/bitfield-access-unit.c
+++ b/clang/test/CodeGen/bitfield-access-unit.c
@@ -222,6 +222,24 @@ struct G {
 // LAYOUT-DWN32-NEXT: <CGBitFieldInfo Offset:{{[0-9]+}} Size:7 IsSigned:1 StorageSize:8 StorageOffset:4
 // CHECK-NEXT: ]>
 
+struct __attribute__((aligned(8))) H {
+  char a;
+  unsigned b : 24; // on expensive alignment we want this to stay 24
+  unsigned c __attribute__((aligned(8))); // Think 'long long' or lp64 ptr
+} h;
+// CHECK-LABEL: LLVMType:%struct.H =
+// LAYOUT-FLEX-SAME: type <{ i8, i32, [3 x i8], i32, [4 x i8] }>
+// LAYOUT-STRICT-SAME: type { i8, [3 x i8], [4 x i8], i32, [4 x i8] }
+// LAYOUT-DWN32-FLEX-SAME: type <{ i8, i32, [3 x i8], i32, [4 x i8] }>
+// LAYOUT-DWN32-STRICT-SAME: type { i8, [3 x i8], [4 x i8], i32, [4 x i8] }
+// CHECK: BitFields:[
+// LAYOUT-FLEX-NEXT: <CGBitFieldInfo Offset:{{[0-9]+}} Size:24 IsSigned:0 StorageSize:32 StorageOffset:1
+// LAYOUT-STRICT-NEXT: <CGBitFieldInfo Offset:{{[0-9]+}} Size:24 IsSigned:0 StorageSize:24 StorageOffset:1
+
+// LAYOUT-DWN32-FLEX-NEXT: <CGBitFieldInfo Offset:{{[0-9]+}} Size:24 IsSigned:0 StorageSize:32 StorageOffset:1
+// LAYOUT-DWN32-STRICT-NEXT: <CGBitFieldInfo Offset:{{[0-9]+}} Size:24 IsSigned:0 StorageSize:24 StorageOffset:1
+// CHECK-NEXT: ]>
+
 #if _LP64
 struct A64 {
   int a : 16;

``````````

</details>


https://github.com/llvm/llvm-project/pull/87090