[clang] bcdea60 - [CIR] Implement constant expression bitfield initialization (#162143)

Tue Oct 7 04:39:53 PDT 2025

Author: Morris Hafner
Date: 2025-10-07T13:39:50+02:00
New Revision: bcdea6011790a2fd5af3dd40ef9fa8a05cd58b79

URL: https://github.com/llvm/llvm-project/commit/bcdea6011790a2fd5af3dd40ef9fa8a05cd58b79
DIFF: https://github.com/llvm/llvm-project/commit/bcdea6011790a2fd5af3dd40ef9fa8a05cd58b79.diff

LOG: [CIR] Implement constant expression bitfield initialization (#162143)

This adds support for emitting bitfields in constant aggregatge
initializers with the exception of single element records and
decomposing large bitfields into smaller constants.

---------

Co-authored-by: Andy Kaylor <akaylor at nvidia.com>

Added: 
    

Modified: 
    clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
    clang/test/CIR/CodeGen/constant-inits.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
index e20a4fc3c63aa..59aa25726a749 100644

--- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
@@ -118,6 +118,9 @@ class ConstantAggregateBuilder : private ConstantAggregateBuilderUtils {
   /// non-packed LLVM struct will give the correct layout.
   bool naturalLayout = true;
 
+  bool split(size_t index, CharUnits hint);
+  std::optional<size_t> splitAt(CharUnits pos);
+
   static mlir::Attribute buildFrom(CIRGenModule &cgm, ArrayRef<Element> elems,
                                    CharUnits startOffset, CharUnits size,
                                    bool naturalLayout, mlir::Type desiredTy,
@@ -137,6 +140,10 @@ class ConstantAggregateBuilder : private ConstantAggregateBuilderUtils {
   /// Update or overwrite the bits starting at \p offsetInBits with \p bits.
   bool addBits(llvm::APInt bits, uint64_t offsetInBits, bool allowOverwrite);
 
+  /// Attempt to condense the value starting at \p offset to a constant of type
+  /// \p desiredTy.
+  void condense(CharUnits offset, mlir::Type desiredTy);
+
   /// Produce a constant representing the entire accumulated value, ideally of
   /// the specified type. If \p allowOversized, the constant might be larger
   /// than implied by \p desiredTy (eg, if there is a flexible array member).
@@ -176,6 +183,195 @@ bool ConstantAggregateBuilder::add(mlir::TypedAttr typedAttr, CharUnits offset,
   return false;
 }
 
+bool ConstantAggregateBuilder::addBits(llvm::APInt bits, uint64_t offsetInBits,
+                                       bool allowOverwrite) {
+  const ASTContext &astContext = cgm.getASTContext();
+  const uint64_t charWidth = astContext.getCharWidth();
+  mlir::Type charTy = cgm.getBuilder().getUIntNTy(charWidth);
+
+  // Offset of where we want the first bit to go within the bits of the
+  // current char.
+  unsigned offsetWithinChar = offsetInBits % charWidth;
+
+  // We split bit-fields up into individual bytes. Walk over the bytes and
+  // update them.
+  for (CharUnits offsetInChars =
+           astContext.toCharUnitsFromBits(offsetInBits - offsetWithinChar);
+       /**/; ++offsetInChars) {
+    // Number of bits we want to fill in this char.
+    unsigned wantedBits =
+        std::min((uint64_t)bits.getBitWidth(), charWidth - offsetWithinChar);
+
+    // Get a char containing the bits we want in the right places. The other
+    // bits have unspecified values.
+    llvm::APInt bitsThisChar = bits;
+    if (bitsThisChar.getBitWidth() < charWidth)
+      bitsThisChar = bitsThisChar.zext(charWidth);
+    if (cgm.getDataLayout().isBigEndian()) {
+      // Figure out how much to shift by. We may need to left-shift if we have
+      // less than one byte of Bits left.
+      int shift = bits.getBitWidth() - charWidth + offsetWithinChar;
+      if (shift > 0)
+        bitsThisChar.lshrInPlace(shift);
+      else if (shift < 0)
+        bitsThisChar = bitsThisChar.shl(-shift);
+    } else {
+      bitsThisChar = bitsThisChar.shl(offsetWithinChar);
+    }
+    if (bitsThisChar.getBitWidth() > charWidth)
+      bitsThisChar = bitsThisChar.trunc(charWidth);
+
+    if (wantedBits == charWidth) {
+      // Got a full byte: just add it directly.
+      add(cir::IntAttr::get(charTy, bitsThisChar), offsetInChars,
+          allowOverwrite);
+    } else {
+      // Partial byte: update the existing integer if there is one. If we
+      // can't split out a 1-CharUnit range to update, then we can't add
+      // these bits and fail the entire constant emission.
+      std::optional<size_t> firstElemToUpdate = splitAt(offsetInChars);
+      if (!firstElemToUpdate)
+        return false;
+      std::optional<size_t> lastElemToUpdate =
+          splitAt(offsetInChars + CharUnits::One());
+      if (!lastElemToUpdate)
+        return false;
+      assert(*lastElemToUpdate - *firstElemToUpdate < 2 &&
+             "should have at most one element covering one byte");
+
+      // Figure out which bits we want and discard the rest.
+      llvm::APInt updateMask(charWidth, 0);
+      if (cgm.getDataLayout().isBigEndian())
+        updateMask.setBits(charWidth - offsetWithinChar - wantedBits,
+                           charWidth - offsetWithinChar);
+      else
+        updateMask.setBits(offsetWithinChar, offsetWithinChar + wantedBits);
+      bitsThisChar &= updateMask;
+      bool isNull = false;
+      if (*firstElemToUpdate < elements.size()) {
+        auto firstEltToUpdate =
+            mlir::dyn_cast<cir::IntAttr>(elements[*firstElemToUpdate].element);
+        isNull = firstEltToUpdate && firstEltToUpdate.isNullValue();
+      }
+
+      if (*firstElemToUpdate == *lastElemToUpdate || isNull) {
+        // All existing bits are either zero or undef.
+        add(cir::IntAttr::get(charTy, bitsThisChar), offsetInChars,
+            /*allowOverwrite*/ true);
+      } else {
+        cir::IntAttr ci =
+            mlir::dyn_cast<cir::IntAttr>(elements[*firstElemToUpdate].element);
+        // In order to perform a partial update, we need the existing bitwise
+        // value, which we can only extract for a constant int.
+        if (!ci)
+          return false;
+        // Because this is a 1-CharUnit range, the constant occupying it must
+        // be exactly one CharUnit wide.
+        assert(ci.getBitWidth() == charWidth && "splitAt failed");
+        assert((!(ci.getValue() & updateMask) || allowOverwrite) &&
+               "unexpectedly overwriting bitfield");
+        bitsThisChar |= (ci.getValue() & ~updateMask);
+        elements[*firstElemToUpdate].element =
+            cir::IntAttr::get(charTy, bitsThisChar);
+      }
+    }
+
+    // Stop if we've added all the bits.
+    if (wantedBits == bits.getBitWidth())
+      break;
+
+    // Remove the consumed bits from Bits.
+    if (!cgm.getDataLayout().isBigEndian())
+      bits.lshrInPlace(wantedBits);
+    bits = bits.trunc(bits.getBitWidth() - wantedBits);
+
+    // The remaining bits go at the start of the following bytes.
+    offsetWithinChar = 0;
+  }
+
+  return true;
+}
+
+/// Returns a position within elements such that all elements
+/// before the returned index end before pos and all elements at or after
+/// the returned index begin at or after pos. Splits elements as necessary
+/// to ensure this. Returns std::nullopt if we find something we can't split.
+std::optional<size_t> ConstantAggregateBuilder::splitAt(CharUnits pos) {
+  if (pos >= size)
+    return elements.size();
+
+  while (true) {
+    // Find the first element that starts after pos.
+    Element *iter =
+        llvm::upper_bound(elements, pos, [](CharUnits pos, const Element &elt) {
+          return pos < elt.offset;
+        });
+
+    if (iter == elements.begin())
+      return 0;
+
+    size_t index = iter - elements.begin() - 1;
+    const Element &elt = elements[index];
+
+    // If we already have an element starting at pos, we're done.
+    if (elt.offset == pos)
+      return index;
+
+    // Check for overlap with the element that starts before pos.
+    CharUnits eltEnd = elt.offset + getSize(elt.element);
+    if (eltEnd <= pos)
+      return index + 1;
+
+    // Try to decompose it into smaller constants.
+    if (!split(index, pos))
+      return std::nullopt;
+  }
+}
+
+/// Split the constant at index, if possible. Return true if we did.
+/// Hint indicates the location at which we'd like to split, but may be
+/// ignored.
+bool ConstantAggregateBuilder::split(size_t index, CharUnits hint) {
+  cgm.errorNYI("split constant at index");
+  return false;
+}
+
+void ConstantAggregateBuilder::condense(CharUnits offset,
+                                        mlir::Type desiredTy) {
+  CharUnits desiredSize = getSize(desiredTy);
+
+  std::optional<size_t> firstElemToReplace = splitAt(offset);
+  if (!firstElemToReplace)
+    return;
+  size_t first = *firstElemToReplace;
+
+  std::optional<size_t> lastElemToReplace = splitAt(offset + desiredSize);
+  if (!lastElemToReplace)
+    return;
+  size_t last = *lastElemToReplace;
+
+  size_t length = last - first;
+  if (length == 0)
+    return;
+
+  if (length == 1 && elements[first].offset == offset &&
+      getSize(elements[first].element) == desiredSize) {
+    cgm.errorNYI("re-wrapping single element records");
+    return;
+  }
+
+  // Build a new constant from the elements in the range.
+  SmallVector<Element> subElems(elements.begin() + first,
+                                elements.begin() + last);
+  mlir::Attribute replacement =
+      buildFrom(cgm, subElems, offset, desiredSize,
+                /*naturalLayout=*/false, desiredTy, false);
+
+  // Replace the range with the condensed constant.
+  Element newElt(mlir::cast<mlir::TypedAttr>(replacement), offset);
+  replace(elements, first, last, {newElt});
+}
+
 mlir::Attribute
 ConstantAggregateBuilder::buildFrom(CIRGenModule &cgm, ArrayRef<Element> elems,
                                     CharUnits startOffset, CharUnits size,
@@ -301,6 +497,9 @@ class ConstRecordBuilder {
   bool appendBytes(CharUnits fieldOffsetInChars, mlir::TypedAttr initCst,
                    bool allowOverwrite = false);
 
+  bool appendBitField(const FieldDecl *field, uint64_t fieldOffset,
+                      cir::IntAttr ci, bool allowOverwrite = false);
+
   bool build(InitListExpr *ile, bool allowOverwrite);
   bool build(const APValue &val, const RecordDecl *rd, bool isPrimaryBase,
              const CXXRecordDecl *vTableClass, CharUnits baseOffset);
@@ -325,6 +524,30 @@ bool ConstRecordBuilder::appendBytes(CharUnits fieldOffsetInChars,
   return builder.add(initCst, startOffset + fieldOffsetInChars, allowOverwrite);
 }
 
+bool ConstRecordBuilder::appendBitField(const FieldDecl *field,
+                                        uint64_t fieldOffset, cir::IntAttr ci,
+                                        bool allowOverwrite) {
+  const CIRGenRecordLayout &rl =
+      cgm.getTypes().getCIRGenRecordLayout(field->getParent());
+  const CIRGenBitFieldInfo &info = rl.getBitFieldInfo(field);
+  llvm::APInt fieldValue = ci.getValue();
+
+  // Promote the size of FieldValue if necessary
+  // FIXME: This should never occur, but currently it can because initializer
+  // constants are cast to bool, and because clang is not enforcing bitfield
+  // width limits.
+  if (info.size > fieldValue.getBitWidth())
+    fieldValue = fieldValue.zext(info.size);
+
+  // Truncate the size of FieldValue to the bit field size.
+  if (info.size < fieldValue.getBitWidth())
+    fieldValue = fieldValue.trunc(info.size);
+
+  return builder.addBits(fieldValue,
+                         cgm.getASTContext().toBits(startOffset) + fieldOffset,
+                         allowOverwrite);
+}
+
 bool ConstRecordBuilder::build(InitListExpr *ile, bool allowOverwrite) {
   RecordDecl *rd = ile->getType()
                        ->castAs<clang::RecordType>()
@@ -407,12 +630,14 @@ bool ConstRecordBuilder::build(InitListExpr *ile, bool allowOverwrite) {
     } else {
       // Otherwise we have a bitfield.
       if (auto constInt = dyn_cast<cir::IntAttr>(eltInit)) {
-        assert(!cir::MissingFeatures::bitfields());
-        cgm.errorNYI(field->getSourceRange(), "bitfields");
+        if (!appendBitField(field, layout.getFieldOffset(index), constInt,
+                            allowOverwrite))
+          return false;
+      } else {
+        // We are trying to initialize a bitfield with a non-trivial constant,
+        // this must require run-time code.
+        return false;
       }
-      // We are trying to initialize a bitfield with a non-trivial constant,
-      // this must require run-time code.
-      return false;
     }
   }
 
@@ -510,8 +735,16 @@ bool ConstRecordBuilder::build(const APValue &val, const RecordDecl *rd,
       if (field->hasAttr<NoUniqueAddressAttr>())
         allowOverwrite = true;
     } else {
-      assert(!cir::MissingFeatures::bitfields());
-      cgm.errorNYI(field->getSourceRange(), "bitfields");
+      // Otherwise we have a bitfield.
+      if (auto constInt = dyn_cast<cir::IntAttr>(eltInit)) {
+        if (!appendBitField(field, layout.getFieldOffset(index) + offsetBits,
+                            constInt, allowOverwrite))
+          return false;
+      } else {
+        // We are trying to initialize a bitfield with a non-trivial constant,
+        // this must require run-time code.
+        return false;
+      }
     }
   }
 

diff  --git a/clang/test/CIR/CodeGen/constant-inits.cpp b/clang/test/CIR/CodeGen/constant-inits.cpp
index c9153c91ebc22..d5a7bb9d57251 100644
--- a/clang/test/CIR/CodeGen/constant-inits.cpp
+++ b/clang/test/CIR/CodeGen/constant-inits.cpp
@@ -30,6 +30,41 @@ struct simple {
     int a, b;
 };
 
+// Byte-aligned bitfields
+struct byte_aligned_bitfields {
+    unsigned int a : 8;
+    unsigned int b : 8;
+    unsigned int c : 16;
+};
+
+struct signed_byte_aligned_bitfields {
+    int x : 8;
+    int y : 8;
+};
+
+struct single_byte_bitfield {
+    unsigned char a : 8;
+};
+
+// Partial bitfields (sub-byte)
+struct partial_bitfields {
+    unsigned int a : 3;
+    unsigned int b : 5;
+    unsigned int c : 8;
+};
+
+struct signed_partial_bitfields {
+    int x : 4;
+    int y : 4;
+};
+
+struct mixed_partial_bitfields {
+    unsigned char a : 1;
+    unsigned char b : 1;
+    unsigned char c : 1;
+    unsigned char d : 5;
+};
+
 void function() {
     constexpr static empty e;
 
@@ -54,8 +89,22 @@ void function() {
     constexpr static simple simple_array[] {
         s, {1111, 2222}, s
     };
+
+    // Byte-aligned bitfield tests
+    constexpr static byte_aligned_bitfields ba_bf1 = {0xFF, 0xAA, 0x1234};
+    constexpr static signed_byte_aligned_bitfields ba_bf2 = {-1, 127};
+    constexpr static single_byte_bitfield ba_bf3 = {42};
+
+    // Partial bitfield tests
+    constexpr static partial_bitfields p_bf1 = {1, 2, 3};
+    constexpr static signed_partial_bitfields p_bf2 = {-1, 7};
+    constexpr static mixed_partial_bitfields p_bf3 = {1, 0, 1, 15};
 }
 
+// Anonymous struct type definitions for bitfields
+// CIR-DAG: !rec_anon_struct = !cir.record<struct  {!u8i, !u8i, !u8i, !u8i}>
+// CIR-DAG: !rec_anon_struct1 = !cir.record<struct  {!u8i, !u8i, !cir.array<!u8i x 2>}>
+
 // CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE1e = #cir.zero : !rec_empty
 // CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE1s = #cir.const_record<{#cir.int<0> : !s32i, #cir.int<-1> : !s32i}> : !rec_simple
 // CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE2p1 = #cir.const_record<{#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.const_array<[#cir.int<99> : !s8i, #cir.int<88> : !s8i, #cir.int<77> : !s8i]> : !cir.array<!s8i x 3>, #cir.int<40> : !s32i}> : !rec_Point
@@ -83,6 +132,33 @@ void function() {
 // CIR-DAG-SAME:   #cir.zero : !rec_packed_and_aligned
 // CIR-DAG-SAME: ]> : !cir.array<!rec_packed_and_aligned x 2>
 
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE6ba_bf1 = #cir.const_record<{
+// CIR-DAG-SAME:   #cir.int<255> : !u8i,
+// CIR-DAG-SAME:   #cir.int<170> : !u8i,
+// CIR-DAG-SAME:   #cir.int<52> : !u8i,
+// CIR-DAG-SAME:   #cir.int<18> : !u8i
+// CIR-DAG-SAME: }> : !rec_anon_struct
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE6ba_bf2 = #cir.const_record<{
+// CIR-DAG-SAME:   #cir.int<255> : !u8i,
+// CIR-DAG-SAME:   #cir.int<127> : !u8i,
+// CIR-DAG-SAME:   #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>
+// CIR-DAG-SAME: }> : !rec_anon_struct1
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE6ba_bf3 = #cir.const_record<{
+// CIR-DAG-SAME:   #cir.int<42> : !u8i
+// CIR-DAG-SAME: }> : !rec_single_byte_bitfield
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE5p_bf1 = #cir.const_record<{
+// CIR-DAG-SAME:   #cir.int<17> : !u8i,
+// CIR-DAG-SAME:   #cir.int<3> : !u8i,
+// CIR-DAG-SAME:   #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>
+// CIR-DAG-SAME: }> : !rec_anon_struct1
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE5p_bf2 = #cir.const_record<{
+// CIR-DAG-SAME:   #cir.int<127> : !u8i,
+// CIR-DAG-SAME:   #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 3>
+// CIR-DAG-SAME: }> : !rec_signed_partial_bitfields
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE5p_bf3 = #cir.const_record<{
+// CIR-DAG-SAME:   #cir.int<125> : !u8i
+// CIR-DAG-SAME: }> : !rec_mixed_partial_bitfields
+
 // CIR-LABEL: cir.func dso_local @_Z8functionv()
 // CIR:   cir.return
 
@@ -96,6 +172,12 @@ void function() {
 // LLVM-DAG: @_ZZ8functionvE3paa = internal global %struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 0 }>
 // LLVM-DAG: @_ZZ8functionvE5array = internal global [2 x %struct.Point] [%struct.Point { i32 123, i32 456, [3 x i8] c"\0B\16!", i32 789 }, %struct.Point { i32 10, i32 20, [3 x i8] zeroinitializer, i32 40 }]
 // LLVM-DAG: @_ZZ8functionvE9paa_array = internal global [2 x %struct.packed_and_aligned] [%struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 0 }>, %struct.packed_and_aligned zeroinitializer]
+// LLVM-DAG: @_ZZ8functionvE6ba_bf1 = internal global { i8, i8, i8, i8 } { i8 -1, i8 -86, i8 52, i8 18 }
+// LLVM-DAG: @_ZZ8functionvE6ba_bf2 = internal global { i8, i8, [2 x i8] } { i8 -1, i8 127, [2 x i8] zeroinitializer }
+// LLVM-DAG: @_ZZ8functionvE6ba_bf3 = internal global %struct.single_byte_bitfield { i8 42 }
+// LLVM-DAG: @_ZZ8functionvE5p_bf1 = internal global { i8, i8, [2 x i8] } { i8 17, i8 3, [2 x i8] zeroinitializer }
+// LLVM-DAG: @_ZZ8functionvE5p_bf2 = internal global %struct.signed_partial_bitfields { i8 127, [3 x i8] zeroinitializer }
+// LLVM-DAG: @_ZZ8functionvE5p_bf3 = internal global %struct.mixed_partial_bitfields { i8 125 }
 
 // LLVM-LABEL: define{{.*}} void @_Z8functionv
 // LLVM:   ret void
@@ -110,6 +192,12 @@ void function() {
 // OGCG-DAG: @_ZZ8functionvE3paa = internal constant %struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 undef }>
 // OGCG-DAG: @_ZZ8functionvE5array = internal constant [2 x %struct.Point] [%struct.Point { i32 123, i32 456, [3 x i8] c"\0B\16!", i32 789 }, %struct.Point { i32 10, i32 20, [3 x i8] zeroinitializer, i32 40 }]
 // OGCG-DAG: @_ZZ8functionvE9paa_array = internal constant [2 x %struct.packed_and_aligned] [%struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 undef }>, %struct.packed_and_aligned <{ i16 0, i8 0, float 0.000000e+00, i8 undef }>]
+// OGCG-DAG: @_ZZ8functionvE6ba_bf1 = internal constant { i8, i8, i8, i8 } { i8 -1, i8 -86, i8 52, i8 18 }
+// OGCG-DAG: @_ZZ8functionvE6ba_bf2 = internal constant { i8, i8, [2 x i8] } { i8 -1, i8 127, [2 x i8] undef }
+// OGCG-DAG: @_ZZ8functionvE6ba_bf3 = internal constant %struct.single_byte_bitfield { i8 42 }
+// OGCG-DAG: @_ZZ8functionvE5p_bf1 = internal constant { i8, i8, [2 x i8] } { i8 17, i8 3, [2 x i8] undef }
+// OGCG-DAG: @_ZZ8functionvE5p_bf2 = internal constant %struct.signed_partial_bitfields { i8 127, [3 x i8] undef }
+// OGCG-DAG: @_ZZ8functionvE5p_bf3 = internal constant %struct.mixed_partial_bitfields { i8 125 }
 
 // OGCG-LABEL: define{{.*}} void @_Z8functionv
 // OGCG:   ret void