[clang] bcdea60 - [CIR] Implement constant expression bitfield initialization (#162143)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 7 04:39:53 PDT 2025
Author: Morris Hafner
Date: 2025-10-07T13:39:50+02:00
New Revision: bcdea6011790a2fd5af3dd40ef9fa8a05cd58b79
URL: https://github.com/llvm/llvm-project/commit/bcdea6011790a2fd5af3dd40ef9fa8a05cd58b79
DIFF: https://github.com/llvm/llvm-project/commit/bcdea6011790a2fd5af3dd40ef9fa8a05cd58b79.diff
LOG: [CIR] Implement constant expression bitfield initialization (#162143)
This adds support for emitting bitfields in constant aggregatge
initializers with the exception of single element records and
decomposing large bitfields into smaller constants.
---------
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
Added:
Modified:
clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
clang/test/CIR/CodeGen/constant-inits.cpp
Removed:
################################################################################
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
index e20a4fc3c63aa..59aa25726a749 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
@@ -118,6 +118,9 @@ class ConstantAggregateBuilder : private ConstantAggregateBuilderUtils {
/// non-packed LLVM struct will give the correct layout.
bool naturalLayout = true;
+ bool split(size_t index, CharUnits hint);
+ std::optional<size_t> splitAt(CharUnits pos);
+
static mlir::Attribute buildFrom(CIRGenModule &cgm, ArrayRef<Element> elems,
CharUnits startOffset, CharUnits size,
bool naturalLayout, mlir::Type desiredTy,
@@ -137,6 +140,10 @@ class ConstantAggregateBuilder : private ConstantAggregateBuilderUtils {
/// Update or overwrite the bits starting at \p offsetInBits with \p bits.
bool addBits(llvm::APInt bits, uint64_t offsetInBits, bool allowOverwrite);
+ /// Attempt to condense the value starting at \p offset to a constant of type
+ /// \p desiredTy.
+ void condense(CharUnits offset, mlir::Type desiredTy);
+
/// Produce a constant representing the entire accumulated value, ideally of
/// the specified type. If \p allowOversized, the constant might be larger
/// than implied by \p desiredTy (eg, if there is a flexible array member).
@@ -176,6 +183,195 @@ bool ConstantAggregateBuilder::add(mlir::TypedAttr typedAttr, CharUnits offset,
return false;
}
+bool ConstantAggregateBuilder::addBits(llvm::APInt bits, uint64_t offsetInBits,
+ bool allowOverwrite) {
+ const ASTContext &astContext = cgm.getASTContext();
+ const uint64_t charWidth = astContext.getCharWidth();
+ mlir::Type charTy = cgm.getBuilder().getUIntNTy(charWidth);
+
+ // Offset of where we want the first bit to go within the bits of the
+ // current char.
+ unsigned offsetWithinChar = offsetInBits % charWidth;
+
+ // We split bit-fields up into individual bytes. Walk over the bytes and
+ // update them.
+ for (CharUnits offsetInChars =
+ astContext.toCharUnitsFromBits(offsetInBits - offsetWithinChar);
+ /**/; ++offsetInChars) {
+ // Number of bits we want to fill in this char.
+ unsigned wantedBits =
+ std::min((uint64_t)bits.getBitWidth(), charWidth - offsetWithinChar);
+
+ // Get a char containing the bits we want in the right places. The other
+ // bits have unspecified values.
+ llvm::APInt bitsThisChar = bits;
+ if (bitsThisChar.getBitWidth() < charWidth)
+ bitsThisChar = bitsThisChar.zext(charWidth);
+ if (cgm.getDataLayout().isBigEndian()) {
+ // Figure out how much to shift by. We may need to left-shift if we have
+ // less than one byte of Bits left.
+ int shift = bits.getBitWidth() - charWidth + offsetWithinChar;
+ if (shift > 0)
+ bitsThisChar.lshrInPlace(shift);
+ else if (shift < 0)
+ bitsThisChar = bitsThisChar.shl(-shift);
+ } else {
+ bitsThisChar = bitsThisChar.shl(offsetWithinChar);
+ }
+ if (bitsThisChar.getBitWidth() > charWidth)
+ bitsThisChar = bitsThisChar.trunc(charWidth);
+
+ if (wantedBits == charWidth) {
+ // Got a full byte: just add it directly.
+ add(cir::IntAttr::get(charTy, bitsThisChar), offsetInChars,
+ allowOverwrite);
+ } else {
+ // Partial byte: update the existing integer if there is one. If we
+ // can't split out a 1-CharUnit range to update, then we can't add
+ // these bits and fail the entire constant emission.
+ std::optional<size_t> firstElemToUpdate = splitAt(offsetInChars);
+ if (!firstElemToUpdate)
+ return false;
+ std::optional<size_t> lastElemToUpdate =
+ splitAt(offsetInChars + CharUnits::One());
+ if (!lastElemToUpdate)
+ return false;
+ assert(*lastElemToUpdate - *firstElemToUpdate < 2 &&
+ "should have at most one element covering one byte");
+
+ // Figure out which bits we want and discard the rest.
+ llvm::APInt updateMask(charWidth, 0);
+ if (cgm.getDataLayout().isBigEndian())
+ updateMask.setBits(charWidth - offsetWithinChar - wantedBits,
+ charWidth - offsetWithinChar);
+ else
+ updateMask.setBits(offsetWithinChar, offsetWithinChar + wantedBits);
+ bitsThisChar &= updateMask;
+ bool isNull = false;
+ if (*firstElemToUpdate < elements.size()) {
+ auto firstEltToUpdate =
+ mlir::dyn_cast<cir::IntAttr>(elements[*firstElemToUpdate].element);
+ isNull = firstEltToUpdate && firstEltToUpdate.isNullValue();
+ }
+
+ if (*firstElemToUpdate == *lastElemToUpdate || isNull) {
+ // All existing bits are either zero or undef.
+ add(cir::IntAttr::get(charTy, bitsThisChar), offsetInChars,
+ /*allowOverwrite*/ true);
+ } else {
+ cir::IntAttr ci =
+ mlir::dyn_cast<cir::IntAttr>(elements[*firstElemToUpdate].element);
+ // In order to perform a partial update, we need the existing bitwise
+ // value, which we can only extract for a constant int.
+ if (!ci)
+ return false;
+ // Because this is a 1-CharUnit range, the constant occupying it must
+ // be exactly one CharUnit wide.
+ assert(ci.getBitWidth() == charWidth && "splitAt failed");
+ assert((!(ci.getValue() & updateMask) || allowOverwrite) &&
+ "unexpectedly overwriting bitfield");
+ bitsThisChar |= (ci.getValue() & ~updateMask);
+ elements[*firstElemToUpdate].element =
+ cir::IntAttr::get(charTy, bitsThisChar);
+ }
+ }
+
+ // Stop if we've added all the bits.
+ if (wantedBits == bits.getBitWidth())
+ break;
+
+ // Remove the consumed bits from Bits.
+ if (!cgm.getDataLayout().isBigEndian())
+ bits.lshrInPlace(wantedBits);
+ bits = bits.trunc(bits.getBitWidth() - wantedBits);
+
+ // The remaining bits go at the start of the following bytes.
+ offsetWithinChar = 0;
+ }
+
+ return true;
+}
+
+/// Returns a position within elements such that all elements
+/// before the returned index end before pos and all elements at or after
+/// the returned index begin at or after pos. Splits elements as necessary
+/// to ensure this. Returns std::nullopt if we find something we can't split.
+std::optional<size_t> ConstantAggregateBuilder::splitAt(CharUnits pos) {
+ if (pos >= size)
+ return elements.size();
+
+ while (true) {
+ // Find the first element that starts after pos.
+ Element *iter =
+ llvm::upper_bound(elements, pos, [](CharUnits pos, const Element &elt) {
+ return pos < elt.offset;
+ });
+
+ if (iter == elements.begin())
+ return 0;
+
+ size_t index = iter - elements.begin() - 1;
+ const Element &elt = elements[index];
+
+ // If we already have an element starting at pos, we're done.
+ if (elt.offset == pos)
+ return index;
+
+ // Check for overlap with the element that starts before pos.
+ CharUnits eltEnd = elt.offset + getSize(elt.element);
+ if (eltEnd <= pos)
+ return index + 1;
+
+ // Try to decompose it into smaller constants.
+ if (!split(index, pos))
+ return std::nullopt;
+ }
+}
+
+/// Split the constant at index, if possible. Return true if we did.
+/// Hint indicates the location at which we'd like to split, but may be
+/// ignored.
+bool ConstantAggregateBuilder::split(size_t index, CharUnits hint) {
+ cgm.errorNYI("split constant at index");
+ return false;
+}
+
+void ConstantAggregateBuilder::condense(CharUnits offset,
+ mlir::Type desiredTy) {
+ CharUnits desiredSize = getSize(desiredTy);
+
+ std::optional<size_t> firstElemToReplace = splitAt(offset);
+ if (!firstElemToReplace)
+ return;
+ size_t first = *firstElemToReplace;
+
+ std::optional<size_t> lastElemToReplace = splitAt(offset + desiredSize);
+ if (!lastElemToReplace)
+ return;
+ size_t last = *lastElemToReplace;
+
+ size_t length = last - first;
+ if (length == 0)
+ return;
+
+ if (length == 1 && elements[first].offset == offset &&
+ getSize(elements[first].element) == desiredSize) {
+ cgm.errorNYI("re-wrapping single element records");
+ return;
+ }
+
+ // Build a new constant from the elements in the range.
+ SmallVector<Element> subElems(elements.begin() + first,
+ elements.begin() + last);
+ mlir::Attribute replacement =
+ buildFrom(cgm, subElems, offset, desiredSize,
+ /*naturalLayout=*/false, desiredTy, false);
+
+ // Replace the range with the condensed constant.
+ Element newElt(mlir::cast<mlir::TypedAttr>(replacement), offset);
+ replace(elements, first, last, {newElt});
+}
+
mlir::Attribute
ConstantAggregateBuilder::buildFrom(CIRGenModule &cgm, ArrayRef<Element> elems,
CharUnits startOffset, CharUnits size,
@@ -301,6 +497,9 @@ class ConstRecordBuilder {
bool appendBytes(CharUnits fieldOffsetInChars, mlir::TypedAttr initCst,
bool allowOverwrite = false);
+ bool appendBitField(const FieldDecl *field, uint64_t fieldOffset,
+ cir::IntAttr ci, bool allowOverwrite = false);
+
bool build(InitListExpr *ile, bool allowOverwrite);
bool build(const APValue &val, const RecordDecl *rd, bool isPrimaryBase,
const CXXRecordDecl *vTableClass, CharUnits baseOffset);
@@ -325,6 +524,30 @@ bool ConstRecordBuilder::appendBytes(CharUnits fieldOffsetInChars,
return builder.add(initCst, startOffset + fieldOffsetInChars, allowOverwrite);
}
+bool ConstRecordBuilder::appendBitField(const FieldDecl *field,
+ uint64_t fieldOffset, cir::IntAttr ci,
+ bool allowOverwrite) {
+ const CIRGenRecordLayout &rl =
+ cgm.getTypes().getCIRGenRecordLayout(field->getParent());
+ const CIRGenBitFieldInfo &info = rl.getBitFieldInfo(field);
+ llvm::APInt fieldValue = ci.getValue();
+
+ // Promote the size of FieldValue if necessary
+ // FIXME: This should never occur, but currently it can because initializer
+ // constants are cast to bool, and because clang is not enforcing bitfield
+ // width limits.
+ if (info.size > fieldValue.getBitWidth())
+ fieldValue = fieldValue.zext(info.size);
+
+ // Truncate the size of FieldValue to the bit field size.
+ if (info.size < fieldValue.getBitWidth())
+ fieldValue = fieldValue.trunc(info.size);
+
+ return builder.addBits(fieldValue,
+ cgm.getASTContext().toBits(startOffset) + fieldOffset,
+ allowOverwrite);
+}
+
bool ConstRecordBuilder::build(InitListExpr *ile, bool allowOverwrite) {
RecordDecl *rd = ile->getType()
->castAs<clang::RecordType>()
@@ -407,12 +630,14 @@ bool ConstRecordBuilder::build(InitListExpr *ile, bool allowOverwrite) {
} else {
// Otherwise we have a bitfield.
if (auto constInt = dyn_cast<cir::IntAttr>(eltInit)) {
- assert(!cir::MissingFeatures::bitfields());
- cgm.errorNYI(field->getSourceRange(), "bitfields");
+ if (!appendBitField(field, layout.getFieldOffset(index), constInt,
+ allowOverwrite))
+ return false;
+ } else {
+ // We are trying to initialize a bitfield with a non-trivial constant,
+ // this must require run-time code.
+ return false;
}
- // We are trying to initialize a bitfield with a non-trivial constant,
- // this must require run-time code.
- return false;
}
}
@@ -510,8 +735,16 @@ bool ConstRecordBuilder::build(const APValue &val, const RecordDecl *rd,
if (field->hasAttr<NoUniqueAddressAttr>())
allowOverwrite = true;
} else {
- assert(!cir::MissingFeatures::bitfields());
- cgm.errorNYI(field->getSourceRange(), "bitfields");
+ // Otherwise we have a bitfield.
+ if (auto constInt = dyn_cast<cir::IntAttr>(eltInit)) {
+ if (!appendBitField(field, layout.getFieldOffset(index) + offsetBits,
+ constInt, allowOverwrite))
+ return false;
+ } else {
+ // We are trying to initialize a bitfield with a non-trivial constant,
+ // this must require run-time code.
+ return false;
+ }
}
}
diff --git a/clang/test/CIR/CodeGen/constant-inits.cpp b/clang/test/CIR/CodeGen/constant-inits.cpp
index c9153c91ebc22..d5a7bb9d57251 100644
--- a/clang/test/CIR/CodeGen/constant-inits.cpp
+++ b/clang/test/CIR/CodeGen/constant-inits.cpp
@@ -30,6 +30,41 @@ struct simple {
int a, b;
};
+// Byte-aligned bitfields
+struct byte_aligned_bitfields {
+ unsigned int a : 8;
+ unsigned int b : 8;
+ unsigned int c : 16;
+};
+
+struct signed_byte_aligned_bitfields {
+ int x : 8;
+ int y : 8;
+};
+
+struct single_byte_bitfield {
+ unsigned char a : 8;
+};
+
+// Partial bitfields (sub-byte)
+struct partial_bitfields {
+ unsigned int a : 3;
+ unsigned int b : 5;
+ unsigned int c : 8;
+};
+
+struct signed_partial_bitfields {
+ int x : 4;
+ int y : 4;
+};
+
+struct mixed_partial_bitfields {
+ unsigned char a : 1;
+ unsigned char b : 1;
+ unsigned char c : 1;
+ unsigned char d : 5;
+};
+
void function() {
constexpr static empty e;
@@ -54,8 +89,22 @@ void function() {
constexpr static simple simple_array[] {
s, {1111, 2222}, s
};
+
+ // Byte-aligned bitfield tests
+ constexpr static byte_aligned_bitfields ba_bf1 = {0xFF, 0xAA, 0x1234};
+ constexpr static signed_byte_aligned_bitfields ba_bf2 = {-1, 127};
+ constexpr static single_byte_bitfield ba_bf3 = {42};
+
+ // Partial bitfield tests
+ constexpr static partial_bitfields p_bf1 = {1, 2, 3};
+ constexpr static signed_partial_bitfields p_bf2 = {-1, 7};
+ constexpr static mixed_partial_bitfields p_bf3 = {1, 0, 1, 15};
}
+// Anonymous struct type definitions for bitfields
+// CIR-DAG: !rec_anon_struct = !cir.record<struct {!u8i, !u8i, !u8i, !u8i}>
+// CIR-DAG: !rec_anon_struct1 = !cir.record<struct {!u8i, !u8i, !cir.array<!u8i x 2>}>
+
// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE1e = #cir.zero : !rec_empty
// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE1s = #cir.const_record<{#cir.int<0> : !s32i, #cir.int<-1> : !s32i}> : !rec_simple
// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE2p1 = #cir.const_record<{#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.const_array<[#cir.int<99> : !s8i, #cir.int<88> : !s8i, #cir.int<77> : !s8i]> : !cir.array<!s8i x 3>, #cir.int<40> : !s32i}> : !rec_Point
@@ -83,6 +132,33 @@ void function() {
// CIR-DAG-SAME: #cir.zero : !rec_packed_and_aligned
// CIR-DAG-SAME: ]> : !cir.array<!rec_packed_and_aligned x 2>
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE6ba_bf1 = #cir.const_record<{
+// CIR-DAG-SAME: #cir.int<255> : !u8i,
+// CIR-DAG-SAME: #cir.int<170> : !u8i,
+// CIR-DAG-SAME: #cir.int<52> : !u8i,
+// CIR-DAG-SAME: #cir.int<18> : !u8i
+// CIR-DAG-SAME: }> : !rec_anon_struct
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE6ba_bf2 = #cir.const_record<{
+// CIR-DAG-SAME: #cir.int<255> : !u8i,
+// CIR-DAG-SAME: #cir.int<127> : !u8i,
+// CIR-DAG-SAME: #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>
+// CIR-DAG-SAME: }> : !rec_anon_struct1
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE6ba_bf3 = #cir.const_record<{
+// CIR-DAG-SAME: #cir.int<42> : !u8i
+// CIR-DAG-SAME: }> : !rec_single_byte_bitfield
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE5p_bf1 = #cir.const_record<{
+// CIR-DAG-SAME: #cir.int<17> : !u8i,
+// CIR-DAG-SAME: #cir.int<3> : !u8i,
+// CIR-DAG-SAME: #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>
+// CIR-DAG-SAME: }> : !rec_anon_struct1
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE5p_bf2 = #cir.const_record<{
+// CIR-DAG-SAME: #cir.int<127> : !u8i,
+// CIR-DAG-SAME: #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 3>
+// CIR-DAG-SAME: }> : !rec_signed_partial_bitfields
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE5p_bf3 = #cir.const_record<{
+// CIR-DAG-SAME: #cir.int<125> : !u8i
+// CIR-DAG-SAME: }> : !rec_mixed_partial_bitfields
+
// CIR-LABEL: cir.func dso_local @_Z8functionv()
// CIR: cir.return
@@ -96,6 +172,12 @@ void function() {
// LLVM-DAG: @_ZZ8functionvE3paa = internal global %struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 0 }>
// LLVM-DAG: @_ZZ8functionvE5array = internal global [2 x %struct.Point] [%struct.Point { i32 123, i32 456, [3 x i8] c"\0B\16!", i32 789 }, %struct.Point { i32 10, i32 20, [3 x i8] zeroinitializer, i32 40 }]
// LLVM-DAG: @_ZZ8functionvE9paa_array = internal global [2 x %struct.packed_and_aligned] [%struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 0 }>, %struct.packed_and_aligned zeroinitializer]
+// LLVM-DAG: @_ZZ8functionvE6ba_bf1 = internal global { i8, i8, i8, i8 } { i8 -1, i8 -86, i8 52, i8 18 }
+// LLVM-DAG: @_ZZ8functionvE6ba_bf2 = internal global { i8, i8, [2 x i8] } { i8 -1, i8 127, [2 x i8] zeroinitializer }
+// LLVM-DAG: @_ZZ8functionvE6ba_bf3 = internal global %struct.single_byte_bitfield { i8 42 }
+// LLVM-DAG: @_ZZ8functionvE5p_bf1 = internal global { i8, i8, [2 x i8] } { i8 17, i8 3, [2 x i8] zeroinitializer }
+// LLVM-DAG: @_ZZ8functionvE5p_bf2 = internal global %struct.signed_partial_bitfields { i8 127, [3 x i8] zeroinitializer }
+// LLVM-DAG: @_ZZ8functionvE5p_bf3 = internal global %struct.mixed_partial_bitfields { i8 125 }
// LLVM-LABEL: define{{.*}} void @_Z8functionv
// LLVM: ret void
@@ -110,6 +192,12 @@ void function() {
// OGCG-DAG: @_ZZ8functionvE3paa = internal constant %struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 undef }>
// OGCG-DAG: @_ZZ8functionvE5array = internal constant [2 x %struct.Point] [%struct.Point { i32 123, i32 456, [3 x i8] c"\0B\16!", i32 789 }, %struct.Point { i32 10, i32 20, [3 x i8] zeroinitializer, i32 40 }]
// OGCG-DAG: @_ZZ8functionvE9paa_array = internal constant [2 x %struct.packed_and_aligned] [%struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 undef }>, %struct.packed_and_aligned <{ i16 0, i8 0, float 0.000000e+00, i8 undef }>]
+// OGCG-DAG: @_ZZ8functionvE6ba_bf1 = internal constant { i8, i8, i8, i8 } { i8 -1, i8 -86, i8 52, i8 18 }
+// OGCG-DAG: @_ZZ8functionvE6ba_bf2 = internal constant { i8, i8, [2 x i8] } { i8 -1, i8 127, [2 x i8] undef }
+// OGCG-DAG: @_ZZ8functionvE6ba_bf3 = internal constant %struct.single_byte_bitfield { i8 42 }
+// OGCG-DAG: @_ZZ8functionvE5p_bf1 = internal constant { i8, i8, [2 x i8] } { i8 17, i8 3, [2 x i8] undef }
+// OGCG-DAG: @_ZZ8functionvE5p_bf2 = internal constant %struct.signed_partial_bitfields { i8 127, [3 x i8] undef }
+// OGCG-DAG: @_ZZ8functionvE5p_bf3 = internal constant %struct.mixed_partial_bitfields { i8 125 }
// OGCG-LABEL: define{{.*}} void @_Z8functionv
// OGCG: ret void
More information about the cfe-commits
mailing list