[clang] [CIR] Add initial support for bitfields in structs (PR #142041)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 18 13:30:15 PDT 2025
https://github.com/Andres-Salamanca updated https://github.com/llvm/llvm-project/pull/142041
>From 8f154d2c1fd9c646966aa07c9a292d253f9e59cc Mon Sep 17 00:00:00 2001
From: Andres Salamanca <andrealebarbaritos at gmail.com>
Date: Thu, 29 May 2025 16:17:09 -0500
Subject: [PATCH 1/6] Add initial support for bitfields in structs and add
tests
---
clang/include/clang/CIR/MissingFeatures.h | 2 +
clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 114 +++++++++++++
.../CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp | 155 +++++++++++++++++-
clang/test/CIR/CodeGen/bitfields.cpp | 79 +++++++++
4 files changed, 344 insertions(+), 6 deletions(-)
create mode 100644 clang/test/CIR/CodeGen/bitfields.cpp
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 7f20424e9b675..7aac1f77dadd3 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -150,6 +150,8 @@ struct MissingFeatures {
static bool cxxabiUseARMGuardVarABI() { return false; }
static bool cxxabiAppleARM64CXXABI() { return false; }
static bool cxxabiStructorImplicitParam() { return false; }
+ static bool isDiscreteBitFieldABI() { return false; }
+ static bool isBigEndian() { return false; }
// Misc
static bool cirgenABIInfo() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
index ac8832b8c9b24..f3c0e865ad537 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
@@ -14,6 +14,106 @@
namespace clang::CIRGen {
+/// Record with information about how a bitfield should be accessed. This is
+/// very similar to what LLVM codegen does, once CIR evolves it's possible we
+/// can use a more higher level representation.
+///
+/// Often we lay out a sequence of bitfields as a contiguous sequence of bits.
+/// When the AST record layout does this, we represent it in CIR as a
+/// `!cir.record` type, which directly reflects the structure's layout,
+/// including bitfield packing and padding, using CIR types such as
+/// `!cir.bool`, `!s8i`, `!u16i`.
+///
+/// To access a particular bitfield in CIR, we use the operations
+/// `cir.get_bitfield` (`GetBitfieldOp`) or `cir.set_bitfield`
+/// (`SetBitfieldOp`). These operations rely on the `bitfield_info`
+/// attribute, which provides detailed metadata required for access,
+/// such as the size and offset of the bitfield, the type and size of
+/// the underlying storage, and whether the value is signed.
+/// The CIRGenRecordLayout also has a bitFields map which encodes which
+/// byte-sequence this bitfield falls within. Let's assume the following C
+/// struct:
+///
+/// struct:
+///
+/// struct S {
+/// char a, b, c;
+/// unsigned bits : 3;
+/// unsigned more_bits : 4;
+/// unsigned still_more_bits : 7;
+/// };
+///
+/// This will end up as the following cir.record. The first array is the
+/// bitfield, and the second is the padding out to a 4-byte alignment.
+///
+/// !rec_S = !cir.record<struct "S" padded {!s8i, !s8i, !s8i, !u8i, !u8i,
+/// !cir.array<!u8i x 3>}>
+///
+/// When generating code to access more_bits, we'll generate something
+/// essentially like this:
+///
+/// #bfi_more_bits = #cir.bitfield_info<name = "more_bits", storage_type =
+/// !u16i, size = 4, offset = 3, is_signed = false>
+///
+/// cir.func @store_field() {
+/// %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s"] {alignment = 4 : i64}
+/// %1 = cir.const #cir.int<2> : !s32i
+/// %2 = cir.cast(integral, %1 : !s32i), !u32i
+/// %3 = cir.get_member %0[4] {name = "more_bits"} : !cir.ptr<!rec_S> ->
+/// !cir.ptr<!u16i>
+/// %4 = cir.set_bitfield(#bfi_more_bits, %3 :
+/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i cir.return
+/// }
+////
+struct CIRGenBitFieldInfo {
+ /// The offset within a contiguous run of bitfields that are represented as
+ /// a single "field" within the cir.record type. This offset is in bits.
+ unsigned offset : 16;
+
+ /// The total size of the bit-field, in bits.
+ unsigned size : 15;
+
+ /// Whether the bit-field is signed.
+ unsigned isSigned : 1;
+
+ /// The storage size in bits which should be used when accessing this
+ /// bitfield.
+ unsigned storageSize;
+
+ /// The offset of the bitfield storage from the start of the record.
+ clang::CharUnits storageOffset;
+
+ /// The offset within a contiguous run of bitfields that are represented as a
+ /// single "field" within the cir.record type, taking into account the AAPCS
+ /// rules for volatile bitfields. This offset is in bits.
+ unsigned volatileOffset : 16;
+
+ /// The storage size in bits which should be used when accessing this
+ /// bitfield.
+ unsigned volatileStorageSize;
+
+ /// The offset of the bitfield storage from the start of the record.
+ clang::CharUnits volatileStorageOffset;
+
+ /// The name of a bitfield
+ llvm::StringRef name;
+
+ // The actual storage type for the bitfield
+ mlir::Type storageType;
+
+ CIRGenBitFieldInfo()
+ : offset(), size(), isSigned(), storageSize(), volatileOffset(),
+ volatileStorageSize() {}
+
+ CIRGenBitFieldInfo(unsigned offset, unsigned size, bool isSigned,
+ unsigned storageSize, clang::CharUnits storageOffset)
+ : offset(offset), size(size), isSigned(isSigned),
+ storageSize(storageSize), storageOffset(storageOffset) {}
+
+ void print(llvm::raw_ostream &os) const;
+ void dump() const;
+};
+
/// This class handles record and union layout info while lowering AST types
/// to CIR types.
///
@@ -41,6 +141,10 @@ class CIRGenRecordLayout {
// for both virtual and non-virtual bases.
llvm::DenseMap<const clang::CXXRecordDecl *, unsigned> nonVirtualBases;
+ /// Map from (bit-field) record field to the corresponding CIR record type
+ /// field no. This info is populated by record builder.
+ llvm::DenseMap<const clang::FieldDecl *, CIRGenBitFieldInfo> bitFields;
+
/// False if any direct or indirect subobject of this class, when considered
/// as a complete object, requires a non-zero bitpattern when
/// zero-initialized.
@@ -83,6 +187,16 @@ class CIRGenRecordLayout {
/// Check whether this struct can be C++ zero-initialized
/// with a zeroinitializer when considered as a base subobject.
bool isZeroInitializableAsBase() const { return zeroInitializableAsBase; }
+
+ /// Return the BitFieldInfo that corresponds to the field FD.
+ const CIRGenBitFieldInfo &getBitFieldInfo(const clang::FieldDecl *fd) const {
+ fd = fd->getCanonicalDecl();
+ assert(fd->isBitField() && "Invalid call for non-bit-field decl!");
+ llvm::DenseMap<const clang::FieldDecl *, CIRGenBitFieldInfo>::const_iterator
+ it = bitFields.find(fd);
+ assert(it != bitFields.end() && "Unable to find bitfield info");
+ return it->second;
+ }
};
} // namespace clang::CIRGen
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
index 0aeef7fd89aef..f8e5fd2fc506d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
@@ -20,6 +20,7 @@
#include "clang/AST/RecordLayout.h"
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/MissingFeatures.h"
#include "llvm/Support/Casting.h"
#include <memory>
@@ -66,6 +67,10 @@ struct CIRRecordLowering final {
return MemberInfo(offset, MemberInfo::InfoKind::Field, data);
}
+ // Layout routines.
+ void setBitFieldInfo(const FieldDecl *fd, CharUnits startOffset,
+ mlir::Type storageType);
+
void lower();
void lowerUnion();
@@ -77,6 +82,8 @@ struct CIRRecordLowering final {
void accumulateBases(const CXXRecordDecl *cxxRecordDecl);
void accumulateVPtrs();
void accumulateFields();
+ void accumulateBitFields(RecordDecl::field_iterator field,
+ RecordDecl::field_iterator fieldEnd);
CharUnits bitsToCharUnits(uint64_t bitOffset) {
return astContext.toCharUnitsFromBits(bitOffset);
@@ -87,6 +94,9 @@ struct CIRRecordLowering final {
CharUnits getSize(mlir::Type Ty) {
return CharUnits::fromQuantity(dataLayout.layout.getTypeSize(Ty));
}
+ CharUnits getSizeInBits(mlir::Type ty) {
+ return CharUnits::fromQuantity(dataLayout.layout.getTypeSizeInBits(ty));
+ }
CharUnits getAlignment(mlir::Type Ty) {
return CharUnits::fromQuantity(dataLayout.layout.getTypeABIAlignment(Ty));
}
@@ -124,6 +134,17 @@ struct CIRRecordLowering final {
mlir::Type getStorageType(const CXXRecordDecl *RD) {
return cirGenTypes.getCIRGenRecordLayout(RD).getBaseSubobjectCIRType();
}
+ // This is different from LLVM traditional codegen because CIRGen uses arrays
+ // of bytes instead of arbitrary-sized integers. This is important for packed
+ // structures support.
+ mlir::Type getBitfieldStorageType(unsigned numBits) {
+ unsigned alignedBits = llvm::alignTo(numBits, astContext.getCharWidth());
+ if (cir::isValidFundamentalIntWidth(alignedBits))
+ return builder.getUIntNTy(alignedBits);
+
+ mlir::Type type = getCharType();
+ return cir::ArrayType::get(type, alignedBits / astContext.getCharWidth());
+ }
mlir::Type getStorageType(const FieldDecl *fieldDecl) {
mlir::Type type = cirGenTypes.convertTypeForMem(fieldDecl->getType());
@@ -157,6 +178,7 @@ struct CIRRecordLowering final {
std::vector<MemberInfo> members;
// Output fields, consumed by CIRGenTypes::computeRecordLayout
llvm::SmallVector<mlir::Type, 16> fieldTypes;
+ llvm::DenseMap<const FieldDecl *, CIRGenBitFieldInfo> bitFields;
llvm::DenseMap<const FieldDecl *, unsigned> fieldIdxMap;
llvm::DenseMap<const CXXRecordDecl *, unsigned> nonVirtualBases;
cir::CIRDataLayout dataLayout;
@@ -186,6 +208,32 @@ CIRRecordLowering::CIRRecordLowering(CIRGenTypes &cirGenTypes,
zeroInitializable(true), zeroInitializableAsBase(true), packed(packed),
padded(false) {}
+void CIRRecordLowering::setBitFieldInfo(const FieldDecl *fd,
+ CharUnits startOffset,
+ mlir::Type storageType) {
+ CIRGenBitFieldInfo &info = bitFields[fd->getCanonicalDecl()];
+ info.isSigned = fd->getType()->isSignedIntegerOrEnumerationType();
+ info.offset =
+ (unsigned)(getFieldBitOffset(fd) - astContext.toBits(startOffset));
+ info.size = fd->getBitWidthValue();
+ info.storageSize = getSizeInBits(storageType).getQuantity();
+ info.storageOffset = startOffset;
+ info.storageType = storageType;
+ info.name = fd->getName();
+
+ if (info.size > info.storageSize)
+ info.size = info.storageSize;
+ // Reverse the bit offsets for big endian machines. Because we represent
+ // a bitfield as a single large integer load, we can imagine the bits
+ // counting from the most-significant-bit instead the
+ // least-significant-bit.
+ assert(!cir::MissingFeatures::isBigEndian());
+
+ info.volatileStorageSize = 0;
+ info.volatileOffset = 0;
+ info.volatileStorageOffset = CharUnits::Zero();
+}
+
void CIRRecordLowering::lower() {
if (recordDecl->isUnion()) {
lowerUnion();
@@ -233,6 +281,8 @@ void CIRRecordLowering::fillOutputFields() {
fieldTypes.size() - 1;
// A field without storage must be a bitfield.
assert(!cir::MissingFeatures::bitfields());
+ if (!member.data)
+ setBitFieldInfo(member.fieldDecl, member.offset, fieldTypes.back());
} else if (member.kind == MemberInfo::InfoKind::Base) {
nonVirtualBases[member.cxxRecordDecl] = fieldTypes.size() - 1;
}
@@ -240,16 +290,107 @@ void CIRRecordLowering::fillOutputFields() {
}
}
+void CIRRecordLowering::accumulateBitFields(
+ RecordDecl::field_iterator field, RecordDecl::field_iterator fieldEnd) {
+ // Run stores the first element of the current run of bitfields. FieldEnd is
+ // used as a special value to note that we don't have a current run. A
+ // bitfield run is a contiguous collection of bitfields that can be stored in
+ // the same storage block. Zero-sized bitfields and bitfields that would
+ // cross an alignment boundary break a run and start a new one.
+ RecordDecl::field_iterator run = fieldEnd;
+ // Tail is the offset of the first bit off the end of the current run. It's
+ // used to determine if the ASTRecordLayout is treating these two bitfields as
+ // contiguous. StartBitOffset is offset of the beginning of the Run.
+ uint64_t startBitOffset, tail = 0;
+ assert(!cir::MissingFeatures::isDiscreteBitFieldABI());
+
+ // Check if OffsetInRecord (the size in bits of the current run) is better
+ // as a single field run. When OffsetInRecord has legal integer width, and
+ // its bitfield offset is naturally aligned, it is better to make the
+ // bitfield a separate storage component so as it can be accessed directly
+ // with lower cost.
+ auto isBetterAsSingleFieldRun = [&](uint64_t offsetInRecord,
+ uint64_t startBitOffset,
+ uint64_t nextTail = 0) {
+ if (!cirGenTypes.getCGModule().getCodeGenOpts().FineGrainedBitfieldAccesses)
+ return false;
+ cirGenTypes.getCGModule().errorNYI(field->getSourceRange(),
+ "NYI FineGrainedBitfield");
+ return true;
+ };
+
+ // The start field is better as a single field run.
+ bool startFieldAsSingleRun = false;
+ for (;;) {
+ // Check to see if we need to start a new run.
+ if (run == fieldEnd) {
+ // If we're out of fields, return.
+ if (field == fieldEnd)
+ break;
+ // Any non-zero-length bitfield can start a new run.
+ if (!field->isZeroLengthBitField()) {
+ run = field;
+ startBitOffset = getFieldBitOffset(*field);
+ tail = startBitOffset + field->getBitWidthValue();
+ startFieldAsSingleRun =
+ isBetterAsSingleFieldRun(tail - startBitOffset, startBitOffset);
+ }
+ ++field;
+ continue;
+ }
+
+ // If the start field of a new run is better as a single run, or if current
+ // field (or consecutive fields) is better as a single run, or if current
+ // field has zero width bitfield and either UseZeroLengthBitfieldAlignment
+ // or UseBitFieldTypeAlignment is set to true, or if the offset of current
+ // field is inconsistent with the offset of previous field plus its offset,
+ // skip the block below and go ahead to emit the storage. Otherwise, try to
+ // add bitfields to the run.
+ uint64_t nextTail = tail;
+ if (field != fieldEnd)
+ nextTail += field->getBitWidthValue();
+
+ if (!startFieldAsSingleRun && field != fieldEnd &&
+ !isBetterAsSingleFieldRun(tail - startBitOffset, startBitOffset,
+ nextTail) &&
+ (!field->isZeroLengthBitField() ||
+ (!astContext.getTargetInfo().useZeroLengthBitfieldAlignment() &&
+ !astContext.getTargetInfo().useBitFieldTypeAlignment())) &&
+ tail == getFieldBitOffset(*field)) {
+ tail = nextTail;
+ ++field;
+ continue;
+ }
+
+ // We've hit a break-point in the run and need to emit a storage field.
+ mlir::Type type = getBitfieldStorageType(tail - startBitOffset);
+
+ // Add the storage member to the record and set the bitfield info for all of
+ // the bitfields in the run. Bitfields get the offset of their storage but
+ // come afterward and remain there after a stable sort.
+ members.push_back(makeStorageInfo(bitsToCharUnits(startBitOffset), type));
+ for (; run != field; ++run)
+ members.push_back(MemberInfo(bitsToCharUnits(startBitOffset),
+ MemberInfo::InfoKind::Field, nullptr, *run));
+ run = fieldEnd;
+ startFieldAsSingleRun = false;
+ }
+}
+
void CIRRecordLowering::accumulateFields() {
- for (const FieldDecl *field : recordDecl->fields()) {
+ for (RecordDecl::field_iterator field = recordDecl->field_begin(),
+ fieldEnd = recordDecl->field_end();
+ field != fieldEnd;) {
if (field->isBitField()) {
- cirGenTypes.getCGModule().errorNYI(recordDecl->getSourceRange(),
- "accumulate bitfields");
- ++field;
+ RecordDecl::field_iterator start = field;
+ // Iterate to gather the list of bitfields.
+ for (++field; field != fieldEnd && field->isBitField(); ++field)
+ ;
+ accumulateBitFields(start, field);
} else if (!field->isZeroSize(astContext)) {
- members.push_back(MemberInfo(bitsToCharUnits(getFieldBitOffset(field)),
+ members.push_back(MemberInfo(bitsToCharUnits(getFieldBitOffset(*field)),
MemberInfo::InfoKind::Field,
- getStorageType(field), field));
+ getStorageType(*field), *field));
++field;
} else {
// TODO(cir): do we want to do anything special about zero size members?
@@ -383,6 +524,8 @@ CIRGenTypes::computeRecordLayout(const RecordDecl *rd, cir::RecordType *ty) {
// Add all the field numbers.
rl->fieldIdxMap.swap(lowering.fieldIdxMap);
+ rl->bitFields.swap(lowering.bitFields);
+
// Dump the layout, if requested.
if (getASTContext().getLangOpts().DumpRecordLayouts) {
cgm.errorNYI(rd->getSourceRange(), "computeRecordLayout: dump layout");
diff --git a/clang/test/CIR/CodeGen/bitfields.cpp b/clang/test/CIR/CodeGen/bitfields.cpp
new file mode 100644
index 0000000000000..2eedbcd4377e6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/bitfields.cpp
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+struct A {
+ char a, b, c;
+ unsigned bits : 3;
+ unsigned more_bits : 4;
+ unsigned still_more_bits : 7;
+};
+
+typedef struct {
+ int a : 4;
+ int b : 5;
+ int c;
+} D;
+
+typedef struct {
+ int a : 4;
+ int b : 27;
+ int c : 17;
+ int d : 2;
+ int e : 15;
+ unsigned f; // type other than int above, not a bitfield
+} S;
+
+typedef struct {
+ int a : 3; // one bitfield with size < 8
+ unsigned b;
+} T;
+
+typedef struct {
+ char a;
+ char b;
+ char c;
+
+ // startOffset 24 bits, new storage from here
+ int d: 2;
+ int e: 2;
+ int f: 4;
+ int g: 25;
+ int h: 3;
+ int i: 4;
+ int j: 3;
+ int k: 8;
+
+ int l: 14; // need to be a part of the new storage
+ // because (tail - startOffset) is 65 after 'l' field
+} U;
+
+// CIR-DAG: !rec_D = !cir.record<struct "D" {!u16i, !s32i}>
+// CIR-DAG: !rec_T = !cir.record<struct "T" {!u8i, !u32i}>
+// CIR-DAG: !rec_U = !cir.record<struct "U" {!s8i, !s8i, !s8i, !cir.array<!u8i x 9>}>
+// CIR-DAG: !rec_A = !cir.record<struct "A" padded {!s8i, !s8i, !s8i, !u8i, !u8i, !cir.array<!u8i x 3>}>
+// CIR-DAG: !rec_S = !cir.record<struct "S" {!u32i, !cir.array<!u8i x 3>, !u16i, !u32i}>
+
+
+// LLVM-DAG: %struct.D = type { i16, i32 }
+// LLVM-DAG: %struct.U = type { i8, i8, i8, [9 x i8] }
+// LLVM-DAG: %struct.A = type { i8, i8, i8, i8, i8, [3 x i8] }
+// LLVM-DAG: %struct.S = type { i32, [3 x i8], i16, i32 }
+// LLVM-DAG: %struct.T = type { i8, i32 }
+
+// OGCG-DAG: %struct.D = type { i16, i32 }
+// OGCG-DAG: %struct.U = type <{ i8, i8, i8, i8, i64 }>
+// OGCG-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }>
+// OGCG-DAG: %struct.S = type { i64, i16, i32 }
+// OGCG-DAG: %struct.T = type { i8, i32 }
+
+void def() {
+ A a;
+ D d;
+ S s;
+ T t;
+ U u;
+}
>From 11a9717f6ad42bafb7698386525635b08659489d Mon Sep 17 00:00:00 2001
From: Andres Salamanca <andrealebarbaritos at gmail.com>
Date: Thu, 29 May 2025 16:31:14 -0500
Subject: [PATCH 2/6] Fix incorrect formatting
---
clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
index f3c0e865ad537..6659706d2bcb2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
@@ -62,7 +62,8 @@ namespace clang::CIRGen {
/// %3 = cir.get_member %0[4] {name = "more_bits"} : !cir.ptr<!rec_S> ->
/// !cir.ptr<!u16i>
/// %4 = cir.set_bitfield(#bfi_more_bits, %3 :
-/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i cir.return
+/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i
+/// cir.return
/// }
////
struct CIRGenBitFieldInfo {
>From b0c5e4684d40f2474aa7d56d82f6dba7a6f2620d Mon Sep 17 00:00:00 2001
From: Andres Salamanca <andrealebarbaritos at gmail.com>
Date: Thu, 29 May 2025 16:33:58 -0500
Subject: [PATCH 3/6] Fix formatting
---
clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
index 6659706d2bcb2..136772037ccd4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
@@ -34,8 +34,6 @@ namespace clang::CIRGen {
/// byte-sequence this bitfield falls within. Let's assume the following C
/// struct:
///
-/// struct:
-///
/// struct S {
/// char a, b, c;
/// unsigned bits : 3;
@@ -65,7 +63,7 @@ namespace clang::CIRGen {
/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i
/// cir.return
/// }
-////
+///
struct CIRGenBitFieldInfo {
/// The offset within a contiguous run of bitfields that are represented as
/// a single "field" within the cir.record type. This offset is in bits.
>From 865d544a26c9c88c4c7b37f15e992b91f03d28e1 Mon Sep 17 00:00:00 2001
From: Andres Salamanca <andrealebarbaritos at gmail.com>
Date: Tue, 3 Jun 2025 18:08:37 -0500
Subject: [PATCH 4/6] Applied code review changes
---
clang/include/clang/CIR/MissingFeatures.h | 1 +
clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 15 ++---
.../CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp | 60 +++++++++----------
clang/test/CIR/CodeGen/bitfields.cpp | 31 +++++-----
4 files changed, 53 insertions(+), 54 deletions(-)
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 7aac1f77dadd3..b8f7999198ff4 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -219,6 +219,7 @@ struct MissingFeatures {
static bool peepholeProtection() { return false; }
static bool instrumentation() { return false; }
static bool cleanupAfterErrorDiags() { return false; }
+ static bool nonFineGrainedBitfields() { return false; }
// Missing types
static bool dataMemberType() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
index 136772037ccd4..a852a1edac139 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
@@ -41,8 +41,9 @@ namespace clang::CIRGen {
/// unsigned still_more_bits : 7;
/// };
///
-/// This will end up as the following cir.record. The first array is the
-/// bitfield, and the second is the padding out to a 4-byte alignment.
+/// This will end up as the following cir.record. The bitfield members are
+/// represented by two !u8i values, and the array provides padding to align the
+/// struct to a 4-byte alignment.
///
/// !rec_S = !cir.record<struct "S" padded {!s8i, !s8i, !s8i, !u8i, !u8i,
/// !cir.array<!u8i x 3>}>
@@ -51,16 +52,16 @@ namespace clang::CIRGen {
/// essentially like this:
///
/// #bfi_more_bits = #cir.bitfield_info<name = "more_bits", storage_type =
-/// !u16i, size = 4, offset = 3, is_signed = false>
+/// !u8i, size = 4, offset = 3, is_signed = false>
///
/// cir.func @store_field() {
/// %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s"] {alignment = 4 : i64}
/// %1 = cir.const #cir.int<2> : !s32i
/// %2 = cir.cast(integral, %1 : !s32i), !u32i
-/// %3 = cir.get_member %0[4] {name = "more_bits"} : !cir.ptr<!rec_S> ->
-/// !cir.ptr<!u16i>
+/// %3 = cir.get_member %0[3] {name = "more_bits"} : !cir.ptr<!rec_S> ->
+/// !cir.ptr<!u8i>
/// %4 = cir.set_bitfield(#bfi_more_bits, %3 :
-/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i
+/// !cir.ptr<!u8i>, %2 : !u32i) -> !u32i
/// cir.return
/// }
///
@@ -110,7 +111,7 @@ struct CIRGenBitFieldInfo {
storageSize(storageSize), storageOffset(storageOffset) {}
void print(llvm::raw_ostream &os) const;
- void dump() const;
+ LLVM_DUMP_METHOD void dump() const;
};
/// This class handles record and union layout info while lowering AST types
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
index f8e5fd2fc506d..7f12448e2e525 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
@@ -223,9 +223,9 @@ void CIRRecordLowering::setBitFieldInfo(const FieldDecl *fd,
if (info.size > info.storageSize)
info.size = info.storageSize;
- // Reverse the bit offsets for big endian machines. Because we represent
- // a bitfield as a single large integer load, we can imagine the bits
- // counting from the most-significant-bit instead the
+ // Reverse the bit offsets for big endian machines. Since bitfields are laid
+ // out as packed bits within an integer-sized unit, we can imagine the bits
+ // counting from the most-significant-bit instead of the
// least-significant-bit.
assert(!cir::MissingFeatures::isBigEndian());
@@ -292,35 +292,25 @@ void CIRRecordLowering::fillOutputFields() {
void CIRRecordLowering::accumulateBitFields(
RecordDecl::field_iterator field, RecordDecl::field_iterator fieldEnd) {
- // Run stores the first element of the current run of bitfields. FieldEnd is
- // used as a special value to note that we don't have a current run. A
+ // 'run' stores the first element of the current run of bitfields. 'fieldEnd'
+ // is used as a special value to note that we don't have a current run. A
// bitfield run is a contiguous collection of bitfields that can be stored in
// the same storage block. Zero-sized bitfields and bitfields that would
// cross an alignment boundary break a run and start a new one.
RecordDecl::field_iterator run = fieldEnd;
- // Tail is the offset of the first bit off the end of the current run. It's
+ // 'tail' is the offset of the first bit off the end of the current run. It's
// used to determine if the ASTRecordLayout is treating these two bitfields as
- // contiguous. StartBitOffset is offset of the beginning of the Run.
+ // contiguous. 'startBitOffset' is offset of the beginning of the run.
uint64_t startBitOffset, tail = 0;
assert(!cir::MissingFeatures::isDiscreteBitFieldABI());
- // Check if OffsetInRecord (the size in bits of the current run) is better
+ // Check if 'offsetInRecord' (the size in bits of the current run) is better
// as a single field run. When OffsetInRecord has legal integer width, and
// its bitfield offset is naturally aligned, it is better to make the
// bitfield a separate storage component so as it can be accessed directly
// with lower cost.
- auto isBetterAsSingleFieldRun = [&](uint64_t offsetInRecord,
- uint64_t startBitOffset,
- uint64_t nextTail = 0) {
- if (!cirGenTypes.getCGModule().getCodeGenOpts().FineGrainedBitfieldAccesses)
- return false;
- cirGenTypes.getCGModule().errorNYI(field->getSourceRange(),
- "NYI FineGrainedBitfield");
- return true;
- };
+ assert(!cir::MissingFeatures::nonFineGrainedBitfields());
- // The start field is better as a single field run.
- bool startFieldAsSingleRun = false;
for (;;) {
// Check to see if we need to start a new run.
if (run == fieldEnd) {
@@ -332,27 +322,34 @@ void CIRRecordLowering::accumulateBitFields(
run = field;
startBitOffset = getFieldBitOffset(*field);
tail = startBitOffset + field->getBitWidthValue();
- startFieldAsSingleRun =
- isBetterAsSingleFieldRun(tail - startBitOffset, startBitOffset);
+ assert(!cir::MissingFeatures::nonFineGrainedBitfields());
}
++field;
continue;
}
- // If the start field of a new run is better as a single run, or if current
- // field (or consecutive fields) is better as a single run, or if current
- // field has zero width bitfield and either UseZeroLengthBitfieldAlignment
- // or UseBitFieldTypeAlignment is set to true, or if the offset of current
- // field is inconsistent with the offset of previous field plus its offset,
- // skip the block below and go ahead to emit the storage. Otherwise, try to
- // add bitfields to the run.
+ // Decide whether to continue extending the current bitfield run.
+ //
+ // Skip the block below and go directly to emitting storage if any of the
+ // following is true:
+ // - 1. The first field in the run is better treated as its own run.
+ // - 2. We have reached the end of the fields.
+ // - 3. The current field (or set of fields) is better as its own run.
+ // - 4. The current field is a zero-width bitfield or:
+ // - Zero-length bitfield alignment is enabled, and
+ // - Bitfield type alignment is enabled.
+ // - 5. The current field's offset doesn't match the expected tail (i.e.,
+ // layout isn't contiguous).
+ //
+ // If none of the above conditions are met, add the current field to the
+ // current run.
uint64_t nextTail = tail;
if (field != fieldEnd)
nextTail += field->getBitWidthValue();
- if (!startFieldAsSingleRun && field != fieldEnd &&
- !isBetterAsSingleFieldRun(tail - startBitOffset, startBitOffset,
- nextTail) &&
+ // TODO: add condition 1 and 3
+ assert(!cir::MissingFeatures::nonFineGrainedBitfields());
+ if (field != fieldEnd &&
(!field->isZeroLengthBitField() ||
(!astContext.getTargetInfo().useZeroLengthBitfieldAlignment() &&
!astContext.getTargetInfo().useBitFieldTypeAlignment())) &&
@@ -373,7 +370,6 @@ void CIRRecordLowering::accumulateBitFields(
members.push_back(MemberInfo(bitsToCharUnits(startBitOffset),
MemberInfo::InfoKind::Field, nullptr, *run));
run = fieldEnd;
- startFieldAsSingleRun = false;
}
}
diff --git a/clang/test/CIR/CodeGen/bitfields.cpp b/clang/test/CIR/CodeGen/bitfields.cpp
index 2eedbcd4377e6..3bd48ac4e09e8 100644
--- a/clang/test/CIR/CodeGen/bitfields.cpp
+++ b/clang/test/CIR/CodeGen/bitfields.cpp
@@ -12,12 +12,20 @@ struct A {
unsigned still_more_bits : 7;
};
+// CIR-DAG: !rec_A = !cir.record<struct "A" padded {!s8i, !s8i, !s8i, !u8i, !u8i, !cir.array<!u8i x 3>}>
+// LLVM-DAG: %struct.A = type { i8, i8, i8, i8, i8, [3 x i8] }
+// OGCG-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }>
+
typedef struct {
int a : 4;
int b : 5;
int c;
} D;
+// CIR-DAG: !rec_D = !cir.record<struct "D" {!u16i, !s32i}>
+// LLVM-DAG: %struct.D = type { i16, i32 }
+// OGCG-DAG: %struct.D = type { i16, i32 }
+
typedef struct {
int a : 4;
int b : 27;
@@ -27,11 +35,19 @@ typedef struct {
unsigned f; // type other than int above, not a bitfield
} S;
+// CIR-DAG: !rec_S = !cir.record<struct "S" {!u32i, !cir.array<!u8i x 3>, !u16i, !u32i}>
+// LLVM-DAG: %struct.S = type { i32, [3 x i8], i16, i32 }
+// OGCG-DAG: %struct.S = type { i64, i16, i32 }
+
typedef struct {
int a : 3; // one bitfield with size < 8
unsigned b;
} T;
+// CIR-DAG: !rec_T = !cir.record<struct "T" {!u8i, !u32i}>
+// LLVM-DAG: %struct.T = type { i8, i32 }
+// OGCG-DAG: %struct.T = type { i8, i32 }
+
typedef struct {
char a;
char b;
@@ -51,24 +67,9 @@ typedef struct {
// because (tail - startOffset) is 65 after 'l' field
} U;
-// CIR-DAG: !rec_D = !cir.record<struct "D" {!u16i, !s32i}>
-// CIR-DAG: !rec_T = !cir.record<struct "T" {!u8i, !u32i}>
// CIR-DAG: !rec_U = !cir.record<struct "U" {!s8i, !s8i, !s8i, !cir.array<!u8i x 9>}>
-// CIR-DAG: !rec_A = !cir.record<struct "A" padded {!s8i, !s8i, !s8i, !u8i, !u8i, !cir.array<!u8i x 3>}>
-// CIR-DAG: !rec_S = !cir.record<struct "S" {!u32i, !cir.array<!u8i x 3>, !u16i, !u32i}>
-
-
-// LLVM-DAG: %struct.D = type { i16, i32 }
// LLVM-DAG: %struct.U = type { i8, i8, i8, [9 x i8] }
-// LLVM-DAG: %struct.A = type { i8, i8, i8, i8, i8, [3 x i8] }
-// LLVM-DAG: %struct.S = type { i32, [3 x i8], i16, i32 }
-// LLVM-DAG: %struct.T = type { i8, i32 }
-
-// OGCG-DAG: %struct.D = type { i16, i32 }
// OGCG-DAG: %struct.U = type <{ i8, i8, i8, i8, i64 }>
-// OGCG-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }>
-// OGCG-DAG: %struct.S = type { i64, i16, i32 }
-// OGCG-DAG: %struct.T = type { i8, i32 }
void def() {
A a;
>From 8f1264d158709cee78a84adc1a9d9c3be5221a00 Mon Sep 17 00:00:00 2001
From: Andres Salamanca <andrealebarbaritos at gmail.com>
Date: Wed, 18 Jun 2025 15:24:43 -0500
Subject: [PATCH 5/6] Update accumulated bit fields algorithm
---
clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 10 +-
.../CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp | 261 +++++++++++++-----
clang/lib/CIR/CodeGen/TargetInfo.cpp | 37 +++
clang/lib/CIR/CodeGen/TargetInfo.h | 10 +
clang/test/CIR/CodeGen/bitfields.c | 79 ++++++
clang/test/CIR/CodeGen/bitfields.cpp | 52 +---
6 files changed, 321 insertions(+), 128 deletions(-)
create mode 100644 clang/test/CIR/CodeGen/bitfields.c
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
index a852a1edac139..3b51ab784d374 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
@@ -42,26 +42,26 @@ namespace clang::CIRGen {
/// };
///
/// This will end up as the following cir.record. The bitfield members are
-/// represented by two !u8i values, and the array provides padding to align the
+/// represented by one !u16i value, and the array provides padding to align the
/// struct to a 4-byte alignment.
///
-/// !rec_S = !cir.record<struct "S" padded {!s8i, !s8i, !s8i, !u8i, !u8i,
+/// !rec_S = !cir.record<struct "S" padded {!s8i, !s8i, !s8i, !u16i,
/// !cir.array<!u8i x 3>}>
///
/// When generating code to access more_bits, we'll generate something
/// essentially like this:
///
/// #bfi_more_bits = #cir.bitfield_info<name = "more_bits", storage_type =
-/// !u8i, size = 4, offset = 3, is_signed = false>
+/// !u16i, size = 4, offset = 3, is_signed = false>
///
/// cir.func @store_field() {
/// %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s"] {alignment = 4 : i64}
/// %1 = cir.const #cir.int<2> : !s32i
/// %2 = cir.cast(integral, %1 : !s32i), !u32i
/// %3 = cir.get_member %0[3] {name = "more_bits"} : !cir.ptr<!rec_S> ->
-/// !cir.ptr<!u8i>
+/// !cir.ptr<!u16i>
/// %4 = cir.set_bitfield(#bfi_more_bits, %3 :
-/// !cir.ptr<!u8i>, %2 : !u32i) -> !u32i
+/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i
/// cir.return
/// }
///
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
index 7f12448e2e525..8dbf1b36a93b2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
@@ -82,8 +82,9 @@ struct CIRRecordLowering final {
void accumulateBases(const CXXRecordDecl *cxxRecordDecl);
void accumulateVPtrs();
void accumulateFields();
- void accumulateBitFields(RecordDecl::field_iterator field,
- RecordDecl::field_iterator fieldEnd);
+ RecordDecl::field_iterator
+ accumulateBitFields(RecordDecl::field_iterator field,
+ RecordDecl::field_iterator fieldEnd);
CharUnits bitsToCharUnits(uint64_t bitOffset) {
return astContext.toCharUnitsFromBits(bitOffset);
@@ -290,87 +291,199 @@ void CIRRecordLowering::fillOutputFields() {
}
}
-void CIRRecordLowering::accumulateBitFields(
- RecordDecl::field_iterator field, RecordDecl::field_iterator fieldEnd) {
- // 'run' stores the first element of the current run of bitfields. 'fieldEnd'
- // is used as a special value to note that we don't have a current run. A
- // bitfield run is a contiguous collection of bitfields that can be stored in
- // the same storage block. Zero-sized bitfields and bitfields that would
- // cross an alignment boundary break a run and start a new one.
- RecordDecl::field_iterator run = fieldEnd;
- // 'tail' is the offset of the first bit off the end of the current run. It's
- // used to determine if the ASTRecordLayout is treating these two bitfields as
- // contiguous. 'startBitOffset' is offset of the beginning of the run.
- uint64_t startBitOffset, tail = 0;
+RecordDecl::field_iterator
+CIRRecordLowering::accumulateBitFields(RecordDecl::field_iterator field,
+ RecordDecl::field_iterator fieldEnd) {
assert(!cir::MissingFeatures::isDiscreteBitFieldABI());
- // Check if 'offsetInRecord' (the size in bits of the current run) is better
- // as a single field run. When OffsetInRecord has legal integer width, and
- // its bitfield offset is naturally aligned, it is better to make the
- // bitfield a separate storage component so as it can be accessed directly
- // with lower cost.
- assert(!cir::MissingFeatures::nonFineGrainedBitfields());
+ CharUnits regSize =
+ bitsToCharUnits(astContext.getTargetInfo().getRegisterWidth());
+ unsigned charBits = astContext.getCharWidth();
+
+ // Data about the start of the span we're accumulating to create an access
+ // unit from. 'Begin' is the first bitfield of the span. If 'begin' is
+ // 'fieldEnd', we've not got a current span. The span starts at the
+ // 'beginOffset' character boundary. 'bitSizeSinceBegin' is the size (in bits)
+ // of the span -- this might include padding when we've advanced to a
+ // subsequent bitfield run.
+ RecordDecl::field_iterator begin = fieldEnd;
+ CharUnits beginOffset;
+ uint64_t bitSizeSinceBegin;
+
+ // The (non-inclusive) end of the largest acceptable access unit we've found
+ // since 'begin'. If this is 'begin', we're gathering the initial set of
+ // bitfields of a new span. 'bestEndOffset' is the end of that acceptable
+ // access unit -- it might extend beyond the last character of the bitfield
+ // run, using available padding characters.
+ RecordDecl::field_iterator bestEnd = begin;
+ CharUnits bestEndOffset;
+ bool bestClipped; // Whether the representation must be in a byte array.
for (;;) {
- // Check to see if we need to start a new run.
- if (run == fieldEnd) {
- // If we're out of fields, return.
- if (field == fieldEnd)
+ // atAlignedBoundary is true if 'field' is the (potential) start of a new
+ // span (or the end of the bitfields). When true, limitOffset is the
+ // character offset of that span and barrier indicates whether the new
+ // span cannot be merged into the current one.
+ bool atAlignedBoundary = false;
+ bool barrier = false; // a barrier can be a zero Bit Width or non bit member
+ if (field != fieldEnd && field->isBitField()) {
+ uint64_t bitOffset = getFieldBitOffset(*field);
+ if (begin == fieldEnd) {
+ // Beginning a new span.
+ begin = field;
+ bestEnd = begin;
+
+ assert((bitOffset % charBits) == 0 && "Not at start of char");
+ beginOffset = bitsToCharUnits(bitOffset);
+ bitSizeSinceBegin = 0;
+ } else if ((bitOffset % charBits) != 0) {
+ // Bitfield occupies the same character as previous bitfield, it must be
+ // part of the same span. This can include zero-length bitfields, should
+ // the target not align them to character boundaries. Such non-alignment
+ // is at variance with the standards, which require zero-length
+ // bitfields be a barrier between access units. But of course we can't
+ // achieve that in the middle of a character.
+ assert(bitOffset ==
+ astContext.toBits(beginOffset) + bitSizeSinceBegin &&
+ "Concatenating non-contiguous bitfields");
+ } else {
+ // Bitfield potentially begins a new span. This includes zero-length
+ // bitfields on non-aligning targets that lie at character boundaries
+ // (those are barriers to merging).
+ if (field->isZeroLengthBitField())
+ barrier = true;
+ atAlignedBoundary = true;
+ }
+ } else {
+ // We've reached the end of the bitfield run. Either we're done, or this
+ // is a barrier for the current span.
+ if (begin == fieldEnd)
break;
- // Any non-zero-length bitfield can start a new run.
- if (!field->isZeroLengthBitField()) {
- run = field;
- startBitOffset = getFieldBitOffset(*field);
- tail = startBitOffset + field->getBitWidthValue();
- assert(!cir::MissingFeatures::nonFineGrainedBitfields());
+
+ barrier = true;
+ atAlignedBoundary = true;
+ }
+
+ // 'installBest' indicates whether we should create an access unit for the
+ // current best span: fields ['begin', 'bestEnd') occupying characters
+ // ['beginOffset', 'bestEndOffset').
+ bool installBest = false;
+ if (atAlignedBoundary) {
+ // 'field' is the start of a new span or the end of the bitfields. The
+ // just-seen span now extends to 'bitSizeSinceBegin'.
+
+ // Determine if we can accumulate that just-seen span into the current
+ // accumulation.
+ CharUnits accessSize = bitsToCharUnits(bitSizeSinceBegin + charBits - 1);
+ if (bestEnd == begin) {
+ // This is the initial run at the start of a new span. By definition,
+ // this is the best seen so far.
+ bestEnd = field;
+ bestEndOffset = beginOffset + accessSize;
+ // Assume clipped until proven not below.
+ bestClipped = true;
+ if (!bitSizeSinceBegin)
+ // A zero-sized initial span -- this will install nothing and reset
+ // for another.
+ installBest = true;
+ } else if (accessSize > regSize) {
+ // Accumulating the just-seen span would create a multi-register access
+ // unit, which would increase register pressure.
+ installBest = true;
+ }
+
+ if (!installBest) {
+ // Determine if accumulating the just-seen span will create an expensive
+ // access unit or not.
+ mlir::Type type = getUIntNType(astContext.toBits(accessSize));
+ if (!astContext.getTargetInfo().hasCheapUnalignedBitFieldAccess())
+ cirGenTypes.getCGModule().errorNYI(
+ field->getSourceRange(), "NYI CheapUnalignedBitFieldAccess");
+
+ if (!installBest) {
+ // Find the next used storage offset to determine what the limit of
+ // the current span is. That's either the offset of the next field
+ // with storage (which might be field itself) or the end of the
+ // non-reusable tail padding.
+ CharUnits limitOffset;
+ for (auto probe = field; probe != fieldEnd; ++probe)
+ if (!isEmptyFieldForLayout(astContext, *probe)) {
+ // A member with storage sets the limit.
+ assert((getFieldBitOffset(*probe) % charBits) == 0 &&
+ "Next storage is not byte-aligned");
+ limitOffset = bitsToCharUnits(getFieldBitOffset(*probe));
+ goto FoundLimit;
+ }
+ assert(!cir::MissingFeatures::cxxSupport());
+ limitOffset = astRecordLayout.getDataSize();
+ FoundLimit:
+ CharUnits typeSize = getSize(type);
+ if (beginOffset + typeSize <= limitOffset) {
+ // There is space before limitOffset to create a naturally-sized
+ // access unit.
+ bestEndOffset = beginOffset + typeSize;
+ bestEnd = field;
+ bestClipped = false;
+ }
+ if (barrier) {
+ // The next field is a barrier that we cannot merge across.
+ installBest = true;
+ } else if (cirGenTypes.getCGModule()
+ .getCodeGenOpts()
+ .FineGrainedBitfieldAccesses) {
+ assert(!cir::MissingFeatures::nonFineGrainedBitfields());
+ cirGenTypes.getCGModule().errorNYI(field->getSourceRange(),
+ "NYI FineGrainedBitfield");
+ } else {
+ // Otherwise, we're not installing. Update the bit size
+ // of the current span to go all the way to limitOffset, which is
+ // the (aligned) offset of next bitfield to consider.
+ bitSizeSinceBegin = astContext.toBits(limitOffset - beginOffset);
+ }
+ }
}
- ++field;
- continue;
}
- // Decide whether to continue extending the current bitfield run.
- //
- // Skip the block below and go directly to emitting storage if any of the
- // following is true:
- // - 1. The first field in the run is better treated as its own run.
- // - 2. We have reached the end of the fields.
- // - 3. The current field (or set of fields) is better as its own run.
- // - 4. The current field is a zero-width bitfield or:
- // - Zero-length bitfield alignment is enabled, and
- // - Bitfield type alignment is enabled.
- // - 5. The current field's offset doesn't match the expected tail (i.e.,
- // layout isn't contiguous).
- //
- // If none of the above conditions are met, add the current field to the
- // current run.
- uint64_t nextTail = tail;
- if (field != fieldEnd)
- nextTail += field->getBitWidthValue();
-
- // TODO: add condition 1 and 3
- assert(!cir::MissingFeatures::nonFineGrainedBitfields());
- if (field != fieldEnd &&
- (!field->isZeroLengthBitField() ||
- (!astContext.getTargetInfo().useZeroLengthBitfieldAlignment() &&
- !astContext.getTargetInfo().useBitFieldTypeAlignment())) &&
- tail == getFieldBitOffset(*field)) {
- tail = nextTail;
+ if (installBest) {
+ assert((field == fieldEnd || !field->isBitField() ||
+ (getFieldBitOffset(*field) % charBits) == 0) &&
+ "Installing but not at an aligned bitfield or limit");
+ CharUnits accessSize = bestEndOffset - beginOffset;
+ if (!accessSize.isZero()) {
+ // Add the storage member for the access unit to the record. The
+ // bitfields get the offset of their storage but come afterward and
+ // remain there after a stable sort.
+ mlir::Type type;
+ if (bestClipped) {
+ assert(getSize(getUIntNType(astContext.toBits(accessSize))) >
+ accessSize &&
+ "Clipped access need not be clipped");
+ type = getByteArrayType(accessSize);
+ } else {
+ type = getUIntNType(astContext.toBits(accessSize));
+ assert(getSize(type) == accessSize &&
+ "Unclipped access must be clipped");
+ }
+ members.push_back(makeStorageInfo(beginOffset, type));
+ for (; begin != bestEnd; ++begin)
+ if (!begin->isZeroLengthBitField())
+ members.push_back(MemberInfo(
+ beginOffset, MemberInfo::InfoKind::Field, nullptr, *begin));
+ }
+ // Reset to start a new span.
+ field = bestEnd;
+ begin = fieldEnd;
+ } else {
+ assert(field != fieldEnd && field->isBitField() &&
+ "Accumulating past end of bitfields");
+ assert(!barrier && "Accumulating across barrier");
+ // Accumulate this bitfield into the current (potential) span.
+ bitSizeSinceBegin += field->getBitWidthValue();
++field;
- continue;
}
-
- // We've hit a break-point in the run and need to emit a storage field.
- mlir::Type type = getBitfieldStorageType(tail - startBitOffset);
-
- // Add the storage member to the record and set the bitfield info for all of
- // the bitfields in the run. Bitfields get the offset of their storage but
- // come afterward and remain there after a stable sort.
- members.push_back(makeStorageInfo(bitsToCharUnits(startBitOffset), type));
- for (; run != field; ++run)
- members.push_back(MemberInfo(bitsToCharUnits(startBitOffset),
- MemberInfo::InfoKind::Field, nullptr, *run));
- run = fieldEnd;
}
+
+ return field;
}
void CIRRecordLowering::accumulateFields() {
@@ -382,7 +495,9 @@ void CIRRecordLowering::accumulateFields() {
// Iterate to gather the list of bitfields.
for (++field; field != fieldEnd && field->isBitField(); ++field)
;
- accumulateBitFields(start, field);
+ field = accumulateBitFields(start, field);
+ assert((field == fieldEnd || !field->isBitField()) &&
+ "Failed to accumulate all the bitfields");
} else if (!field->isZeroSize(astContext)) {
members.push_back(MemberInfo(bitsToCharUnits(getFieldBitOffset(*field)),
MemberInfo::InfoKind::Field,
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp
index 551341ff20c00..d2d32bbd9403c 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp
@@ -3,6 +3,43 @@
using namespace clang;
using namespace clang::CIRGen;
+
+bool clang::CIRGen::isEmptyRecordForLayout(const ASTContext &context,
+ QualType t) {
+ const RecordType *rt = t->getAs<RecordType>();
+ if (!rt)
+ return false;
+
+ const RecordDecl *rd = rt->getDecl();
+
+ // If this is a C++ record, check the bases first.
+ if (const CXXRecordDecl *cxxrd = dyn_cast<CXXRecordDecl>(rd)) {
+ if (cxxrd->isDynamicClass())
+ return false;
+
+ for (const auto &I : cxxrd->bases())
+ if (!isEmptyRecordForLayout(context, I.getType()))
+ return false;
+ }
+
+ for (const auto *I : rd->fields())
+ if (!isEmptyFieldForLayout(context, I))
+ return false;
+
+ return true;
+}
+
+bool clang::CIRGen::isEmptyFieldForLayout(const ASTContext &context,
+ const FieldDecl *fd) {
+ if (fd->isZeroLengthBitField())
+ return true;
+
+ if (fd->isUnnamedBitField())
+ return false;
+
+ return isEmptyRecordForLayout(context, fd->getType());
+}
+
namespace {
class X8664ABIInfo : public ABIInfo {
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h
index d31d1ee82d90a..a5c548aa2c7c4 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.h
+++ b/clang/lib/CIR/CodeGen/TargetInfo.h
@@ -22,6 +22,16 @@
namespace clang::CIRGen {
+/// isEmptyFieldForLayout - Return true if the field is "empty", that is,
+/// either a zero-width bit-field or an isEmptyRecordForLayout.
+bool isEmptyFieldForLayout(const ASTContext &context, const FieldDecl *fd);
+
+/// isEmptyRecordForLayout - Return true if a structure contains only empty
+/// base classes (per isEmptyRecordForLayout) and fields (per
+/// isEmptyFieldForLayout). Note, C++ record fields are considered empty
+/// if the [[no_unique_address]] attribute would have made them empty.
+bool isEmptyRecordForLayout(const ASTContext &context, QualType t);
+
class TargetCIRGenInfo {
std::unique_ptr<ABIInfo> info;
diff --git a/clang/test/CIR/CodeGen/bitfields.c b/clang/test/CIR/CodeGen/bitfields.c
new file mode 100644
index 0000000000000..a3827ed1e2fa1
--- /dev/null
+++ b/clang/test/CIR/CodeGen/bitfields.c
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+typedef struct {
+ char a, b, c;
+ unsigned bits : 3;
+ unsigned more_bits : 4;
+ unsigned still_more_bits : 7;
+} A;
+
+// CIR-DAG: !rec_A = !cir.record<struct "A" packed padded {!s8i, !s8i, !s8i, !u16i, !cir.array<!u8i x 3>}>
+// LLVM-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }>
+// OGCG-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }>
+
+typedef struct {
+ int a : 4;
+ int b : 5;
+ int c;
+} D;
+
+// CIR-DAG: !rec_D = !cir.record<struct "D" {!u16i, !s32i}>
+// LLVM-DAG: %struct.D = type { i16, i32 }
+// OGCG-DAG: %struct.D = type { i16, i32 }
+
+typedef struct {
+ int a : 4;
+ int b : 27;
+ int c : 17;
+ int d : 2;
+ int e : 15;
+ unsigned f; // type other than int above, not a bitfield
+} S;
+// CIR-DAG: !rec_S = !cir.record<struct "S" {!u64i, !u16i, !u32i}>
+// LLVM-DAG: %struct.S = type { i64, i16, i32 }
+// OGCG-DAG: %struct.S = type { i64, i16, i32 }
+
+typedef struct {
+ int a : 3; // one bitfield with size < 8
+ unsigned b;
+} T;
+
+// CIR-DAG: !rec_T = !cir.record<struct "T" {!u8i, !u32i}>
+// LLVM-DAG: %struct.T = type { i8, i32 }
+// OGCG-DAG: %struct.T = type { i8, i32 }
+
+typedef struct {
+ char a;
+ char b;
+ char c;
+
+ // startOffset 24 bits, new storage from here
+ int d: 2;
+ int e: 2;
+ int f: 4;
+ int g: 25;
+ int h: 3;
+ int i: 4;
+ int j: 3;
+ int k: 8;
+
+ int l: 14; // need to be a part of the new storage
+ // because (tail - startOffset) is 65 after 'l' field
+} U;
+
+// CIR-DAG: !rec_U = !cir.record<struct "U" packed {!s8i, !s8i, !s8i, !u8i, !u64i}>
+// LLVM-DAG: %struct.U = type <{ i8, i8, i8, i8, i64 }>
+// OGCG-DAG: %struct.U = type <{ i8, i8, i8, i8, i64 }>
+
+void def() {
+ A a;
+ D d;
+ S s;
+ T t;
+ U u;
+}
diff --git a/clang/test/CIR/CodeGen/bitfields.cpp b/clang/test/CIR/CodeGen/bitfields.cpp
index 3bd48ac4e09e8..762d249884741 100644
--- a/clang/test/CIR/CodeGen/bitfields.cpp
+++ b/clang/test/CIR/CodeGen/bitfields.cpp
@@ -5,27 +5,6 @@
// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
-struct A {
- char a, b, c;
- unsigned bits : 3;
- unsigned more_bits : 4;
- unsigned still_more_bits : 7;
-};
-
-// CIR-DAG: !rec_A = !cir.record<struct "A" padded {!s8i, !s8i, !s8i, !u8i, !u8i, !cir.array<!u8i x 3>}>
-// LLVM-DAG: %struct.A = type { i8, i8, i8, i8, i8, [3 x i8] }
-// OGCG-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }>
-
-typedef struct {
- int a : 4;
- int b : 5;
- int c;
-} D;
-
-// CIR-DAG: !rec_D = !cir.record<struct "D" {!u16i, !s32i}>
-// LLVM-DAG: %struct.D = type { i16, i32 }
-// OGCG-DAG: %struct.D = type { i16, i32 }
-
typedef struct {
int a : 4;
int b : 27;
@@ -34,9 +13,8 @@ typedef struct {
int e : 15;
unsigned f; // type other than int above, not a bitfield
} S;
-
-// CIR-DAG: !rec_S = !cir.record<struct "S" {!u32i, !cir.array<!u8i x 3>, !u16i, !u32i}>
-// LLVM-DAG: %struct.S = type { i32, [3 x i8], i16, i32 }
+// CIR-DAG: !rec_S = !cir.record<struct "S" {!u64i, !u16i, !u32i}>
+// LLVM-DAG: %struct.S = type { i64, i16, i32 }
// OGCG-DAG: %struct.S = type { i64, i16, i32 }
typedef struct {
@@ -48,33 +26,7 @@ typedef struct {
// LLVM-DAG: %struct.T = type { i8, i32 }
// OGCG-DAG: %struct.T = type { i8, i32 }
-typedef struct {
- char a;
- char b;
- char c;
-
- // startOffset 24 bits, new storage from here
- int d: 2;
- int e: 2;
- int f: 4;
- int g: 25;
- int h: 3;
- int i: 4;
- int j: 3;
- int k: 8;
-
- int l: 14; // need to be a part of the new storage
- // because (tail - startOffset) is 65 after 'l' field
-} U;
-
-// CIR-DAG: !rec_U = !cir.record<struct "U" {!s8i, !s8i, !s8i, !cir.array<!u8i x 9>}>
-// LLVM-DAG: %struct.U = type { i8, i8, i8, [9 x i8] }
-// OGCG-DAG: %struct.U = type <{ i8, i8, i8, i8, i64 }>
-
void def() {
- A a;
- D d;
S s;
T t;
- U u;
}
>From bca08b6da9dee370f47fe9a4014782ebdc8b48a8 Mon Sep 17 00:00:00 2001
From: Andres Salamanca <andrealebarbaritos at gmail.com>
Date: Wed, 18 Jun 2025 15:27:43 -0500
Subject: [PATCH 6/6] Remove old comment
---
clang/test/CIR/CodeGen/bitfields.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/clang/test/CIR/CodeGen/bitfields.c b/clang/test/CIR/CodeGen/bitfields.c
index a3827ed1e2fa1..ff5c6bc1787b4 100644
--- a/clang/test/CIR/CodeGen/bitfields.c
+++ b/clang/test/CIR/CodeGen/bitfields.c
@@ -62,8 +62,7 @@ typedef struct {
int j: 3;
int k: 8;
- int l: 14; // need to be a part of the new storage
- // because (tail - startOffset) is 65 after 'l' field
+ int l: 14;
} U;
// CIR-DAG: !rec_U = !cir.record<struct "U" packed {!s8i, !s8i, !s8i, !u8i, !u64i}>
More information about the cfe-commits
mailing list