[clang] [CIR] Add initial support for bitfields in structs (PR #142041)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 3 16:08:59 PDT 2025
https://github.com/Andres-Salamanca updated https://github.com/llvm/llvm-project/pull/142041
>From 8fd6f99f59d1bbde98696559141bcfe15920d175 Mon Sep 17 00:00:00 2001
From: Andres Salamanca <andrealebarbaritos at gmail.com>
Date: Thu, 29 May 2025 16:17:09 -0500
Subject: [PATCH 1/4] Add initial support for bitfields in structs and add
tests
---
clang/include/clang/CIR/MissingFeatures.h | 2 +
clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 114 +++++++++++++
.../CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp | 156 +++++++++++++++++-
clang/test/CIR/CodeGen/bitfields.cpp | 79 +++++++++
4 files changed, 345 insertions(+), 6 deletions(-)
create mode 100644 clang/test/CIR/CodeGen/bitfields.cpp
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index fb205e9cd85d1..6d7e3998d2da8 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -134,6 +134,8 @@ struct MissingFeatures {
static bool cxxSupport() { return false; }
static bool recordZeroInit() { return false; }
static bool zeroSizeRecordMembers() { return false; }
+ static bool isDiscreteBitFieldABI() { return false; }
+ static bool isBigEndian() { return false; }
// Misc
static bool cxxABI() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
index 2ece85b8aa0a3..023f6bbaa47bb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
@@ -14,6 +14,106 @@
namespace clang::CIRGen {
+/// Record with information about how a bitfield should be accessed. This is
+/// very similar to what LLVM codegen does, once CIR evolves it's possible we
+/// can use a more higher level representation.
+///
+/// Often we lay out a sequence of bitfields as a contiguous sequence of bits.
+/// When the AST record layout does this, we represent it in CIR as a
+/// `!cir.record` type, which directly reflects the structure's layout,
+/// including bitfield packing and padding, using CIR types such as
+/// `!cir.bool`, `!s8i`, `!u16i`.
+///
+/// To access a particular bitfield in CIR, we use the operations
+/// `cir.get_bitfield` (`GetBitfieldOp`) or `cir.set_bitfield`
+/// (`SetBitfieldOp`). These operations rely on the `bitfield_info`
+/// attribute, which provides detailed metadata required for access,
+/// such as the size and offset of the bitfield, the type and size of
+/// the underlying storage, and whether the value is signed.
+/// The CIRGenRecordLayout also has a bitFields map which encodes which
+/// byte-sequence this bitfield falls within. Let's assume the following C
+/// struct:
+///
+/// struct:
+///
+/// struct S {
+/// char a, b, c;
+/// unsigned bits : 3;
+/// unsigned more_bits : 4;
+/// unsigned still_more_bits : 7;
+/// };
+///
+/// This will end up as the following cir.record. The first array is the
+/// bitfield, and the second is the padding out to a 4-byte alignment.
+///
+/// !rec_S = !cir.record<struct "S" padded {!s8i, !s8i, !s8i, !u8i, !u8i,
+/// !cir.array<!u8i x 3>}>
+///
+/// When generating code to access more_bits, we'll generate something
+/// essentially like this:
+///
+/// #bfi_more_bits = #cir.bitfield_info<name = "more_bits", storage_type =
+/// !u16i, size = 4, offset = 3, is_signed = false>
+///
+/// cir.func @store_field() {
+/// %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s"] {alignment = 4 : i64}
+/// %1 = cir.const #cir.int<2> : !s32i
+/// %2 = cir.cast(integral, %1 : !s32i), !u32i
+/// %3 = cir.get_member %0[4] {name = "more_bits"} : !cir.ptr<!rec_S> ->
+/// !cir.ptr<!u16i>
+/// %4 = cir.set_bitfield(#bfi_more_bits, %3 :
+/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i cir.return
+/// }
+////
+struct CIRGenBitFieldInfo {
+ /// The offset within a contiguous run of bitfields that are represented as
+ /// a single "field" within the cir.record type. This offset is in bits.
+ unsigned offset : 16;
+
+ /// The total size of the bit-field, in bits.
+ unsigned size : 15;
+
+ /// Whether the bit-field is signed.
+ unsigned isSigned : 1;
+
+ /// The storage size in bits which should be used when accessing this
+ /// bitfield.
+ unsigned storageSize;
+
+ /// The offset of the bitfield storage from the start of the record.
+ clang::CharUnits storageOffset;
+
+ /// The offset within a contiguous run of bitfields that are represented as a
+ /// single "field" within the cir.record type, taking into account the AAPCS
+ /// rules for volatile bitfields. This offset is in bits.
+ unsigned volatileOffset : 16;
+
+ /// The storage size in bits which should be used when accessing this
+ /// bitfield.
+ unsigned volatileStorageSize;
+
+ /// The offset of the bitfield storage from the start of the record.
+ clang::CharUnits volatileStorageOffset;
+
+ /// The name of a bitfield
+ llvm::StringRef name;
+
+ // The actual storage type for the bitfield
+ mlir::Type storageType;
+
+ CIRGenBitFieldInfo()
+ : offset(), size(), isSigned(), storageSize(), volatileOffset(),
+ volatileStorageSize() {}
+
+ CIRGenBitFieldInfo(unsigned offset, unsigned size, bool isSigned,
+ unsigned storageSize, clang::CharUnits storageOffset)
+ : offset(offset), size(size), isSigned(isSigned),
+ storageSize(storageSize), storageOffset(storageOffset) {}
+
+ void print(llvm::raw_ostream &os) const;
+ void dump() const;
+};
+
/// This class handles record and union layout info while lowering AST types
/// to CIR types.
///
@@ -33,6 +133,10 @@ class CIRGenRecordLayout {
/// field no. This info is populated by the record builder.
llvm::DenseMap<const clang::FieldDecl *, unsigned> fieldIdxMap;
+ /// Map from (bit-field) record field to the corresponding CIR record type
+ /// field no. This info is populated by record builder.
+ llvm::DenseMap<const clang::FieldDecl *, CIRGenBitFieldInfo> bitFields;
+
/// False if any direct or indirect subobject of this class, when considered
/// as a complete object, requires a non-zero bitpattern when
/// zero-initialized.
@@ -69,6 +173,16 @@ class CIRGenRecordLayout {
/// Check whether this struct can be C++ zero-initialized
/// with a zeroinitializer when considered as a base subobject.
bool isZeroInitializableAsBase() const { return zeroInitializableAsBase; }
+
+ /// Return the BitFieldInfo that corresponds to the field FD.
+ const CIRGenBitFieldInfo &getBitFieldInfo(const clang::FieldDecl *fd) const {
+ fd = fd->getCanonicalDecl();
+ assert(fd->isBitField() && "Invalid call for non-bit-field decl!");
+ llvm::DenseMap<const clang::FieldDecl *, CIRGenBitFieldInfo>::const_iterator
+ it = bitFields.find(fd);
+ assert(it != bitFields.end() && "Unable to find bitfield info");
+ return it->second;
+ }
};
} // namespace clang::CIRGen
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
index 53aa0aee36fc3..76933952a9dee 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
@@ -20,6 +20,7 @@
#include "clang/AST/RecordLayout.h"
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/MissingFeatures.h"
#include "llvm/Support/Casting.h"
#include <memory>
@@ -63,6 +64,10 @@ struct CIRRecordLowering final {
return MemberInfo(offset, MemberInfo::InfoKind::Field, data);
}
+ // Layout routines.
+ void setBitFieldInfo(const FieldDecl *fd, CharUnits startOffset,
+ mlir::Type storageType);
+
void lower();
void lowerUnion();
@@ -72,6 +77,8 @@ struct CIRRecordLowering final {
void insertPadding();
void accumulateFields();
+ void accumulateBitFields(RecordDecl::field_iterator field,
+ RecordDecl::field_iterator fieldEnd);
CharUnits bitsToCharUnits(uint64_t bitOffset) {
return astContext.toCharUnitsFromBits(bitOffset);
@@ -82,6 +89,9 @@ struct CIRRecordLowering final {
CharUnits getSize(mlir::Type Ty) {
return CharUnits::fromQuantity(dataLayout.layout.getTypeSize(Ty));
}
+ CharUnits getSizeInBits(mlir::Type ty) {
+ return CharUnits::fromQuantity(dataLayout.layout.getTypeSizeInBits(ty));
+ }
CharUnits getAlignment(mlir::Type Ty) {
return CharUnits::fromQuantity(dataLayout.layout.getTypeABIAlignment(Ty));
}
@@ -112,6 +122,18 @@ struct CIRRecordLowering final {
: cir::ArrayType::get(type, numberOfChars.getQuantity());
}
+ // This is different from LLVM traditional codegen because CIRGen uses arrays
+ // of bytes instead of arbitrary-sized integers. This is important for packed
+ // structures support.
+ mlir::Type getBitfieldStorageType(unsigned numBits) {
+ unsigned alignedBits = llvm::alignTo(numBits, astContext.getCharWidth());
+ if (cir::isValidFundamentalIntWidth(alignedBits))
+ return builder.getUIntNTy(alignedBits);
+
+ mlir::Type type = getCharType();
+ return cir::ArrayType::get(type, alignedBits / astContext.getCharWidth());
+ }
+
mlir::Type getStorageType(const FieldDecl *fieldDecl) {
mlir::Type type = cirGenTypes.convertTypeForMem(fieldDecl->getType());
if (fieldDecl->isBitField()) {
@@ -144,6 +166,7 @@ struct CIRRecordLowering final {
std::vector<MemberInfo> members;
// Output fields, consumed by CIRGenTypes::computeRecordLayout
llvm::SmallVector<mlir::Type, 16> fieldTypes;
+ llvm::DenseMap<const FieldDecl *, CIRGenBitFieldInfo> bitFields;
llvm::DenseMap<const FieldDecl *, unsigned> fieldIdxMap;
cir::CIRDataLayout dataLayout;
@@ -172,6 +195,32 @@ CIRRecordLowering::CIRRecordLowering(CIRGenTypes &cirGenTypes,
zeroInitializable(true), zeroInitializableAsBase(true), packed(packed),
padded(false) {}
+void CIRRecordLowering::setBitFieldInfo(const FieldDecl *fd,
+ CharUnits startOffset,
+ mlir::Type storageType) {
+ CIRGenBitFieldInfo &info = bitFields[fd->getCanonicalDecl()];
+ info.isSigned = fd->getType()->isSignedIntegerOrEnumerationType();
+ info.offset =
+ (unsigned)(getFieldBitOffset(fd) - astContext.toBits(startOffset));
+ info.size = fd->getBitWidthValue();
+ info.storageSize = getSizeInBits(storageType).getQuantity();
+ info.storageOffset = startOffset;
+ info.storageType = storageType;
+ info.name = fd->getName();
+
+ if (info.size > info.storageSize)
+ info.size = info.storageSize;
+ // Reverse the bit offsets for big endian machines. Because we represent
+ // a bitfield as a single large integer load, we can imagine the bits
+ // counting from the most-significant-bit instead of the
+ // least-significant-bit.
+ assert(!cir::MissingFeatures::isBigEndian());
+
+ info.volatileStorageSize = 0;
+ info.volatileOffset = 0;
+ info.volatileStorageOffset = CharUnits::Zero();
+}
+
void CIRRecordLowering::lower() {
if (recordDecl->isUnion()) {
lowerUnion();
@@ -223,21 +272,114 @@ void CIRRecordLowering::fillOutputFields() {
fieldTypes.size() - 1;
// A field without storage must be a bitfield.
assert(!cir::MissingFeatures::bitfields());
+ if (!member.data)
+ setBitFieldInfo(member.fieldDecl, member.offset, fieldTypes.back());
}
assert(!cir::MissingFeatures::cxxSupport());
}
}
+void CIRRecordLowering::accumulateBitFields(
+ RecordDecl::field_iterator field, RecordDecl::field_iterator fieldEnd) {
+ // Run stores the first element of the current run of bitfields. FieldEnd is
+ // used as a special value to note that we don't have a current run. A
+ // bitfield run is a contiguous collection of bitfields that can be stored in
+ // the same storage block. Zero-sized bitfields and bitfields that would
+ // cross an alignment boundary break a run and start a new one.
+ RecordDecl::field_iterator run = fieldEnd;
+ // Tail is the offset of the first bit off the end of the current run. It's
+ // used to determine if the ASTRecordLayout is treating these two bitfields as
+ // contiguous. StartBitOffset is offset of the beginning of the Run.
+ uint64_t startBitOffset, tail = 0;
+ assert(!cir::MissingFeatures::isDiscreteBitFieldABI());
+
+ // Check if OffsetInRecord (the size in bits of the current run) is better
+ // as a single field run. When OffsetInRecord has legal integer width, and
+ // its bitfield offset is naturally aligned, it is better to make the
+ // bitfield a separate storage component so as it can be accessed directly
+ // with lower cost.
+ auto isBetterAsSingleFieldRun = [&](uint64_t offsetInRecord,
+ uint64_t startBitOffset,
+ uint64_t nextTail = 0) {
+ if (!cirGenTypes.getCGModule().getCodeGenOpts().FineGrainedBitfieldAccesses)
+ return false;
+ cirGenTypes.getCGModule().errorNYI(field->getSourceRange(),
+ "NYI FineGrainedBitfield");
+ return true;
+ };
+
+ // The start field is better as a single field run.
+ bool startFieldAsSingleRun = false;
+ for (;;) {
+ // Check to see if we need to start a new run.
+ if (run == fieldEnd) {
+ // If we're out of fields, return.
+ if (field == fieldEnd)
+ break;
+ // Any non-zero-length bitfield can start a new run.
+ if (!field->isZeroLengthBitField()) {
+ run = field;
+ startBitOffset = getFieldBitOffset(*field);
+ tail = startBitOffset + field->getBitWidthValue();
+ startFieldAsSingleRun =
+ isBetterAsSingleFieldRun(tail - startBitOffset, startBitOffset);
+ }
+ ++field;
+ continue;
+ }
+
+ // If the start field of a new run is better as a single run, or if current
+ // field (or consecutive fields) is better as a single run, or if current
+ // field has zero width bitfield and either UseZeroLengthBitfieldAlignment
+ // or UseBitFieldTypeAlignment is set to true, or if the offset of current
+ // field is inconsistent with the offset of previous field plus its offset,
+ // skip the block below and go ahead to emit the storage. Otherwise, try to
+ // add bitfields to the run.
+ uint64_t nextTail = tail;
+ if (field != fieldEnd)
+ nextTail += field->getBitWidthValue();
+
+ if (!startFieldAsSingleRun && field != fieldEnd &&
+ !isBetterAsSingleFieldRun(tail - startBitOffset, startBitOffset,
+ nextTail) &&
+ (!field->isZeroLengthBitField() ||
+ (!astContext.getTargetInfo().useZeroLengthBitfieldAlignment() &&
+ !astContext.getTargetInfo().useBitFieldTypeAlignment())) &&
+ tail == getFieldBitOffset(*field)) {
+ tail = nextTail;
+ ++field;
+ continue;
+ }
+
+ // We've hit a break-point in the run and need to emit a storage field.
+ mlir::Type type = getBitfieldStorageType(tail - startBitOffset);
+
+ // Add the storage member to the record and set the bitfield info for all of
+ // the bitfields in the run. Bitfields get the offset of their storage but
+ // come afterward and remain there after a stable sort.
+ members.push_back(makeStorageInfo(bitsToCharUnits(startBitOffset), type));
+ for (; run != field; ++run)
+ members.push_back(MemberInfo(bitsToCharUnits(startBitOffset),
+ MemberInfo::InfoKind::Field, nullptr, *run));
+ run = fieldEnd;
+ startFieldAsSingleRun = false;
+ }
+}
+
void CIRRecordLowering::accumulateFields() {
- for (const FieldDecl *field : recordDecl->fields()) {
+ for (RecordDecl::field_iterator field = recordDecl->field_begin(),
+ fieldEnd = recordDecl->field_end();
+ field != fieldEnd;) {
if (field->isBitField()) {
- cirGenTypes.getCGModule().errorNYI(recordDecl->getSourceRange(),
- "accumulate bitfields");
- ++field;
+ RecordDecl::field_iterator start = field;
+ // Iterate to gather the list of bitfields.
+ for (++field; field != fieldEnd && field->isBitField(); ++field)
+ ;
+ accumulateBitFields(start, field);
} else if (!field->isZeroSize(astContext)) {
- members.push_back(MemberInfo(bitsToCharUnits(getFieldBitOffset(field)),
+ members.push_back(MemberInfo(bitsToCharUnits(getFieldBitOffset(*field)),
MemberInfo::InfoKind::Field,
- getStorageType(field), field));
+ getStorageType(*field), *field));
++field;
} else {
// TODO(cir): do we want to do anything special about zero size members?
@@ -342,6 +484,8 @@ CIRGenTypes::computeRecordLayout(const RecordDecl *rd, cir::RecordType *ty) {
// Add all the field numbers.
rl->fieldIdxMap.swap(lowering.fieldIdxMap);
+ rl->bitFields.swap(lowering.bitFields);
+
// Dump the layout, if requested.
if (getASTContext().getLangOpts().DumpRecordLayouts) {
cgm.errorNYI(rd->getSourceRange(), "computeRecordLayout: dump layout");
diff --git a/clang/test/CIR/CodeGen/bitfields.cpp b/clang/test/CIR/CodeGen/bitfields.cpp
new file mode 100644
index 0000000000000..2eedbcd4377e6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/bitfields.cpp
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+struct A {
+ char a, b, c;
+ unsigned bits : 3;
+ unsigned more_bits : 4;
+ unsigned still_more_bits : 7;
+};
+
+typedef struct {
+ int a : 4;
+ int b : 5;
+ int c;
+} D;
+
+typedef struct {
+ int a : 4;
+ int b : 27;
+ int c : 17;
+ int d : 2;
+ int e : 15;
+ unsigned f; // type other than int above, not a bitfield
+} S;
+
+typedef struct {
+ int a : 3; // one bitfield with size < 8
+ unsigned b;
+} T;
+
+typedef struct {
+ char a;
+ char b;
+ char c;
+
+ // startOffset 24 bits, new storage from here
+ int d: 2;
+ int e: 2;
+ int f: 4;
+ int g: 25;
+ int h: 3;
+ int i: 4;
+ int j: 3;
+ int k: 8;
+
+ int l: 14; // need to be a part of the new storage
+ // because (tail - startOffset) is 65 after 'l' field
+} U;
+
+// CIR-DAG: !rec_D = !cir.record<struct "D" {!u16i, !s32i}>
+// CIR-DAG: !rec_T = !cir.record<struct "T" {!u8i, !u32i}>
+// CIR-DAG: !rec_U = !cir.record<struct "U" {!s8i, !s8i, !s8i, !cir.array<!u8i x 9>}>
+// CIR-DAG: !rec_A = !cir.record<struct "A" padded {!s8i, !s8i, !s8i, !u8i, !u8i, !cir.array<!u8i x 3>}>
+// CIR-DAG: !rec_S = !cir.record<struct "S" {!u32i, !cir.array<!u8i x 3>, !u16i, !u32i}>
+
+
+// LLVM-DAG: %struct.D = type { i16, i32 }
+// LLVM-DAG: %struct.U = type { i8, i8, i8, [9 x i8] }
+// LLVM-DAG: %struct.A = type { i8, i8, i8, i8, i8, [3 x i8] }
+// LLVM-DAG: %struct.S = type { i32, [3 x i8], i16, i32 }
+// LLVM-DAG: %struct.T = type { i8, i32 }
+
+// OGCG-DAG: %struct.D = type { i16, i32 }
+// OGCG-DAG: %struct.U = type <{ i8, i8, i8, i8, i64 }>
+// OGCG-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }>
+// OGCG-DAG: %struct.S = type { i64, i16, i32 }
+// OGCG-DAG: %struct.T = type { i8, i32 }
+
+void def() {
+ A a;
+ D d;
+ S s;
+ T t;
+ U u;
+}
>From 357d77e36c1187c727953f502c18fc99416f77d4 Mon Sep 17 00:00:00 2001
From: Andres Salamanca <andrealebarbaritos at gmail.com>
Date: Thu, 29 May 2025 16:31:14 -0500
Subject: [PATCH 2/4] Fix incorrect formatting
---
clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
index 023f6bbaa47bb..fc920103aeadd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
@@ -62,7 +62,8 @@ namespace clang::CIRGen {
/// %3 = cir.get_member %0[4] {name = "more_bits"} : !cir.ptr<!rec_S> ->
/// !cir.ptr<!u16i>
/// %4 = cir.set_bitfield(#bfi_more_bits, %3 :
-/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i cir.return
+/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i
+/// cir.return
/// }
////
struct CIRGenBitFieldInfo {
>From 0a215e1652bb283b7c6181ac50e64895ec8db5a1 Mon Sep 17 00:00:00 2001
From: Andres Salamanca <andrealebarbaritos at gmail.com>
Date: Thu, 29 May 2025 16:33:58 -0500
Subject: [PATCH 3/4] Fix formatting
---
clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
index fc920103aeadd..8aee4ac78725b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
@@ -34,8 +34,6 @@ namespace clang::CIRGen {
/// byte-sequence this bitfield falls within. Let's assume the following C
/// struct:
///
-/// struct:
-///
/// struct S {
/// char a, b, c;
/// unsigned bits : 3;
@@ -65,7 +63,7 @@ namespace clang::CIRGen {
/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i
/// cir.return
/// }
-////
+///
struct CIRGenBitFieldInfo {
/// The offset within a contiguous run of bitfields that are represented as
/// a single "field" within the cir.record type. This offset is in bits.
>From abadac74a1592538c925c101538d1b2398160132 Mon Sep 17 00:00:00 2001
From: Andres Salamanca <andrealebarbaritos at gmail.com>
Date: Tue, 3 Jun 2025 18:08:37 -0500
Subject: [PATCH 4/4] Applied code review changes
---
clang/include/clang/CIR/MissingFeatures.h | 1 +
clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 15 ++---
.../CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp | 58 +++++++++----------
clang/test/CIR/CodeGen/bitfields.cpp | 31 +++++-----
4 files changed, 52 insertions(+), 53 deletions(-)
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 6d7e3998d2da8..6f7296fa40b33 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -203,6 +203,7 @@ struct MissingFeatures {
static bool writebacks() { return false; }
static bool cleanupsToDeactivate() { return false; }
static bool stackBase() { return false; }
+ static bool nonFineGrainedBitfields() { return false; }
// Missing types
static bool dataMemberType() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
index 8aee4ac78725b..042b02eb4dfc6 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
@@ -41,8 +41,9 @@ namespace clang::CIRGen {
/// unsigned still_more_bits : 7;
/// };
///
-/// This will end up as the following cir.record. The first array is the
-/// bitfield, and the second is the padding out to a 4-byte alignment.
+/// This will end up as the following cir.record. The bitfield members are
+/// represented by two !u8i values, and the array provides padding to align the
+/// struct to a 4-byte alignment.
///
/// !rec_S = !cir.record<struct "S" padded {!s8i, !s8i, !s8i, !u8i, !u8i,
/// !cir.array<!u8i x 3>}>
@@ -51,16 +52,16 @@ namespace clang::CIRGen {
/// essentially like this:
///
/// #bfi_more_bits = #cir.bitfield_info<name = "more_bits", storage_type =
-/// !u16i, size = 4, offset = 3, is_signed = false>
+/// !u8i, size = 4, offset = 3, is_signed = false>
///
/// cir.func @store_field() {
/// %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s"] {alignment = 4 : i64}
/// %1 = cir.const #cir.int<2> : !s32i
/// %2 = cir.cast(integral, %1 : !s32i), !u32i
-/// %3 = cir.get_member %0[4] {name = "more_bits"} : !cir.ptr<!rec_S> ->
-/// !cir.ptr<!u16i>
+/// %3 = cir.get_member %0[3] {name = "more_bits"} : !cir.ptr<!rec_S> ->
+/// !cir.ptr<!u8i>
/// %4 = cir.set_bitfield(#bfi_more_bits, %3 :
-/// !cir.ptr<!u16i>, %2 : !u32i) -> !u32i
+/// !cir.ptr<!u8i>, %2 : !u32i) -> !u32i
/// cir.return
/// }
///
@@ -110,7 +111,7 @@ struct CIRGenBitFieldInfo {
storageSize(storageSize), storageOffset(storageOffset) {}
void print(llvm::raw_ostream &os) const;
- void dump() const;
+ LLVM_DUMP_METHOD void dump() const;
};
/// This class handles record and union layout info while lowering AST types
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
index 76933952a9dee..bfd492bdb1cd7 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
@@ -210,8 +210,8 @@ void CIRRecordLowering::setBitFieldInfo(const FieldDecl *fd,
if (info.size > info.storageSize)
info.size = info.storageSize;
- // Reverse the bit offsets for big endian machines. Because we represent
- // a bitfield as a single large integer load, we can imagine the bits
+ // Reverse the bit offsets for big endian machines. Since bitfields are laid
+ // out as packed bits within an integer-sized unit, we can imagine the bits
// counting from the most-significant-bit instead of the
// least-significant-bit.
assert(!cir::MissingFeatures::isBigEndian());
@@ -281,35 +281,25 @@ void CIRRecordLowering::fillOutputFields() {
void CIRRecordLowering::accumulateBitFields(
RecordDecl::field_iterator field, RecordDecl::field_iterator fieldEnd) {
- // Run stores the first element of the current run of bitfields. FieldEnd is
- // used as a special value to note that we don't have a current run. A
+ // 'run' stores the first element of the current run of bitfields. 'fieldEnd'
+ // is used as a special value to note that we don't have a current run. A
// bitfield run is a contiguous collection of bitfields that can be stored in
// the same storage block. Zero-sized bitfields and bitfields that would
// cross an alignment boundary break a run and start a new one.
RecordDecl::field_iterator run = fieldEnd;
- // Tail is the offset of the first bit off the end of the current run. It's
+ // 'tail' is the offset of the first bit off the end of the current run. It's
// used to determine if the ASTRecordLayout is treating these two bitfields as
- // contiguous. StartBitOffset is offset of the beginning of the Run.
+ // contiguous. 'startBitOffset' is offset of the beginning of the run.
uint64_t startBitOffset, tail = 0;
assert(!cir::MissingFeatures::isDiscreteBitFieldABI());
- // Check if OffsetInRecord (the size in bits of the current run) is better
+ // Check if 'offsetInRecord' (the size in bits of the current run) is better
// as a single field run. When OffsetInRecord has legal integer width, and
// its bitfield offset is naturally aligned, it is better to make the
// bitfield a separate storage component so as it can be accessed directly
// with lower cost.
- auto isBetterAsSingleFieldRun = [&](uint64_t offsetInRecord,
- uint64_t startBitOffset,
- uint64_t nextTail = 0) {
- if (!cirGenTypes.getCGModule().getCodeGenOpts().FineGrainedBitfieldAccesses)
- return false;
- cirGenTypes.getCGModule().errorNYI(field->getSourceRange(),
- "NYI FineGrainedBitfield");
- return true;
- };
+ assert(!cir::MissingFeatures::nonFineGrainedBitfields());
- // The start field is better as a single field run.
- bool startFieldAsSingleRun = false;
for (;;) {
// Check to see if we need to start a new run.
if (run == fieldEnd) {
@@ -321,27 +311,34 @@ void CIRRecordLowering::accumulateBitFields(
run = field;
startBitOffset = getFieldBitOffset(*field);
tail = startBitOffset + field->getBitWidthValue();
- startFieldAsSingleRun =
- isBetterAsSingleFieldRun(tail - startBitOffset, startBitOffset);
+ assert(!cir::MissingFeatures::nonFineGrainedBitfields());
}
++field;
continue;
}
- // If the start field of a new run is better as a single run, or if current
- // field (or consecutive fields) is better as a single run, or if current
- // field has zero width bitfield and either UseZeroLengthBitfieldAlignment
- // or UseBitFieldTypeAlignment is set to true, or if the offset of current
- // field is inconsistent with the offset of previous field plus its offset,
- // skip the block below and go ahead to emit the storage. Otherwise, try to
- // add bitfields to the run.
+ // Decide whether to continue extending the current bitfield run.
+ //
+ // Skip the block below and go directly to emitting storage if any of the
+ // following is true:
+ // - 1. The first field in the run is better treated as its own run.
+ // - 2. We have reached the end of the fields.
+ // - 3. The current field (or set of fields) is better as its own run.
+ // - 4. The current field is a zero-width bitfield or:
+ // - Zero-length bitfield alignment is enabled, and
+ // - Bitfield type alignment is enabled.
+ // - 5. The current field's offset doesn't match the expected tail (i.e.,
+ // layout isn't contiguous).
+ //
+ // If none of the above conditions are met, add the current field to the
+ // current run.
uint64_t nextTail = tail;
if (field != fieldEnd)
nextTail += field->getBitWidthValue();
- if (!startFieldAsSingleRun && field != fieldEnd &&
- !isBetterAsSingleFieldRun(tail - startBitOffset, startBitOffset,
- nextTail) &&
+ // TODO: add condition 1 and 3
+ assert(!cir::MissingFeatures::nonFineGrainedBitfields());
+ if (field != fieldEnd &&
(!field->isZeroLengthBitField() ||
(!astContext.getTargetInfo().useZeroLengthBitfieldAlignment() &&
!astContext.getTargetInfo().useBitFieldTypeAlignment())) &&
@@ -362,7 +359,6 @@ void CIRRecordLowering::accumulateBitFields(
members.push_back(MemberInfo(bitsToCharUnits(startBitOffset),
MemberInfo::InfoKind::Field, nullptr, *run));
run = fieldEnd;
- startFieldAsSingleRun = false;
}
}
diff --git a/clang/test/CIR/CodeGen/bitfields.cpp b/clang/test/CIR/CodeGen/bitfields.cpp
index 2eedbcd4377e6..3bd48ac4e09e8 100644
--- a/clang/test/CIR/CodeGen/bitfields.cpp
+++ b/clang/test/CIR/CodeGen/bitfields.cpp
@@ -12,12 +12,20 @@ struct A {
unsigned still_more_bits : 7;
};
+// CIR-DAG: !rec_A = !cir.record<struct "A" padded {!s8i, !s8i, !s8i, !u8i, !u8i, !cir.array<!u8i x 3>}>
+// LLVM-DAG: %struct.A = type { i8, i8, i8, i8, i8, [3 x i8] }
+// OGCG-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }>
+
typedef struct {
int a : 4;
int b : 5;
int c;
} D;
+// CIR-DAG: !rec_D = !cir.record<struct "D" {!u16i, !s32i}>
+// LLVM-DAG: %struct.D = type { i16, i32 }
+// OGCG-DAG: %struct.D = type { i16, i32 }
+
typedef struct {
int a : 4;
int b : 27;
@@ -27,11 +35,19 @@ typedef struct {
unsigned f; // type other than int above, not a bitfield
} S;
+// CIR-DAG: !rec_S = !cir.record<struct "S" {!u32i, !cir.array<!u8i x 3>, !u16i, !u32i}>
+// LLVM-DAG: %struct.S = type { i32, [3 x i8], i16, i32 }
+// OGCG-DAG: %struct.S = type { i64, i16, i32 }
+
typedef struct {
int a : 3; // one bitfield with size < 8
unsigned b;
} T;
+// CIR-DAG: !rec_T = !cir.record<struct "T" {!u8i, !u32i}>
+// LLVM-DAG: %struct.T = type { i8, i32 }
+// OGCG-DAG: %struct.T = type { i8, i32 }
+
typedef struct {
char a;
char b;
@@ -51,24 +67,9 @@ typedef struct {
// because (tail - startOffset) is 65 after 'l' field
} U;
-// CIR-DAG: !rec_D = !cir.record<struct "D" {!u16i, !s32i}>
-// CIR-DAG: !rec_T = !cir.record<struct "T" {!u8i, !u32i}>
// CIR-DAG: !rec_U = !cir.record<struct "U" {!s8i, !s8i, !s8i, !cir.array<!u8i x 9>}>
-// CIR-DAG: !rec_A = !cir.record<struct "A" padded {!s8i, !s8i, !s8i, !u8i, !u8i, !cir.array<!u8i x 3>}>
-// CIR-DAG: !rec_S = !cir.record<struct "S" {!u32i, !cir.array<!u8i x 3>, !u16i, !u32i}>
-
-
-// LLVM-DAG: %struct.D = type { i16, i32 }
// LLVM-DAG: %struct.U = type { i8, i8, i8, [9 x i8] }
-// LLVM-DAG: %struct.A = type { i8, i8, i8, i8, i8, [3 x i8] }
-// LLVM-DAG: %struct.S = type { i32, [3 x i8], i16, i32 }
-// LLVM-DAG: %struct.T = type { i8, i32 }
-
-// OGCG-DAG: %struct.D = type { i16, i32 }
// OGCG-DAG: %struct.U = type <{ i8, i8, i8, i8, i64 }>
-// OGCG-DAG: %struct.A = type <{ i8, i8, i8, i16, [3 x i8] }>
-// OGCG-DAG: %struct.S = type { i64, i16, i32 }
-// OGCG-DAG: %struct.T = type { i8, i32 }
void def() {
A a;
More information about the cfe-commits
mailing list