[clang] [CIR] Handle non-empty null base class initialization (PR #168646)
Andy Kaylor via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 19 11:53:07 PST 2025
https://github.com/andykaylor updated https://github.com/llvm/llvm-project/pull/168646
>From aeff15d7b27da96c20b093cd899a96b945ba77e4 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <akaylor at nvidia.com>
Date: Wed, 5 Nov 2025 10:27:30 -0800
Subject: [PATCH 1/2] [CIR] Handle non-empty null base class initialization
This implements null base class initialization for non-empty bases.
---
.../include/clang/CIR/Dialect/IR/CIRTypes.td | 4 +
clang/lib/CIR/CodeGen/CIRGenBuilder.h | 10 ++
clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp | 42 +++++-
clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp | 120 ++++++++++++++++--
clang/lib/CIR/CodeGen/CIRGenModule.h | 6 +
clang/lib/CIR/CodeGen/CIRGenRecordLayout.h | 5 +
clang/test/CIR/CodeGen/ctor-null-init.cpp | 60 +++++++++
7 files changed, 237 insertions(+), 10 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
index 313184764f536..e3a71d9f9a0d3 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
@@ -657,6 +657,10 @@ def CIR_RecordType : CIR_Type<"Record", "record", [
}
llvm_unreachable("Invalid value for RecordType::getKind()");
}
+ mlir::Type getElementType(size_t idx) {
+ assert(idx < getMembers().size());
+ return getMembers()[idx];
+ }
std::string getPrefixedName() {
return getKindAsStr() + "." + getName().getValue().str();
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index 5ab1d0e05cf8a..653ce00b29d36 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -212,6 +212,16 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
&ignored);
return fv.bitwiseIsEqual(fpVal);
}
+ if (const auto recordVal = mlir::dyn_cast<cir::ConstRecordAttr>(attr)) {
+ for (const auto elt : recordVal.getMembers()) {
+ // FIXME(cir): the record's ID should not be considered a member.
+ if (mlir::isa<mlir::StringAttr>(elt))
+ continue;
+ if (!isNullValue(elt))
+ return false;
+ }
+ return true;
+ }
if (const auto arrayVal = mlir::dyn_cast<cir::ConstArrayAttr>(attr)) {
if (mlir::isa<mlir::StringAttr>(arrayVal.getElts()))
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
index 007d873ff5db6..c3580dcfff171 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
@@ -240,8 +240,46 @@ static void emitNullBaseClassInitialization(CIRGenFunction &cgf,
if (base->isEmpty())
return;
- cgf.cgm.errorNYI(base->getSourceRange(),
- "emitNullBaseClassInitialization: not empty");
+ const ASTRecordLayout &layout = cgf.getContext().getASTRecordLayout(base);
+ CharUnits nvSize = layout.getNonVirtualSize();
+
+ // We cannot simply zero-initialize the entire base sub-object if vbptrs are
+ // present, they are initialized by the most derived class before calling the
+ // constructor.
+ SmallVector<std::pair<CharUnits, CharUnits>, 1> stores;
+ stores.emplace_back(CharUnits::Zero(), nvSize);
+
+ // Each store is split by the existence of a vbptr.
+ // TODO(cir): This only needs handling for the MS CXXABI.
+ assert(!cir::MissingFeatures::msabi());
+
+ // If the type contains a pointer to data member we can't memset it to zero.
+ // Instead, create a null constant and copy it to the destination.
+ // TODO: there are other patterns besides zero that we can usefully memset,
+ // like -1, which happens to be the pattern used by member-pointers.
+ // TODO: isZeroInitializable can be over-conservative in the case where a
+ // virtual base contains a member pointer.
+ mlir::TypedAttr nullConstantForBase = cgf.cgm.emitNullConstantForBase(base);
+ if (!cgf.getBuilder().isNullValue(nullConstantForBase)) {
+ cgf.cgm.errorNYI(
+ base->getSourceRange(),
+ "emitNullBaseClassInitialization: base constant is not null");
+ } else {
+ // Otherwise, just memset the whole thing to zero. This is legal
+ // because in LLVM, all default initializers (other than the ones we just
+ // handled above) are guaranteed to have a bit pattern of all zeros.
+ // TODO(cir): When the MS CXXABI is supported, we will need to iterate over
+ // `stores` and create a separate memset for each one. For now, we know that
+ // there will only be one store and it will begin at offset zero, so that
+ // simplifies this code considerably.
+ assert(stores.size() == 1 && "Expected only one store");
+ assert(stores[0].first == CharUnits::Zero() &&
+ "Expected store to begin at offset zero");
+ CIRGenBuilderTy builder = cgf.getBuilder();
+ mlir::Location loc = cgf.getLoc(base->getBeginLoc());
+ builder.createStore(loc, builder.getConstant(loc, nullConstantForBase),
+ destPtr);
+ }
}
void CIRGenFunction::emitCXXConstructExpr(const CXXConstructExpr *e,
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
index 6af87a0159f0a..a5efe981920bb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
@@ -1521,6 +1521,101 @@ ConstantEmitter::~ConstantEmitter() {
"not finalized after being initialized for non-abstract emission");
}
+static mlir::TypedAttr emitNullConstantForBase(CIRGenModule &cgm,
+ mlir::Type baseType,
+ const CXXRecordDecl *baseDecl);
+
+static mlir::TypedAttr emitNullConstant(CIRGenModule &cgm, const RecordDecl *rd,
+ bool asCompleteObject) {
+ const CIRGenRecordLayout &layout = cgm.getTypes().getCIRGenRecordLayout(rd);
+ mlir::Type ty = (asCompleteObject ? layout.getCIRType()
+ : layout.getBaseSubobjectCIRType());
+ auto recordTy = cast<cir::RecordType>(ty);
+
+ unsigned numElements = recordTy.getNumElements();
+ SmallVector<mlir::Attribute> elements(numElements);
+
+ auto cxxrd = dyn_cast<CXXRecordDecl>(rd);
+ // Fill in all the bases.
+ if (cxxrd) {
+ for (const auto &base : cxxrd->bases()) {
+ if (base.isVirtual()) {
+ // Ignore virtual bases; if we're laying out for a complete
+ // object, we'll lay these out later.
+ continue;
+ }
+
+ const auto *baseDecl = base.getType()->castAsCXXRecordDecl();
+ // Ignore empty bases.
+ if (isEmptyRecordForLayout(cgm.getASTContext(), base.getType()) ||
+ cgm.getASTContext()
+ .getASTRecordLayout(baseDecl)
+ .getNonVirtualSize()
+ .isZero())
+ continue;
+
+ unsigned fieldIndex = layout.getNonVirtualBaseCIRFieldNo(baseDecl);
+ mlir::Type baseType = recordTy.getElementType(fieldIndex);
+ elements[fieldIndex] = emitNullConstantForBase(cgm, baseType, baseDecl);
+ }
+ }
+
+ // Fill in all the fields.
+ for (const auto *field : rd->fields()) {
+ // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
+ // will fill in later.)
+ if (!field->isBitField() &&
+ !isEmptyFieldForLayout(cgm.getASTContext(), field)) {
+ unsigned fieldIndex = layout.getCIRFieldNo(field);
+ elements[fieldIndex] = cgm.emitNullConstantAttr(field->getType());
+ }
+
+ // For unions, stop after the first named field.
+ if (rd->isUnion()) {
+ if (field->getIdentifier())
+ break;
+ if (const auto *fieldRD = field->getType()->getAsRecordDecl())
+ if (fieldRD->findFirstNamedDataMember())
+ break;
+ }
+ }
+
+ // Fill in the virtual bases, if we're working with the complete object.
+ if (cxxrd && asCompleteObject) {
+ for ([[maybe_unused]] const auto &vbase : cxxrd->vbases()) {
+ cgm.errorNYI(vbase.getSourceRange(), "emitNullConstant: virtual base");
+ return {};
+ }
+ }
+
+ // Now go through all other fields and zero them out.
+ for (unsigned i = 0; i != numElements; ++i) {
+ if (!elements[i]) {
+ cgm.errorNYI(rd->getSourceRange(), "emitNullConstant: field not zeroed");
+ return {};
+ }
+ }
+
+ mlir::MLIRContext *mlirContext = recordTy.getContext();
+ return cir::ConstRecordAttr::get(recordTy,
+ mlir::ArrayAttr::get(mlirContext, elements));
+}
+
+/// Emit the null constant for a base subobject.
+static mlir::TypedAttr emitNullConstantForBase(CIRGenModule &cgm,
+ mlir::Type baseType,
+ const CXXRecordDecl *baseDecl) {
+ const CIRGenRecordLayout &baseLayout =
+ cgm.getTypes().getCIRGenRecordLayout(baseDecl);
+
+ // Just zero out bases that don't have any pointer to data members.
+ if (baseLayout.isZeroInitializableAsBase())
+ return cgm.getBuilder().getZeroInitAttr(baseType);
+
+ // Otherwise, we can just use its null constant.
+ return emitNullConstant(cgm, baseDecl, /*asCompleteObject=*/false);
+}
+
mlir::Attribute ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &d) {
// Make a quick check if variable can be default NULL initialized
// and avoid going through rest of code which may do, for c++11,
@@ -1820,23 +1915,32 @@ mlir::Attribute ConstantEmitter::tryEmitPrivate(const APValue &value,
}
mlir::Value CIRGenModule::emitNullConstant(QualType t, mlir::Location loc) {
- if (t->getAs<PointerType>()) {
- return builder.getNullPtr(getTypes().convertTypeForMem(t), loc);
- }
+ return builder.getConstant(loc, emitNullConstantAttr(t));
+}
+
+mlir::TypedAttr CIRGenModule::emitNullConstantAttr(QualType t) {
+ if (t->getAs<PointerType>())
+ return builder.getConstNullPtrAttr(getTypes().convertTypeForMem(t));
if (getTypes().isZeroInitializable(t))
- return builder.getNullValue(getTypes().convertTypeForMem(t), loc);
+ return builder.getZeroInitAttr(getTypes().convertTypeForMem(t));
if (getASTContext().getAsConstantArrayType(t)) {
- errorNYI("CIRGenModule::emitNullConstant ConstantArrayType");
+ errorNYI("CIRGenModule::emitNullConstantAttr ConstantArrayType");
+ return {};
}
- if (t->isRecordType())
- errorNYI("CIRGenModule::emitNullConstant RecordType");
+ if (const RecordType *rt = t->getAs<RecordType>())
+ return ::emitNullConstant(*this, rt->getDecl(), /*asCompleteObject=*/true);
assert(t->isMemberDataPointerType() &&
"Should only see pointers to data members here!");
- errorNYI("CIRGenModule::emitNullConstant unsupported type");
+ errorNYI("CIRGenModule::emitNullConstantAttr unsupported type");
return {};
}
+
+mlir::TypedAttr
+CIRGenModule::emitNullConstantForBase(const CXXRecordDecl *record) {
+ return ::emitNullConstant(*this, record, false);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
index dc28d9e8e9d33..7601e39a798d9 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -461,6 +461,12 @@ class CIRGenModule : public CIRGenTypeCache {
/// expression of the given type.
mlir::Value emitNullConstant(QualType t, mlir::Location loc);
+ mlir::TypedAttr emitNullConstantAttr(QualType t);
+
+ /// Return a null constant appropriate for zero-initializing a base class with
+ /// the given type. This is usually, but not always, an LLVM null constant.
+ mlir::TypedAttr emitNullConstantForBase(const CXXRecordDecl *record);
+
llvm::StringRef getMangledName(clang::GlobalDecl gd);
void emitTentativeDefinition(const VarDecl *d);
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
index bf0ddc5875059..c9364971e080c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
@@ -184,6 +184,11 @@ class CIRGenRecordLayout {
return fieldIdxMap.lookup(fd);
}
+ unsigned getNonVirtualBaseCIRFieldNo(const CXXRecordDecl *rd) const {
+ assert(nonVirtualBases.count(rd) && "Invalid non-virtual base!");
+ return nonVirtualBases.lookup(rd);
+ }
+
/// Check whether this struct can be C++ zero-initialized
/// with a zeroinitializer.
bool isZeroInitializable() const { return zeroInitializable; }
diff --git a/clang/test/CIR/CodeGen/ctor-null-init.cpp b/clang/test/CIR/CodeGen/ctor-null-init.cpp
index 4324b329c8b41..6f31a46305ae8 100644
--- a/clang/test/CIR/CodeGen/ctor-null-init.cpp
+++ b/clang/test/CIR/CodeGen/ctor-null-init.cpp
@@ -29,3 +29,63 @@ void test_empty_base_null_init() {
// OGCG-NEXT: entry:
// OGCG-NEXT: %[[B:.*]] = alloca %struct.B
// OGCG-NEXT: ret void
+
+
+struct C {
+ int c;
+ C() = default;
+ C(int); // This constructor triggers the null base class initialization.
+};
+
+struct D : C {
+};
+
+void test_non_empty_base_null_init() {
+ D{};
+}
+
+// CIR: cir.func {{.*}} @_Z29test_non_empty_base_null_initv()
+// CIR: %[[TMP:.*]] = cir.alloca !rec_D, !cir.ptr<!rec_D>, ["agg.tmp.ensured"]
+// CIR: %[[BASE:.*]] = cir.base_class_addr %[[TMP]] : !cir.ptr<!rec_D> nonnull [0] -> !cir.ptr<!rec_C>
+// CIR: %[[ZERO:.*]] = cir.const #cir.const_record<{#cir.int<0> : !s32i}> : !rec_C
+// CIR: cir.store{{.*}} %[[ZERO]], %[[BASE]]
+
+// LLVM: define{{.*}} void @_Z29test_non_empty_base_null_initv()
+// LLVM: %[[TMP:.*]] = alloca %struct.D
+// LLVM: store %struct.C zeroinitializer, ptr %[[TMP]]
+
+// OGCG: define {{.*}} void @_Z29test_non_empty_base_null_initv()
+// OGCG: %[[TMP:.*]] = alloca %struct.D
+// OGCG: %[[BASE:.*]] = getelementptr inbounds i8, ptr %[[TMP]], i64 0
+// OGCG: call void @llvm.memset.p0.i64(ptr{{.*}} %[[BASE]], i8 0, i64 4, i1 false)
+
+struct E {
+ int e;
+};
+
+struct F : E {
+ F() = default;
+ F(int);
+};
+
+struct G : F {
+};
+
+void test_base_chain_null_init() {
+ G{};
+}
+
+// CIR: cir.func {{.*}} @_Z25test_base_chain_null_initv()
+// CIR: %[[TMP:.*]] = cir.alloca !rec_G, !cir.ptr<!rec_G>, ["agg.tmp.ensured"]
+// CIR: %[[BASE:.*]] = cir.base_class_addr %[[TMP]] : !cir.ptr<!rec_G> nonnull [0] -> !cir.ptr<!rec_F>
+// CIR: %[[ZERO:.*]] = cir.const #cir.const_record<{#cir.zero : !rec_E}> : !rec_F
+// CIR: cir.store{{.*}} %[[ZERO]], %[[BASE]]
+
+// LLVM: define{{.*}} void @_Z25test_base_chain_null_initv()
+// LLVM: %[[TMP:.*]] = alloca %struct.G
+// LLVM: store %struct.F zeroinitializer, ptr %[[TMP]]
+
+// OGCG: define {{.*}} void @_Z25test_base_chain_null_initv()
+// OGCG: %[[TMP:.*]] = alloca %struct.G
+// OGCG: %[[BASE:.*]] = getelementptr inbounds i8, ptr %[[TMP]], i64 0
+// OGCG: call void @llvm.memset.p0.i64(ptr{{.*}} %[[BASE]], i8 0, i64 4, i1 false)
>From 865e244cd868ab0cc00eb40d5f8a98a00f3a987c Mon Sep 17 00:00:00 2001
From: Andy Kaylor <akaylor at nvidia.com>
Date: Wed, 19 Nov 2025 11:52:28 -0800
Subject: [PATCH 2/2] Address review comments
---
clang/include/clang/CIR/Dialect/IR/CIRTypes.td | 1 -
clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp | 10 +++++-----
2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
index e3a71d9f9a0d3..3e062add6633a 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
@@ -658,7 +658,6 @@ def CIR_RecordType : CIR_Type<"Record", "record", [
llvm_unreachable("Invalid value for RecordType::getKind()");
}
mlir::Type getElementType(size_t idx) {
- assert(idx < getMembers().size());
return getMembers()[idx];
}
std::string getPrefixedName() {
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
index a5efe981920bb..66f8ef9b05913 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
@@ -1530,15 +1530,15 @@ static mlir::TypedAttr emitNullConstant(CIRGenModule &cgm, const RecordDecl *rd,
const CIRGenRecordLayout &layout = cgm.getTypes().getCIRGenRecordLayout(rd);
mlir::Type ty = (asCompleteObject ? layout.getCIRType()
: layout.getBaseSubobjectCIRType());
- auto recordTy = cast<cir::RecordType>(ty);
+ auto recordTy = mlir::cast<cir::RecordType>(ty);
unsigned numElements = recordTy.getNumElements();
SmallVector<mlir::Attribute> elements(numElements);
- auto cxxrd = dyn_cast<CXXRecordDecl>(rd);
+ auto *cxxrd = dyn_cast<CXXRecordDecl>(rd);
// Fill in all the bases.
if (cxxrd) {
- for (const auto &base : cxxrd->bases()) {
+ for (const CXXBaseSpecifier &base : cxxrd->bases()) {
if (base.isVirtual()) {
// Ignore virtual bases; if we're laying out for a complete
// object, we'll lay these out later.
@@ -1561,7 +1561,7 @@ static mlir::TypedAttr emitNullConstant(CIRGenModule &cgm, const RecordDecl *rd,
}
// Fill in all the fields.
- for (const auto *field : rd->fields()) {
+ for (const FieldDecl *field : rd->fields()) {
// Fill in non-bitfields. (Bitfields always use a zero pattern, which we
// will fill in later.)
if (!field->isBitField() &&
@@ -1582,7 +1582,7 @@ static mlir::TypedAttr emitNullConstant(CIRGenModule &cgm, const RecordDecl *rd,
// Fill in the virtual bases, if we're working with the complete object.
if (cxxrd && asCompleteObject) {
- for ([[maybe_unused]] const auto &vbase : cxxrd->vbases()) {
+ for ([[maybe_unused]] const CXXBaseSpecifier &vbase : cxxrd->vbases()) {
cgm.errorNYI(vbase.getSourceRange(), "emitNullConstant: virtual base");
return {};
}
More information about the cfe-commits
mailing list