[clang] [CIR] Add builtin operator new/delete (PR #168578)
Hendrik Hübner via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 18 09:55:05 PST 2025
https://github.com/HendrikHuebner updated https://github.com/llvm/llvm-project/pull/168578
>From 185d4f496ffb5c9299089606213c52cb5b7a60bb Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Tue, 18 Nov 2025 18:50:47 +0100
Subject: [PATCH 1/3] [CIR] builtin operator new/delete
---
clang/include/clang/CIR/MissingFeatures.h | 1 +
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 9 ++++++++
clang/lib/CIR/CodeGen/CIRGenExpr.cpp | 1 +
clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp | 26 +++++++++++++++++++++++
clang/lib/CIR/CodeGen/CIRGenFunction.h | 3 +++
5 files changed, 40 insertions(+)
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 567c79a27c07b..477d8046e18c0 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -200,6 +200,7 @@ struct MissingFeatures {
static bool aggValueSlotMayOverlap() { return false; }
static bool aggValueSlotVolatile() { return false; }
static bool alignCXXRecordDecl() { return false; }
+ static bool allocToken() { return false; }
static bool appleKext() { return false; }
static bool armComputeVolatileBitfields() { return false; }
static bool asmGoto() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 77f19343653db..c038f0be81137 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -18,6 +18,7 @@
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/Value.h"
#include "mlir/Support/LLVM.h"
+#include "clang/AST/DeclBase.h"
#include "clang/AST/Expr.h"
#include "clang/AST/GlobalDecl.h"
#include "clang/Basic/Builtins.h"
@@ -520,6 +521,13 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
cir::PrefetchOp::create(builder, loc, address, locality, isWrite);
return RValue::get(nullptr);
}
+ case Builtin::BI__builtin_operator_new:
+ return emitNewOrDeleteBuiltinCall(
+ e->getCallee()->getType()->castAs<FunctionProtoType>(), e, false);
+ case Builtin::BI__builtin_operator_delete:
+ emitNewOrDeleteBuiltinCall(
+ e->getCallee()->getType()->castAs<FunctionProtoType>(), e, true);
+ return RValue::get(nullptr);
}
// If this is an alias for a lib function (e.g. __builtin_sin), emit
@@ -559,6 +567,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
std::string("unimplemented builtin call: ") +
getContext().BuiltinInfo.getName(builtinID));
return getUndefRValue(e->getType());
+
}
static mlir::Value emitTargetArchBuiltinExpr(CIRGenFunction *cgf,
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 91a59d60fcb3e..57b49f4640c4c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -2006,6 +2006,7 @@ RValue CIRGenFunction::emitCallExpr(const clang::CallExpr *e,
return emitCall(e->getCallee()->getType(), callee, e, returnValue);
}
+
/// Emit code to compute the specified expression, ignoring the result.
void CIRGenFunction::emitIgnoredExpr(const Expr *e) {
if (e->isPRValue()) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
index 007d873ff5db6..345bb0e6bed9f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
@@ -610,6 +610,32 @@ static RValue emitNewDeleteCall(CIRGenFunction &cgf,
return rv;
}
+RValue CIRGenFunction::emitNewOrDeleteBuiltinCall(const FunctionProtoType *type,
+ const CallExpr *callExpr,
+ bool isDelete) {
+ CallArgList args;
+ emitCallArgs(args, type, callExpr->arguments());
+ // Find the allocation or deallocation function that we're calling.
+ ASTContext &astContext = getContext();
+ DeclarationName name = astContext.DeclarationNames.getCXXOperatorName(
+ isDelete ? OO_Delete : OO_New);
+
+ clang::DeclContextLookupResult lookupResult = astContext.getTranslationUnitDecl()->lookup(name);
+ for (const auto *decl : lookupResult) {
+ if (const auto *funcDecl = dyn_cast<FunctionDecl>(decl)) {
+ if (astContext.hasSameType(funcDecl->getType(), QualType(type, 0))) {
+ // Used for -fsanitize=alloc-token
+ assert(!cir::MissingFeatures::allocToken());
+
+ // Emit the call to operator new/delete.
+ return emitNewDeleteCall(*this, funcDecl, type, args);
+ }
+ }
+ }
+
+ llvm_unreachable("predeclared global operator new/delete is missing");
+}
+
namespace {
/// Calls the given 'operator delete' on a single object.
struct CallObjectDelete final : EHScopeStack::Cleanup {
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 00f289bcd1bb2..060a31edea2cf 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1476,6 +1476,9 @@ class CIRGenFunction : public CIRGenTypeCache {
RValue emitCXXPseudoDestructorExpr(const CXXPseudoDestructorExpr *expr);
+ RValue emitNewOrDeleteBuiltinCall(const FunctionProtoType* type,
+ const CallExpr* call, bool isDelete);
+
void emitCXXTemporary(const CXXTemporary *temporary, QualType tempType,
Address ptr);
>From 948d5e7945bdd19ee4f7f920296aab117c2ccf21 Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Tue, 18 Nov 2025 18:54:20 +0100
Subject: [PATCH 2/3] formatting
---
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 1 -
clang/lib/CIR/CodeGen/CIRGenExpr.cpp | 1 -
clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp | 7 ++++---
clang/lib/CIR/CodeGen/CIRGenFunction.h | 4 ++--
4 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index c038f0be81137..2fbad2ecce0d3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -567,7 +567,6 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
std::string("unimplemented builtin call: ") +
getContext().BuiltinInfo.getName(builtinID));
return getUndefRValue(e->getType());
-
}
static mlir::Value emitTargetArchBuiltinExpr(CIRGenFunction *cgf,
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 57b49f4640c4c..91a59d60fcb3e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -2006,7 +2006,6 @@ RValue CIRGenFunction::emitCallExpr(const clang::CallExpr *e,
return emitCall(e->getCallee()->getType(), callee, e, returnValue);
}
-
/// Emit code to compute the specified expression, ignoring the result.
void CIRGenFunction::emitIgnoredExpr(const Expr *e) {
if (e->isPRValue()) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
index 345bb0e6bed9f..f28887df34212 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
@@ -611,8 +611,8 @@ static RValue emitNewDeleteCall(CIRGenFunction &cgf,
}
RValue CIRGenFunction::emitNewOrDeleteBuiltinCall(const FunctionProtoType *type,
- const CallExpr *callExpr,
- bool isDelete) {
+ const CallExpr *callExpr,
+ bool isDelete) {
CallArgList args;
emitCallArgs(args, type, callExpr->arguments());
// Find the allocation or deallocation function that we're calling.
@@ -620,7 +620,8 @@ RValue CIRGenFunction::emitNewOrDeleteBuiltinCall(const FunctionProtoType *type,
DeclarationName name = astContext.DeclarationNames.getCXXOperatorName(
isDelete ? OO_Delete : OO_New);
- clang::DeclContextLookupResult lookupResult = astContext.getTranslationUnitDecl()->lookup(name);
+ clang::DeclContextLookupResult lookupResult =
+ astContext.getTranslationUnitDecl()->lookup(name);
for (const auto *decl : lookupResult) {
if (const auto *funcDecl = dyn_cast<FunctionDecl>(decl)) {
if (astContext.hasSameType(funcDecl->getType(), QualType(type, 0))) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 060a31edea2cf..0cfcc2be0255e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1476,8 +1476,8 @@ class CIRGenFunction : public CIRGenTypeCache {
RValue emitCXXPseudoDestructorExpr(const CXXPseudoDestructorExpr *expr);
- RValue emitNewOrDeleteBuiltinCall(const FunctionProtoType* type,
- const CallExpr* call, bool isDelete);
+ RValue emitNewOrDeleteBuiltinCall(const FunctionProtoType *type,
+ const CallExpr *call, bool isDelete);
void emitCXXTemporary(const CXXTemporary *temporary, QualType tempType,
Address ptr);
>From 1c5877f4e984d880f9f66fa72e13244a53673cbf Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Tue, 18 Nov 2025 18:54:54 +0100
Subject: [PATCH 3/3] Add test
---
clang/lib/CIR/CodeGen/CIRGenTBAA.cpp | 485 ++++++++++++++++++
clang/lib/CIR/CodeGen/CIRGenTBAA.h | 194 +++++++
clang/test/CIR/CodeGen/builtin_new_delete.cpp | 44 ++
3 files changed, 723 insertions(+)
create mode 100644 clang/lib/CIR/CodeGen/CIRGenTBAA.cpp
create mode 100644 clang/lib/CIR/CodeGen/CIRGenTBAA.h
create mode 100644 clang/test/CIR/CodeGen/builtin_new_delete.cpp
diff --git a/clang/lib/CIR/CodeGen/CIRGenTBAA.cpp b/clang/lib/CIR/CodeGen/CIRGenTBAA.cpp
new file mode 100644
index 0000000000000..b0750a9c77c42
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenTBAA.cpp
@@ -0,0 +1,485 @@
+#include "CIRGenTBAA.h"
+#include "CIRGenCXXABI.h"
+#include "CIRGenTypes.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/Interfaces/DataLayoutInterfaces.h"
+#include "mlir/Support/LLVM.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/AST/Type.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+namespace clang::CIRGen {
+
+cir::TBAAAttr tbaa_NYI(mlir::MLIRContext *mlirContext) {
+ return cir::TBAAAttr::get(mlirContext);
+}
+
+CIRGenTBAA::CIRGenTBAA(mlir::MLIRContext *mlirContext,
+ clang::ASTContext &astContext, CIRGenTypes &types,
+ mlir::ModuleOp moduleOp,
+ const clang::CodeGenOptions &codeGenOpts,
+ const clang::LangOptions &features)
+ : mlirContext(mlirContext), astContext(astContext), types(types),
+ moduleOp(moduleOp), codeGenOpts(codeGenOpts), features(features) {}
+
+cir::TBAAAttr CIRGenTBAA::getChar() {
+ return cir::TBAAOmnipotentCharAttr::get(mlirContext);
+}
+
+static bool typeHasMayAlias(clang::QualType qty) {
+ // Tagged types have declarations, and therefore may have attributes.
+ if (auto *td = qty->getAsTagDecl())
+ if (td->hasAttr<MayAliasAttr>())
+ return true;
+
+ // Also look for may_alias as a declaration attribute on a typedef.
+ // FIXME: We should follow GCC and model may_alias as a type attribute
+ // rather than as a declaration attribute.
+ while (auto *tt = qty->getAs<TypedefType>()) {
+ if (tt->getDecl()->hasAttr<MayAliasAttr>())
+ return true;
+ qty = tt->desugar();
+ }
+ return false;
+}
+
+/// Check if the given type is a valid base type to be used in access tags.
+static bool isValidBaseType(clang::QualType qty) {
+ if (const clang::RecordType *tty = qty->getAs<clang::RecordType>()) {
+ const clang::RecordDecl *rd = tty->getDecl()->getDefinition();
+ // Incomplete types are not valid base access types.
+ if (!rd)
+ return false;
+ if (rd->hasFlexibleArrayMember())
+ return false;
+ // rd can be struct, union, class, interface or enum.
+ // For now, we only handle struct and class.
+ if (rd->isStruct() || rd->isClass())
+ return true;
+ }
+ return false;
+}
+
+cir::TBAAScalarAttr CIRGenTBAA::getScalarTypeInfo(clang::QualType qty) {
+ const clang::Type *ty = astContext.getCanonicalType(qty).getTypePtr();
+ assert(mlir::isa<clang::BuiltinType>(ty));
+ const clang::BuiltinType *bty = mlir::dyn_cast<BuiltinType>(ty);
+ return cir::TBAAScalarAttr::get(mlirContext, bty->getName(features),
+ types.convertType(qty));
+}
+
+cir::TBAAAttr CIRGenTBAA::getTypeInfoHelper(clang::QualType qty) {
+ const clang::Type *ty = astContext.getCanonicalType(qty).getTypePtr();
+ // Handle builtin types.
+ if (const clang::BuiltinType *bty = mlir::dyn_cast<BuiltinType>(ty)) {
+ switch (bty->getKind()) {
+ // Character types are special and can alias anything.
+ // In C++, this technically only includes "char" and "unsigned char",
+ // and not "signed char". In C, it includes all three. For now,
+ // the risk of exploiting this detail in C++ seems likely to outweigh
+ // the benefit.
+ case BuiltinType::Char_U:
+ case BuiltinType::Char_S:
+ case BuiltinType::UChar:
+ case BuiltinType::SChar:
+ return getChar();
+
+ // Unsigned types can alias their corresponding signed types.
+ case BuiltinType::UShort:
+ return getScalarTypeInfo(astContext.ShortTy);
+ case BuiltinType::UInt:
+ return getScalarTypeInfo(astContext.IntTy);
+ case BuiltinType::ULong:
+ return getScalarTypeInfo(astContext.LongTy);
+ case BuiltinType::ULongLong:
+ return getScalarTypeInfo(astContext.LongLongTy);
+ case BuiltinType::UInt128:
+ return getScalarTypeInfo(astContext.Int128Ty);
+
+ case BuiltinType::UShortFract:
+ return getScalarTypeInfo(astContext.ShortFractTy);
+ case BuiltinType::UFract:
+ return getScalarTypeInfo(astContext.FractTy);
+ case BuiltinType::ULongFract:
+ return getScalarTypeInfo(astContext.LongFractTy);
+
+ case BuiltinType::SatUShortFract:
+ return getScalarTypeInfo(astContext.SatShortFractTy);
+ case BuiltinType::SatUFract:
+ return getScalarTypeInfo(astContext.SatFractTy);
+ case BuiltinType::SatULongFract:
+ return getScalarTypeInfo(astContext.SatLongFractTy);
+
+ case BuiltinType::UShortAccum:
+ return getScalarTypeInfo(astContext.ShortAccumTy);
+ case BuiltinType::UAccum:
+ return getScalarTypeInfo(astContext.AccumTy);
+ case BuiltinType::ULongAccum:
+ return getScalarTypeInfo(astContext.LongAccumTy);
+
+ case BuiltinType::SatUShortAccum:
+ return getScalarTypeInfo(astContext.SatShortAccumTy);
+ case BuiltinType::SatUAccum:
+ return getScalarTypeInfo(astContext.SatAccumTy);
+ case BuiltinType::SatULongAccum:
+ return getScalarTypeInfo(astContext.SatLongAccumTy);
+
+ // Treat all other builtin types as distinct types. This includes
+ // treating wchar_t, char16_t, and char32_t as distinct from their
+ // "underlying types".
+ default:
+ return getScalarTypeInfo(qty);
+ }
+ }
+ // C++1z [basic.lval]p10: "If a program attempts to access the stored value of
+ // an object through a glvalue of other than one of the following types the
+ // behavior is undefined: [...] a char, unsigned char, or std::byte type."
+ if (ty->isStdByteType())
+ return getChar();
+
+ // Handle pointers and references.
+ //
+ // C has a very strict rule for pointer aliasing. C23 6.7.6.1p2:
+ // For two pointer types to be compatible, both shall be identically
+ // qualified and both shall be pointers to compatible types.
+ //
+ // This rule is impractically strict; we want to at least ignore CVR
+ // qualifiers. Distinguishing by CVR qualifiers would make it UB to
+ // e.g. cast a `char **` to `const char * const *` and dereference it,
+ // which is too common and useful to invalidate. C++'s similar types
+ // rule permits qualifier differences in these nested positions; in fact,
+ // C++ even allows that cast as an implicit conversion.
+ //
+ // Other qualifiers could theoretically be distinguished, especially if
+ // they involve a significant representation difference. We don't
+ // currently do so, however.
+ if (ty->isPointerType() || ty->isReferenceType()) {
+ auto anyPtr = cir::TBAAScalarAttr::get(mlirContext, "any pointer",
+ types.convertType(qty));
+ if (!codeGenOpts.PointerTBAA)
+ return anyPtr;
+ // C++ [basic.lval]p11 permits objects to accessed through an l-value of
+ // similar type. Two types are similar under C++ [conv.qual]p2 if the
+ // decomposition of the types into pointers, member pointers, and arrays has
+ // the same structure when ignoring cv-qualifiers at each level of the
+ // decomposition. Meanwhile, C makes T(*)[] and T(*)[N] compatible, which
+ // would really complicate any attempt to distinguish pointers to arrays by
+ // their bounds. It's simpler, and much easier to explain to users, to
+ // simply treat all pointers to arrays as pointers to their element type for
+ // aliasing purposes. So when creating a TBAA tag for a pointer type, we
+ // recursively ignore both qualifiers and array types when decomposing the
+ // pointee type. The only meaningful remaining structure is the number of
+ // pointer types we encountered along the way, so we just produce the tag
+ // "p<depth> <base type tag>". If we do find a member pointer type, for now
+ // we just conservatively bail out with AnyPtr (below) rather than trying to
+ // create a tag that honors the similar-type rules while still
+ // distinguishing different kinds of member pointer.
+ unsigned ptrDepth = 0;
+ do {
+ ptrDepth++;
+ ty = ty->getPointeeType()->getBaseElementTypeUnsafe();
+ } while (ty->isPointerType());
+ assert(!isa<VariableArrayType>(ty));
+ // When the underlying type is a builtin type, we compute the pointee type
+ // string recursively, which is implicitly more forgiving than the standards
+ // require. Effectively, we are turning the question "are these types
+ // compatible/similar" into "are accesses to these types allowed to alias".
+ // In both C and C++, the latter question has special carve-outs for
+ // signedness mismatches that only apply at the top level. As a result, we
+ // are allowing e.g. `int *` l-values to access `unsigned *` objects.
+ SmallString<256> tyName;
+
+ if (isa<BuiltinType>(ty)) {
+ auto scalarAttr = getScalarTypeInfo(ty->getCanonicalTypeInternal());
+ tyName = scalarAttr.getId();
+ } else {
+ // Be conservative if the type isn't a RecordType. We are specifically
+ // required to do this for member pointers until we implement the
+ // similar-types rule.
+ const auto *rt = ty->getAs<RecordType>();
+ if (!rt)
+ return anyPtr;
+
+ // For unnamed structs or unions C's compatible types rule applies. Two
+ // compatible types in different compilation units can have different
+ // mangled names, meaning the metadata emitted below would incorrectly
+ // mark them as no-alias. Use AnyPtr for such types in both C and C++, as
+ // C and C++ types may be visible when doing LTO.
+ //
+ // Note that using AnyPtr is overly conservative. We could summarize the
+ // members of the type, as per the C compatibility rule in the future.
+ // This also covers anonymous structs and unions, which have a different
+ // compatibility rule, but it doesn't matter because you can never have a
+ // pointer to an anonymous struct or union.
+ if (!rt->getDecl()->getDeclName())
+ return anyPtr;
+
+ // For non-builtin types use the mangled name of the canonical type.
+ llvm::raw_svector_ostream tyOut(tyName);
+ types.getCXXABI().getMangleContext().mangleCanonicalTypeName(
+ QualType(ty, 0), tyOut);
+ }
+
+ SmallString<256> outName("p");
+ outName += std::to_string(ptrDepth);
+ outName += " ";
+ outName += tyName;
+ return cir::TBAAScalarAttr::get(mlirContext, outName,
+ types.convertType(qty), anyPtr);
+ }
+ // Accesses to arrays are accesses to objects of their element types.
+ if (codeGenOpts.NewStructPathTBAA && ty->isArrayType()) {
+ assert(!cir::MissingFeatures::tbaaNewStructPath());
+ return tbaa_NYI(mlirContext);
+ }
+ // Enum types are distinct types. In C++ they have "underlying types",
+ // however they aren't related for TBAA.
+ if (const EnumType *ety = dyn_cast<EnumType>(ty)) {
+ if (!features.CPlusPlus)
+ return getTypeInfo(ety->getDecl()->getIntegerType());
+
+ // In C++ mode, types have linkage, so we can rely on the ODR and
+ // on their mangled names, if they're external.
+ // TODO: Is there a way to get a program-wide unique name for a
+ // decl with local linkage or no linkage?
+ if (!ety->getDecl()->isExternallyVisible())
+ return getChar();
+
+ SmallString<256> outName;
+ llvm::raw_svector_ostream out(outName);
+ types.getCXXABI().getMangleContext().mangleCanonicalTypeName(
+ QualType(ety, 0), out);
+ return cir::TBAAScalarAttr::get(mlirContext, outName,
+ types.convertType(qty));
+ }
+ if (const auto *eit = dyn_cast<BitIntType>(ty)) {
+ SmallString<256> outName;
+ llvm::raw_svector_ostream out(outName);
+ // Don't specify signed/unsigned since integer types can alias despite sign
+ // differences.
+ out << "_BitInt(" << eit->getNumBits() << ')';
+ return cir::TBAAScalarAttr::get(mlirContext, outName,
+ types.convertType(qty));
+ }
+ // For now, handle any other kind of type conservatively.
+ return getChar();
+}
+
+cir::TBAAAttr CIRGenTBAA::getTypeInfo(clang::QualType qty) {
+ // At -O0 or relaxed aliasing, TBAA is not emitted for regular types.
+ if (codeGenOpts.OptimizationLevel == 0 || codeGenOpts.RelaxedAliasing) {
+ return nullptr;
+ }
+
+ // If the type has the may_alias attribute (even on a typedef), it is
+ // effectively in the general char alias class.
+ if (typeHasMayAlias(qty)) {
+ assert(!cir::MissingFeatures::tbaaMayAlias());
+ return getChar();
+ }
+ // We need this function to not fall back to returning the "omnipotent char"
+ // type node for aggregate and union types. Otherwise, any dereference of an
+ // aggregate will result into the may-alias access descriptor, meaning all
+ // subsequent accesses to direct and indirect members of that aggregate will
+ // be considered may-alias too.
+ // function.
+ if (isValidBaseType(qty)) {
+ assert(!cir::MissingFeatures::tbaaTagForStruct());
+ return getValidBaseTypeInfo(qty);
+ }
+
+ const clang::Type *ty = astContext.getCanonicalType(qty).getTypePtr();
+ if (metadataCache.contains(ty)) {
+ return metadataCache[ty];
+ }
+
+ // Note that the following helper call is allowed to add new nodes to the
+ // cache, which invalidates all its previously obtained iterators. So we
+ // first generate the node for the type and then add that node to the
+ // cache.
+ auto typeNode = getTypeInfoHelper(qty);
+ return metadataCache[ty] = typeNode;
+}
+
+TBAAAccessInfo CIRGenTBAA::getAccessInfo(clang::QualType accessType) {
+ // Pointee values may have incomplete types, but they shall never be
+ // dereferenced.
+ if (accessType->isIncompleteType()) {
+ assert(!cir::MissingFeatures::tbaaIncompleteType());
+ return TBAAAccessInfo::getIncompleteInfo();
+ }
+
+ if (typeHasMayAlias(accessType)) {
+ assert(!cir::MissingFeatures::tbaaMayAlias());
+ return TBAAAccessInfo::getMayAliasInfo();
+ }
+
+ uint64_t size = astContext.getTypeSizeInChars(accessType).getQuantity();
+ return TBAAAccessInfo(getTypeInfo(accessType), size);
+}
+
+TBAAAccessInfo CIRGenTBAA::getVTablePtrAccessInfo(mlir::Type vtablePtrType) {
+ const mlir::DataLayout dataLayout(moduleOp);
+ auto size = dataLayout.getTypeSize(vtablePtrType);
+ return TBAAAccessInfo(
+ cir::TBAAVTablePointerAttr::get(mlirContext, vtablePtrType), size);
+}
+
+mlir::ArrayAttr CIRGenTBAA::getTBAAStructInfo(clang::QualType qty) {
+ assert(!cir::MissingFeatures::tbaaStruct() && "tbaa.struct NYI");
+ return mlir::ArrayAttr();
+}
+
+cir::TBAAAttr CIRGenTBAA::getBaseTypeInfo(clang::QualType qty) {
+ return isValidBaseType(qty) ? getValidBaseTypeInfo(qty) : nullptr;
+}
+
+cir::TBAAAttr CIRGenTBAA::getValidBaseTypeInfo(clang::QualType qty) {
+ assert(isValidBaseType(qty) && "Must be a valid base type");
+
+ const clang::Type *ty = astContext.getCanonicalType(qty).getTypePtr();
+
+ // nullptr is a valid value in the cache, so use find rather than []
+ auto iter = baseTypeMetadataCache.find(ty);
+ if (iter != baseTypeMetadataCache.end())
+ return iter->second;
+
+ // First calculate the metadata, before recomputinyg the insertion point, as
+ // the helper can recursively call us.
+ auto typeNode = getBaseTypeInfoHelper(ty);
+ LLVM_ATTRIBUTE_UNUSED auto inserted =
+ baseTypeMetadataCache.insert({ty, typeNode});
+ assert(inserted.second && "BaseType metadata was already inserted");
+
+ return typeNode;
+}
+cir::TBAAAttr CIRGenTBAA::getBaseTypeInfoHelper(const clang::Type *ty) {
+ using namespace clang;
+ if (auto *tty = mlir::dyn_cast<clang::RecordType>(ty)) {
+ const clang::RecordDecl *rd = tty->getDecl()->getDefinition();
+ const ASTRecordLayout &layout = astContext.getASTRecordLayout(rd);
+ SmallVector<cir::TBAAMemberAttr, 4> fields;
+ if (const CXXRecordDecl *cxxrd = dyn_cast<CXXRecordDecl>(rd)) {
+ // Handle C++ base classes. Non-virtual bases can treated a kind of
+ // field. Virtual bases are more complex and omitted, but avoid an
+ // incomplete view for NewStructPathTBAA.
+ if (codeGenOpts.NewStructPathTBAA && cxxrd->getNumVBases() != 0)
+ return nullptr;
+ for (const CXXBaseSpecifier &cxxBaseSpecifier : cxxrd->bases()) {
+ if (cxxBaseSpecifier.isVirtual())
+ continue;
+ QualType baseQTy = cxxBaseSpecifier.getType();
+ const CXXRecordDecl *baseRD = baseQTy->getAsCXXRecordDecl();
+ if (baseRD->isEmpty())
+ continue;
+ auto typeNode = isValidBaseType(baseQTy) ? getValidBaseTypeInfo(baseQTy)
+ : getTypeInfo(baseQTy);
+ if (!typeNode)
+ return nullptr;
+ uint64_t offset = layout.getBaseClassOffset(baseRD).getQuantity();
+ [[maybe_unused]] uint64_t size =
+ astContext.getASTRecordLayout(baseRD).getDataSize().getQuantity();
+ fields.push_back(
+ cir::TBAAMemberAttr::get(mlirContext, typeNode, offset));
+ }
+ // The order in which base class subobjects are allocated is
+ // unspecified, so may differ from declaration order. In particular,
+ // Itanium ABI will allocate a primary base first. Since we exclude
+ // empty subobjects, the objects are not overlapping and their offsets
+ // are unique.
+ llvm::sort(fields, [](const cir::TBAAMemberAttr &lhs,
+ const cir::TBAAMemberAttr &rhs) {
+ return lhs.getOffset() < rhs.getOffset();
+ });
+ }
+ for (FieldDecl *field : rd->fields()) {
+ if (field->isZeroSize(astContext) || field->isUnnamedBitField())
+ continue;
+ QualType fieldQTy = field->getType();
+ auto typeNode = isValidBaseType(fieldQTy) ? getValidBaseTypeInfo(fieldQTy)
+ : getTypeInfo(fieldQTy);
+ if (!typeNode)
+ return nullptr;
+
+ uint64_t bitOffset = layout.getFieldOffset(field->getFieldIndex());
+ uint64_t offset = astContext.toCharUnitsFromBits(bitOffset).getQuantity();
+ [[maybe_unused]] uint64_t size =
+ astContext.getTypeSizeInChars(fieldQTy).getQuantity();
+ fields.push_back(cir::TBAAMemberAttr::get(mlirContext, typeNode, offset));
+ }
+
+ SmallString<256> outName;
+ if (features.CPlusPlus) {
+ // Don't use the mangler for C code.
+ llvm::raw_svector_ostream out(outName);
+ types.getCXXABI().getMangleContext().mangleCanonicalTypeName(
+ QualType(ty, 0), out);
+ } else {
+ outName = rd->getName();
+ }
+
+ if (codeGenOpts.NewStructPathTBAA) {
+ assert(!cir::MissingFeatures::tbaaNewStructPath());
+ return nullptr;
+ }
+ return cir::TBAAStructAttr::get(mlirContext, outName, fields);
+ }
+ return nullptr;
+}
+cir::TBAAAttr CIRGenTBAA::getAccessTagInfo(TBAAAccessInfo tbaaInfo) {
+ assert(!tbaaInfo.isIncomplete() &&
+ "Access to an object of an incomplete type!");
+
+ if (tbaaInfo.isMayAlias()) {
+ assert(!cir::MissingFeatures::tbaaMayAlias());
+ tbaaInfo = TBAAAccessInfo(getChar(), tbaaInfo.size);
+ }
+ if (!tbaaInfo.accessType) {
+ return nullptr;
+ }
+
+ if (!codeGenOpts.StructPathTBAA)
+ tbaaInfo = TBAAAccessInfo(tbaaInfo.accessType, tbaaInfo.size);
+
+ if (!tbaaInfo.baseType) {
+ tbaaInfo.baseType = tbaaInfo.accessType;
+ assert(!tbaaInfo.offset &&
+ "Nonzero offset for an access with no base type!");
+ }
+ if (codeGenOpts.NewStructPathTBAA) {
+ assert(!cir::MissingFeatures::tbaaNewStructPath());
+ return tbaa_NYI(mlirContext);
+ }
+ if (tbaaInfo.baseType == tbaaInfo.accessType) {
+ return tbaaInfo.accessType;
+ }
+ return cir::TBAATagAttr::get(mlirContext, tbaaInfo.baseType,
+ tbaaInfo.accessType, tbaaInfo.offset);
+}
+
+TBAAAccessInfo CIRGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo sourceInfo,
+ TBAAAccessInfo targetInfo) {
+ assert(!cir::MissingFeatures::tbaaMergeTBAAInfo());
+ return TBAAAccessInfo();
+}
+
+TBAAAccessInfo
+CIRGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo infoA,
+ TBAAAccessInfo infoB) {
+ assert(!cir::MissingFeatures::tbaaMergeTBAAInfo());
+ return TBAAAccessInfo();
+}
+
+TBAAAccessInfo
+CIRGenTBAA::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo destInfo,
+ TBAAAccessInfo srcInfo) {
+ assert(!cir::MissingFeatures::tbaaMergeTBAAInfo());
+ return TBAAAccessInfo();
+}
+
+} // namespace clang::CIRGen
diff --git a/clang/lib/CIR/CodeGen/CIRGenTBAA.h b/clang/lib/CIR/CodeGen/CIRGenTBAA.h
new file mode 100644
index 0000000000000..3272c1630916d
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenTBAA.h
@@ -0,0 +1,194 @@
+//===--- CIRGenTBAA.h - TBAA information for LLVM CIRGen --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the code that manages TBAA information and defines the TBAA policy
+// for the optimizer to use.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENTBAA_H
+#define LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENTBAA_H
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/MLIRContext.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/CodeGenOptions.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+namespace clang::CIRGen {
+class CIRGenTypes;
+enum class TBAAAccessKind : unsigned {
+ Ordinary,
+ MayAlias,
+ Incomplete,
+};
+// Describes a memory access in terms of TBAA.
+struct TBAAAccessInfo {
+ TBAAAccessInfo(TBAAAccessKind kind, cir::TBAAAttr baseType,
+ cir::TBAAAttr accessType, uint64_t offset, uint64_t size)
+ : kind(kind), baseType(baseType), accessType(accessType), offset(offset) {
+ }
+
+ TBAAAccessInfo(cir::TBAAAttr baseType, cir::TBAAAttr accessType,
+ uint64_t offset, uint64_t size)
+ : kind(TBAAAccessKind::Ordinary), baseType(baseType),
+ accessType(accessType), offset(offset) {}
+
+ explicit TBAAAccessInfo(cir::TBAAAttr accessType, uint64_t size)
+ : TBAAAccessInfo(TBAAAccessKind::Ordinary, /* baseType= */ {}, accessType,
+ /* offset= */ 0, size) {}
+
+ TBAAAccessInfo()
+ : TBAAAccessInfo(/* accessType= */ nullptr, /* size= */ 0) {};
+
+ static TBAAAccessInfo getMayAliasInfo() {
+ return TBAAAccessInfo(TBAAAccessKind::MayAlias, /* baseType= */ {},
+ /* accessType= */ nullptr,
+ /* offset= */ 0, /* size= */ 0);
+ }
+
+ bool isMayAlias() const { return kind == TBAAAccessKind::MayAlias; }
+
+ static TBAAAccessInfo getIncompleteInfo() {
+ return TBAAAccessInfo(TBAAAccessKind::Incomplete, /* baseType= */ {},
+ /* accessType= */ {},
+ /* offset= */ 0, /* size= */ 0);
+ }
+
+ bool isIncomplete() const { return kind == TBAAAccessKind::Incomplete; }
+
+ bool operator==(const TBAAAccessInfo &other) const {
+ return kind == other.kind && baseType == other.baseType &&
+ accessType == other.accessType && offset == other.offset &&
+ size == other.size;
+ }
+
+ bool operator!=(const TBAAAccessInfo &other) const {
+ return !(*this == other);
+ }
+
+ explicit operator bool() const { return *this != TBAAAccessInfo(); }
+
+ /// The kind of the access descriptor.
+ TBAAAccessKind kind;
+
+ /// The base/leading access type. May be null if this access
+ /// descriptor represents an access that is not considered to be an access
+ /// to an aggregate or union member.
+ cir::TBAAAttr baseType;
+
+ /// The final access type. May be null if there is no TBAA
+ /// information available about this access.
+ cir::TBAAAttr accessType;
+
+ /// The byte offset of the final access within the base one. Must be
+ /// zero if the base access type is not specified.
+ uint64_t offset;
+
+ /// The size of access, in bytes.
+ uint64_t size;
+};
+
+/// This class organizes the cross-module state that is used while lowering AST
+/// types to LLVM types.
+class CIRGenTBAA {
+ mlir::MLIRContext *mlirContext;
+ [[maybe_unused]] clang::ASTContext &astContext;
+ [[maybe_unused]] CIRGenTypes &types;
+ mlir::ModuleOp moduleOp;
+ [[maybe_unused]] const clang::CodeGenOptions &codeGenOpts;
+ [[maybe_unused]] const clang::LangOptions &features;
+
+ llvm::DenseMap<const Type *, cir::TBAAAttr> metadataCache;
+ llvm::DenseMap<const Type *, cir::TBAAAttr> baseTypeMetadataCache;
+
+ cir::TBAAAttr getChar();
+
+ // An internal helper function to generate metadata used
+ // to describe accesses to objects of the given type.
+ cir::TBAAAttr getTypeInfoHelper(clang::QualType qty);
+ cir::TBAAScalarAttr getScalarTypeInfo(clang::QualType qty);
+
+ cir::TBAAAttr getValidBaseTypeInfo(clang::QualType qty);
+ cir::TBAAAttr getBaseTypeInfoHelper(const clang::Type *ty);
+
+public:
+ CIRGenTBAA(mlir::MLIRContext *mlirContext, clang::ASTContext &astContext,
+ CIRGenTypes &types, mlir::ModuleOp moduleOp,
+ const clang::CodeGenOptions &codeGenOpts,
+ const clang::LangOptions &features);
+
+ /// Get attribute used to describe accesses to objects of the given type.
+ cir::TBAAAttr getTypeInfo(clang::QualType qty);
+
+ /// Get TBAA information that describes an access to an object of the given
+ /// type.
+ TBAAAccessInfo getAccessInfo(clang::QualType accessType);
+
+ /// Get the TBAA information that describes an access to a virtual table
+ /// pointer.
+ TBAAAccessInfo getVTablePtrAccessInfo(mlir::Type vtablePtrType);
+
+ /// Get the TBAAStruct attributes to be used for a memcpy of the given type.
+ mlir::ArrayAttr getTBAAStructInfo(clang::QualType qty);
+
+ /// Get attribute that describes the given base access type. Return null if
+ /// the type is not suitable for use in TBAA access tags.
+ cir::TBAAAttr getBaseTypeInfo(clang::QualType qty);
+
+ /// Get TBAA tag for a given memory access.
+ cir::TBAAAttr getAccessTagInfo(TBAAAccessInfo tbaaInfo);
+
+ /// Get merged TBAA information for the purpose of type casts.
+ TBAAAccessInfo mergeTBAAInfoForCast(TBAAAccessInfo sourceInfo,
+ TBAAAccessInfo targetInfo);
+
+ /// Get merged TBAA information for the purpose of conditional operator.
+ TBAAAccessInfo mergeTBAAInfoForConditionalOperator(TBAAAccessInfo infoA,
+ TBAAAccessInfo infoB);
+
+ /// Get merged TBAA information for the purpose of memory transfer calls.
+ TBAAAccessInfo mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo destInfo,
+ TBAAAccessInfo srcInfo);
+};
+} // namespace clang::CIRGen
+namespace llvm {
+template <> struct DenseMapInfo<clang::CIRGen::TBAAAccessInfo> {
+ static clang::CIRGen::TBAAAccessInfo getEmptyKey() {
+ unsigned unsignedKey = DenseMapInfo<unsigned>::getEmptyKey();
+ return clang::CIRGen::TBAAAccessInfo(
+ static_cast<clang::CIRGen::TBAAAccessKind>(unsignedKey),
+ DenseMapInfo<cir::TBAAAttr>::getEmptyKey(),
+ DenseMapInfo<cir::TBAAAttr>::getEmptyKey(),
+ DenseMapInfo<uint64_t>::getEmptyKey(),
+ DenseMapInfo<uint64_t>::getEmptyKey());
+ }
+ static clang::CIRGen::TBAAAccessInfo getTombstoneKey() {
+ unsigned unsignedKey = DenseMapInfo<unsigned>::getTombstoneKey();
+ return clang::CIRGen::TBAAAccessInfo(
+ static_cast<clang::CIRGen::TBAAAccessKind>(unsignedKey),
+ DenseMapInfo<cir::TBAAAttr>::getTombstoneKey(),
+ DenseMapInfo<cir::TBAAAttr>::getTombstoneKey(),
+ DenseMapInfo<uint64_t>::getTombstoneKey(),
+ DenseMapInfo<uint64_t>::getTombstoneKey());
+ }
+ static unsigned getHashValue(const clang::CIRGen::TBAAAccessInfo &val) {
+ auto kindValue = static_cast<unsigned>(val.kind);
+ return DenseMapInfo<unsigned>::getHashValue(kindValue) ^
+ DenseMapInfo<cir::TBAAAttr>::getHashValue(val.baseType) ^
+ DenseMapInfo<cir::TBAAAttr>::getHashValue(val.accessType) ^
+ DenseMapInfo<uint64_t>::getHashValue(val.offset) ^
+ DenseMapInfo<uint64_t>::getHashValue(val.size);
+ }
+ static bool isEqual(const clang::CIRGen::TBAAAccessInfo &lhs,
+ const clang::CIRGen::TBAAAccessInfo &rhs) {
+ return lhs == rhs;
+ }
+};
+} // namespace llvm
+#endif
diff --git a/clang/test/CIR/CodeGen/builtin_new_delete.cpp b/clang/test/CIR/CodeGen/builtin_new_delete.cpp
new file mode 100644
index 0000000000000..d540bfcf8a36d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin_new_delete.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+
+void test_builtins_basic() {
+ __builtin_operator_delete(__builtin_operator_new(4));
+ // CIR-LABEL: test_builtins_basic
+ // CIR: [[P:%.*]] = cir.call @_Znwm({{%.*}}) : (!u64i) -> !cir.ptr<!void>
+ // CIR: cir.call @_ZdlPv([[P]]) {{.*}}: (!cir.ptr<!void>) -> ()
+ // CIR: cir.return
+
+ // LLVM-LABEL: test_builtins_basic
+ // LLVM: [[P:%.*]] = call ptr @_Znwm(i64 4)
+ // LLVM: call void @_ZdlPv(ptr [[P]])
+ // LLVM: ret void
+
+ // OGCG-LABEL: test_builtins_basic
+ // OGCG: [[P:%.*]] = call {{.*}} ptr @_Znwm(i64 {{.*}} 4)
+ // OGCG: call void @_ZdlPv(ptr {{.*}} [[P]])
+ // OGCG: ret void
+}
+
+void test_sized_delete() {
+ __builtin_operator_delete(__builtin_operator_new(4), 4);
+
+ // CIR-LABEL: test_sized_delete
+ // CIR: [[P:%.*]] = cir.call @_Znwm({{%.*}}) : (!u64i) -> !cir.ptr<!void>
+ // CIR: cir.call @_ZdlPvm([[P]], {{%.*}}) {{.*}}: (!cir.ptr<!void>, !u64i) -> ()
+ // CIR: cir.return
+
+ // LLVM-LABEL: test_sized_delete
+ // LLVM: [[P:%.*]] = call ptr @_Znwm(i64 4)
+ // LLVM: call void @_ZdlPvm(ptr [[P]], i64 4)
+ // LLVM: ret void
+
+ // OGCG-LABEL: test_sized_delete
+ // OGCG: [[P:%.*]] = call {{.*}} ptr @_Znwm(i64 {{.*}} 4)
+ // OGCG: call void @_ZdlPvm(ptr {{.*}} [[P]], i64 {{.*}} 4)
+ // OGCG: ret void
+}
More information about the cfe-commits
mailing list