[clang] [CIR] Add limited support for array new (PR #161095)

Mon Sep 29 12:43:54 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-clangir

Author: None (jiang1997)

<details>
<summary>Changes</summary>

This change adds initial support for array new expressions where the array size is constant and the element does not require a cookie.

Ported from ClangIR incubator PR [#1286 ](https://github.com/llvm/clangir/pull/1286).
This is the first PR in a series intended to close #160383.


---
Full diff: https://github.com/llvm/llvm-project/pull/161095.diff


5 Files Affected:

- (modified) clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp (+18) 
- (modified) clang/lib/CIR/CodeGen/CIRGenCXXABI.h (+15) 
- (modified) clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp (+128-4) 
- (modified) clang/lib/CIR/CodeGen/CIRGenFunction.h (+5) 
- (modified) clang/test/CIR/CodeGen/new.cpp (+53) 


``````````diff

diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp
index 5f1faabde22a5..a6ec2f2981c51 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp
@@ -15,6 +15,7 @@
 #include "CIRGenFunction.h"
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/GlobalDecl.h"
 
 using namespace clang;
@@ -75,3 +76,20 @@ void CIRGenCXXABI::setCXXABIThisValue(CIRGenFunction &cgf,
   assert(getThisDecl(cgf) && "no 'this' variable for function");
   cgf.cxxabiThisValue = thisPtr;
 }
+
+CharUnits CIRGenCXXABI::getArrayCookieSize(const CXXNewExpr *E) {
+  if (!requiresArrayCookie(E))
+    return CharUnits::Zero();
+
+  cgm.errorNYI(E->getSourceRange(), "CIRGenCXXABI::getArrayCookieSize");
+  return CharUnits::Zero();
+}
+
+bool CIRGenCXXABI::requiresArrayCookie(const CXXNewExpr *E) {
+  // If the class's usual deallocation function takes two arguments,
+  // it needs a cookie.
+  if (E->doesUsualArrayDeleteWantSize())
+    return true;
+
+  return E->getAllocatedType().isDestructedType();
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
index ae922599809b8..185db2840c237 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
@@ -28,6 +28,8 @@ class CIRGenCXXABI {
   CIRGenModule &cgm;
   std::unique_ptr<clang::MangleContext> mangleContext;
 
+  virtual bool requiresArrayCookie(const CXXNewExpr *E);
+
 public:
   // TODO(cir): make this protected when target-specific CIRGenCXXABIs are
   // implemented.
@@ -241,6 +243,19 @@ class CIRGenCXXABI {
   void setStructorImplicitParamValue(CIRGenFunction &cgf, mlir::Value val) {
     cgf.cxxStructorImplicitParamValue = val;
   }
+
+  /**************************** Array cookies ******************************/
+
+  /// Returns the extra size required in order to store the array
+  /// cookie for the given new-expression.  May return 0 to indicate that no
+  /// array cookie is required.
+  ///
+  /// Several cases are filtered out before this method is called:
+  ///   - non-array allocations never need a cookie
+  ///   - calls to \::operator new(size_t, void*) never need a cookie
+  ///
+  /// \param E - the new-expression being allocated.
+  virtual CharUnits getArrayCookieSize(const CXXNewExpr *E);
 };
 
 /// Creates and Itanium-family ABI
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
index 83208bf226882..30d4e03aba523 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CIRGenCXXABI.h"
+#include "CIRGenConstantEmitter.h"
 #include "CIRGenFunction.h"
 
 #include "clang/AST/DeclCXX.h"
@@ -264,6 +265,19 @@ static UsualDeleteParams getUsualDeleteParams(const FunctionDecl *fd) {
   return params;
 }
 
+static CharUnits calculateCookiePadding(CIRGenFunction &cgf,
+                                        const CXXNewExpr *e) {
+  if (!e->isArray())
+    return CharUnits::Zero();
+
+  // No cookie is required if the operator new[] being used is the
+  // reserved placement operator new[].
+  if (e->getOperatorNew()->isReservedGlobalPlacementOperator())
+    return CharUnits::Zero();
+
+  return cgf.cgm.getCXXABI().getArrayCookieSize(e);
+}
+
 static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e,
                                        unsigned minElements,
                                        mlir::Value &numElements,
@@ -278,8 +292,98 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e,
     return sizeWithoutCookie;
   }
 
-  cgf.cgm.errorNYI(e->getSourceRange(), "emitCXXNewAllocSize: array");
-  return {};
+  // The width of size_t.
+  unsigned sizeWidth = cgf.cgm.getDataLayout().getTypeSizeInBits(cgf.SizeTy);
+
+  // The number of elements can be have an arbitrary integer type;
+  // essentially, we need to multiply it by a constant factor, add a
+  // cookie size, and verify that the result is representable as a
+  // size_t.  That's just a gloss, though, and it's wrong in one
+  // important way: if the count is negative, it's an error even if
+  // the cookie size would bring the total size >= 0.
+  //
+  // If the array size is constant, Sema will have prevented negative
+  // values and size overflow.
+
+  // Compute the constant factor.
+  llvm::APInt arraySizeMultiplier(sizeWidth, 1);
+  while (const ConstantArrayType *cat =
+             cgf.getContext().getAsConstantArrayType(type)) {
+    type = cat->getElementType();
+    arraySizeMultiplier *= cat->getSize();
+  }
+
+  CharUnits typeSize = cgf.getContext().getTypeSizeInChars(type);
+  llvm::APInt typeSizeMultiplier(sizeWidth, typeSize.getQuantity());
+  typeSizeMultiplier *= arraySizeMultiplier;
+
+  // Figure out the cookie size.
+  llvm::APInt cookieSize(sizeWidth,
+                         calculateCookiePadding(cgf, e).getQuantity());
+
+  // This will be a size_t.
+  mlir::Value size;
+
+  // Emit the array size expression.
+  // We multiply the size of all dimensions for NumElements.
+  // e.g for 'int[2][3]', ElemType is 'int' and NumElements is 6.
+  const Expr *arraySize = *e->getArraySize();
+  mlir::Attribute constNumElements =
+      ConstantEmitter(cgf.cgm, &cgf)
+          .emitAbstract(arraySize, arraySize->getType());
+  if (constNumElements) {
+    // Get an APInt from the constant
+    const llvm::APInt &count =
+        mlir::cast<cir::IntAttr>(constNumElements).getValue();
+
+    unsigned numElementsWidth = count.getBitWidth();
+
+    // The equivalent code in CodeGen/CGExprCXX.cpp handles these cases as
+    // overflow, but they should never happen. The size argument is implicitly
+    // cast to a size_t, so it can never be negative and numElementsWidth will
+    // always equal sizeWidth.
+    assert(!count.isNegative() && "Expected non-negative array size");
+    assert(numElementsWidth == sizeWidth &&
+           "Expected a size_t array size constant");
+
+    // Okay, compute a count at the right width.
+    llvm::APInt adjustedCount = count.zextOrTrunc(sizeWidth);
+
+    // Scale numElements by that.  This might overflow, but we don't
+    // care because it only overflows if allocationSize does, too, and
+    // if that overflows then we shouldn't use this.
+    // This emits a constant that may not be used, but we can't tell here
+    // whether it will be needed or not.
+    numElements =
+        cgf.getBuilder().getConstInt(loc, adjustedCount * arraySizeMultiplier);
+
+    // Compute the size before cookie, and track whether it overflowed.
+    bool overflow;
+    llvm::APInt allocationSize =
+        adjustedCount.umul_ov(typeSizeMultiplier, overflow);
+
+    // Sema prevents us from hitting this case
+    assert(!overflow && "Overflow in array allocation size");
+
+    // Add in the cookie, and check whether it's overflowed.
+    if (cookieSize != 0) {
+      cgf.cgm.errorNYI(e->getSourceRange(),
+                       "emitCXXNewAllocSize: array cookie");
+    }
+
+    size = cgf.getBuilder().getConstInt(loc, allocationSize);
+  } else {
+    // TODO: Handle the variable size case
+    cgf.cgm.errorNYI(e->getSourceRange(),
+                     "emitCXXNewAllocSize: variable array size");
+  }
+
+  if (cookieSize == 0)
+    sizeWithoutCookie = size;
+  else
+    assert(sizeWithoutCookie && "didn't set sizeWithoutCookie?");
+
+  return size;
 }
 
 static void storeAnyExprIntoOneUnit(CIRGenFunction &cgf, const Expr *init,
@@ -308,13 +412,26 @@ static void storeAnyExprIntoOneUnit(CIRGenFunction &cgf, const Expr *init,
   llvm_unreachable("bad evaluation kind");
 }
 
+void CIRGenFunction::emitNewArrayInitializer(
+    const CXXNewExpr *e, QualType elementType, mlir::Type elementTy,
+    Address beginPtr, mlir::Value numElements,
+    mlir::Value allocSizeWithoutCookie) {
+  // If we have a type with trivial initialization and no initializer,
+  // there's nothing to do.
+  if (!e->hasInitializer())
+    return;
+
+  cgm.errorNYI(e->getSourceRange(), "emitNewArrayInitializer");
+}
+
 static void emitNewInitializer(CIRGenFunction &cgf, const CXXNewExpr *e,
                                QualType elementType, mlir::Type elementTy,
                                Address newPtr, mlir::Value numElements,
                                mlir::Value allocSizeWithoutCookie) {
   assert(!cir::MissingFeatures::generateDebugInfo());
   if (e->isArray()) {
-    cgf.cgm.errorNYI(e->getSourceRange(), "emitNewInitializer: array");
+    cgf.emitNewArrayInitializer(e, elementType, elementTy, newPtr, numElements,
+                                allocSizeWithoutCookie);
   } else if (const Expr *init = e->getInitializer()) {
     storeAnyExprIntoOneUnit(cgf, init, e->getAllocatedType(), newPtr,
                             AggValueSlot::DoesNotOverlap);
@@ -590,7 +707,14 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) {
   if (allocSize != allocSizeWithoutCookie)
     cgm.errorNYI(e->getSourceRange(), "emitCXXNewExpr: array with cookies");
 
-  mlir::Type elementTy = convertTypeForMem(allocType);
+  mlir::Type elementTy;
+  if (e->isArray()) {
+    // For array new, use the allocated type to handle multidimensional arrays
+    // correctly
+    elementTy = convertTypeForMem(e->getAllocatedType());
+  } else {
+    elementTy = convertTypeForMem(allocType);
+  }
   Address result = builder.createElementBitCast(getLoc(e->getSourceRange()),
                                                 allocation, elementTy);
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index ef07db3d48ffc..6f13a8cb8fb61 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1228,6 +1228,11 @@ class CIRGenFunction : public CIRGenTypeCache {
 
   mlir::Value emitCXXNewExpr(const CXXNewExpr *e);
 
+  void emitNewArrayInitializer(const CXXNewExpr *E, QualType ElementType,
+                               mlir::Type ElementTy, Address BeginPtr,
+                               mlir::Value NumElements,
+                               mlir::Value AllocSizeWithoutCookie);
+
   RValue emitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *e,
                                        const CXXMethodDecl *md,
                                        ReturnValueSlot returnValue);
diff --git a/clang/test/CIR/CodeGen/new.cpp b/clang/test/CIR/CodeGen/new.cpp
index b14bf077cd154..7a8dacc6bc887 100644
--- a/clang/test/CIR/CodeGen/new.cpp
+++ b/clang/test/CIR/CodeGen/new.cpp
@@ -180,3 +180,56 @@ void test_new_with_complex_type() {
 // OGCG:   store float 1.000000e+00, ptr %[[COMPLEX_REAL_PTR]], align 8
 // OGCG:   store float 2.000000e+00, ptr %[[COMPLEX_IMAG_PTR]], align 4
 // OGCG:   store ptr %[[NEW_COMPLEX]], ptr %[[A_ADDR]], align 8
+
+void t_new_constant_size() {
+  auto p = new double[16];
+}
+
+// In this test, NUM_ELEMENTS isn't used because no cookie is needed and there
+//   are no constructor calls needed.
+
+// CHECK:   cir.func{{.*}} @_Z19t_new_constant_sizev()
+// CHECK:    %0 = cir.alloca !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>, ["p", init] {alignment = 8 : i64}
+// CHECK:    %[[#NUM_ELEMENTS:]] = cir.const #cir.int<16> : !u64i
+// CHECK:    %[[#ALLOCATION_SIZE:]] = cir.const #cir.int<128> : !u64i
+// CHECK:    %3 = cir.call @_Znam(%[[#ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK:    %4 = cir.cast(bitcast, %3 : !cir.ptr<!void>), !cir.ptr<!cir.double>
+// CHECK:    cir.store align(8) %4, %0 : !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>
+// CHECK:    cir.return
+// CHECK:  }
+
+// LLVM: define{{.*}} void @_Z19t_new_constant_sizev
+// LLVM:   %[[P_ADDR:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   %[[CALL:.*]] = call ptr @_Znam(i64 128)
+// LLVM:   store ptr %[[CALL]], ptr %[[P_ADDR]], align 8
+
+// OGCG: define{{.*}} void @_Z19t_new_constant_sizev
+// OGCG:   %[[P_ADDR:.*]] = alloca ptr, align 8
+// OGCG:   %[[CALL:.*]] = call noalias noundef nonnull ptr @_Znam(i64 noundef 128)
+// OGCG:   store ptr %[[CALL]], ptr %[[P_ADDR]], align 8
+
+
+void t_new_multidim_constant_size() {
+  auto p = new double[2][3][4];
+}
+
+// As above, NUM_ELEMENTS isn't used.
+
+// CHECK:   cir.func{{.*}} @_Z28t_new_multidim_constant_sizev()
+// CHECK:    %0 = cir.alloca !cir.ptr<!cir.array<!cir.array<!cir.double x 4> x 3>>, !cir.ptr<!cir.ptr<!cir.array<!cir.array<!cir.double x 4> x 3>>>, ["p", init] {alignment = 8 : i64}
+// CHECK:    %[[#NUM_ELEMENTS:]] = cir.const #cir.int<24> : !u64i
+// CHECK:    %[[#ALLOCATION_SIZE:]] = cir.const #cir.int<192> : !u64i
+// CHECK:    %3 = cir.call @_Znam(%[[#ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK:    %4 = cir.cast(bitcast, %3 : !cir.ptr<!void>), !cir.ptr<!cir.array<!cir.array<!cir.double x 4> x 3>>
+// CHECK:    cir.store align(8) %4, %0 : !cir.ptr<!cir.array<!cir.array<!cir.double x 4> x 3>>, !cir.ptr<!cir.ptr<!cir.array<!cir.array<!cir.double x 4> x 3>>>
+// CHECK:  }
+
+// LLVM: define{{.*}} void @_Z28t_new_multidim_constant_sizev
+// LLVM:   %[[P_ADDR:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   %[[CALL:.*]] = call ptr @_Znam(i64 192)
+// LLVM:   store ptr %[[CALL]], ptr %[[P_ADDR]], align 8
+
+// OGCG: define{{.*}} void @_Z28t_new_multidim_constant_sizev
+// OGCG:   %[[P_ADDR:.*]] = alloca ptr, align 8
+// OGCG:   %[[CALL:.*]] = call noalias noundef nonnull ptr @_Znam(i64 noundef 192)
+// OGCG:   store ptr %[[CALL]], ptr %[[P_ADDR]], align 8

``````````

</details>


https://github.com/llvm/llvm-project/pull/161095