[clang] 729480c - [CIR] Generalize cxx alloc new size handling (#187790)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 23 11:47:25 PDT 2026
Author: Andy Kaylor
Date: 2026-03-23T11:47:16-07:00
New Revision: 729480c4aec6ea8a18149fd3c73de57aaa677d2a
URL: https://github.com/llvm/llvm-project/commit/729480c4aec6ea8a18149fd3c73de57aaa677d2a
DIFF: https://github.com/llvm/llvm-project/commit/729480c4aec6ea8a18149fd3c73de57aaa677d2a.diff
LOG: [CIR] Generalize cxx alloc new size handling (#187790)
The non-constant size handling in `emitCXXNewAllocSize` was making the
incorrect assumption that the default behavior of the size value being
explicitly cast to size_t would be the only behavior we'd see. This is
actually only true with C++14 and later. To properly handle earlier
standards, we need the more robust checking that classic codegen does in
the equivalent function. This change adds that handling.
Assisted-by: Cursor / claude-4.6-opus-high
Added:
clang/test/CIR/CodeGen/cleanup-scope-return-in-loop.cpp
clang/test/CIR/CodeGen/new-array-size-conv.cpp
Modified:
clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
clang/lib/CIR/CodeGen/CIRGenFunction.cpp
clang/lib/CIR/CodeGen/CIRGenFunction.h
Removed:
################################################################################
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
index 03298faf79ce3..30a833564ec2f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
@@ -534,39 +534,97 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e,
// Create a value for the variable number of elements
numElements = cgf.emitScalarExpr(*e->getArraySize());
auto numElementsType = mlir::cast<cir::IntType>(numElements.getType());
- [[maybe_unused]] unsigned numElementsWidth = numElementsType.getWidth();
-
- // We might need check for overflow.
-
- mlir::Value hasOverflow;
- // Classic codegen checks for the size variable being signed, having a
- // smaller width than size_t, and having a larger width than size_t.
- // However, the AST implicitly casts the size variable to size_t so none of
- // these conditions will ever be met.
- assert(
- !(*e->getArraySize())->getType()->isSignedIntegerOrEnumerationType() &&
- (numElementsWidth == sizeWidth) &&
- (numElements.getType() == cgf.sizeTy) &&
- "Expected array size to be implicitly cast to size_t!");
-
- // There are up to three conditions we need to test for:
- // 1) if minElements > 0, we need to check whether numElements is smaller
+ unsigned numElementsWidth = numElementsType.getWidth();
+
+ // The number of elements can have an arbitrary integer type;
+ // essentially, we need to multiply it by a constant factor, add a
+ // cookie size, and verify that the result is representable as a
+ // size_t. That's just a gloss, though, and it's wrong in one
+ // important way: if the count is negative, it's an error even if
+ // the cookie size would bring the total size >= 0.
+ bool isSigned =
+ (*e->getArraySize())->getType()->isSignedIntegerOrEnumerationType();
+
+ // There are up to five conditions we need to test for:
+ // 1) if isSigned, we need to check whether numElements is negative;
+ // 2) if numElementsWidth > sizeWidth, we need to check whether
+ // numElements is larger than something representable in size_t;
+ // 3) if minElements > 0, we need to check whether numElements is smaller
// than that.
- // 2) we need to compute
+ // 4) we need to compute
// sizeWithoutCookie := numElements * typeSizeMultiplier
// and check whether it overflows; and
- // 3) if we need a cookie, we need to compute
+ // 5) if we need a cookie, we need to compute
// size := sizeWithoutCookie + cookieSize
// and check whether it overflows.
+ mlir::Value hasOverflow;
+
+ // If numElementsWidth > sizeWidth, then one way or another, we're
+ // going to have to do a comparison for (2), and this happens to
+ // take care of (1), too.
+ if (numElementsWidth > sizeWidth) {
+ llvm::APInt threshold =
+ llvm::APInt::getOneBitSet(numElementsWidth, sizeWidth);
+
+ // Use an unsigned comparison regardless of the sign of numElements.
+ mlir::Value unsignedNumElements = numElements;
+ if (isSigned)
+ unsignedNumElements = cgf.getBuilder().createIntCast(
+ numElements, cgf.getBuilder().getUIntNTy(numElementsWidth));
+
+ mlir::Value thresholdV =
+ cgf.getBuilder().getConstInt(loc, threshold, /*isUnsigned=*/true);
+ hasOverflow = cgf.getBuilder().createCompare(
+ loc, cir::CmpOpKind::ge, unsignedNumElements, thresholdV);
+ numElements = cgf.getBuilder().createIntCast(
+ unsignedNumElements, mlir::cast<cir::IntType>(cgf.sizeTy));
+
+ // Otherwise, if we're signed, we want to sext up to size_t.
+ } else if (isSigned) {
+ if (numElementsWidth < sizeWidth)
+ numElements = cgf.getBuilder().createIntCast(
+ numElements, cgf.getBuilder().getSIntNTy(sizeWidth));
+
+ // If there's a non-1 type size multiplier, then we can do the
+ // signedness check at the same time as we do the multiply
+ // because a negative number times anything will cause an
+ // unsigned overflow. Otherwise, we have to do it here. But at
+ // least in this case, we can subsume the >= minElements check.
+ if (typeSizeMultiplier == 1)
+ hasOverflow = cgf.getBuilder().createCompare(
+ loc, cir::CmpOpKind::lt, numElements,
+ cgf.getBuilder().getConstInt(loc, numElements.getType(),
+ minElements));
+
+ numElements = cgf.getBuilder().createIntCast(
+ numElements, mlir::cast<cir::IntType>(cgf.sizeTy));
+
+ // Otherwise, zext up to size_t if necessary.
+ } else if (numElementsWidth < sizeWidth) {
+ numElements = cgf.getBuilder().createIntCast(
+ numElements, mlir::cast<cir::IntType>(cgf.sizeTy));
+ }
+
+ assert(numElements.getType() == cgf.sizeTy);
+
if (minElements) {
// Don't allow allocation of fewer elements than we have initializers.
if (!hasOverflow) {
- // FIXME: Avoid creating this twice. It may happen above.
mlir::Value minElementsV = cgf.getBuilder().getConstInt(
loc, llvm::APInt(sizeWidth, minElements));
hasOverflow = cgf.getBuilder().createCompare(loc, cir::CmpOpKind::lt,
numElements, minElementsV);
+ } else if (numElementsWidth > sizeWidth) {
+ // The other existing overflow subsumes this check.
+ // We do an unsigned comparison, since any signed value < -1 is
+ // taken care of either above or below.
+ mlir::Value minElementsV = cgf.getBuilder().getConstInt(
+ loc, llvm::APInt(sizeWidth, minElements));
+ hasOverflow = cgf.getBuilder().createOr(
+ loc, hasOverflow,
+ cgf.getBuilder().createCompare(loc, cir::CmpOpKind::lt, numElements,
+ minElementsV));
}
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 4862dacfdeb1e..58fe699eeff1f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -344,13 +344,23 @@ void CIRGenFunction::LexicalScope::cleanup() {
return;
// Get rid of any empty block at the end of the scope.
- bool entryBlock = builder.getInsertionBlock()->isEntryBlock();
- if (!entryBlock && curBlock->empty()) {
+ bool isEntryBlock = builder.getInsertionBlock()->isEntryBlock();
+ if (!isEntryBlock && curBlock->empty()) {
curBlock->erase();
for (mlir::Block *retBlock : retBlocks) {
if (retBlock->getUses().empty())
retBlock->erase();
}
+ // The empty block was created by a terminator (return/break/continue)
+ // and is now erased. If there are pending cleanup scopes (from variables
+ // with destructors), we need to pop them and ensure the containing scope
+ // block gets a proper terminator (e.g. cir.yield). Without this, the
+ // cleanup-scope-op popping that would otherwise happen in
+ // ~RunCleanupsScope leaves the scope block without a terminator.
+ if (hasPendingCleanups()) {
+ builder.setInsertionPointToEnd(entryBlock);
+ insertCleanupAndLeave(entryBlock);
+ }
return;
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index a3eb000871013..153bc19ab2f31 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1037,6 +1037,12 @@ class CIRGenFunction : public CIRGenTypeCache {
performCleanup = false;
cgf.currentCleanupStackDepth = oldCleanupStackDepth;
}
+
+ /// Whether there are any pending cleanups that have been pushed since
+ /// this scope was entered.
+ bool hasPendingCleanups() const {
+ return cgf.ehStack.stable_begin() != cleanupStackDepth;
+ }
};
// Cleanup stack depth of the RunCleanupsScope that was pushed most recently.
diff --git a/clang/test/CIR/CodeGen/cleanup-scope-return-in-loop.cpp b/clang/test/CIR/CodeGen/cleanup-scope-return-in-loop.cpp
new file mode 100644
index 0000000000000..38e0f314ee52f
--- /dev/null
+++ b/clang/test/CIR/CodeGen/cleanup-scope-return-in-loop.cpp
@@ -0,0 +1,58 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+struct Struk {
+ ~Struk();
+ bool check();
+};
+
+// Test that a for-loop body containing a local variable with a destructor
+// followed by 'continue' and 'return' produces a properly terminated
+// cir.scope. This exercises the interaction between LexicalScope cleanup,
+// CleanupScopeOp popping, and empty-block erasure.
+
+int test_cleanup_return_in_loop(int n) {
+ for (int i = 0; i < n; i++) {
+ Struk s;
+ if (s.check())
+ continue;
+ return i;
+ }
+ return -1;
+}
+
+// CIR: cir.func {{.*}} @_Z27test_cleanup_return_in_loopi
+// CIR: cir.scope {
+// CIR: cir.for : cond {
+// CIR: } body {
+// CIR: cir.scope {
+// CIR: cir.cleanup.scope {
+// CIR: cir.continue
+// CIR: cir.return
+// CIR: } cleanup normal {
+// CIR: cir.call @_ZN5StrukD1Ev
+// CIR: cir.yield
+// CIR: cir.yield
+// CIR: } step {
+// CIR: cir.return
+
+// LLVM: define dso_local noundef i32 @_Z27test_cleanup_return_in_loopi(i32 noundef %{{.*}})
+// LLVM: [[LOOP_COND:.*]]:
+// LLVM: %[[ICMP:.*]] = icmp slt i32
+// LLVM-NEXT: br i1 %[[ICMP]]
+// LLVM: [[LOOP_BODY:.*]]:
+// LLVM: %[[CALL:.*]] = call noundef {{.*}} @_ZN5Struk5checkEv(
+// LLVM-NEXT: br i1 %[[CALL]]
+// LLVM: [[CLEANUP:.*]]:
+// LLVM: call void @_ZN5StrukD1Ev(
+// LLVM: switch i32 %{{.*}}, label %{{.*}} [
+// LLVM: ]
+// LLVM: [[LOOP_STEP:.*]]:
+// LLVM: add nsw i32 %{{.*}}, 1
+// LLVM: [[POST_LOOP:.*]]:
+// LLVM: store i32 -1, ptr %{{.*}}, align 4
+// LLVM: ret i32
diff --git a/clang/test/CIR/CodeGen/new-array-size-conv.cpp b/clang/test/CIR/CodeGen/new-array-size-conv.cpp
new file mode 100644
index 0000000000000..e109b99b91af2
--- /dev/null
+++ b/clang/test/CIR/CodeGen/new-array-size-conv.cpp
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// Tests for array new expressions where the array size is not implicitly
+// converted to size_t by Sema (pre-C++14 behavior). The CIR codegen must
+// handle the signed/width conversion itself.
+
+typedef __typeof__(sizeof(int)) size_t;
+
+// Sized non-allocating (placement) new.
+void *operator new[](size_t, void *p) noexcept { return p; }
+
+struct S {
+ int x;
+ S();
+};
+
+// Signed int array size with multi-byte element (typeSizeMultiplier != 1).
+// The sign extension is done and the multiply overflow catches negative values.
+void t_new_signed_size(int n) {
+ auto p = new double[n];
+}
+
+// CIR-LABEL: cir.func {{.*}} @_Z17t_new_signed_sizei
+// CIR: %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]]
+// CIR: %[[N_SEXT:.*]] = cir.cast integral %[[N]] : !s32i -> !s64i
+// CIR: %[[N_SIZE_T:.*]] = cir.cast integral %[[N_SEXT]] : !s64i -> !u64i
+// CIR: %[[ELEMENT_SIZE:.*]] = cir.const #cir.int<8> : !u64i
+// CIR: %[[RESULT:.*]], %[[OVERFLOW:.*]] = cir.mul.overflow %[[N_SIZE_T]], %[[ELEMENT_SIZE]] : !u64i -> !u64i
+// CIR: %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i
+// CIR: %[[ALLOC_SIZE:.*]] = cir.select if %[[OVERFLOW]] then %[[ALL_ONES]] else %[[RESULT]]
+
+// LLVM-LABEL: define{{.*}} void @_Z17t_new_signed_sizei
+// LLVM: %[[N:.*]] = load i32, ptr %{{.+}}
+// LLVM: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
+// LLVM: %[[MUL_OVERFLOW:.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %[[N_SIZE_T]], i64 8)
+
+// OGCG-LABEL: define{{.*}} void @_Z17t_new_signed_sizei
+// OGCG: %[[N:.*]] = load i32, ptr %{{.+}}
+// OGCG: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
+// OGCG: %[[MUL_OVERFLOW:.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %[[N_SIZE_T]], i64 8)
+
+// Signed int array size with single-byte element (typeSizeMultiplier == 1).
+// A signed comparison catches negative values directly.
+void t_new_signed_size_char(int n) {
+ auto p = new char[n];
+}
+
+// CIR-LABEL: cir.func {{.*}} @_Z22t_new_signed_size_chari
+// CIR: %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]]
+// CIR: %[[N_SEXT:.*]] = cir.cast integral %[[N]] : !s32i -> !s64i
+// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
+// CIR: %[[IS_NEG:.*]] = cir.cmp lt %[[N_SEXT]], %[[ZERO]] : !s64i
+// CIR: %[[N_SIZE_T:.*]] = cir.cast integral %[[N_SEXT]] : !s64i -> !u64i
+// CIR: %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i
+// CIR: %[[ALLOC_SIZE:.*]] = cir.select if %[[IS_NEG]] then %[[ALL_ONES]] else %[[N_SIZE_T]]
+
+// LLVM-LABEL: define{{.*}} void @_Z22t_new_signed_size_chari
+// LLVM: %[[N:.*]] = load i32, ptr %{{.+}}
+// LLVM: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
+// LLVM: %[[IS_NEG:.*]] = icmp slt i64 %[[N_SIZE_T]], 0
+
+// OGCG-LABEL: define{{.*}} void @_Z22t_new_signed_size_chari
+// OGCG: %[[N:.*]] = load i32, ptr %{{.+}}
+// OGCG: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
+// OGCG: %[[IS_NEG:.*]] = icmp slt i64 %[[N_SIZE_T]], 0
More information about the cfe-commits
mailing list