[clang] MSan: poison the default-init allocation before calling constructors (PR #188001)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 24 06:25:34 PDT 2026
https://github.com/dingxiangfei2009 updated https://github.com/llvm/llvm-project/pull/188001
>From cca93e30b6fa358c6923363637ac9c6d7b9405fa Mon Sep 17 00:00:00 2001
From: Xiangfei Ding <dingxiangfei2009 at protonmail.ch>
Date: Thu, 19 Mar 2026 16:04:41 +0000
Subject: [PATCH] MSan: poison the default-init allocation before calling
constructors
This change is to align with the standards pertaining to reserved global
placement `new`s in the default initialisation style.
Signed-off-by: Xiangfei Ding <dingxiangfei2009 at protonmail.ch>
---
clang/lib/CodeGen/CGExprCXX.cpp | 93 +++++++++++++++++++
clang/test/CXX/drs/cwg1748.cpp | 20 ++--
clang/test/CodeGenCXX/new.cpp | 10 +-
.../test/CodeGenCXX/sanitize-default-init.cpp | 41 ++++++++
4 files changed, 157 insertions(+), 7 deletions(-)
create mode 100644 clang/test/CodeGenCXX/sanitize-default-init.cpp
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index 82300c3ede183..1b22915583689 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -17,8 +17,11 @@
#include "CodeGenFunction.h"
#include "ConstantEmitter.h"
#include "TargetInfo.h"
+#include "clang/AST/ExprCXX.h"
#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/Sanitizers.h"
#include "clang/CodeGen/CGFunctionInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Intrinsics.h"
using namespace clang;
@@ -1489,6 +1492,7 @@ class CallDeleteDuringNew final : public EHScopeStack::Cleanup {
EmitNewDeleteCall(CGF, OperatorDelete, FPT, DeleteArgs);
}
};
+
} // namespace
/// Enter a cleanup to call 'operator delete' if the initializer in a
@@ -1553,6 +1557,86 @@ static void EnterNewDeleteCleanup(CodeGenFunction &CGF, const CXXNewExpr *E,
CGF.initFullExprCleanup();
}
+namespace {
+void PoisonTrivialField(CodeGenFunction &CGF, QualType const &Ty,
+ Address Dest) {
+ CharUnits Size = CGF.getContext().getTypeSizeInChars(Ty);
+ llvm::Value *SizeVal = CGF.CGM.getSize(Size);
+ llvm::Value *PoisonByte = llvm::PoisonValue::get(CGF.Builder.getInt8Ty());
+ CGF.Builder.CreateMemSet(Dest, PoisonByte, SizeVal, Ty.isVolatileQualified());
+}
+
+void PoisonArrayLValue(CodeGenFunction &CGF, QualType const &ElementQualTy,
+ llvm::Type *ElementTy, LValue const &Dest,
+ llvm::Value *numElements);
+
+void PoisonLValueRecursive(CodeGenFunction &CGF, QualType const &Ty,
+ LValue const &Dest) {
+ if (Ty.isTriviallyCopyableType(CGF.getContext()) || Ty->isReferenceType()) {
+ return PoisonTrivialField(CGF, Ty, Dest.getAddress());
+ }
+
+ auto *RD = Ty->castAsCXXRecordDecl();
+ assert(RD &&
+ "type is not trivially copyable but it is not a record type either");
+ for (auto *FD : RD->fields()) {
+ // There is no need to poison unnamed fields.
+ if (FD->isUnnamedBitField()) {
+ continue;
+ }
+ QualType FieldTy = FD->getType();
+ LValue FieldLV = CGF.EmitLValueForField(Dest, FD);
+ if (FieldTy->isRecordType()) {
+ return PoisonLValueRecursive(CGF, FieldTy, FieldLV);
+ }
+ if (auto *AQualTy = dyn_cast<clang::ArrayType>(FieldTy)) {
+ if (auto *ATy =
+ dyn_cast<llvm::ArrayType>(CGF.ConvertTypeForMem(FieldTy))) {
+ if (uint64_t NumArrayElements = ATy->getNumElements()) {
+ PoisonArrayLValue(
+ CGF, AQualTy->getElementType(), ATy->getElementType(), FieldLV,
+ llvm::ConstantInt::get(CGF.SizeTy, NumArrayElements));
+ }
+ }
+ return;
+ }
+ // Every other case is trivial to poison.
+ PoisonTrivialField(CGF, FieldTy, FieldLV.getAddress());
+ }
+}
+
+void PoisonArrayLValue(CodeGenFunction &CGF, QualType const &ElementQualTy,
+ llvm::Type *ElementTy, LValue const &Dest,
+ llvm::Value *NumElements) {
+ auto ElementAlign = Dest.getAlignment().alignmentOfArrayElement(
+ CGF.getContext().getTypeSizeInChars(ElementQualTy));
+
+ auto &Builder = CGF.Builder;
+ llvm::Value *BeginPtr = Dest.emitRawPointer(CGF);
+ llvm::Value *EndPtr = Builder.CreateInBoundsGEP(
+ ElementTy, BeginPtr, NumElements, "arraypoison.end");
+ llvm::Value *One = llvm::ConstantInt::get(CGF.SizeTy, 1);
+ auto *EntryBB = Builder.GetInsertBlock();
+ // The loop head.
+ auto *BodyBB = CGF.createBasicBlock("arraypoison.body");
+ CGF.EmitBlock(BodyBB);
+ auto *CurElementPtr =
+ Builder.CreatePHI(BeginPtr->getType(), 2, "arraypoison.cur");
+ CurElementPtr->addIncoming(BeginPtr, EntryBB);
+ LValue ElementDest = CGF.MakeAddrLValue(
+ Address(CurElementPtr, ElementTy, ElementAlign), ElementQualTy);
+ PoisonLValueRecursive(CGF, ElementQualTy, ElementDest);
+ llvm::Value *NextElementPtr = Builder.CreateInBoundsGEP(
+ ElementTy, CurElementPtr, One, "arraypoison.next");
+ llvm::Value *Done =
+ Builder.CreateICmpEQ(NextElementPtr, EndPtr, "arraypoison.done");
+ auto *EndBB = CGF.createBasicBlock("arraypoison.end");
+ Builder.CreateCondBr(Done, EndBB, BodyBB);
+ CurElementPtr->addIncoming(NextElementPtr, Builder.GetInsertBlock());
+ CGF.EmitBlock(EndBB);
+}
+} // namespace
+
llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
// The element type being allocated.
QualType allocType = getContext().getBaseElementType(E->getAllocatedType());
@@ -1611,6 +1695,15 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
allocatorArgs.add(RValue::get(allocation, *this), arg->getType());
}
+ if (E->getInitializationStyle() == CXXNewInitializationStyle::None) {
+ auto *ElemTy = ConvertTypeForMem(allocType);
+ auto Dest = MakeAddrLValue(allocation.withElementType(ElemTy), allocType);
+ if (numElements) {
+ PoisonArrayLValue(*this, allocType, ElemTy, Dest, numElements);
+ } else {
+ PoisonLValueRecursive(*this, allocType, Dest);
+ }
+ }
} else {
const FunctionProtoType *allocatorType =
allocator->getType()->castAs<FunctionProtoType>();
diff --git a/clang/test/CXX/drs/cwg1748.cpp b/clang/test/CXX/drs/cwg1748.cpp
index a0fe737539392..aeee0bb6b3558 100644
--- a/clang/test/CXX/drs/cwg1748.cpp
+++ b/clang/test/CXX/drs/cwg1748.cpp
@@ -21,6 +21,7 @@ struct X { X(); };
// perform a null check.
// CHECK-LABEL: define {{.*}} @_Z1fPv(
+// CHECK: call void @llvm.memset{{.*}}(ptr {{.*}}, {{.*}} poison, {{.*}})
// CHECK-NOT: call
// CHECK-NOT: icmp{{.*}} null
// CHECK-NOT: br i1
@@ -29,10 +30,17 @@ struct X { X(); };
X *f(void *p) { return new (p) X; }
// CHECK-LABEL: define {{.*}} @_Z1gPv(
-// CHECK-NOT: call
-// CHECK-NOT: icmp{{.*}} null
-// CHECK-NOT: br i1
-// CHECK: call void @_ZN1XC1Ev(
-// CHECK: br i1
-// CHECK: }
+// CHECK: [[ARRAY_POISON_END:%arraypoison\.end.*]] = getelementptr inbounds [[A:.+]], ptr [[P:%.+]], i{{.+}} 5
+// CHECK: [[LARRAY_POISON_BODY:arraypoison\.body.*]]:
+// CHECK-NEXT: [[ARRAY_POISON_CUR:%arraypoison\.cur.*]] = phi ptr [ [[P]], %entry ], [ [[ARRAY_POISON_NEXT:%arraypoison\.next.*]], %[[LARRAY_POISON_BODY]] ]
+// CHECK: call void @llvm.memset.p0.i64(ptr align 1 [[ARRAY_POISON_CUR]], i8 poison, i64 1, i1 false)
+// CHECK: br i1 {{.+}}, label %[[LARRAY_POISON_END:arraypoison\.end.*]], label %[[LARRAY_POISON_BODY]]
+// CHECK: [[LARRAY_POISON_END]]:
+// CHECK: arrayctor.loop{{.*}}:
+// CHECK-NOT: call
+// CHECK-NOT: icmp{{.*}} null
+// CHECK-NOT: br i1
+// CHECK: call void @_ZN1XC1Ev(
+// CHECK: br i1
+// CHECK: }
X *g(void *p) { return new (p) X[5]; }
diff --git a/clang/test/CodeGenCXX/new.cpp b/clang/test/CodeGenCXX/new.cpp
index af225529c494e..bfa9eb4bfdea4 100644
--- a/clang/test/CodeGenCXX/new.cpp
+++ b/clang/test/CodeGenCXX/new.cpp
@@ -223,7 +223,11 @@ namespace test15 {
// CHECK: [[P:%.*]] = load ptr, ptr
// CHECK-NOT: icmp eq ptr [[P]], null
// CHECK-NOT: br i1
- // CHECK-NEXT: [[END:%.*]] = getelementptr inbounds [[A:.*]], ptr [[P]], i64 5
+ // CHECK: [[ARRAY_POISON_END:%arraypoison\.end.*]] = getelementptr inbounds [[A:.*]], ptr [[P]], i64 5
+ // CHECK: [[LARRAY_POISON_BODY:arraypoison\.body.*]]:
+ // CHECK-NEXT: [[ARRAY_POISON_CUR:%arraypoison\.cur.*]] = phi ptr [ [[P]], %entry ], [ [[ARRAY_POISON_NEXT:%arraypoison\.next.*]], %[[LARRAY_POISON_BODY]] ]
+ // CHECK: arraypoison.end{{.*}}:
+ // CHECK-NEXT: [[END:%.*]] = getelementptr inbounds [[A]], ptr [[P]], i64 5
// CHECK-NEXT: br label
// CHECK: [[CUR:%.*]] = phi ptr [ [[P]], {{%.*}} ], [ [[NEXT:%.*]], {{%.*}} ]
// CHECK-NEXT: call void @_ZN6test151AC1Ev(ptr {{[^,]*}} [[CUR]])
@@ -257,6 +261,10 @@ namespace test15 {
// CHECK: [[N:%.*]] = load i32, ptr
// CHECK-NEXT: [[T0:%.*]] = sext i32 [[N]] to i64
// CHECK-NEXT: [[P:%.*]] = load ptr, ptr
+ // CHECK: [[ARRAY_POISON_END:%arraypoison\.end.*]] = getelementptr inbounds [[A]], ptr [[P]], i64 [[T0]]
+ // CHECK: [[LARRAY_POISON_BODY:arraypoison\.body.*]]:
+ // CHECK-NEXT: [[ARRAY_POISON_CUR:%arraypoison\.cur.*]] = phi ptr [ [[P]], %entry ], [ [[ARRAY_POISON_NEXT:%arraypoison\.next.*]], %[[LARRAY_POISON_BODY]] ]
+ // CHECK: arraypoison.end{{.*}}:
// CHECK-NEXT: [[ISEMPTY:%.*]] = icmp eq i64 [[T0]], 0
// CHECK-NEXT: br i1 [[ISEMPTY]],
// CHECK: [[END:%.*]] = getelementptr inbounds [[A]], ptr [[P]], i64 [[T0]]
diff --git a/clang/test/CodeGenCXX/sanitize-default-init.cpp b/clang/test/CodeGenCXX/sanitize-default-init.cpp
new file mode 100644
index 0000000000000..fc3c2fd658fc6
--- /dev/null
+++ b/clang/test/CodeGenCXX/sanitize-default-init.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -x c++ -fsanitize=memory -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+
+/// Sanitise the placement new with default initialisation style.
+
+namespace std {
+ using size_t = decltype(sizeof(0));
+}
+
+void *operator new(std::size_t, void *p) noexcept { return p; }
+
+struct Simple {
+ int x;
+};
+
+struct WithCtor {
+ int x;
+ int y[4];
+ WithCtor() {
+ bool flag = x > 0; /// This is UB
+ }
+};
+
+// CHECK-LABEL: define {{.*}} i32 @main()
+int main() {
+ {
+ Simple s;
+ // CHECK: [[S:%.+]] = alloca %struct.Simple, align 4
+ // CHECK: [[W:%.+]] = alloca %struct.WithCtor, align 4
+ s.x = 42;
+ // CHECK: {{%.+}} = call ptr @__msan_memset(ptr [[S]], i32 poison, i64 4)
+ new (&s) Simple;
+ bool flag = s.x == 42; /// This is UB
+ }
+ {
+ WithCtor w;
+ w.x = 42;
+ // CHECK: {{%.+}} = call ptr @__msan_memset(ptr [[W]], i32 poison, i64 20)
+ auto *ptr = new (&w) WithCtor; /// This is UB
+ // CHECK: call void @_ZN8WithCtorC1Ev
+ }
+}
More information about the cfe-commits
mailing list