[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Jan 25 13:20:34 PST 2024
https://github.com/serge-sans-paille updated https://github.com/llvm/llvm-project/pull/79502
>From e9151018b7a086b167db394caadb74e20dc27711 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Thu, 25 Jan 2024 22:12:55 +0100
Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a
partially initialized array
Fix #79500
---
clang/lib/CodeGen/CGDecl.cpp | 60 ++++++++++++++++++-
.../test/CodeGenCXX/trivial-auto-var-init.cpp | 6 +-
.../test/CodeGenOpenCL/partial_initializer.cl | 3 +-
3 files changed, 64 insertions(+), 5 deletions(-)
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244f..9e7f2f15c6e0982 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,47 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
}
+__attribute__((optnone)) static size_t
+CountLeadingNonNullBytes(const llvm::DataLayout &DL, llvm::Constant *Init) {
+ // Zero and Undef never requires any extra stores.
+ if (Init->isNullValue() || isa<llvm::ConstantAggregateZero>(Init) ||
+ isa<llvm::ConstantPointerNull>(Init) || isa<llvm::UndefValue>(Init))
+ return 0u;
+ if (isa<llvm::ConstantInt>(Init) || isa<llvm::ConstantFP>(Init) ||
+ isa<llvm::ConstantVector>(Init) || isa<llvm::BlockAddress>(Init) ||
+ isa<llvm::ConstantExpr>(Init))
+ return DL.getTypeAllocSize(Init->getType());
+
+ // See if we can emit each element.
+ if (isa<llvm::ConstantArray>(Init) || isa<llvm::ConstantStruct>(Init)) {
+ size_t LeadingNonNullBytes = 0;
+ for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
+ llvm::Constant *Elt = cast<llvm::Constant>(Init->getOperand(i));
+ size_t ExtraBytes = CountLeadingNonNullBytes(DL, Elt);
+ if (!ExtraBytes)
+ return LeadingNonNullBytes;
+ LeadingNonNullBytes += ExtraBytes;
+ }
+ return LeadingNonNullBytes;
+ }
+
+ if (llvm::ConstantDataSequential *CDS =
+ dyn_cast<llvm::ConstantDataSequential>(Init)) {
+ size_t LeadingNonNullBytes = 0;
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ llvm::Constant *Elt = CDS->getElementAsConstant(i);
+ size_t ExtraBytes = CountLeadingNonNullBytes(DL, Elt);
+ if (!ExtraBytes)
+ return LeadingNonNullBytes;
+ LeadingNonNullBytes += ExtraBytes;
+ }
+ return LeadingNonNullBytes;
+ }
+
+ // Anything else is hard and scary.
+ return 0;
+}
+
/// Decide whether we can emit the non-zero parts of the specified initializer
/// with equal or fewer than NumStores scalar stores.
static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -1209,8 +1250,23 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
// If the initializer is all or mostly the same, codegen with bzero / memset
// then do a few stores afterward.
if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
- auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
- SizeVal, isVolatile);
+ size_t LeadingNonNullBytes =
+ CountLeadingNonNullBytes(CGM.getDataLayout(), constant);
+ // llvm::errs() << LeadingNonNullBytes << " out of " << ConstantSize <<
+ // "\n";
+
+ llvm::Constant *Z8 = llvm::ConstantInt::get(CGM.Int8Ty, 0);
+ Address AdjustedLoc =
+ LeadingNonNullBytes ? Builder.CreateConstInBoundsByteGEP(
+ Loc.withElementType(CGM.Int8Ty),
+ CharUnits::fromQuantity(LeadingNonNullBytes))
+ : Loc;
+ auto *I = Builder.CreateMemSet(
+ AdjustedLoc, Z8,
+ llvm::ConstantInt::get(CGM.IntPtrTy,
+ ConstantSize - LeadingNonNullBytes),
+ isVolatile);
+
if (IsAutoInit)
I->addAnnotationMetadata("auto-init");
diff --git a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
index eed9868cad07f84..b0deb8149ed936f 100644
--- a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
+++ b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
@@ -288,14 +288,16 @@ void test_huge_uninit() {
// UNINIT-LABEL: test_huge_small_init(
// ZERO-LABEL: test_huge_small_init(
-// ZERO: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// ZERO: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// ZERO: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
// ZERO-NOT: !annotation
// ZERO: store i8 97,
// ZERO: store i8 98,
// ZERO: store i8 99,
// ZERO: store i8 100,
// PATTERN-LABEL: test_huge_small_init(
-// PATTERN: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// PATTERN: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// PATTERN: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
// PATTERN-NOT: !annotation
// PATTERN: store i8 97,
// PATTERN: store i8 98,
diff --git a/clang/test/CodeGenOpenCL/partial_initializer.cl b/clang/test/CodeGenOpenCL/partial_initializer.cl
index 5cc4e2b246003a1..7c01c750d1afef2 100644
--- a/clang/test/CodeGenOpenCL/partial_initializer.cl
+++ b/clang/test/CodeGenOpenCL/partial_initializer.cl
@@ -35,7 +35,8 @@ void f(void) {
// CHECK: %[[compoundliteral1:.*]] = alloca <2 x i32>, align 8
// CHECK: %[[V2:.*]] = alloca <4 x i32>, align 16
- // CHECK: call void @llvm.memset.p0.i32(ptr align 4 %A, i8 0, i32 144, i1 false)
+ // CHECK: %[[v0:.*]] = getelementptr inbounds i8, ptr %A, i32 8
+ // CHECK: call void @llvm.memset.p0.i32(ptr align 4 %[[v0]], i8 0, i32 136, i1 false)
// CHECK: %[[v2:.*]] = getelementptr inbounds [6 x [6 x float]], ptr %A, i32 0, i32 0
// CHECK: %[[v3:.*]] = getelementptr inbounds [6 x float], ptr %[[v2]], i32 0, i32 0
// CHECK: store float 1.000000e+00, ptr %[[v3]], align 4
More information about the cfe-commits
mailing list