[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Jan 26 03:05:10 PST 2024
https://github.com/serge-sans-paille updated https://github.com/llvm/llvm-project/pull/79502
>From 1ca9d2d7697528b1a126cc95a2fb7a9e5bb8669a Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Thu, 25 Jan 2024 22:12:55 +0100
Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a
partially initialized array
Fix #79500
---
clang/lib/CodeGen/CGDecl.cpp | 109 +++++++++++++++++-
clang/test/CodeGen/array-init.c | 50 ++++++++
.../test/CodeGenCXX/trivial-auto-var-init.cpp | 6 +-
.../test/CodeGenOpenCL/partial_initializer.cl | 3 +-
4 files changed, 163 insertions(+), 5 deletions(-)
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244f..724dcf6464aef10 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,98 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
}
+static bool isNullOrUndef(llvm::Constant *C) {
+ return C->isNullValue() || isa<llvm::ConstantAggregateZero>(C) ||
+ isa<llvm::ConstantPointerNull>(C) || isa<llvm::UndefValue>(C);
+}
+
+static size_t CountLeadingNonNullBytes(const llvm::DataLayout &DL,
+ llvm::Constant *Init) {
+ // Zero and Undef never requires any extra stores.
+ if (isNullOrUndef(Init))
+ return 0u;
+
+ if (isa<llvm::ConstantInt>(Init) || isa<llvm::ConstantFP>(Init) ||
+ isa<llvm::ConstantVector>(Init) || isa<llvm::BlockAddress>(Init) ||
+ isa<llvm::ConstantExpr>(Init))
+ return DL.getTypeAllocSize(Init->getType());
+
+ // For array, consider each element independently
+ if (auto *CA = dyn_cast<llvm::ConstantArray>(Init)) {
+
+ llvm::ArrayType *CAT = CA->getType();
+ uint64_t NumElements = CAT->getNumElements();
+ uint64_t ElementByteCount = DL.getTypeAllocSize(CAT->getElementType());
+
+ unsigned LeadingNonNullElementsCount = 0;
+ for (; LeadingNonNullElementsCount != NumElements;
+ ++LeadingNonNullElementsCount) {
+ auto *Elt =
+ cast<llvm::Constant>(Init->getOperand(LeadingNonNullElementsCount));
+ if (isNullOrUndef(Elt))
+ break;
+ }
+
+ unsigned TrailingNonNullBytes = 0;
+ if (LeadingNonNullElementsCount != 0) {
+ LeadingNonNullElementsCount -= 1;
+ TrailingNonNullBytes = CountLeadingNonNullBytes(
+ DL,
+ cast<llvm::Constant>(Init->getOperand(LeadingNonNullElementsCount)));
+ }
+
+ return LeadingNonNullElementsCount * ElementByteCount +
+ TrailingNonNullBytes;
+ ;
+ }
+
+ // For records, per field
+ if (auto *CS = dyn_cast<llvm::ConstantStruct>(Init)) {
+ llvm::StructType *CST = CS->getType();
+ const llvm::StructLayout *SL = DL.getStructLayout(CST);
+
+ uint64_t NumElements = CST->getNumElements();
+ uint64_t LeadingNonNullFieldsCount = 0;
+
+ for (; LeadingNonNullFieldsCount != NumElements;
+ ++LeadingNonNullFieldsCount) {
+ auto *Elt =
+ cast<llvm::Constant>(Init->getOperand(LeadingNonNullFieldsCount));
+ if (isNullOrUndef(Elt))
+ break;
+ }
+
+ unsigned TrailingNonNullBytes = 0;
+ if (LeadingNonNullFieldsCount != 0) {
+ LeadingNonNullFieldsCount -= 1;
+ TrailingNonNullBytes = CountLeadingNonNullBytes(
+ DL,
+ cast<llvm::Constant>(Init->getOperand(LeadingNonNullFieldsCount)));
+ }
+
+ return SL->getElementOffset(LeadingNonNullFieldsCount) +
+ TrailingNonNullBytes;
+ }
+
+ if (llvm::ConstantDataSequential *CDS =
+ dyn_cast<llvm::ConstantDataSequential>(Init)) {
+ size_t LeadingNonNullElementCount = 0;
+ uint64_t ElementByteCount = DL.getTypeAllocSize(CDS->getElementType());
+ for (unsigned NumElements = CDS->getNumElements();
+ LeadingNonNullElementCount != NumElements;
+ ++LeadingNonNullElementCount) {
+ llvm::Constant *Elt =
+ CDS->getElementAsConstant(LeadingNonNullElementCount);
+ if (isNullOrUndef(Elt))
+ break;
+ }
+ return LeadingNonNullElementCount * ElementByteCount;
+ }
+
+ // Anything else is hard and scary.
+ return 0;
+}
+
/// Decide whether we can emit the non-zero parts of the specified initializer
/// with equal or fewer than NumStores scalar stores.
static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -1209,8 +1301,21 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
// If the initializer is all or mostly the same, codegen with bzero / memset
// then do a few stores afterward.
if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
- auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
- SizeVal, isVolatile);
+ size_t LeadingNonNullBytes =
+ CountLeadingNonNullBytes(CGM.getDataLayout(), constant);
+
+ llvm::Constant *Z8 = llvm::ConstantInt::get(CGM.Int8Ty, 0);
+ Address AdjustedLoc =
+ LeadingNonNullBytes ? Builder.CreateConstInBoundsByteGEP(
+ Loc.withElementType(CGM.Int8Ty),
+ CharUnits::fromQuantity(LeadingNonNullBytes))
+ : Loc;
+ auto *I = Builder.CreateMemSet(
+ AdjustedLoc, Z8,
+ llvm::ConstantInt::get(CGM.IntPtrTy,
+ ConstantSize - LeadingNonNullBytes),
+ isVolatile);
+
if (IsAutoInit)
I->addAnnotationMetadata("auto-init");
diff --git a/clang/test/CodeGen/array-init.c b/clang/test/CodeGen/array-init.c
index 62e87edc2974197..d5c01e016660275 100644
--- a/clang/test/CodeGen/array-init.c
+++ b/clang/test/CodeGen/array-init.c
@@ -1,3 +1,4 @@
+// RUN: %clang_cc1 %s -O0 -triple x86_64-unknown-linux-gnu -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -O0 -triple x86_64-unknown-linux-gnu -emit-llvm -o - | FileCheck -check-prefix=CHECK-NO-MERGE-CONSTANTS %s
// RUN: %clang_cc1 %s -O0 -triple x86_64-unknown-linux-gnu -fmerge-all-constants -emit-llvm -o - | FileCheck -check-prefix=CHECK-MERGE-CONSTANTS %s
@@ -13,3 +14,52 @@ void testConstArrayInits(void)
const int a2[5] = {0,0,0};
const int a3[5] = {0};
}
+
+
+// CHECK-LABEL: @testConstLongArrayInits()
+// CHECK: entry:
+// CHECK-NEXT: %a1 = alloca [20 x i32], align 16
+// CHECK-NEXT: %a2 = alloca [20 x %struct.anon], align 16
+// CHECK-NEXT: %a3 = alloca [20 x %struct.anon.0], align 16
+// CHECK-NEXT: %a4 = alloca [20 x %struct.anon.1], align 16
+//
+// CHECK-NEXT: %0 = getelementptr inbounds i8, ptr %a1, i64 8
+// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 %0, i8 0, i64 72, i1 false)
+// CHECK-NEXT: %1 = getelementptr inbounds <{ i32, i32, [18 x i32] }>, ptr %a1, i32 0, i32 0
+// CHECK-NEXT: store i32 1, ptr %1, align 16
+// CHECK-NEXT: %2 = getelementptr inbounds <{ i32, i32, [18 x i32] }>, ptr %a1, i32 0, i32 1
+// CHECK-NEXT: store i32 2, ptr %2, align 4
+//
+// CHECK-NEXT: %3 = getelementptr inbounds i8, ptr %a2, i64 8
+// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 %3, i8 0, i64 152, i1 false)
+// CHECK-NEXT: %4 = getelementptr inbounds <{ %struct.anon, [19 x %struct.anon] }>, ptr %a2, i32 0, i32 0
+// CHECK-NEXT: %5 = getelementptr inbounds %struct.anon, ptr %4, i32 0, i32 0
+// CHECK-NEXT: store i8 1, ptr %5, align 16
+// CHECK-NEXT: %6 = getelementptr inbounds %struct.anon, ptr %4, i32 0, i32 1
+// CHECK-NEXT: store i32 2, ptr %6, align 4
+//
+// CHECK-NEXT: %7 = getelementptr inbounds i8, ptr %a3, i64 1
+// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 %7, i8 0, i64 159, i1 false)
+// CHECK-NEXT: %8 = getelementptr inbounds <{ %struct.anon.0, [19 x %struct.anon.0] }>, ptr %a3, i32 0, i32 0
+// CHECK-NEXT: %9 = getelementptr inbounds %struct.anon.0, ptr %8, i32 0, i32 0
+// CHECK-NEXT: store i8 1, ptr %9, align 16
+//
+// CHECK-NEXT: %10 = getelementptr inbounds i8, ptr %a4, i64 8
+// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 %10, i8 0, i64 392, i1 false)
+// CHECK-NEXT: %11 = getelementptr inbounds <{ %struct.anon.1, [19 x %struct.anon.1] }>, ptr %a4, i32 0, i32 0
+// CHECK-NEXT: %12 = getelementptr inbounds %struct.anon.1, ptr %11, i32 0, i32 0
+// CHECK-NEXT: store i8 1, ptr %12, align 16
+// CHECK-NEXT: %13 = getelementptr inbounds %struct.anon.1, ptr %11, i32 0, i32 1
+// CHECK-NEXT: %14 = getelementptr inbounds [4 x i32], ptr %13, i32 0, i32 0
+// CHECK-NEXT: store i32 2, ptr %14, align 4
+//
+// CHECK-NEXT: ret void
+// }
+
+void testConstLongArrayInits(void)
+{
+ const int a1[20] = {1,2};
+ const struct {char c; int i;} a2[20] = {{1,2}};
+ const struct {char c; int i;} a3[20] = {{1}};
+ const struct {char c; int i[4];} a4[20] = {{1,{2}}};
+}
diff --git a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
index eed9868cad07f84..b0deb8149ed936f 100644
--- a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
+++ b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
@@ -288,14 +288,16 @@ void test_huge_uninit() {
// UNINIT-LABEL: test_huge_small_init(
// ZERO-LABEL: test_huge_small_init(
-// ZERO: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// ZERO: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// ZERO: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
// ZERO-NOT: !annotation
// ZERO: store i8 97,
// ZERO: store i8 98,
// ZERO: store i8 99,
// ZERO: store i8 100,
// PATTERN-LABEL: test_huge_small_init(
-// PATTERN: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// PATTERN: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// PATTERN: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
// PATTERN-NOT: !annotation
// PATTERN: store i8 97,
// PATTERN: store i8 98,
diff --git a/clang/test/CodeGenOpenCL/partial_initializer.cl b/clang/test/CodeGenOpenCL/partial_initializer.cl
index 5cc4e2b246003a1..7c01c750d1afef2 100644
--- a/clang/test/CodeGenOpenCL/partial_initializer.cl
+++ b/clang/test/CodeGenOpenCL/partial_initializer.cl
@@ -35,7 +35,8 @@ void f(void) {
// CHECK: %[[compoundliteral1:.*]] = alloca <2 x i32>, align 8
// CHECK: %[[V2:.*]] = alloca <4 x i32>, align 16
- // CHECK: call void @llvm.memset.p0.i32(ptr align 4 %A, i8 0, i32 144, i1 false)
+ // CHECK: %[[v0:.*]] = getelementptr inbounds i8, ptr %A, i32 8
+ // CHECK: call void @llvm.memset.p0.i32(ptr align 4 %[[v0]], i8 0, i32 136, i1 false)
// CHECK: %[[v2:.*]] = getelementptr inbounds [6 x [6 x float]], ptr %A, i32 0, i32 0
// CHECK: %[[v3:.*]] = getelementptr inbounds [6 x float], ptr %[[v2]], i32 0, i32 0
// CHECK: store float 1.000000e+00, ptr %[[v3]], align 4
More information about the cfe-commits
mailing list