[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

via cfe-commits cfe-commits at lists.llvm.org
Thu Feb 1 08:24:12 PST 2024


https://github.com/serge-sans-paille updated https://github.com/llvm/llvm-project/pull/79502

>From 707050175b73e61e3be8da7c2b4683be8afa6db7 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Thu, 25 Jan 2024 22:12:55 +0100
Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a
 partially initialized array

Fix #79500
---
 clang/lib/CodeGen/CGDecl.cpp                  | 101 ++++++++++++++----
 clang/test/CodeGen/array-init.c               |  50 +++++++++
 .../test/CodeGenCXX/trivial-auto-var-init.cpp |   6 +-
 .../test/CodeGenOpenCL/partial_initializer.cl |   3 +-
 4 files changed, 139 insertions(+), 21 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c1724..a9ae06bd77c32 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,11 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
   EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
 }
 
+static bool isNullOrUndef(llvm::Constant *C) {
+  return C->isNullValue() || isa<llvm::ConstantAggregateZero>(C) ||
+         isa<llvm::ConstantPointerNull>(C) || isa<llvm::UndefValue>(C);
+}
+
 /// Decide whether we can emit the non-zero parts of the specified initializer
 /// with equal or fewer than NumStores scalar stores.
 static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -945,48 +950,90 @@ static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
 
 /// For inits that canEmitInitWithFewStoresAfterBZero returned true for, emit
 /// the scalar stores that would be required.
-static void emitStoresForInitAfterBZero(CodeGenModule &CGM,
-                                        llvm::Constant *Init, Address Loc,
-                                        bool isVolatile, CGBuilderTy &Builder,
-                                        bool IsAutoInit) {
+static uint64_t emitStoresForInitAfterBZero(CodeGenModule &CGM,
+                                            llvm::Constant *Init, Address Loc,
+                                            bool isVolatile,
+                                            CGBuilderTy &Builder,
+                                            bool IsAutoInit) {
   assert(!Init->isNullValue() && !isa<llvm::UndefValue>(Init) &&
          "called emitStoresForInitAfterBZero for zero or undef value.");
 
+  auto const &DL = CGM.getDataLayout();
+
   if (isa<llvm::ConstantInt>(Init) || isa<llvm::ConstantFP>(Init) ||
       isa<llvm::ConstantVector>(Init) || isa<llvm::BlockAddress>(Init) ||
       isa<llvm::ConstantExpr>(Init)) {
     auto *I = Builder.CreateStore(Init, Loc, isVolatile);
     if (IsAutoInit)
       I->addAnnotationMetadata("auto-init");
-    return;
+    return DL.getTypeAllocSize(Init->getType());
   }
 
   if (llvm::ConstantDataSequential *CDS =
           dyn_cast<llvm::ConstantDataSequential>(Init)) {
+    bool CountNonNullBytes = true;
+    uint64_t LeadingNonNullElementsCount = 0;
     for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
       llvm::Constant *Elt = CDS->getElementAsConstant(i);
 
       // If necessary, get a pointer to the element and emit it.
-      if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt))
+      if (!isNullOrUndef(Elt)) {
         emitStoresForInitAfterBZero(
             CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), isVolatile,
             Builder, IsAutoInit);
+        if (CountNonNullBytes)
+          ++LeadingNonNullElementsCount;
+      } else if (CountNonNullBytes)
+        CountNonNullBytes = false;
     }
-    return;
+    uint64_t ElementByteCount = DL.getTypeAllocSize(CDS->getElementType());
+    return LeadingNonNullElementsCount * ElementByteCount;
   }
 
   assert((isa<llvm::ConstantStruct>(Init) || isa<llvm::ConstantArray>(Init)) &&
          "Unknown value type!");
 
+  bool CountNonNullBytes = true;
+  uint64_t Offset = DL.getTypeAllocSize(Init->getType());
+
   for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
-    llvm::Constant *Elt = cast<llvm::Constant>(Init->getOperand(i));
+    llvm::Constant *Operand = cast<llvm::Constant>(Init->getOperand(i));
+    uint64_t OperandByteCount = DL.getTypeAllocSize(Operand->getType());
 
-    // If necessary, get a pointer to the element and emit it.
-    if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt))
-      emitStoresForInitAfterBZero(CGM, Elt,
-                                  Builder.CreateConstInBoundsGEP2_32(Loc, 0, i),
-                                  isVolatile, Builder, IsAutoInit);
+    uint64_t OperandOffset;
+    if (isNullOrUndef(Operand)) {
+      OperandOffset = 0;
+    } else {
+      // If necessary, get a pointer to the element and emit it.
+      OperandOffset = emitStoresForInitAfterBZero(
+          CGM, Operand, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i),
+          isVolatile, Builder, IsAutoInit);
+    }
+
+    if (CountNonNullBytes) {
+      if (OperandOffset != OperandByteCount) {
+        CountNonNullBytes = false;
+
+        // When not at the beginning of the constant, add the offset of the
+        // previous elements.
+        if (i) {
+          if (auto *CS = dyn_cast<llvm::ConstantStruct>(Init)) {
+            llvm::StructType *CST = CS->getType();
+            const llvm::StructLayout *SL = DL.getStructLayout(CST);
+            OperandOffset += SL->getElementOffset(i - 1);
+          } else if (auto *CA = dyn_cast<llvm::ConstantArray>(Init)) {
+            llvm::ArrayType *CAT = CA->getType();
+            uint64_t ElementByteCount =
+                DL.getTypeAllocSize(CAT->getElementType());
+            OperandOffset += ElementByteCount * (i - 1);
+          }
+        }
+
+        Offset = OperandOffset;
+      }
+    }
   }
+  return Offset;
 }
 
 /// Decide whether we should use bzero plus some stores to initialize a local
@@ -1205,12 +1252,13 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
   }
 
   auto *SizeVal = llvm::ConstantInt::get(CGM.IntPtrTy, ConstantSize);
-
   // If the initializer is all or mostly the same, codegen with bzero / memset
   // then do a few stores afterward.
   if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
-    auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
-                                   SizeVal, isVolatile);
+    auto *I = Builder.CreateMemSet(
+        Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
+        llvm::ConstantInt::get(CGM.IntPtrTy, ConstantSize), isVolatile);
+
     if (IsAutoInit)
       I->addAnnotationMetadata("auto-init");
 
@@ -1218,8 +1266,25 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
         constant->isNullValue() || isa<llvm::UndefValue>(constant);
     if (!valueAlreadyCorrect) {
       Loc = Loc.withElementType(Ty);
-      emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder,
-                                  IsAutoInit);
+      uint64_t Offset = emitStoresForInitAfterBZero(
+          CGM, constant, Loc, isVolatile, Builder, IsAutoInit);
+
+      // Prevent optimizing to odd offsets.
+      Offset -= Offset % Loc.getAlignment().getQuantity();
+      if (Offset && isa<llvm::ConstantArray>(constant)) {
+        if (Offset == ConstantSize) {
+          I->eraseFromParent();
+        } else {
+          Loc = Loc.withElementType(CGM.Int8Ty);
+
+          llvm::Value *AdjustedLoc = llvm::GetElementPtrInst::CreateInBounds(
+              CGM.Int8Ty, Loc.getPointer(),
+              {llvm::ConstantInt::get(CGM.IntPtrTy, Offset)}, "", I);
+          I->setOperand(0, AdjustedLoc);
+          I->setOperand(
+              2, llvm::ConstantInt::get(CGM.IntPtrTy, ConstantSize - Offset));
+        }
+      }
     }
     return;
   }
diff --git a/clang/test/CodeGen/array-init.c b/clang/test/CodeGen/array-init.c
index 62e87edc29741..a8d272f1ea1c4 100644
--- a/clang/test/CodeGen/array-init.c
+++ b/clang/test/CodeGen/array-init.c
@@ -1,3 +1,4 @@
+// RUN: %clang_cc1 %s -O0 -triple x86_64-unknown-linux-gnu -emit-llvm -o - | FileCheck  %s
 // RUN: %clang_cc1 %s -O0 -triple x86_64-unknown-linux-gnu -emit-llvm -o - | FileCheck -check-prefix=CHECK-NO-MERGE-CONSTANTS %s
 // RUN: %clang_cc1 %s -O0 -triple x86_64-unknown-linux-gnu -fmerge-all-constants -emit-llvm -o - | FileCheck -check-prefix=CHECK-MERGE-CONSTANTS %s
 
@@ -13,3 +14,52 @@ void testConstArrayInits(void)
   const int a2[5] = {0,0,0};
   const int a3[5] = {0};
 }
+
+
+// CHECK-LABEL: @testConstLongArrayInits()
+// CHECK: entry:
+// CHECK-NEXT:  %a1 = alloca [20 x i32], align 16
+// CHECK-NEXT:  %a2 = alloca [20 x %struct.anon], align 16
+// CHECK-NEXT:  %a3 = alloca [20 x %struct.anon.0], align 16
+// CHECK-NEXT:  %a4 = alloca [20 x %struct.anon.1], align 16
+//
+// CHECK-NEXT:  %0 = getelementptr inbounds i8, ptr %a1, i64 8
+// CHECK-NEXT:  call void @llvm.memset.p0.i64(ptr align 8 %0, i8 0, i64 72, i1 false)
+// CHECK-NEXT:  %1 = getelementptr inbounds <{ i32, i32, [18 x i32] }>, ptr %a1, i32 0, i32 0
+// CHECK-NEXT:  store i32 1, ptr %1, align 16
+// CHECK-NEXT:  %2 = getelementptr inbounds <{ i32, i32, [18 x i32] }>, ptr %a1, i32 0, i32 1
+// CHECK-NEXT:  store i32 2, ptr %2, align 4
+//
+// CHECK-NEXT:  %3 = getelementptr inbounds i8, ptr %a2, i64 8
+// CHECK-NEXT:  call void @llvm.memset.p0.i64(ptr align 8 %3, i8 0, i64 152, i1 false)
+// CHECK-NEXT:  %4 = getelementptr inbounds <{ %struct.anon, [19 x %struct.anon] }>, ptr %a2, i32 0, i32 0
+// CHECK-NEXT:  %5 = getelementptr inbounds %struct.anon, ptr %4, i32 0, i32 0
+// CHECK-NEXT:  store i8 1, ptr %5, align 16
+// CHECK-NEXT:  %6 = getelementptr inbounds %struct.anon, ptr %4, i32 0, i32 1
+// CHECK-NEXT:  store i32 2, ptr %6, align 4
+//
+// CHECK-NEXT:  %7 = getelementptr inbounds i8, ptr %a3, i64 1
+// CHECK-NEXT:  call void @llvm.memset.p0.i64(ptr align 1 %7, i8 0, i64 159, i1 false)
+// CHECK-NEXT:  %8 = getelementptr inbounds <{ %struct.anon.0, [19 x %struct.anon.0] }>, ptr %a3, i32 0, i32 0
+// CHECK-NEXT:  %9 = getelementptr inbounds %struct.anon.0, ptr %8, i32 0, i32 0
+// CHECK-NEXT:  store i8 1, ptr %9, align 16
+//
+// CHECK-NEXT:  %10 = getelementptr inbounds i8, ptr %a4, i64 20
+// CHECK-NEXT:  call void @llvm.memset.p0.i64(ptr align 4 %10, i8 0, i64 380, i1 false)
+// CHECK-NEXT:  %11 = getelementptr inbounds <{ %struct.anon.1, [19 x %struct.anon.1] }>, ptr %a4, i32 0, i32 0
+// CHECK-NEXT:  %12 = getelementptr inbounds %struct.anon.1, ptr %11, i32 0, i32 0
+// CHECK-NEXT:  store i8 1, ptr %12, align 16
+// CHECK-NEXT:  %13 = getelementptr inbounds %struct.anon.1, ptr %11, i32 0, i32 1
+// CHECK-NEXT:  %14 = getelementptr inbounds [4 x i32], ptr %13, i32 0, i32 0
+// CHECK-NEXT:  store i32 2, ptr %14, align 4
+//
+// CHECK-NEXT:  ret void
+// }
+
+void testConstLongArrayInits(void)
+{
+   const int a1[20] = {1,2};
+   const struct {char c; int i;} a2[20] = {{1,2}};
+   const struct {char c; int i;} a3[20] = {{1}};
+   const struct {char c; int i[4];} a4[20] = {{1,{2}}};
+}
diff --git a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
index eed9868cad07f..b0deb8149ed93 100644
--- a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
+++ b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
@@ -288,14 +288,16 @@ void test_huge_uninit() {
 
 // UNINIT-LABEL:  test_huge_small_init(
 // ZERO-LABEL:    test_huge_small_init(
-// ZERO: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// ZERO: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// ZERO: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
 // ZERO-NOT: !annotation
 // ZERO: store i8 97,
 // ZERO: store i8 98,
 // ZERO: store i8 99,
 // ZERO: store i8 100,
 // PATTERN-LABEL: test_huge_small_init(
-// PATTERN: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// PATTERN: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// PATTERN: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
 // PATTERN-NOT: !annotation
 // PATTERN: store i8 97,
 // PATTERN: store i8 98,
diff --git a/clang/test/CodeGenOpenCL/partial_initializer.cl b/clang/test/CodeGenOpenCL/partial_initializer.cl
index 5cc4e2b246003..7c01c750d1afe 100644
--- a/clang/test/CodeGenOpenCL/partial_initializer.cl
+++ b/clang/test/CodeGenOpenCL/partial_initializer.cl
@@ -35,7 +35,8 @@ void f(void) {
   // CHECK: %[[compoundliteral1:.*]] = alloca <2 x i32>, align 8
   // CHECK: %[[V2:.*]] = alloca <4 x i32>, align 16
 
-  // CHECK: call void @llvm.memset.p0.i32(ptr align 4 %A, i8 0, i32 144, i1 false)
+  // CHECK: %[[v0:.*]] = getelementptr inbounds i8, ptr %A, i32 8
+  // CHECK: call void @llvm.memset.p0.i32(ptr align 4 %[[v0]], i8 0, i32 136, i1 false)
   // CHECK: %[[v2:.*]] = getelementptr inbounds [6 x [6 x float]], ptr %A, i32 0, i32 0
   // CHECK: %[[v3:.*]] = getelementptr inbounds [6 x float], ptr %[[v2]], i32 0, i32 0
   // CHECK: store float 1.000000e+00, ptr %[[v3]], align 4



More information about the cfe-commits mailing list