[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

via cfe-commits cfe-commits at lists.llvm.org
Thu Jan 25 13:15:38 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-codegen

Author: None (serge-sans-paille)

<details>
<summary>Changes</summary>

…initialized array

Fix #<!-- -->79500

---
Full diff: https://github.com/llvm/llvm-project/pull/79502.diff


3 Files Affected:

- (modified) clang/lib/CodeGen/CGDecl.cpp (+58-2) 
- (modified) clang/test/CodeGenCXX/trivial-auto-var-init.cpp (+4-2) 
- (modified) clang/test/CodeGenOpenCL/partial_initializer.cl (+2-1) 


``````````diff
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index a5da0aa2965a00..101fefc511a0c4 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,47 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
   EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
 }
 
+__attribute__((optnone)) static size_t
+CountLeadingNonNullBytes(const llvm::DataLayout &DL, llvm::Constant *Init) {
+  // Zero and Undef never requires any extra stores.
+  if (Init->isNullValue() || isa<llvm::ConstantAggregateZero>(Init) ||
+      isa<llvm::ConstantPointerNull>(Init) || isa<llvm::UndefValue>(Init))
+    return 0u;
+  if (isa<llvm::ConstantInt>(Init) || isa<llvm::ConstantFP>(Init) ||
+      isa<llvm::ConstantVector>(Init) || isa<llvm::BlockAddress>(Init) ||
+      isa<llvm::ConstantExpr>(Init))
+    return DL.getTypeAllocSize(Init->getType());
+
+  // See if we can emit each element.
+  if (isa<llvm::ConstantArray>(Init) || isa<llvm::ConstantStruct>(Init)) {
+    size_t LeadingNonNullBytes = 0;
+    for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
+      llvm::Constant *Elt = cast<llvm::Constant>(Init->getOperand(i));
+      size_t ExtraBytes = CountLeadingNonNullBytes(DL, Elt);
+      if (!ExtraBytes)
+        return LeadingNonNullBytes;
+      LeadingNonNullBytes += ExtraBytes;
+    }
+    return LeadingNonNullBytes;
+  }
+
+  if (llvm::ConstantDataSequential *CDS =
+          dyn_cast<llvm::ConstantDataSequential>(Init)) {
+    size_t LeadingNonNullBytes = 0;
+    for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+      llvm::Constant *Elt = CDS->getElementAsConstant(i);
+      size_t ExtraBytes = CountLeadingNonNullBytes(DL, Elt);
+      if (!ExtraBytes)
+        return LeadingNonNullBytes;
+      LeadingNonNullBytes += ExtraBytes;
+    }
+    return LeadingNonNullBytes;
+  }
+
+  // Anything else is hard and scary.
+  return 0;
+}
+
 /// Decide whether we can emit the non-zero parts of the specified initializer
 /// with equal or fewer than NumStores scalar stores.
 static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -1209,8 +1250,23 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
   // If the initializer is all or mostly the same, codegen with bzero / memset
   // then do a few stores afterward.
   if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
-    auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
-                                   SizeVal, isVolatile);
+    size_t LeadingNonNullBytes =
+        CountLeadingNonNullBytes(CGM.getDataLayout(), constant);
+    // llvm::errs() << LeadingNonNullBytes << " out of " << ConstantSize <<
+    // "\n";
+
+    llvm::Constant *Z8 = llvm::ConstantInt::get(CGM.Int8Ty, 0);
+    Address AdjustedLoc =
+        LeadingNonNullBytes ? Builder.CreateConstInBoundsByteGEP(
+                                  Loc.withElementType(CGM.Int8Ty),
+                                  CharUnits::fromQuantity(LeadingNonNullBytes))
+                            : Loc;
+    auto *I = Builder.CreateMemSet(
+        AdjustedLoc, Z8,
+        llvm::ConstantInt::get(CGM.IntPtrTy,
+                               ConstantSize - LeadingNonNullBytes),
+        isVolatile);
+
     if (IsAutoInit)
       I->addAnnotationMetadata("auto-init");
 
diff --git a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
index eed9868cad07f8..b0deb8149ed936 100644
--- a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
+++ b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
@@ -288,14 +288,16 @@ void test_huge_uninit() {
 
 // UNINIT-LABEL:  test_huge_small_init(
 // ZERO-LABEL:    test_huge_small_init(
-// ZERO: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// ZERO: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// ZERO: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
 // ZERO-NOT: !annotation
 // ZERO: store i8 97,
 // ZERO: store i8 98,
 // ZERO: store i8 99,
 // ZERO: store i8 100,
 // PATTERN-LABEL: test_huge_small_init(
-// PATTERN: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// PATTERN: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// PATTERN: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
 // PATTERN-NOT: !annotation
 // PATTERN: store i8 97,
 // PATTERN: store i8 98,
diff --git a/clang/test/CodeGenOpenCL/partial_initializer.cl b/clang/test/CodeGenOpenCL/partial_initializer.cl
index 5cc4e2b246003a..7c01c750d1afef 100644
--- a/clang/test/CodeGenOpenCL/partial_initializer.cl
+++ b/clang/test/CodeGenOpenCL/partial_initializer.cl
@@ -35,7 +35,8 @@ void f(void) {
   // CHECK: %[[compoundliteral1:.*]] = alloca <2 x i32>, align 8
   // CHECK: %[[V2:.*]] = alloca <4 x i32>, align 16
 
-  // CHECK: call void @llvm.memset.p0.i32(ptr align 4 %A, i8 0, i32 144, i1 false)
+  // CHECK: %[[v0:.*]] = getelementptr inbounds i8, ptr %A, i32 8
+  // CHECK: call void @llvm.memset.p0.i32(ptr align 4 %[[v0]], i8 0, i32 136, i1 false)
   // CHECK: %[[v2:.*]] = getelementptr inbounds [6 x [6 x float]], ptr %A, i32 0, i32 0
   // CHECK: %[[v3:.*]] = getelementptr inbounds [6 x float], ptr %[[v2]], i32 0, i32 0
   // CHECK: store float 1.000000e+00, ptr %[[v3]], align 4

``````````

</details>


https://github.com/llvm/llvm-project/pull/79502


More information about the cfe-commits mailing list