[llvm] f65c88c - [GlobalOpt] Fix memset handling in global ctor evaluation (PR55859)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 27 07:50:56 PDT 2022


Author: Nikita Popov
Date: 2022-06-27T16:50:49+02:00
New Revision: f65c88c42fdd0e46d16fe31737e6627c56de77c3

URL: https://github.com/llvm/llvm-project/commit/f65c88c42fdd0e46d16fe31737e6627c56de77c3
DIFF: https://github.com/llvm/llvm-project/commit/f65c88c42fdd0e46d16fe31737e6627c56de77c3.diff

LOG: [GlobalOpt] Fix memset handling in global ctor evaluation (PR55859)

The global ctor evaluator currently handles by checking whether the
memset memory is already zero, and skips it in that case. However,
it only actually checks the first byte of the memory being set.

This patch extends the code to check all bytes being set. This is
done byte-by-byte to avoid converting undef values to zeros in
larger reads. However, the handling is still not completely correct,
because there might still be padding bytes (though probably this
doesn't matter much in practice, as I'd expect global variable
padding to be zero-initialized in practice).

Mostly fixes https://github.com/llvm/llvm-project/issues/55859.

Differential Revision: https://reviews.llvm.org/D128532

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/Evaluator.h
    llvm/lib/Transforms/Utils/Evaluator.cpp
    llvm/test/Transforms/GlobalOpt/ctor-memset.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/Evaluator.h b/llvm/include/llvm/Transforms/Utils/Evaluator.h
index 6a34ceba6f05..2b8384897c6b 100644
--- a/llvm/include/llvm/Transforms/Utils/Evaluator.h
+++ b/llvm/include/llvm/Transforms/Utils/Evaluator.h
@@ -138,6 +138,8 @@ class Evaluator {
                        SmallVectorImpl<Constant *> &Formals);
 
   Constant *ComputeLoadResult(Constant *P, Type *Ty);
+  Constant *ComputeLoadResult(GlobalVariable *GV, Type *Ty,
+                              const APInt &Offset);
 
   /// As we compute SSA register values, we store their contents here. The back
   /// of the deque contains the current function and the stack contains the

diff  --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp
index 70b5e0e54c66..18abe3851d56 100644
--- a/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -217,10 +217,13 @@ Constant *Evaluator::ComputeLoadResult(Constant *P, Type *Ty) {
   P = cast<Constant>(P->stripAndAccumulateConstantOffsets(
       DL, Offset, /* AllowNonInbounds */ true));
   Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(P->getType()));
-  auto *GV = dyn_cast<GlobalVariable>(P);
-  if (!GV)
-    return nullptr;
+  if (auto *GV = dyn_cast<GlobalVariable>(P))
+    return ComputeLoadResult(GV, Ty, Offset);
+  return nullptr;
+}
 
+Constant *Evaluator::ComputeLoadResult(GlobalVariable *GV, Type *Ty,
+                                       const APInt &Offset) {
   auto It = MutatedMemory.find(GV);
   if (It != MutatedMemory.end())
     return It->second.read(Ty, Offset, DL);
@@ -436,16 +439,39 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
                               << "intrinsic.\n");
             return false;
           }
+
+          auto *LenC = dyn_cast<ConstantInt>(getVal(MSI->getLength()));
+          if (!LenC) {
+            LLVM_DEBUG(dbgs() << "Memset with unknown length.\n");
+            return false;
+          }
+
           Constant *Ptr = getVal(MSI->getDest());
+          APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+          Ptr = cast<Constant>(Ptr->stripAndAccumulateConstantOffsets(
+              DL, Offset, /* AllowNonInbounds */ true));
+          auto *GV = dyn_cast<GlobalVariable>(Ptr);
+          if (!GV) {
+            LLVM_DEBUG(dbgs() << "Memset with unknown base.\n");
+            return false;
+          }
+
           Constant *Val = getVal(MSI->getValue());
-          Constant *DestVal =
-              ComputeLoadResult(getVal(Ptr), MSI->getValue()->getType());
-          if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
-            // This memset is a no-op.
-            LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n");
-            ++CurInst;
-            continue;
+          APInt Len = LenC->getValue();
+          while (Len != 0) {
+            Constant *DestVal = ComputeLoadResult(GV, Val->getType(), Offset);
+            if (DestVal != Val) {
+              LLVM_DEBUG(dbgs() << "Memset is not a no-op at offset "
+                                << Offset << " of " << *GV << ".\n");
+              return false;
+            }
+            ++Offset;
+            --Len;
           }
+
+          LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n");
+          ++CurInst;
+          continue;
         }
 
         if (II->isLifetimeStartOrEnd()) {

diff  --git a/llvm/test/Transforms/GlobalOpt/ctor-memset.ll b/llvm/test/Transforms/GlobalOpt/ctor-memset.ll
index 79ba25fc56cf..694b6fe65b68 100644
--- a/llvm/test/Transforms/GlobalOpt/ctor-memset.ll
+++ b/llvm/test/Transforms/GlobalOpt/ctor-memset.ll
@@ -1,7 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -globalopt < %s | FileCheck %s
 
- at llvm.global_ctors = appending global [8 x { i32, ptr, ptr }] [
+target datalayout = "p1:32:32"
+
+ at llvm.global_ctors = appending global [9 x { i32, ptr, ptr }] [
   { i32, ptr, ptr } { i32 65535, ptr @ctor0, ptr null },
   { i32, ptr, ptr } { i32 65535, ptr @ctor1, ptr null },
   { i32, ptr, ptr } { i32 65535, ptr @ctor2, ptr null },
@@ -9,11 +11,12 @@
   { i32, ptr, ptr } { i32 65535, ptr @ctor4, ptr null },
   { i32, ptr, ptr } { i32 65535, ptr @ctor5, ptr null },
   { i32, ptr, ptr } { i32 65535, ptr @ctor6, ptr null },
-  { i32, ptr, ptr } { i32 65535, ptr @ctor7, ptr null }
+  { i32, ptr, ptr } { i32 65535, ptr @ctor7, ptr null },
+  { i32, ptr, ptr } { i32 65535, ptr @ctor8, ptr null }
 ]
 
 ;.
-; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @ctor6, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor7, ptr null }]
+; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @ctor3, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor4, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor7, ptr null }]
 ; CHECK: @[[G0:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32 } zeroinitializer
 ; CHECK: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32, i32 } { i32 0, i32 0, i32 1 }
 ; CHECK: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32, i32 } { i32 1, i32 0, i32 0 }
@@ -22,11 +25,11 @@
 ; CHECK: @[[G5:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i16, i32 } { i16 0, i32 1 }
 ; CHECK: @[[G6:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32 } { i32 -1, i32 -1 }
 ; CHECK: @[[G7:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32 } { i32 -1, i32 1 }
+; CHECK: @[[G8:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr addrspace(1) global { i32, i32 } zeroinitializer
 ;.
 
 ; memset of all-zero global
 @g0 = global { i32, i32 } zeroinitializer
-
 define internal void @ctor0() {
   call void @llvm.memset.p0.i64(ptr @g0, i8 0, i64 8, i1 false)
   ret void
@@ -52,6 +55,10 @@ define internal void @ctor2() {
 @g3 = global { i32, i32 } { i32 0, i32 1 }
 
 define internal void @ctor3() {
+; CHECK-LABEL: @ctor3(
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr @g3, i8 0, i64 8, i1 false)
+; CHECK-NEXT:    ret void
+;
   call void @llvm.memset.p0.i64(ptr @g3, i8 0, i64 8, i1 false)
   ret void
 }
@@ -60,11 +67,17 @@ define internal void @ctor3() {
 @g4 = global { i32, i32 } { i32 0, i32 undef }
 
 define internal void @ctor4() {
+; CHECK-LABEL: @ctor4(
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr @g4, i8 0, i64 8, i1 false)
+; CHECK-NEXT:    ret void
+;
   call void @llvm.memset.p0.i64(ptr @g4, i8 0, i64 8, i1 false)
   ret void
 }
 
 ; memset including padding bytes
+; FIXME: We still incorrectly optimize the memset away here, even though code
+; might access the padding.
 @g5 = global { i16, i32 } { i16 0, i32 1 }
 
 define internal void @ctor5() {
@@ -76,10 +89,6 @@ define internal void @ctor5() {
 @g6 = global { i32, i32 } { i32 -1, i32 -1 }
 
 define internal void @ctor6() {
-; CHECK-LABEL: @ctor6(
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr @g6, i8 -1, i64 8, i1 false)
-; CHECK-NEXT:    ret void
-;
   call void @llvm.memset.p0.i64(ptr @g6, i8 -1, i64 8, i1 false)
   ret void
 }
@@ -96,6 +105,14 @@ define internal void @ctor7() {
   ret void
 }
 
+; memset of zero value in 
diff erently-sized address space
+ at g8 = addrspace(1) global { i32, i32 } zeroinitializer
+
+define internal void @ctor8() {
+  call void @llvm.memset.p0.i64(ptr addrspacecast (ptr addrspace(1) @g8 to ptr), i8 0, i64 8, i1 false)
+  ret void
+}
+
 declare void @llvm.memset.p0.i64(ptr, i8, i64, i1)
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly }


        


More information about the llvm-commits mailing list