[llvm] e422c0d - [GlobalOpt] Perform store->dominated load forwarding for stored once globals

Arthur Eubanks via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 24 09:09:39 PDT 2022


Author: Arthur Eubanks
Date: 2022-06-24T09:09:26-07:00
New Revision: e422c0d3b26eaf3e5646960351d4fe6ff7c0b573

URL: https://github.com/llvm/llvm-project/commit/e422c0d3b26eaf3e5646960351d4fe6ff7c0b573
DIFF: https://github.com/llvm/llvm-project/commit/e422c0d3b26eaf3e5646960351d4fe6ff7c0b573.diff

LOG: [GlobalOpt] Perform store->dominated load forwarding for stored once globals

The initial land incorrectly optimized forwarding non-Constants in non-nosync/norecurse functions. Bail on non-Constants since norecurse should cause global -> alloca promotion anyway.

The initial land also incorrectly assumed that StoredOnceStore was the only store to the global, but it actually means that only one value other than the global initializer is stored. Add a check that there's only one store.

Compile time tracker:
https://llvm-compile-time-tracker.com/compare.php?from=c80b88ee29f34078d2149de94e27600093e6c7c0&to=ef2c2b7772424b6861a75e794f3c31b45167304a&stat=instructions

Reviewed By: nikic, asbirlea, jdoerfert

Differential Revision: https://reviews.llvm.org/D128128

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/GlobalStatus.h
    llvm/lib/Transforms/IPO/GlobalOpt.cpp
    llvm/lib/Transforms/Utils/GlobalStatus.cpp
    llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll
    llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll
    llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll
    llvm/test/Transforms/PhaseOrdering/recompute-globalsaa.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
index 775dd23d8f237..60c91fc30174d 100644
--- a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
+++ b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
@@ -35,6 +35,9 @@ struct GlobalStatus {
   /// can be deleted.
   bool IsLoaded = false;
 
+  /// Number of stores to the global.
+  unsigned NumStores = 0;
+
   /// Keep track of what stores to the global look like.
   enum StoredType {
     /// There is no store to this global.  It can thus be marked constant.

diff  --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index fc268a062392b..1a1bde4f0668f 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1444,6 +1444,42 @@ static void makeAllConstantUsesInstructions(Constant *C) {
   }
 }
 
+// For a global variable with one store, if the store dominates any loads,
+// those loads will always load the stored value (as opposed to the
+// initializer), even in the presence of recursion.
+static bool forwardStoredOnceStore(
+    GlobalVariable *GV, const StoreInst *StoredOnceStore,
+    function_ref<DominatorTree &(Function &)> LookupDomTree) {
+  const Value *StoredOnceValue = StoredOnceStore->getValueOperand();
+  // We can do this optimization for non-constants in nosync + norecurse
+  // functions, but globals used in exactly one norecurse functions are already
+  // promoted to an alloca.
+  if (!isa<Constant>(StoredOnceValue))
+    return false;
+  const Function *F = StoredOnceStore->getFunction();
+  SmallVector<LoadInst *> Loads;
+  for (User *U : GV->users()) {
+    if (auto *LI = dyn_cast<LoadInst>(U)) {
+      if (LI->getFunction() == F &&
+          LI->getType() == StoredOnceValue->getType() && LI->isSimple())
+        Loads.push_back(LI);
+    }
+  }
+  // Only compute DT if we have any loads to examine.
+  bool MadeChange = false;
+  if (!Loads.empty()) {
+    auto &DT = LookupDomTree(*const_cast<Function *>(F));
+    for (auto *LI : Loads) {
+      if (DT.dominates(StoredOnceStore, LI)) {
+        LI->replaceAllUsesWith(const_cast<Value *>(StoredOnceValue));
+        LI->eraseFromParent();
+        MadeChange = true;
+      }
+    }
+  }
+  return MadeChange;
+}
+
 /// Analyze the specified global variable and optimize
 /// it if possible.  If we make a change, return true.
 static bool
@@ -1603,6 +1639,12 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
     if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI))
       return true;
 
+    // Try to forward the store to any loads. If we have more than one store, we
+    // may have a store of the initializer between StoredOnceStore and a load.
+    if (GS.NumStores == 1)
+      if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree))
+        return true;
+
     // Otherwise, if the global was not a boolean, we can shrink it to be a
     // boolean. Skip this optimization for AS that doesn't allow an initializer.
     if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic &&

diff  --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index 3ba920cd5878a..c5aded3c45f4c 100644
--- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -104,6 +104,8 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
         if (SI->isVolatile())
           return true;
 
+        ++GS.NumStores;
+
         GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering());
 
         // If this is a direct store to the global (i.e., the global is a scalar

diff  --git a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll
index da396363547fd..1c917413c6f00 100644
--- a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll
+++ b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll
@@ -2,21 +2,24 @@
 
 @foo = internal global i32 0, align 4
 
-define dso_local i32 @bar() {
+define void @store() {
 entry:
   store i32 5, i32* @foo, align 4
+  ret void
+}
+
+define i32 @bar() {
+entry:
   %0 = load i32, i32* @foo, align 4
   ret i32 %0
 }
 
 ;CHECK:      @bar
 ;CHECK-NEXT: entry:
-;CHECK-NEXT:   store i1 true, i1* @foo, align 1, !dbg ![[DbgLocStore:[0-9]+]]
 ;CHECK-NEXT:   %.b = load i1, i1* @foo, align 1, !dbg ![[DbgLocLoadSel:[0-9]+]]
 ;CHECK-NEXT:   %0 = select i1 %.b, i32 5, i32 0, !dbg ![[DbgLocLoadSel]]
 ;CHECK-NEXT:   call void @llvm.dbg.value({{.*}}), !dbg ![[DbgLocLoadSel]]
 ;CHECK-NEXT:   ret i32 %0, !dbg ![[DbgLocRet:[0-9]+]]
 
-;CHECK: ![[DbgLocStore]] = !DILocation(line: 1,
-;CHECK: ![[DbgLocLoadSel]] = !DILocation(line: 2,
-;CHECK: ![[DbgLocRet]] = !DILocation(line: 3,
+;CHECK: ![[DbgLocLoadSel]] = !DILocation(line: 3,
+;CHECK: ![[DbgLocRet]] = !DILocation(line: 4,

diff  --git a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll
index 74c35466992bf..bdd515bf2e557 100644
--- a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll
+++ b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll
@@ -10,11 +10,13 @@
 ; Negative test for AS(3). Skip shrink global to bool optimization.
 ; CHECK: @lvar = internal unnamed_addr addrspace(3) global i32 undef
 
-define void @test_global_var() {
+define void @test_global_var(i1 %i) {
 ; CHECK-LABEL: @test_global_var(
 ; CHECK:    store volatile i32 10, i32* undef, align 4
 ;
 entry:
+  br i1 %i, label %bb1, label %exit
+bb1:
   store i32 10, i32* @gvar
   br label %exit
 exit:
@@ -23,13 +25,15 @@ exit:
   ret void
 }
 
-define void @test_lds_var() {
+define void @test_lds_var(i1 %i) {
 ; CHECK-LABEL: @test_lds_var(
 ; CHECK:    store i32 10, i32 addrspace(3)* @lvar, align 4
 ; CHECK:    [[LD:%.*]] = load i32, i32 addrspace(3)* @lvar, align 4
 ; CHECK:    store volatile i32 [[LD]], i32* undef, align 4
 ;
 entry:
+  br i1 %i, label %bb1, label %exit
+bb1:
   store i32 10, i32 addrspace(3)* @lvar
   br label %exit
 exit:

diff  --git a/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll b/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll
index ed396337553fe..7b845070bbd0b 100644
--- a/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll
+++ b/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll
@@ -15,10 +15,8 @@ declare void @b()
 
 define i1 @dom_const() {
 ; CHECK-LABEL: @dom_const(
-; CHECK-NEXT:    store i1 true, ptr @g1, align 1
 ; CHECK-NEXT:    call void @b()
-; CHECK-NEXT:    [[R:%.*]] = load i1, ptr @g1, align 1
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 true
 ;
   store i1 true, ptr @g1
   call void @b()
@@ -90,8 +88,7 @@ define i1 @dom_multiple_function_loads() {
 ; CHECK-LABEL: @dom_multiple_function_loads(
 ; CHECK-NEXT:    store i1 true, ptr @g6, align 1
 ; CHECK-NEXT:    call void @b()
-; CHECK-NEXT:    [[R:%.*]] = load i1, ptr @g6, align 1
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 true
 ;
   store i1 true, ptr @g6
   call void @b()

diff  --git a/llvm/test/Transforms/PhaseOrdering/recompute-globalsaa.ll b/llvm/test/Transforms/PhaseOrdering/recompute-globalsaa.ll
index 3dbddbdb96faa..f4290c64a773a 100644
--- a/llvm/test/Transforms/PhaseOrdering/recompute-globalsaa.ll
+++ b/llvm/test/Transforms/PhaseOrdering/recompute-globalsaa.ll
@@ -9,7 +9,6 @@
 define i32 @main() {
 ; CHECK-LABEL: @main(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i1 true, i1* @a, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** @e, align 8
 ; CHECK-NEXT:    store i32 0, i32* [[TMP0]], align 4
 ; CHECK-NEXT:    store i32* null, i32** @e, align 8


        


More information about the llvm-commits mailing list