[llvm] b988898 - [GVN LoadPRE] Extend the scope of optimization by using context to prove safety of speculation

Mon Oct 5 19:26:57 PDT 2020

Author: Serguei Katkov
Date: 2020-10-06T09:25:16+07:00
New Revision: b9888980132e5511e85d4172a46e02475957298b

URL: https://github.com/llvm/llvm-project/commit/b9888980132e5511e85d4172a46e02475957298b
DIFF: https://github.com/llvm/llvm-project/commit/b9888980132e5511e85d4172a46e02475957298b.diff

LOG: [GVN LoadPRE] Extend the scope of optimization by using context to prove safety of speculation

Use context to prove that load can be safely executed at a point where load is being hoisted.

Postpone the decision about safety of speculative load execution till the moment we know
where we hoist load and check safety at that context.

Reviewers: nikic, fhahn, mkazantsev, lebedev.ri, efriedma, reames
Reviewed By: reames, mkazantsev
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D88725

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/GVN.cpp
    llvm/test/Transforms/GVN/loadpre-context.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index c25fdd44dcf9..4cb95425678c 100644

--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -1133,7 +1133,6 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
   // backwards through predecessors if needed.
   BasicBlock *LoadBB = LI->getParent();
   BasicBlock *TmpBB = LoadBB;
-  bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI);
 
   // Check that there is no implicit control flow instructions above our load in
   // its block. If there is an instruction that doesn't always pass the
@@ -1150,8 +1149,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
   // because if the index is out of bounds we should deoptimize rather than
   // access the array.
   // Check that there is no guard in this block above our instruction.
-  if (!IsSafeToSpeculativelyExecute && ICF->isDominatedByICFIFromSameBlock(LI))
-    return false;
+  bool MustEnsureSafetyOfSpeculativeExecution =
+      ICF->isDominatedByICFIFromSameBlock(LI);
+
   while (TmpBB->getSinglePredecessor()) {
     TmpBB = TmpBB->getSinglePredecessor();
     if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1168,8 +1168,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
       return false;
 
     // Check that there is no implicit control flow in a block above.
-    if (!IsSafeToSpeculativelyExecute && ICF->hasICF(TmpBB))
-      return false;
+    MustEnsureSafetyOfSpeculativeExecution =
+        MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB);
   }
 
   assert(TmpBB);
@@ -1241,6 +1241,17 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
   if (NumUnavailablePreds != 1)
       return false;
 
+  // Now we know where we will insert load. We must ensure that it is safe
+  // to speculatively execute the load at that points.
+  if (MustEnsureSafetyOfSpeculativeExecution) {
+    if (CriticalEdgePred.size())
+      if (!isSafeToSpeculativelyExecute(LI, LoadBB->getFirstNonPHI(), DT))
+        return false;
+    for (auto &PL : PredLoads)
+      if (!isSafeToSpeculativelyExecute(LI, PL.first->getTerminator(), DT))
+        return false;
+  }
+
   // Split critical edges, and update the unavailable predecessors accordingly.
   for (BasicBlock *OrigPred : CriticalEdgePred) {
     BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);

diff  --git a/llvm/test/Transforms/GVN/loadpre-context.ll b/llvm/test/Transforms/GVN/loadpre-context.ll
index 8c9c21212842..50a43b0e91f8 100644
--- a/llvm/test/Transforms/GVN/loadpre-context.ll
+++ b/llvm/test/Transforms/GVN/loadpre-context.ll
@@ -1,18 +1,21 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -gvn --basic-aa -S | FileCheck %s
 
-; load may be speculated, adress is not null using context search.
+; load may be speculated, address is not null using context search.
 ; There is a critical edge.
 define i32 @loadpre_critical_edge(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) {
 ; CHECK-LABEL: @loadpre_critical_edge(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null
-; CHECK-NEXT:    br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[HEADER:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[ENTRY_HEADER_CRIT_EDGE:%.*]]
+; CHECK:       entry.header_crit_edge:
+; CHECK-NEXT:    [[V_PRE:%.*]] = load i32, i32* [[ARG]], align 4
+; CHECK-NEXT:    br label [[HEADER:%.*]]
 ; CHECK:       header:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ]
-; CHECK-NEXT:    [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]])
-; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[ARG]], align 4
-; CHECK-NEXT:    [[SUM:%.*]] = add i32 [[NEW_V]], [[V]]
+; CHECK-NEXT:    [[V:%.*]] = phi i32 [ [[V_PRE]], [[ENTRY_HEADER_CRIT_EDGE]] ], [ [[SUM:%.*]], [[HEADER]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY_HEADER_CRIT_EDGE]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ]
+; CHECK-NEXT:    [[NEW_V:%.*]] = call i32 @ro_foo(i32 [[IV]])
+; CHECK-NEXT:    [[SUM]] = add i32 [[NEW_V]], [[V]]
 ; CHECK-NEXT:    store i32 [[SUM]], i32* [[ARG]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]]
@@ -28,7 +31,8 @@ entry:
 
 header:
   %iv = phi i32 [0, %entry], [%iv.next, %header]
-  %new_v = call i32 @foo(i32 %iv)
+; Call prevents to move load over due to it does not guarantee to return.
+  %new_v = call i32 @ro_foo(i32 %iv) readnone
   %v = load i32, i32* %arg
   %sum = add i32 %new_v, %v
   store i32 %sum, i32* %arg
@@ -43,19 +47,20 @@ null_exit:
   ret i32 0
 }
 
-; load may be speculated, adress is not null using context search.
+; load may be speculated, address is not null using context search.
 define i32 @loadpre_basic(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) {
 ; CHECK-LABEL: @loadpre_basic(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null
 ; CHECK-NEXT:    br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[PREHEADER:%.*]]
 ; CHECK:       preheader:
+; CHECK-NEXT:    [[V_PRE:%.*]] = load i32, i32* [[ARG]], align 4
 ; CHECK-NEXT:    br label [[HEADER:%.*]]
 ; CHECK:       header:
+; CHECK-NEXT:    [[V:%.*]] = phi i32 [ [[V_PRE]], [[PREHEADER]] ], [ [[SUM:%.*]], [[HEADER]] ]
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ]
-; CHECK-NEXT:    [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]])
-; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[ARG]], align 4
-; CHECK-NEXT:    [[SUM:%.*]] = add i32 [[NEW_V]], [[V]]
+; CHECK-NEXT:    [[NEW_V:%.*]] = call i32 @ro_foo(i32 [[IV]])
+; CHECK-NEXT:    [[SUM]] = add i32 [[NEW_V]], [[V]]
 ; CHECK-NEXT:    store i32 [[SUM]], i32* [[ARG]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]]
@@ -74,7 +79,8 @@ preheader:
 
 header:
   %iv = phi i32 [0, %preheader], [%iv.next, %header]
-  %new_v = call i32 @foo(i32 %iv)
+; Call prevents to move load over due to it does not guarantee to return.
+  %new_v = call i32 @ro_foo(i32 %iv) readnone
   %v = load i32, i32* %arg
   %sum = add i32 %new_v, %v
   store i32 %sum, i32* %arg
@@ -89,7 +95,7 @@ null_exit:
   ret i32 0
 }
 
-; load cannot be speculated, adress is not null check does not dominate the loop.
+; load cannot be speculated, check "address is not null" does not dominate the loop.
 define i32 @loadpre_maybe_null(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N, i1 %c) {
 ; CHECK-LABEL: @loadpre_maybe_null(
 ; CHECK-NEXT:  entry:
@@ -101,7 +107,7 @@ define i32 @loadpre_maybe_null(i32* align 8 dereferenceable_or_null(48) %arg, i3
 ; CHECK-NEXT:    br label [[HEADER:%.*]]
 ; CHECK:       header:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ]
-; CHECK-NEXT:    [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]])
+; CHECK-NEXT:    [[NEW_V:%.*]] = call i32 @ro_foo(i32 [[IV]])
 ; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[ARG]], align 4
 ; CHECK-NEXT:    [[SUM:%.*]] = add i32 [[NEW_V]], [[V]]
 ; CHECK-NEXT:    store i32 [[SUM]], i32* [[ARG]], align 4
@@ -125,7 +131,8 @@ preheader:
 
 header:
   %iv = phi i32 [0, %preheader], [%iv.next, %header]
-  %new_v = call i32 @foo(i32 %iv)
+; Call prevents to move load over due to it does not guarantee to return.
+  %new_v = call i32 @ro_foo(i32 %iv) readnone
   %v = load i32, i32* %arg
   %sum = add i32 %new_v, %v
   store i32 %sum, i32* %arg
@@ -141,4 +148,4 @@ null_exit:
 }
 
 ; Does not guarantee that returns.
-declare i32 @foo(i32) readnone
+declare i32 @ro_foo(i32) readnone