[llvm] f3eb5de - [LoopPred] Generalize profitability check to handle unswitch output

Tue Nov 19 14:08:39 PST 2019

Author: Philip Reames
Date: 2019-11-19T14:06:36-08:00
New Revision: f3eb5dee5756876be0524c59c21478659eba8c4d

URL: https://github.com/llvm/llvm-project/commit/f3eb5dee5756876be0524c59c21478659eba8c4d
DIFF: https://github.com/llvm/llvm-project/commit/f3eb5dee5756876be0524c59c21478659eba8c4d.diff

LOG: [LoopPred] Generalize profitability check to handle unswitch output

Unswitch (and other loop transforms) like to generate loop exit blocks with unconditional successors, and phi nodes (LCSSA, or simple multiple exiting blocks sharing an exit).  Generalize the "likely very rare exit" check slightly to handle this form.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopPredication.cpp
    llvm/test/Transforms/LoopPredication/predicate-exits.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 9abf6deebb60..9d67046d7436 100644

--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -1019,6 +1019,17 @@ static const SCEV *getMinAnalyzeableBackedgeTakenCount(ScalarEvolution &SE,
   return SE.getUMinFromMismatchedTypes(ExitCounts);
 }
 
+/// Return true if we can be fairly sure that executing block BB will probably
+/// lead to executing an __llvm_deoptimize.  This is a profitability heuristic,
+/// not a legality constraint.
+static bool isVeryLikelyToDeopt(BasicBlock *BB) {
+  while (BB->getUniqueSuccessor())
+    // Will skip side effects, that's okay
+    BB = BB->getUniqueSuccessor();
+
+  return BB->getTerminatingDeoptimizeCall();
+}
+
 /// This implements an analogous, but entirely distinct transform from the main
 /// loop predication transform.  This one is phrased in terms of using a
 /// widenable branch *outside* the loop to allow us to simplify loop exits in a
@@ -1109,7 +1120,7 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
 
     const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
     BasicBlock *ExitBB = BI->getSuccessor(ExitIfTrue ? 0 : 1);
-    if (!ExitBB->getTerminatingDeoptimizeCall())
+    if (!isVeryLikelyToDeopt(ExitBB))
       // Profitability: indicator of rarely/never taken exit
       continue;
 

diff  --git a/llvm/test/Transforms/LoopPredication/predicate-exits.ll b/llvm/test/Transforms/LoopPredication/predicate-exits.ll
index b71ac8c3c7e9..340f509fe036 100644
--- a/llvm/test/Transforms/LoopPredication/predicate-exits.ll
+++ b/llvm/test/Transforms/LoopPredication/predicate-exits.ll
@@ -675,11 +675,92 @@ exit:
 }
 
 
+;; Unswitch likes to produce some ugly exit blocks without simplifications
+;; being applied.  Make sure we can handle that form.
+define i32 @unswitch_exit_form(i32* %array, i32 %length, i32 %n, i1 %cond_0) {
+; CHECK-LABEL: @unswitch_exit_form(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1
+; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[UMAX]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]]
+; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]]
+; CHECK-NEXT:    [[TMP4:%.*]] = freeze i1 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and i1 [[TMP4]], [[COND_0:%.*]]
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP5]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0
+; CHECK:       deopt.loopexit:
+; CHECK-NEXT:    br label [[DEOPT]]
+; CHECK:       deopt:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[DEOPT_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    br label [[ACTUAL_DEOPT:%.*]]
+; CHECK:       actual_deopt:
+; CHECK-NEXT:    [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[PHI]]) ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]]
+; CHECK-NEXT:    br i1 true, label [[GUARDED]], label [[DEOPT_LOOPEXIT]], !prof !0
+; CHECK:       guarded:
+; CHECK-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
+; CHECK-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]]
+; CHECK-NEXT:    [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[ARRAY_I_PTR]]
+; CHECK-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
+; CHECK-NEXT:    ret i32 [[RESULT]]
+;
+entry:
+  %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
+  br i1 %exiplicit_guard_cond, label %loop.preheader, label %deopt, !prof !0
 
+deopt:
+  ;; This is written to look like an unsimplified loop exit after unswitch
+  ;; (i.e. phis, merge, and branch to actual block)
+  %phi = phi i32 [0, %entry], [1, %loop]
+  call void @unknown() ;; it's okay to skip possible throws
+  br label %actual_deopt
+
+actual_deopt:
+  %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %phi) ]
+  ret i32 %deoptret
 
+loop.preheader:
+  br label %loop
 
+loop:
+  %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ]
+  %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ]
+  call void @unknown()
+  %within.bounds = icmp ult i32 %i, %length
+  br i1 %within.bounds, label %guarded, label %deopt, !prof !0
 
+guarded:
+  %i.i64 = zext i32 %i to i64
+  %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+  %array.i = load i32, i32* %array.i.ptr, align 4
+  store i32 0, i32* %array.i.ptr
+  %loop.acc.next = add i32 %loop.acc, %array.i
+  %i.next = add nuw i32 %i, 1
+  %continue = icmp ult i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
 
+exit:
+  %result = phi i32 [ %loop.acc.next, %guarded ]
+  ret i32 %result
+}
 
 ; TODO: Non-latch exits can still be predicated
 ; This is currently prevented by an overly restrictive profitability check.