[llvm] 1b87882 - [LoopPredication] Rework assumes of widened conditions

Fri Aug 18 05:36:54 PDT 2023

Author: Aleksandr Popov
Date: 2023-08-18T14:35:46+02:00
New Revision: 1b87882228b0371212753781a363d4addd1bafa7

URL: https://github.com/llvm/llvm-project/commit/1b87882228b0371212753781a363d4addd1bafa7
DIFF: https://github.com/llvm/llvm-project/commit/1b87882228b0371212753781a363d4addd1bafa7.diff

LOG: [LoopPredication] Rework assumes of widened conditions

Currently after widening br(WC && (c1 && c2)) we insert assume of
(c1 && c2) which is joined to WC by And operation.
But we are going to support more flexible form of widenable branches
where WC could be placed arbitrary in the expression tree, e.g:
br(c1 && (c2 && WC)).
In that case we won't have (c1 && c2) in the IR. So we need to add
explicit (c1 && c2) and then create an assumption of it.

Reviewed By: anna

Differential Revision: https://reviews.llvm.org/D157502

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopPredication.cpp
    llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 79a3da9020e2e2..3491362dcd9502 100644

--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -307,8 +307,9 @@ class LoopPredication {
   widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck, LoopICmp RangeCheck,
                                       SCEVExpander &Expander,
                                       Instruction *Guard);
-  unsigned widenChecks(SmallVectorImpl<Value *> &Checks, SCEVExpander &Expander,
-                       Instruction *Guard);
+  void widenChecks(SmallVectorImpl<Value *> &Checks,
+                   SmallVectorImpl<Value *> &WidenedChecks,
+                   SCEVExpander &Expander, Instruction *Guard);
   bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
   bool widenWidenableBranchGuardConditions(BranchInst *Guard, SCEVExpander &Expander);
   // If the loop always exits through another block in the loop, we should not
@@ -754,17 +755,15 @@ LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
   }
 }
 
-unsigned LoopPredication::widenChecks(SmallVectorImpl<Value *> &Checks,
-                                      SCEVExpander &Expander,
-                                      Instruction *Guard) {
-  unsigned NumWidened = 0;
+void LoopPredication::widenChecks(SmallVectorImpl<Value *> &Checks,
+                                  SmallVectorImpl<Value *> &WidenedChecks,
+                                  SCEVExpander &Expander, Instruction *Guard) {
   for (auto &Check : Checks)
     if (ICmpInst *ICI = dyn_cast<ICmpInst>(Check))
       if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Guard)) {
-        NumWidened++;
+        WidenedChecks.push_back(Check);
         Check = *NewRangeCheck;
       }
-  return NumWidened;
 }
 
 bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
@@ -774,12 +773,13 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
 
   TotalConsidered++;
   SmallVector<Value *, 4> Checks;
+  SmallVector<Value *> WidenedChecks;
   parseWidenableGuard(Guard, Checks);
-  unsigned NumWidened = widenChecks(Checks, Expander, Guard);
-  if (NumWidened == 0)
+  widenChecks(Checks, WidenedChecks, Expander, Guard);
+  if (WidenedChecks.empty())
     return false;
 
-  TotalWidened += NumWidened;
+  TotalWidened += WidenedChecks.size();
 
   // Emit the new guard condition
   IRBuilder<> Builder(findInsertPt(Guard, Checks));
@@ -792,7 +792,7 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
   }
   RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
 
-  LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
+  LLVM_DEBUG(dbgs() << "Widened checks = " << WidenedChecks.size() << "\n");
   return true;
 }
 
@@ -810,15 +810,16 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
 
   TotalConsidered++;
   SmallVector<Value *, 4> Checks;
+  SmallVector<Value *> WidenedChecks;
   parseWidenableGuard(BI, Checks);
   // At the moment, our matching logic for wideable conditions implicitly
   // assumes we preserve the form: (br (and Cond, WC())).  FIXME
   Checks.push_back(WC);
-  unsigned NumWidened = widenChecks(Checks, Expander, BI);
-  if (NumWidened == 0)
+  widenChecks(Checks, WidenedChecks, Expander, BI);
+  if (WidenedChecks.empty())
     return false;
 
-  TotalWidened += NumWidened;
+  TotalWidened += WidenedChecks.size();
 
   // Emit the new guard condition
   IRBuilder<> Builder(findInsertPt(BI, Checks));
@@ -830,13 +831,13 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
     // If this block has other predecessors, we might not be able to use Cond.
     // In this case, create a Phi where every other input is `true` and input
     // from guard block is Cond.
-    Value *AssumeCond = Cond;
+    Value *AssumeCond = Builder.CreateAnd(WidenedChecks);
     if (!IfTrueBB->getUniquePredecessor()) {
       auto *GuardBB = BI->getParent();
-      auto *PN = Builder.CreatePHI(Cond->getType(), pred_size(IfTrueBB),
+      auto *PN = Builder.CreatePHI(AssumeCond->getType(), pred_size(IfTrueBB),
                                    "assume.cond");
       for (auto *Pred : predecessors(IfTrueBB))
-        PN->addIncoming(Pred == GuardBB ? Cond : Builder.getTrue(), Pred);
+        PN->addIncoming(Pred == GuardBB ? AssumeCond : Builder.getTrue(), Pred);
       AssumeCond = PN;
     }
     Builder.CreateAssumption(AssumeCond);
@@ -845,7 +846,7 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
   assert(isGuardAsWidenableBranch(BI) &&
          "Stopped being a guard after transform?");
 
-  LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
+  LLVM_DEBUG(dbgs() << "Widened checks = " << WidenedChecks.size() << "\n");
   return true;
 }
 

diff  --git a/llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll b/llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll
index b54e218b0cbd55..cad4fcec5c81ab 100644
--- a/llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll
+++ b/llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll
@@ -1224,7 +1224,6 @@ define i32 @two_range_checks(ptr %array.1, i32 %length.1, ptr %array.2, i32 %len
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
 ; CHECK-NEXT:    [[WITHIN_BOUNDS_1:%.*]] = icmp ult i32 [[I]], [[LENGTH_1]]
 ; CHECK-NEXT:    [[WITHIN_BOUNDS_2:%.*]] = icmp ult i32 [[I]], [[LENGTH_2]]
-; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = and i1 [[WITHIN_BOUNDS_1]], [[WITHIN_BOUNDS_2]]
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
 ; CHECK-NEXT:    [[TMP8:%.*]] = and i1 [[TMP3]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = and i1 [[TMP8]], [[WIDENABLE_COND]]
@@ -1233,7 +1232,8 @@ define i32 @two_range_checks(ptr %array.1, i32 %length.1, ptr %array.2, i32 %len
 ; CHECK-NEXT:    [[DEOPTCALL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32(i32 9) [ "deopt"() ]
 ; CHECK-NEXT:    ret i32 [[DEOPTCALL]]
 ; CHECK:       guarded:
-; CHECK-NEXT:    call void @llvm.assume(i1 [[WITHIN_BOUNDS]])
+; CHECK-NEXT:    [[TMP10:%.*]] = and i1 [[WITHIN_BOUNDS_2]], [[WITHIN_BOUNDS_1]]
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP10]])
 ; CHECK-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
 ; CHECK-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
 ; CHECK-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
@@ -1314,8 +1314,6 @@ define i32 @three_range_checks(ptr %array.1, i32 %length.1, ptr %array.2, i32 %l
 ; CHECK-NEXT:    [[WITHIN_BOUNDS_1:%.*]] = icmp ult i32 [[I]], [[LENGTH_1]]
 ; CHECK-NEXT:    [[WITHIN_BOUNDS_2:%.*]] = icmp ult i32 [[I]], [[LENGTH_2]]
 ; CHECK-NEXT:    [[WITHIN_BOUNDS_3:%.*]] = icmp ult i32 [[I]], [[LENGTH_3]]
-; CHECK-NEXT:    [[WITHIN_BOUNDS_1_AND_2:%.*]] = and i1 [[WITHIN_BOUNDS_1]], [[WITHIN_BOUNDS_2]]
-; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = and i1 [[WITHIN_BOUNDS_1_AND_2]], [[WITHIN_BOUNDS_3]]
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
 ; CHECK-NEXT:    [[TMP12:%.*]] = and i1 [[TMP3]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = and i1 [[TMP12]], [[TMP11]]
@@ -1325,7 +1323,9 @@ define i32 @three_range_checks(ptr %array.1, i32 %length.1, ptr %array.2, i32 %l
 ; CHECK-NEXT:    [[DEOPTCALL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32(i32 9) [ "deopt"() ]
 ; CHECK-NEXT:    ret i32 [[DEOPTCALL]]
 ; CHECK:       guarded:
-; CHECK-NEXT:    call void @llvm.assume(i1 [[WITHIN_BOUNDS]])
+; CHECK-NEXT:    [[TMP15:%.*]] = and i1 [[WITHIN_BOUNDS_3]], [[WITHIN_BOUNDS_2]]
+; CHECK-NEXT:    [[TMP16:%.*]] = and i1 [[TMP15]], [[WITHIN_BOUNDS_1]]
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP16]])
 ; CHECK-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
 ; CHECK-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
 ; CHECK-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
@@ -1533,7 +1533,6 @@ define i32 @unsigned_loop_0_to_n_unrelated_condition(ptr %array, i32 %length, i3
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
 ; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]]
 ; CHECK-NEXT:    [[UNRELATED_COND:%.*]] = icmp ult i32 [[X:%.*]], [[LENGTH]]
-; CHECK-NEXT:    [[GUARD_COND:%.*]] = and i1 [[WITHIN_BOUNDS]], [[UNRELATED_COND]]
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
 ; CHECK-NEXT:    [[TMP4:%.*]] = and i1 [[UNRELATED_COND]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = and i1 [[TMP4]], [[WIDENABLE_COND]]
@@ -1542,7 +1541,7 @@ define i32 @unsigned_loop_0_to_n_unrelated_condition(ptr %array, i32 %length, i3
 ; CHECK-NEXT:    [[DEOPTCALL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32(i32 9) [ "deopt"() ]
 ; CHECK-NEXT:    ret i32 [[DEOPTCALL]]
 ; CHECK:       guarded:
-; CHECK-NEXT:    call void @llvm.assume(i1 [[GUARD_COND]])
+; CHECK-NEXT:    call void @llvm.assume(i1 [[WITHIN_BOUNDS]])
 ; CHECK-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
 ; CHECK-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I_I64]]
 ; CHECK-NEXT:    [[ARRAY_I:%.*]] = load i32, ptr [[ARRAY_I_PTR]], align 4