[llvm] 686f449 - [WC] Fix a subtle bug in our definition of widenable branch

Wed Nov 6 14:16:40 PST 2019

Author: Philip Reames
Date: 2019-11-06T14:16:34-08:00
New Revision: 686f449e3d4ecad6413427aef35557f5adac100c

URL: https://github.com/llvm/llvm-project/commit/686f449e3d4ecad6413427aef35557f5adac100c
DIFF: https://github.com/llvm/llvm-project/commit/686f449e3d4ecad6413427aef35557f5adac100c.diff

LOG: [WC] Fix a subtle bug in our definition of widenable branch

We had a subtle, but nasty bug in our definition of a widenable branch, and thus in the transforms which used that utility. Specifically, we returned true for any branch which included a widenable condition within it's condition, regardless of whether that widenable condition also had other uses.

The problem is that the result of the WC() call is defined to be one particular value. As such, all users must agree as to what that value is. If we widen a branch without also updating *all other users* of the WC in the same way, we have broken the required semantics.

Most of the textual diff is updating existing transforms not to leave dead uses hanging around. They're largely NFC as the dead instructions would be immediately deleted by other passes. The reason to make these changes is so that the transforms preserve the widenable branch form.

In practice, we don't get bitten by this only because it isn't profitable to CSE WC() calls and the lowering pass from guards uses distinct WC calls per branch.

Differential Revision: https://reviews.llvm.org/D69916

Added: 
    

Modified: 
    llvm/lib/Analysis/GuardUtils.cpp
    llvm/lib/Transforms/Scalar/GuardWidening.cpp
    llvm/lib/Transforms/Scalar/LoopPredication.cpp
    llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll
    llvm/test/Transforms/GuardWidening/mixed_guards.ll
    llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/GuardUtils.cpp b/llvm/lib/Analysis/GuardUtils.cpp
index cad92f6e56bb..863443cea355 100644

--- a/llvm/lib/Analysis/GuardUtils.cpp
+++ b/llvm/lib/Analysis/GuardUtils.cpp
@@ -42,6 +42,11 @@ bool llvm::parseWidenableBranch(const User *U, Value *&Condition,
   if (!match(U, m_Br(m_And(m_Value(Condition), m_Value(WidenableCondition)),
                      IfTrueBB, IfFalseBB)))
     return false;
+  // For the branch to be (easily) widenable, it must not correlate with other
+  // branches.  Thus, the widenable condition must have a single use.
+  if (!WidenableCondition->hasOneUse() ||
+      !cast<BranchInst>(U)->getCondition()->hasOneUse())
+    return false;
   // TODO: At the moment, we only recognize the branch if the WC call in this
   // specific position.  We should generalize!
   return match(WidenableCondition,

diff  --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index 2697d7809568..269e641b8f19 100644
--- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -271,16 +271,17 @@ class GuardWideningImpl {
   void widenGuard(Instruction *ToWiden, Value *NewCondition,
                   bool InvertCondition) {
     Value *Result;
+    
     widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result,
                     InvertCondition);
-    Value *WidenableCondition = nullptr;
     if (isGuardAsWidenableBranch(ToWiden)) {
-      auto *Cond = cast<BranchInst>(ToWiden)->getCondition();
-      WidenableCondition = cast<BinaryOperator>(Cond)->getOperand(1);
+      auto *BI = cast<BranchInst>(ToWiden);
+      auto *And = cast<Instruction>(BI->getCondition());
+      And->setOperand(0, Result);
+      And->moveBefore(ToWiden);
+      assert(isGuardAsWidenableBranch(ToWiden) && "still widenable?");
+      return;
     }
-    if (WidenableCondition)
-      Result = BinaryOperator::CreateAnd(Result, WidenableCondition,
-                                         "guard.chk", ToWiden);
     setCondition(ToWiden, Result);
   }
 

diff  --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 885c0e8f4b8b..495f0bc83d69 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -823,9 +823,9 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
   Value *AllChecks = Builder.CreateAnd(Checks);
   auto *OldCond = BI->getCondition();
   BI->setCondition(AllChecks);
+  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
   assert(isGuardAsWidenableBranch(BI) &&
          "Stopped being a guard after transform?");
-  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
 
   LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
   return true;

diff  --git a/llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll b/llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll
index 605178a7868b..a71e7c30f003 100644
--- a/llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll
+++ b/llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll
@@ -8,8 +8,7 @@ define void @f_0(i1 %cond_0, i1 %cond_1) {
 ; CHECK-LABEL: @f_0(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1:%.*]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    br i1 [[GUARD_CHK]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
 ; CHECK:       deopt:
@@ -52,8 +51,7 @@ define void @f_1(i1 %cond_0, i1 %cond_1) {
 ; CHECK-LABEL: @f_1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1:%.*]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    br i1 [[GUARD_CHK]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
 ; CHECK:       deopt:
@@ -113,7 +111,6 @@ define void @f_2(i32 %a, i32 %b) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[COND_0:%.*]] = icmp ult i32 [[A:%.*]], 10
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    [[COND_1:%.*]] = icmp ult i32 [[B:%.*]], 10
 ; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
@@ -235,7 +232,6 @@ define void @f_4(i32 %a, i32 %b) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[COND_0:%.*]] = icmp ult i32 [[A:%.*]], 10
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    [[COND_1:%.*]] = icmp ult i32 [[B:%.*]], 10
 ; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
@@ -294,7 +290,6 @@ define void @f_5(i32 %a) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[COND_0:%.*]] = icmp ugt i32 [[A:%.*]], 7
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    [[WIDE_CHK:%.*]] = icmp uge i32 [[A]], 11
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    br i1 [[GUARD_CHK]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
@@ -403,7 +398,6 @@ define void @f_7(i32 %a, i1* %cond_buf) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[COND_1:%.*]] = load volatile i1, i1* [[COND_BUF:%.*]]
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_1]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    [[COND_3:%.*]] = icmp ult i32 [[A:%.*]], 7
 ; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_1]], [[COND_3]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
@@ -491,9 +485,8 @@ define void @f_8(i32 %a, i1 %cond_1, i1 %cond_2) {
 ; CHECK-NEXT:    br i1 undef, label [[LOOP]], label [[LEAVE:%.*]]
 ; CHECK:       leave:
 ; CHECK-NEXT:    [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[COND_2:%.*]], [[WIDENABLE_COND3]]
 ; CHECK-NEXT:    [[COND_3:%.*]] = icmp ult i32 [[A:%.*]], 7
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_2]], [[COND_3]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_2:%.*]], [[COND_3]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND3]]
 ; CHECK-NEXT:    br i1 [[GUARD_CHK]], label [[GUARDED1:%.*]], label [[DEOPT2:%.*]], !prof !0
 ; CHECK:       deopt2:
@@ -671,8 +664,7 @@ define void @f_11(i32 %a, i1 %cond_0, i1 %cond_1) {
 ; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
 ; CHECK:       outer_header:
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1:%.*]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    br i1 [[GUARD_CHK]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
 ; CHECK:       deopt:
@@ -734,7 +726,6 @@ define void @f_12(i32 %a0) {
 ; CHECK-LABEL: @f_12(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 true, [[WIDENABLE_COND]]
 ; CHECK-NEXT:    [[A1:%.*]] = mul i32 [[A0:%.*]], [[A0]]
 ; CHECK-NEXT:    [[A2:%.*]] = mul i32 [[A1]], [[A1]]
 ; CHECK-NEXT:    [[A3:%.*]] = mul i32 [[A2]], [[A2]]
@@ -840,7 +831,6 @@ define void @f_13(i32 %a) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[COND_0:%.*]] = icmp ult i32 [[A:%.*]], 14
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    [[WIDE_CHK:%.*]] = icmp ult i32 [[A]], 10
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    br i1 [[GUARD_CHK]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0

diff  --git a/llvm/test/Transforms/GuardWidening/mixed_guards.ll b/llvm/test/Transforms/GuardWidening/mixed_guards.ll
index 58908c0ff806..f136f98da186 100644
--- a/llvm/test/Transforms/GuardWidening/mixed_guards.ll
+++ b/llvm/test/Transforms/GuardWidening/mixed_guards.ll
@@ -45,8 +45,7 @@ define void @test_02(i1 %cond_0, i1 %cond_1) {
 ; CHECK-LABEL: @test_02(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
-; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1:%.*]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]]
 ; CHECK-NEXT:    [[GUARD_CHK:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]]
 ; CHECK-NEXT:    br i1 [[GUARD_CHK]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
 ; CHECK:       deopt:

diff  --git a/llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll b/llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll
index d6be9cd53a18..79bbefd12666 100644
--- a/llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll
+++ b/llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll
@@ -305,6 +305,66 @@ deopt2:
   ret i32 %deoptret2
 }
 
+; This one is subtle - We can't widen only one branch use of the
+; widenable condition as two branches are correlated.  We'd have to
+; widen them *both*.
+define i32 @neg_correlated(i1 %cond_0, i1 %cond_1, i32* %p) {
+; CHECK-LABEL: @neg_correlated(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
+; CHECK:       deopt:
+; CHECK-NEXT:    [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET]]
+; CHECK:       guarded:
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND2:%.*]] = and i1 [[COND_1:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND2]], label [[GUARDED2:%.*]], label [[DEOPT2:%.*]], !prof !0
+; CHECK:       deopt2:
+; CHECK-NEXT:    [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET2]]
+; CHECK:       guarded2:
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]]
+; CHECK-NEXT:    [[COND_2:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    br i1 [[COND_2]], label [[RETURN:%.*]], label [[DEOPT3:%.*]], !prof !0
+; CHECK:       deopt3:
+; CHECK-NEXT:    [[DEOPTRET3:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET3]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
+  br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0
+
+deopt:
+  %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret
+
+guarded:
+  %exiplicit_guard_cond2 = and i1 %cond_1, %widenable_cond
+  br i1 %exiplicit_guard_cond2, label %guarded2, label %deopt2, !prof !0
+
+deopt2:
+  %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret2
+
+guarded2:
+  %v = load i32, i32* %p
+  %cond_2 = icmp eq i32 %v, 0
+  br i1 %cond_2, label %return, label %deopt3, !prof !0
+
+deopt3:
+  %deoptret3 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret3
+
+return:
+  ret i32 0
+}
+
+
+
 
 declare void @unknown()
 declare i32 @unknown_i32()