[llvm] r231879 - Infer known bits from dominating conditions

Tue Mar 10 15:43:20 PDT 2015

Author: reames
Date: Tue Mar 10 17:43:20 2015
New Revision: 231879

URL: http://llvm.org/viewvc/llvm-project?rev=231879&view=rev
Log:
Infer known bits from dominating conditions

This patch adds limited support in ValueTracking for inferring known bits of a value from conditional expressions which must be true to reach the instruction we're trying to optimize. At this time, the feature is off by default. Once landed, I'm hoping for feedback from others on both profitability and compile time impact.

Forms of conditional value propagation have been tried in LLVM before and have failed due to compile time problems.  In an attempt to side step that, this patch only considers conditions where the edge leaving the branch dominates the context instruction. It does not attempt full dataflow.  Even with that restriction, it handles many interesting cases:
 * Early exits from functions
 * Early exits from loops (for context instructions in the loop and after the check)
 * Conditions which control entry into loops, including multi-version loops (such as those produced during vectorization, IRCE, loop unswitch, etc..)

Possible applications include optimizing using information provided by constructs such as: preconditions, assumptions, null checks, & range checks.

This patch implements two approaches to the problem that need further benchmarking.  Approach 1 is to directly walk the dominator tree looking for interesting conditions.  Approach 2 is to inspect other uses of the value being queried for interesting comparisons.  From initial benchmarking, it appears that Approach 2 is faster than Approach 1, but this needs to be further validated.  

Differential Revision: http://reviews.llvm.org/D7708


Added:
    llvm/trunk/test/Transforms/InstCombine/dom-conditions.ll
Modified:
    llvm/trunk/lib/Analysis/ValueTracking.cpp

Modified: llvm/trunk/lib/Analysis/ValueTracking.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ValueTracking.cpp?rev=231879&r1=231878&r2=231879&view=diff
==============================================================================

--- llvm/trunk/lib/Analysis/ValueTracking.cpp (original)
+++ llvm/trunk/lib/Analysis/ValueTracking.cpp Tue Mar 10 17:43:20 2015
@@ -39,6 +39,34 @@ using namespace llvm::PatternMatch;
 
 const unsigned MaxDepth = 6;
 
+/// Enable an experimental feature to leverage information about dominating
+/// conditions to compute known bits.  The individual options below control how
+/// hard we search.  The defaults are choosen to be fairly aggressive.  If you
+/// run into compile time problems when testing, scale them back and report
+/// your findings.
+static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
+                                         cl::Hidden, cl::init(false));
+
+// This is expensive, so we only do it for the top level query value.
+// (TODO: evaluate cost vs profit, consider higher thresholds)
+static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
+                                               cl::Hidden, cl::init(1));
+
+/// How many dominating blocks should be scanned looking for dominating
+/// conditions?
+static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
+                                                   cl::Hidden,
+                                                   cl::init(20000));
+
+// Controls the number of uses of the value searched for possible
+// dominating comparisons.
+static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
+                                              cl::Hidden, cl::init(2000));
+
+// If true, don't consider only compares whose only use is a branch.
+static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use",
+                                               cl::Hidden, cl::init(false));
+
 /// Returns the bitwidth of the given scalar or pointer type (if unknown returns
 /// 0). For vector types, returns the element type's bitwidth.
 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
@@ -470,6 +498,179 @@ m_c_Xor(const LHS &L, const RHS &R) {
   return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
 }
 
+/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is
+/// true (at the context instruction.)  This is mostly a utility function for
+/// the prototype dominating conditions reasoning below.
+static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp,
+                                              APInt &KnownZero,
+                                              APInt &KnownOne,
+                                              const DataLayout &DL,
+                                              unsigned Depth, const Query &Q) {
+  Value *LHS = Cmp->getOperand(0);
+  Value *RHS = Cmp->getOperand(1);
+  // TODO: We could potentially be more aggressive here.  This would be worth
+  // evaluating.  If we can, explore commoning this code with the assume
+  // handling logic.
+  if (LHS != V && RHS != V)
+    return;
+
+  const unsigned BitWidth = KnownZero.getBitWidth();
+
+  switch (Cmp->getPredicate()) {
+  default:
+    // We know nothing from this condition
+    break;
+  // TODO: implement unsigned bound from below (known one bits)
+  // TODO: common condition check implementations with assumes
+  // TODO: implement other patterns from assume (e.g. V & B == A)
+  case ICmpInst::ICMP_SGT:
+    if (LHS == V) {
+      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) {
+        // We know that the sign bit is zero.
+        KnownZero |= APInt::getSignBit(BitWidth);
+      }
+    }
+    break;
+  case ICmpInst::ICMP_EQ:
+    if (LHS == V)
+      computeKnownBits(RHS, KnownZero, KnownOne, DL, Depth + 1, Q);
+    else if (RHS == V)
+      computeKnownBits(LHS, KnownZero, KnownOne, DL, Depth + 1, Q);
+    else
+      llvm_unreachable("missing use?");
+    break;
+  case ICmpInst::ICMP_ULE:
+    if (LHS == V) {
+      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      // The known zero bits carry over
+      unsigned SignBits = KnownZeroTemp.countLeadingOnes();
+      KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
+    }
+    break;
+  case ICmpInst::ICMP_ULT:
+    if (LHS == V) {
+      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      // Whatever high bits in rhs are zero are known to be zero (if rhs is a
+      // power of 2, then one more).
+      unsigned SignBits = KnownZeroTemp.countLeadingOnes();
+      if (isKnownToBeAPowerOfTwo(RHS, false, Depth + 1, Query(Q, Cmp), DL))
+        SignBits++;
+      KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
+    }
+    break;
+  };
+}
+
+/// Compute known bits in 'V' from conditions which are known to be true along
+/// all paths leading to the context instruction.  In particular, look for
+/// cases where one branch of an interesting condition dominates the context
+/// instruction.  This does not do general dataflow.
+/// NOTE: This code is EXPERIMENTAL and currently off by default.
+static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
+                                                    APInt &KnownOne,
+                                                    const DataLayout &DL,
+                                                    unsigned Depth,
+                                                    const Query &Q) {
+  // Need both the dominator tree and the query location to do anything useful
+  if (!Q.DT || !Q.CxtI)
+    return;
+  Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
+
+  // Avoid useless work
+  if (auto VI = dyn_cast<Instruction>(V))
+    if (VI->getParent() == Cxt->getParent())
+      return;
+
+  // Note: We currently implement two options.  It's not clear which of these
+  // will survive long term, we need data for that.
+  // Option 1 - Try walking the dominator tree looking for conditions which
+  // might apply.  This works well for local conditions (loop guards, etc..),
+  // but not as well for things far from the context instruction (presuming a
+  // low max blocks explored).  If we can set an high enough limit, this would
+  // be all we need.
+  // Option 2 - We restrict out search to those conditions which are uses of
+  // the value we're interested in.  This is independent of dom structure,
+  // but is slightly less powerful without looking through lots of use chains.
+  // It does handle conditions far from the context instruction (e.g. early
+  // function exits on entry) really well though.
+
+  // Option 1 - Search the dom tree
+  unsigned NumBlocksExplored = 0;
+  BasicBlock *Current = Cxt->getParent();
+  while (true) {
+    // Stop searching if we've gone too far up the chain
+    if (NumBlocksExplored >= DomConditionsMaxDomBlocks)
+      break;
+    NumBlocksExplored++;
+
+    if (!Q.DT->getNode(Current)->getIDom())
+      break;
+    Current = Q.DT->getNode(Current)->getIDom()->getBlock();
+    if (!Current)
+      // found function entry
+      break;
+
+    BranchInst *BI = dyn_cast<BranchInst>(Current->getTerminator());
+    if (!BI || BI->isUnconditional())
+      continue;
+    ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getCondition());
+    if (!Cmp)
+      continue;
+
+    // We're looking for conditions that are guaranteed to hold at the context
+    // instruction.  Finding a condition where one path dominates the context
+    // isn't enough because both the true and false cases could merge before
+    // the context instruction we're actually interested in.  Instead, we need
+    // to ensure that the taken *edge* dominates the context instruction.
+    BasicBlock *BB0 = BI->getSuccessor(0);
+    BasicBlockEdge Edge(BI->getParent(), BB0);
+    if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
+      continue;
+
+    computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
+                                      Q);
+  }
+
+  // Option 2 - Search the other uses of V
+  unsigned NumUsesExplored = 0;
+  for (auto U : V->users()) {
+    // Avoid massive lists
+    if (NumUsesExplored >= DomConditionsMaxUses)
+      break;
+    NumUsesExplored++;
+    // Consider only compare instructions uniquely controlling a branch
+    ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
+    if (!Cmp)
+      continue;
+
+    if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
+      continue;
+
+    for (auto *CmpU : Cmp->users()) {
+      BranchInst *BI = dyn_cast<BranchInst>(CmpU);
+      if (!BI || BI->isUnconditional())
+        continue;
+      // We're looking for conditions that are guaranteed to hold at the
+      // context instruction.  Finding a condition where one path dominates
+      // the context isn't enough because both the true and false cases could
+      // merge before the context instruction we're actually interested in.
+      // Instead, we need to ensure that the taken *edge* dominates the context
+      // instruction. 
+      BasicBlock *BB0 = BI->getSuccessor(0);
+      BasicBlockEdge Edge(BI->getParent(), BB0);
+      if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
+        continue;
+
+      computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
+                                        Q);
+    }
+  }
+}
+
 static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
                                        APInt &KnownOne, const DataLayout &DL,
                                        unsigned Depth, const Query &Q) {
@@ -824,6 +1025,11 @@ void computeKnownBits(Value *V, APInt &K
     // Don't give up yet... there might be an assumption that provides more
     // information...
     computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
+
+    // Or a dominating condition for that matter
+    if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+      computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL,
+                                              Depth, Q);
     return;
   }
 
@@ -846,6 +1052,12 @@ void computeKnownBits(Value *V, APInt &K
   // Check whether a nearby assume intrinsic can determine some known bits.
   computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
 
+  // Check whether there's a dominating condition which implies something about
+  // this value at the given context.
+  if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+    computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth,
+                                            Q);
+
   Operator *I = dyn_cast<Operator>(V);
   if (!I) return;
 

Added: llvm/trunk/test/Transforms/InstCombine/dom-conditions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/dom-conditions.ll?rev=231879&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/dom-conditions.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/dom-conditions.ll Tue Mar 10 17:43:20 2015
@@ -0,0 +1,152 @@
+; RUN: opt -instcombine -value-tracking-dom-conditions=1 -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define i1 @test_cmp_ult(i64 %A) {
+; CHECK-LABEL: @test_cmp_ult
+entry:
+  %cmp = icmp ult i64 %A, 64
+  br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: ret i1 false
+  %cmp2 = icmp ugt i64 %A, 64
+  ret i1 %cmp2
+untaken:
+  ret i1 true
+}
+
+define i1 @test_cmp_ule(i64 %A) {
+; CHECK-LABEL: @test_cmp_ule
+entry:
+  %cmp = icmp ule i64 %A, 64
+  br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: ret i1 false
+  %cmp2 = icmp ugt i64 %A, 128
+  ret i1 %cmp2
+untaken:
+  ret i1 true
+}
+
+define i1 @test_cmp_sgt(i32 %A) {
+; CHECK-LABEL: @test_cmp_sgt
+entry:
+  %cmp = icmp sgt i32 %A, 10
+  br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: ret i1 true
+  %cmp2 = icmp sgt i32 %A, -1
+  ret i1 %cmp2
+untaken:
+  ret i1 true
+}
+
+define i64 @test_add_zero_bits(i64 %A) {
+; CHECK-LABEL: @test_add_zero_bits
+entry:
+  %cmp = icmp eq i64 %A, 2
+  br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: ret i64 3
+  %add = add i64 %A, 1
+  ret i64 %add
+untaken:
+  ret i64 %A
+}
+
+define i64 @test_add_nsw(i64 %A) {
+; CHECK-LABEL: @test_add_nsw
+entry:
+  %cmp = icmp ult i64 %A, 20
+  br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: %add = add nuw nsw i64 %A, 1
+; CHECK-NEXT: ret i64 %add
+  %add = add i64 %A, 1
+  ret i64 %add
+untaken:
+  ret i64 %A
+}
+
+; After sinking the instructions into the if block, check that we 
+; can simplify some of them using dominating conditions.
+define i32 @test_add_zero_bits_sink(i32 %x) nounwind ssp {
+; CHECK-LABEL: @test_add_zero_bits_sink(
+; CHECK-NOT: sdiv i32
+entry:
+  %a = add nsw i32 %x, 16
+  %b = sdiv i32 %a, %x
+  %cmp = icmp ult i32 %x, 7
+  br i1 %cmp, label %bb1, label %bb2
+
+bb1:
+; CHECK-LABEL: bb1:
+; CHECK-NEXT: or i32 %x, 16
+; CHECK-NEXT: udiv i32
+  ret i32 %b
+
+bb2:
+  ret i32 %x
+}
+
+; A condition in the same block gives no information
+define i32 @test_neg1(i32 %x) nounwind ssp {
+; CHECK-LABEL: @test_neg1
+; CHECK: add
+; CHECK: sdiv
+; CHECK: icmp
+; CHECK: select
+entry:
+  %a = add nsw i32 %x, 16
+  %b = sdiv i32 %a, %x
+  %cmp = icmp ult i32 %x, 7
+  %ret = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %ret
+}
+
+; A non-dominating edge gives no information 
+define i32 @test_neg2(i32 %x) {
+; CHECK-LABEL: @test_neg2
+entry:
+  %cmp = icmp ult i32 %x, 7
+  br i1 %cmp, label %bb1, label %merge
+
+bb1:
+  br label %merge
+
+merge:
+; CHECK-LABEL: merge:
+; CHECK: icmp
+; CHECK: select
+  %cmp2 = icmp ult i32 %x, 7
+  %ret = select i1 %cmp2, i32 %x, i32 0
+  ret i32 %ret
+}
+
+; A unconditional branch expressed as a condition one gives no
+; information (and shouldn't trip any asserts.)
+define i32 @test_neg3(i32 %x) {
+; CHECK-LABEL: @test_neg3
+entry:
+  %cmp = icmp ult i32 %x, 7
+  br i1 %cmp, label %merge, label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK: icmp
+; CHECK: select
+  %cmp2 = icmp ult i32 %x, 7
+  %ret = select i1 %cmp2, i32 %x, i32 0
+  ret i32 %ret
+}
+
+declare i32 @bar()