[llvm] fca0218 - [indvars] Canonicalize exit conditions to unsigned using range info

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 19 11:49:18 PDT 2021


Author: Philip Reames
Date: 2021-10-19T11:49:12-07:00
New Revision: fca0218875f5117110d38b9cd7503bc2789693d3

URL: https://github.com/llvm/llvm-project/commit/fca0218875f5117110d38b9cd7503bc2789693d3
DIFF: https://github.com/llvm/llvm-project/commit/fca0218875f5117110d38b9cd7503bc2789693d3.diff

LOG: [indvars] Canonicalize exit conditions to unsigned using range info

This patch duplicates a bit of logic we apply to comparisons encountered during the IV users walk to conditions which feed exit conditions. Why? simplifyAndExtend has a very limited list of users it walks. In particular, in the examples is stops at the zext and never visits the icmp. (Because we can't fold the zext to an addrec yet in SCEV.) Being willing to visit when we haven't simplified regresses multiple tests (seemingly because of less optimal results when computing trip counts).

Note that this can be trivially extended to multiple exiting blocks. I'm leaving that to a future patch (solely to cut down on the number of versions of the same code in review at once.)

Differential Revision: https://reviews.llvm.org/D111896

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
    llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 7fb024ea7fc77..bf776b1cfd7f6 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -89,6 +89,7 @@
 #include <utility>
 
 using namespace llvm;
+using namespace PatternMatch;
 
 #define DEBUG_TYPE "indvars"
 
@@ -155,6 +156,9 @@ class IndVarSimplify {
   bool rewriteNonIntegerIVs(Loop *L);
 
   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
+  /// See if we can convert an exit condition from signed to unsigned.
+  /// (See inline comment about why this is duplicated from simplifyAndExtend)
+  bool canonicalizeExitCondition(Loop *L);
   /// Try to eliminate loop exits based on analyzeable exit counts
   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
   /// Try to form loop invariant tests for loop exits by changing how many
@@ -1346,7 +1350,6 @@ static bool optimizeLoopExitWithUnknownExitCount(
     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
   ICmpInst::Predicate Pred;
   Value *LHS, *RHS;
-  using namespace PatternMatch;
   BasicBlock *TrueSucc, *FalseSucc;
   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
@@ -1407,6 +1410,55 @@ static bool optimizeLoopExitWithUnknownExitCount(
   return true;
 }
 
+bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
+  // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
+  // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
+  // never reaches the icmp since the zext doesn't fold to an AddRec unless
+  // it already has flags.  The alternative to this would be to extending the
+  // set of "interesting" IV users to include the icmp, but doing that
+  // regresses results in practice by querying SCEVs before trip counts which
+  // rely on them which results in SCEV caching sub-optimal answers.  The
+  // concern about caching sub-optimal results is why we only query SCEVs of
+  // the loop invariant RHS here.
+
+  auto *ExitingBB = L->getExitingBlock();
+  if (!ExitingBB)
+    return false;
+  auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+  if (!BI)
+    return false;
+  assert(BI->isConditional() && "exit branch must be conditional");
+
+  auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!ICmp)
+    return false;
+
+  auto *LHS = ICmp->getOperand(0);
+  auto *RHS = ICmp->getOperand(1);
+  // Avoid computing SCEVs in the loop to avoid poisoning cache with
+  // sub-optimal results.
+  if (!L->isLoopInvariant(RHS))
+    return false;
+
+  // Match (icmp signed-cond zext, RHS)
+  Value *LHSOp = nullptr;
+  if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
+    return false;
+
+  const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
+  const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
+  const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
+  auto FullCR = ConstantRange::getFull(InnerBitWidth);
+  FullCR = FullCR.zeroExtend(OuterBitWidth);
+  if (!FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS))))
+    return false;
+
+  // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
+  // replace the signed condition with the unsigned version.
+  ICmp->setPredicate(ICmp->getUnsignedPredicate());
+  return true;
+}
+
 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
   SmallVector<BasicBlock*, 16> ExitingBlocks;
   L->getExitingBlocks(ExitingBlocks);
@@ -1788,6 +1840,11 @@ bool IndVarSimplify::run(Loop *L) {
   // Eliminate redundant IV cycles.
   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
 
+  if (canonicalizeExitCondition(L))
+    // We've changed the predicate, but have not changed exit counts, or the
+    // values which can flow through any SCEV.  i.e, no invalidation needed.
+    Changed = true;
+
   // Try to eliminate loop exits based on analyzeable exit counts
   if (optimizeLoopExits(L, Rewriter))  {
     Changed = true;

diff  --git a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
index 45980be519fd2..c803a54963a91 100644
--- a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
+++ b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
@@ -15,7 +15,7 @@ define void @slt_constant_rhs(i16 %n.raw, i8 %start) mustprogress {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i8 [[IV]], 1
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[ZEXT]], 254
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i16 [[ZEXT]], 254
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
@@ -71,7 +71,7 @@ define void @slt_non_constant_rhs_no_mustprogress(i16 %n.raw) {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i8 [[IV]], 1
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[ZEXT]], [[N]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i16 [[ZEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
@@ -301,7 +301,7 @@ define void @sgt_constant_rhs(i16 %n.raw, i8 %start) mustprogress {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i8 [[IV]], 1
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i16 [[ZEXT]], 254
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[ZEXT]], 254
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
@@ -355,7 +355,7 @@ define void @sle_constant_rhs(i16 %n.raw, i8 %start) mustprogress {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i8 [[IV]], 1
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i16 [[ZEXT]], 254
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ule i16 [[ZEXT]], 254
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
@@ -409,7 +409,7 @@ define void @sge_constant_rhs(i16 %n.raw, i8 %start) mustprogress {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i8 [[IV]], 1
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i16 [[ZEXT]], 254
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i16 [[ZEXT]], 254
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void


        


More information about the llvm-commits mailing list