[llvm] 3b2011f - [LSR] Fold terminating condition to other IV when possible

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 20 01:38:56 PDT 2022


Author: eopXD
Date: 2022-09-20T01:38:47-07:00
New Revision: 3b2011fd4f393c46ed1110840e0c00f52a582241

URL: https://github.com/llvm/llvm-project/commit/3b2011fd4f393c46ed1110840e0c00f52a582241
DIFF: https://github.com/llvm/llvm-project/commit/3b2011fd4f393c46ed1110840e0c00f52a582241.diff

LOG: [LSR] Fold terminating condition to other IV when possible

When the IV is only used by the terminating condition (say IV-A) and the loop
has a predictable back-edge count and we have another IV (say IV-B) that is an
affine add recursion, we will be able to calculate the terminating value of
IV-B in the loop pre-header. This patch adds attempts to replace IV-B as the
new terminating condition and remove IV-A. It is safe to do so since IV-A is
only used as the terminating condition.

This transformation is suitable to be appended after LSR as it may optimize the
loop into the situation mentioned above. The transformation can reduce number of
IV-s in the loop by one.

A cli option `lsr-term-fold` is added and default disabled.

Reviewed By: mcberg2021, craig.topper

Differential Revision: https://reviews.llvm.org/D132443

Added: 
    llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll

Modified: 
    llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 8e9b2c0b57dd0..71740b0d0e3df 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -64,6 +64,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/IVUsers.h"
@@ -186,6 +187,13 @@ static cl::opt<unsigned> SetupCostDepthLimit(
     "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),
     cl::desc("The limit on recursion depth for LSRs setup cost"));
 
+static cl::opt<bool> AllowTerminatingConditionFoldingAfterLSR(
+    "lsr-term-fold", cl::Hidden, cl::init(false),
+    cl::desc("Attempt to replace primary IV with other IV."));
+
+STATISTIC(NumTermFold,
+          "Number of terminating condition fold recognized and performed");
+
 #ifndef NDEBUG
 // Stress test IV chain generation.
 static cl::opt<bool> StressIVChain(
@@ -6570,6 +6578,141 @@ static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
   return nullptr;
 }
 
+static Optional<std::pair<PHINode *, PHINode *>>
+canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
+                      const LoopInfo &LI) {
+  if (!L->isInnermost()) {
+    LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n");
+    return None;
+  }
+  // Only inspect on simple loop structure
+  if (!L->isLoopSimplifyForm()) {
+    LLVM_DEBUG(dbgs() << "Cannot fold on non-simple loop\n");
+    return None;
+  }
+
+  if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
+    LLVM_DEBUG(dbgs() << "Cannot fold on backedge that is loop variant\n");
+    return None;
+  }
+
+  BasicBlock *LoopPreheader = L->getLoopPreheader();
+  BasicBlock *LoopLatch = L->getLoopLatch();
+
+  // TODO: Can we do something for greater than and less than?
+  // Terminating condition is foldable when it is an eq/ne icmp
+  BranchInst *BI = cast<BranchInst>(LoopLatch->getTerminator());
+  if (BI->isUnconditional())
+    return None;
+  Value *TermCond = BI->getCondition();
+  if (!isa<ICmpInst>(TermCond) || !cast<ICmpInst>(TermCond)->isEquality()) {
+    LLVM_DEBUG(dbgs() << "Cannot fold on branching condition that is not an "
+                         "ICmpInst::eq / ICmpInst::ne\n");
+    return None;
+  }
+  if (!TermCond->hasOneUse()) {
+    LLVM_DEBUG(
+        dbgs()
+        << "Cannot replace terminating condition with more than one use\n");
+    return None;
+  }
+
+  // For `IsToFold`, a primary IV can be replaced by other affine AddRec when it
+  // is only used by the terminating condition. To check for this, we may need
+  // to traverse through a chain of use-def until we can examine the final
+  // usage.
+  //         *----------------------*
+  //   *---->|  LoopHeader:         |
+  //   |     |  PrimaryIV = phi ... |
+  //   |     *----------------------*
+  //   |              |
+  //   |              |
+  //   |           chain of
+  //   |          single use
+  // used by          |
+  //  phi             |
+  //   |            Value
+  //   |          /       \
+  //   |     chain of     chain of
+  //   |    single use     single use
+  //   |      /               \
+  //   |     /                 \
+  //   *- Value                Value --> used by terminating condition
+  auto IsToFold = [&](PHINode &PN) -> bool {
+    Value *V = &PN;
+
+    while (V->getNumUses() == 1)
+      V = *V->user_begin();
+
+    if (V->getNumUses() != 2)
+      return false;
+
+    Value *VToPN = nullptr;
+    Value *VToTermCond = nullptr;
+    for (User *U : V->users()) {
+      while (U->getNumUses() == 1) {
+        if (isa<PHINode>(U))
+          VToPN = U;
+        if (U == TermCond)
+          VToTermCond = U;
+        U = *U->user_begin();
+      }
+    }
+    return VToPN && VToTermCond;
+  };
+
+  // For `IsToHelpFold`, other IV that is an affine AddRec will be sufficient to
+  // replace the terminating condition
+  auto IsToHelpFold = [&](PHINode &PN) -> bool {
+    // TODO: Right now we limit the phi node to help the folding be of a start
+    // value of getelementptr. We can extend to any kinds of IV as long as it is
+    // an affine AddRec. Add a switch to cover more types of instructions here
+    // and down in the actual transformation.
+    return isa<GetElementPtrInst>(PN.getIncomingValueForBlock(LoopPreheader));
+  };
+
+  PHINode *ToFold = nullptr;
+  PHINode *ToHelpFold = nullptr;
+
+  for (PHINode &PN : L->getHeader()->phis()) {
+    if (!SE.isSCEVable(PN.getType())) {
+      LLVM_DEBUG(dbgs() << "IV of phi '" << PN
+                        << "' is not SCEV-able, not qualified for the "
+                           "terminating condition folding.\n");
+      continue;
+    }
+    const SCEV *S = SE.getSCEV(&PN);
+    const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S);
+    // Only speculate on affine AddRec
+    if (!AddRec || !AddRec->isAffine()) {
+      LLVM_DEBUG(dbgs() << "SCEV of phi '" << PN
+                        << "' is not an affine add recursion, not qualified "
+                           "for the terminating condition folding.\n");
+      continue;
+    }
+
+    if (IsToFold(PN))
+      ToFold = &PN;
+    else if (IsToHelpFold(PN))
+      ToHelpFold = &PN;
+  }
+
+  LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs()
+                 << "Cannot find other AddRec IV to help folding\n";);
+
+  LLVM_DEBUG(if (ToFold && ToHelpFold) dbgs()
+             << "\nFound loop that can fold terminating condition\n"
+             << "  BECount (SCEV): " << *SE.getBackedgeTakenCount(L) << "\n"
+             << "  TermCond: " << *TermCond << "\n"
+             << "  BrandInst: " << *BI << "\n"
+             << "  ToFold: " << *ToFold << "\n"
+             << "  ToHelpFold: " << *ToHelpFold << "\n");
+
+  if (!ToFold || !ToHelpFold)
+    return None;
+  return {{ToFold, ToHelpFold}};
+}
+
 static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
                                DominatorTree &DT, LoopInfo &LI,
                                const TargetTransformInfo &TTI,
@@ -6628,6 +6771,87 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
     }
   }
 
+  if (AllowTerminatingConditionFoldingAfterLSR) {
+    auto CanFoldTerminatingCondition = canFoldTermCondOfLoop(L, SE, DT, LI);
+    if (CanFoldTerminatingCondition) {
+      BasicBlock *LoopPreheader = L->getLoopPreheader();
+      BasicBlock *LoopLatch = L->getLoopLatch();
+
+      PHINode *ToFold = CanFoldTerminatingCondition->first;
+      PHINode *ToHelpFold = CanFoldTerminatingCondition->second;
+
+      LLVM_DEBUG(dbgs() << "To fold phi-node:\n"
+                        << *ToFold << "\n"
+                        << "New term-cond phi-node:\n"
+                        << *ToHelpFold << "\n");
+
+      Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader);
+      Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch);
+
+      // SCEVExpander for both use in preheader and latch
+      const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+      SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
+      SCEVExpanderCleaner ExpCleaner(Expander);
+
+      // Create new terminating value at loop header
+      GetElementPtrInst *StartValueGEP = cast<GetElementPtrInst>(StartValue);
+      Type *PtrTy = StartValueGEP->getPointerOperand()->getType();
+
+      const SCEV *BECount = SE.getBackedgeTakenCount(L);
+      const SCEVAddRecExpr *AddRec =
+          cast<SCEVAddRecExpr>(SE.getSCEV(ToHelpFold));
+
+      // TermValue = Start + Stride * (BackedgeCount + 1)
+      const SCEV *TermValueS = SE.getAddExpr(
+          AddRec->getOperand(0),
+          SE.getTruncateOrZeroExtend(
+              SE.getMulExpr(
+                  AddRec->getOperand(1),
+                  SE.getTruncateOrZeroExtend(
+                      SE.getAddExpr(BECount, SE.getOne(BECount->getType())),
+                      AddRec->getOperand(1)->getType())),
+              AddRec->getOperand(0)->getType()));
+
+      // NOTE: If this is triggered, we should add this into predicate
+      if (!Expander.isSafeToExpand(TermValueS)) {
+        LLVMContext &Ctx = L->getHeader()->getContext();
+        Ctx.emitError(
+            "Terminating value is not safe to expand, need to add it to "
+            "predicate");
+      } else { // Now we replace the condition with ToHelpFold and remove ToFold
+        Changed = true;
+        NumTermFold++;
+
+        Value *TermValue = Expander.expandCodeFor(
+            TermValueS, PtrTy, LoopPreheader->getTerminator());
+
+        LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n"
+                          << *StartValue << "\n"
+                          << "Terminating value of new term-cond phi-node:\n"
+                          << *TermValue << "\n");
+
+        // Create new terminating condition at loop latch
+        BranchInst *BI = cast<BranchInst>(LoopLatch->getTerminator());
+        ICmpInst *OldTermCond = cast<ICmpInst>(BI->getCondition());
+        IRBuilder<> LatchBuilder(LoopLatch->getTerminator());
+        Value *NewTermCond = LatchBuilder.CreateICmp(
+            OldTermCond->getPredicate(), LoopValue, TermValue,
+            "lsr_fold_term_cond.replaced_term_cond");
+
+        LLVM_DEBUG(dbgs() << "Old term-cond:\n"
+                          << *OldTermCond << "\n"
+                          << "New term-cond:\b" << *NewTermCond << "\n");
+
+        BI->setCondition(NewTermCond);
+
+        OldTermCond->eraseFromParent();
+        DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
+      }
+
+      ExpCleaner.markResultUsed();
+    }
+  }
+
   if (SalvageableDVIRecords.empty())
     return Changed;
 

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll
new file mode 100644
index 0000000000000..ba41ddaeb028e
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll
@@ -0,0 +1,160 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes="loop-reduce" -S -debug -lsr-term-fold 2>&1 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-n32"
+
+define i32 @loop_variant(ptr %ar, i32 %n, i32 %m) {
+; CHECK: Cannot fold on backedge that is loop variant
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %mul, %for.cond ]
+  %cmp = icmp slt i32 %n.addr.0, %m
+  %mul = shl nsw i32 %n.addr.0, 1
+  br i1 %cmp, label %for.cond, label %for.end
+
+for.end:                                          ; preds = %for.cond
+  ret i32 %n.addr.0
+}
+
+define i32 @nested_loop(ptr %ar, i32 %n, i32 %m, i32 %o) {
+; CHECK: Cannot fold on backedge that is loop variant
+; CHECK: Cannot fold on non-innermost loop
+entry:
+  %cmp15 = icmp sgt i32 %o, 0
+  br i1 %cmp15, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup3, %entry
+  %cnt.0.lcssa = phi i32 [ 0, %entry ], [ %cnt.1.lcssa, %for.cond.cleanup3 ]
+  ret i32 %cnt.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.cond.cleanup3
+  %i.017 = phi i32 [ %inc6, %for.cond.cleanup3 ], [ 0, %entry ]
+  %cnt.016 = phi i32 [ %cnt.1.lcssa, %for.cond.cleanup3 ], [ 0, %entry ]
+  %sub = sub nsw i32 %n, %i.017
+  %cmp212 = icmp slt i32 %sub, %m
+  br i1 %cmp212, label %for.body4, label %for.cond.cleanup3
+
+for.cond.cleanup3:                                ; preds = %for.body4, %for.body
+  %cnt.1.lcssa = phi i32 [ %cnt.016, %for.body ], [ %inc, %for.body4 ]
+  %inc6 = add nuw nsw i32 %i.017, 1
+  %cmp = icmp slt i32 %inc6, %o
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.body4:                                        ; preds = %for.body, %for.body4
+  %j.014 = phi i32 [ %mul, %for.body4 ], [ %sub, %for.body ]
+  %cnt.113 = phi i32 [ %inc, %for.body4 ], [ %cnt.016, %for.body ]
+  %inc = add nsw i32 %cnt.113, 1
+  %mul = shl nsw i32 %j.014, 1
+  %cmp2 = icmp slt i32 %mul, %m
+  br i1 %cmp2, label %for.body4, label %for.cond.cleanup3
+}
+
+; The terminating condition folding transformation cannot find the ptr IV
+; because it checks if the value comes from the LoopPreheader. %mark is from
+; the function argument, so it is not qualified for the transformation.
+define void @no_iv_to_help(ptr %mark, i32 signext %length) {
+; CHECK: Cannot find other AddRec IV to help folding
+entry:
+  %tobool.not3 = icmp eq i32 %length, 0
+  br i1 %tobool.not3, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %dec, %for.body ], [ %length, %entry ]
+  %dst.04 = phi ptr [ %incdec.ptr, %for.body ], [ %mark, %entry ]
+  %0 = load ptr, ptr %dst.04, align 8
+  call ptr @foo(ptr %0)
+  %incdec.ptr = getelementptr inbounds ptr, ptr %dst.04, i64 1
+  %dec = add nsw i32 %i.05, -1
+  %tobool.not = icmp eq i32 %dec, 0
+  br i1 %tobool.not, label %for.cond.cleanup, label %for.body
+}
+
+declare void @foo(ptr)
+
+define void @NonAddRecIV(ptr %a) {
+; CHECK: SCEV of phi '  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 1, %entry ]'
+; CHECK: is not an affine add recursion, not qualified for the terminating condition folding.
+entry:
+  %uglygep = getelementptr i8, ptr %a, i32 84
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ]
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 1, %entry ]
+  store i32 1, ptr %lsr.iv1, align 4
+  %lsr.iv.next = mul nsw i32 %lsr.iv, 2
+  %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4
+  %exitcond.not = icmp eq i32 %lsr.iv.next, 65536
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+ at fp_inc = common global float 0.000000e+00, align 4
+
+define void @NonSCEVableIV(float %init, float* %A, i32 %N) {
+; CHECK: IV of phi '  %x.05 = phi float [ %init, %entry ], [ %add, %for.body ]'
+; CHECK: is not SCEV-able, not qualified for the terminating condition folding.
+entry:
+  %0 = load float, float* @fp_inc, align 4
+  br label %for.body
+
+for.body:                                         ; preds = %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %x.05 = phi float [ %init, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
+  store float %x.05, float* %arrayidx, align 4
+  %add = fsub float %x.05, %0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.end
+  ret void
+}
+
+define void @NonIcmpEqNe(ptr %a) {
+; CHECK: Cannot fold on branching condition that is not an ICmpInst::eq / ICmpInst::ne
+entry:
+  %uglygep = getelementptr i8, ptr %a, i64 84
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ]
+  %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 379, %entry ]
+  store i32 1, ptr %lsr.iv1, align 4
+  %lsr.iv.next = add nsw i64 %lsr.iv, -1
+  %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4
+  %exitcond.not = icmp sle i64 %lsr.iv.next, 0
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+define void @TermCondMoreThanOneUse(ptr %a) {
+;CHECK: Cannot replace terminating condition with more than one use
+entry:
+  %uglygep = getelementptr i8, ptr %a, i64 84
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ]
+  %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 379, %entry ]
+  store i32 1, ptr %lsr.iv1, align 4
+  %lsr.iv.next = add nsw i64 %lsr.iv, -1
+  %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4
+  %exitcond.not = icmp eq i64 %lsr.iv.next, 0
+  %dummy = select i1 %exitcond.not, i8 0, i8 1
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
index aea8b6e8a68de..30fbed966dfa3 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes="loop-reduce" -S | FileCheck %s
+; RUN: opt < %s -passes="loop-reduce" -S -lsr-term-fold | FileCheck %s
 
 ; There are 3 test cases here regarding the replacing the primary iv with
 ; other affine AddRec IV.
@@ -20,15 +20,14 @@ define void @const_tripcount(ptr %a) {
 ; CHECK-LABEL: @const_tripcount(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
+; CHECK-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i32 1600
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 379, [[ENTRY]] ]
 ; CHECK-NEXT:    store i32 1, ptr [[LSR_IV1]], align 4
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1
 ; CHECK-NEXT:    [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]]
+; CHECK-NEXT:    br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
 ;
@@ -59,15 +58,16 @@ define void @runtime_tripcount(ptr %a, i32 %N) {
 ; CHECK-LABEL: @runtime_tripcount(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[N:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 84
+; CHECK-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP1]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N:%.*]], [[ENTRY]] ]
 ; CHECK-NEXT:    store i32 1, ptr [[LSR_IV1]], align 4
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], -1
 ; CHECK-NEXT:    [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]]
+; CHECK-NEXT:    br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
 ;
@@ -100,17 +100,17 @@ for.end:                                          ; preds = %for.body
 define void @ptr_of_ptr_addrec(ptr %ptrptr, i32 %length) {
 ; CHECK-LABEL: @ptr_of_ptr_addrec(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[START_PTRPTR:%.*]] = getelementptr inbounds ptr, ptr [[PTRPTR:%.*]]
+; CHECK-NEXT:    [[START_PTRPTR:%.*]] = getelementptr ptr, ptr [[PTRPTR:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[LENGTH:%.*]], 2
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[START_PTRPTR]], i32 [[TMP0]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_BODY]] ], [ [[LENGTH:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[IT_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[START_PTRPTR]], [[ENTRY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[IT_04]], align 8
-; CHECK-NEXT:    tail call void @foo(ptr [[TMP0]])
+; CHECK-NEXT:    [[IT_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[START_PTRPTR]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[IT_04]], align 8
+; CHECK-NEXT:    tail call void @foo(ptr [[TMP1]])
 ; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[IT_04]], i64 1
-; CHECK-NEXT:    [[DEC]] = add i32 [[I_05]], -1
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[UGLYGEP]]
+; CHECK-NEXT:    br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
 ;


        


More information about the llvm-commits mailing list