[llvm] r345877 - [LoopInterchange] Remove support for inner-only reductions.

Thu Nov 1 12:25:00 PDT 2018

Author: fhahn
Date: Thu Nov  1 12:25:00 2018
New Revision: 345877

URL: http://llvm.org/viewvc/llvm-project?rev=345877&view=rev
Log:
[LoopInterchange] Remove support for inner-only reductions.

Inner-loop only reductions require additional checks to make sure they
form a load-phi-store cycle across inner and outer loop. Otherwise the
reduction value is not properly preserved. This patch disables
interchanging such loops for now, as it causes miscompiles in some
cases and it seems to apply only for a tiny amount of loops. Across the
test-suite, SPEC2000 and SPEC2006, 61 instead of 62 loops are
interchange with inner loop reduction support disabled. With
-loop-interchange-threshold=-1000, 3256 instead of 3267.

See the discussion and history of D53027 for an outline of how such legality
checks could look like.

Reviewers: efriedma, mcrosier, davide

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D53027

Added:
    llvm/trunk/test/Transforms/LoopInterchange/inner-only-reductions.ll
Removed:
    llvm/trunk/test/Transforms/LoopInterchange/reductions.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp
    llvm/trunk/test/Transforms/LoopInterchange/lcssa.ll
    llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll

Modified: llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp?rev=345877&r1=345876&r2=345877&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp Thu Nov  1 12:25:00 2018
@@ -339,16 +339,10 @@ public:
 
   bool currentLimitations();
 
-  bool hasInnerLoopReduction() { return InnerLoopHasReduction; }
-
 private:
   bool tightlyNested(Loop *Outer, Loop *Inner);
-  bool containsUnsafeInstructionsInHeader(BasicBlock *BB);
-  bool areAllUsesReductions(Instruction *Ins, Loop *L);
-  bool containsUnsafeInstructionsInLatch(BasicBlock *BB);
-  bool findInductionAndReductions(Loop *L,
-                                  SmallVector<PHINode *, 8> &Inductions,
-                                  SmallVector<PHINode *, 8> &Reductions);
+  bool containsUnsafeInstructions(BasicBlock *BB);
+  bool findInductions(Loop *L, SmallVector<PHINode *, 8> &Inductions);
 
   Loop *OuterLoop;
   Loop *InnerLoop;
@@ -358,7 +352,6 @@ private:
   /// Interface to emit optimization remarks.
   OptimizationRemarkEmitter *ORE;
 
-  bool InnerLoopHasReduction = false;
 };
 
 /// LoopInterchangeProfitability checks if it is profitable to interchange the
@@ -391,11 +384,9 @@ class LoopInterchangeTransform {
 public:
   LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
                            LoopInfo *LI, DominatorTree *DT,
-                           BasicBlock *LoopNestExit,
-                           bool InnerLoopContainsReductions)
+                           BasicBlock *LoopNestExit)
       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
-        LoopExit(LoopNestExit),
-        InnerLoopHasReduction(InnerLoopContainsReductions) {}
+        LoopExit(LoopNestExit) {}
 
   /// Interchange OuterLoop and InnerLoop.
   bool transform();
@@ -420,7 +411,6 @@ private:
   LoopInfo *LI;
   DominatorTree *DT;
   BasicBlock *LoopExit;
-  bool InnerLoopHasReduction;
 };
 
 // Main LoopInterchange Pass.
@@ -571,7 +561,7 @@ struct LoopInterchange : public LoopPass
     });
 
     LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,
-                                 LoopNestExit, LIL.hasInnerLoopReduction());
+                                 LoopNestExit);
     LIT.transform();
     LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
     LoopsInterchanged++;
@@ -581,42 +571,12 @@ struct LoopInterchange : public LoopPass
 
 } // end anonymous namespace
 
-bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) {
-  return llvm::none_of(Ins->users(), [=](User *U) -> bool {
-    auto *UserIns = dyn_cast<PHINode>(U);
-    RecurrenceDescriptor RD;
-    return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD);
+bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
+  return any_of(*BB, [](const Instruction &I) {
+    return I.mayHaveSideEffects() || I.mayReadFromMemory();
   });
 }
 
-bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader(
-    BasicBlock *BB) {
-  for (Instruction &I : *BB) {
-    // Load corresponding to reduction PHI's are safe while concluding if
-    // tightly nested.
-    if (LoadInst *L = dyn_cast<LoadInst>(&I)) {
-      if (!areAllUsesReductions(L, InnerLoop))
-        return true;
-    } else if (I.mayHaveSideEffects() || I.mayReadFromMemory())
-      return true;
-  }
-  return false;
-}
-
-bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch(
-    BasicBlock *BB) {
-  for (Instruction &I : *BB) {
-    // Stores corresponding to reductions are safe while concluding if tightly
-    // nested.
-    if (StoreInst *L = dyn_cast<StoreInst>(&I)) {
-      if (!isa<PHINode>(L->getOperand(0)))
-        return true;
-    } else if (I.mayHaveSideEffects() || I.mayReadFromMemory())
-      return true;
-  }
-  return false;
-}
-
 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
   BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
   BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
@@ -640,8 +600,8 @@ bool LoopInterchangeLegality::tightlyNes
   LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
   // We do not have any basic block in between now make sure the outer header
   // and outer loop latch doesn't contain any unsafe instructions.
-  if (containsUnsafeInstructionsInHeader(OuterLoopHeader) ||
-      containsUnsafeInstructionsInLatch(OuterLoopLatch))
+  if (containsUnsafeInstructions(OuterLoopHeader) ||
+      containsUnsafeInstructions(OuterLoopLatch))
     return false;
 
   LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
@@ -673,9 +633,8 @@ bool LoopInterchangeLegality::isLoopStru
   return true;
 }
 
-bool LoopInterchangeLegality::findInductionAndReductions(
-    Loop *L, SmallVector<PHINode *, 8> &Inductions,
-    SmallVector<PHINode *, 8> &Reductions) {
+bool LoopInterchangeLegality::findInductions(
+    Loop *L, SmallVector<PHINode *, 8> &Inductions) {
   if (!L->getLoopLatch() || !L->getLoopPredecessor())
     return false;
   for (PHINode &PHI : L->getHeader()->phis()) {
@@ -683,11 +642,8 @@ bool LoopInterchangeLegality::findInduct
     InductionDescriptor ID;
     if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
       Inductions.push_back(&PHI);
-    else if (RecurrenceDescriptor::isReductionPHI(&PHI, L, RD))
-      Reductions.push_back(&PHI);
     else {
-      LLVM_DEBUG(
-          dbgs() << "Failed to recognize PHI as an induction or reduction.\n");
+      LLVM_DEBUG(dbgs() << "Failed to recognize PHI as an induction.\n");
       return false;
     }
   }
@@ -737,8 +693,7 @@ bool LoopInterchangeLegality::currentLim
 
   PHINode *InnerInductionVar;
   SmallVector<PHINode *, 8> Inductions;
-  SmallVector<PHINode *, 8> Reductions;
-  if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) {
+  if (!findInductions(InnerLoop, Inductions)) {
     LLVM_DEBUG(
         dbgs() << "Only inner loops with induction or reduction PHI nodes "
                << "are supported currently.\n");
@@ -766,12 +721,9 @@ bool LoopInterchangeLegality::currentLim
     });
     return true;
   }
-  if (Reductions.size() > 0)
-    InnerLoopHasReduction = true;
 
   InnerInductionVar = Inductions.pop_back_val();
-  Reductions.clear();
-  if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) {
+  if (!findInductions(OuterLoop, Inductions)) {
     LLVM_DEBUG(
         dbgs() << "Only outer loops with induction or reduction PHI nodes "
                << "are supported currently.\n");
@@ -785,20 +737,6 @@ bool LoopInterchangeLegality::currentLim
     return true;
   }
 
-  // Outer loop cannot have reduction because then loops will not be tightly
-  // nested.
-  if (!Reductions.empty()) {
-    LLVM_DEBUG(dbgs() << "Outer loops with reductions are not supported "
-                      << "currently.\n");
-    ORE->emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "ReductionsOuter",
-                                      OuterLoop->getStartLoc(),
-                                      OuterLoop->getHeader())
-             << "Outer loops with reductions cannot be interchangeed "
-                "currently.";
-    });
-    return true;
-  }
   // TODO: Currently we handle only loops with 1 induction variable.
   if (Inductions.size() != 1) {
     LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
@@ -1449,34 +1387,11 @@ bool LoopInterchangeTransform::adjustLoo
   // replaced by Inners'.
   updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
 
-  // Now update the reduction PHIs in the inner and outer loop headers.
-  SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
-  for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1))
-    InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
-  for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1))
-    OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
-
-  for (PHINode *PHI : OuterLoopPHIs)
-    PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
-
-  // Move the PHI nodes from the inner loop header to the outer loop header.
-  // We have to deal with one kind of PHI nodes:
-  //  1) PHI nodes that are part of inner loop-only reductions.
-  // We only have to move the PHI node and update the incoming blocks.
-  for (PHINode *PHI : InnerLoopPHIs) {
-    PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
-    for (BasicBlock *InBB : PHI->blocks()) {
-      if (InnerLoop->contains(InBB))
-        continue;
-
-      assert(!isa<PHINode>(PHI->getIncomingValueForBlock(InBB)) &&
-             "Unexpected incoming PHI node, reductions in outer loop are not "
-             "supported yet");
-      PHI->replaceAllUsesWith(PHI->getIncomingValueForBlock(InBB));
-      PHI->eraseFromParent();
-      break;
-    }
-  }
+  // Make sure we have no other PHIs.
+  auto InnerPhis = drop_begin(InnerLoopHeader->phis(), 1);
+  auto OuterPhis = drop_begin(OuterLoopHeader->phis(), 1);
+  assert(begin(InnerPhis) == end(InnerPhis) && "Unexpected PHIs in inner loop");
+  assert(begin(OuterPhis) == end(OuterPhis) && "Unexpected PHis in outer loop");
 
   // Update the incoming blocks for moved PHI nodes.
   updateIncomingBlock(OuterLoopHeader, InnerLoopPreHeader, OuterLoopPreHeader);

Added: llvm/trunk/test/Transforms/LoopInterchange/inner-only-reductions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopInterchange/inner-only-reductions.ll?rev=345877&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopInterchange/inner-only-reductions.ll (added)
+++ llvm/trunk/test/Transforms/LoopInterchange/inner-only-reductions.ll Thu Nov  1 12:25:00 2018
@@ -0,0 +1,124 @@
+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN:     -verify-dom-info -verify-loop-info 2>&1 | FileCheck -check-prefix=IR %s
+; RUN: FileCheck --input-file=%t %s
+
+; Inner loop only reductions are not supported currently. See discussion at
+; D53027 for more information on the required checks.
+
+ at A = common global [500 x [500 x i32]] zeroinitializer
+ at X = common global i32 0
+ at B = common global [500 x [500 x i32]] zeroinitializer
+ at Y = common global i32 0
+
+;; global X
+
+;;  for( int i=1;i<N;i++)
+;;    for( int j=1;j<N;j++)
+;;      X+=A[j][i];
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            UnsupportedPHI
+; CHECK-NEXT: Function:        reduction_01
+
+; IR-LABEL: @reduction_01(
+; IR-NOT: split
+
+define void @reduction_01(i32 %N) {
+entry:
+  %cmp16 = icmp sgt i32 %N, 1
+  br i1 %cmp16, label %for.body3.lr.ph, label %for.end8
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.for.inc6_crit_edge, %entry
+  %indvars.iv18 = phi i64 [ %indvars.iv.next19, %for.cond1.for.inc6_crit_edge ], [ 1, %entry ]
+  %X.promoted = load i32, i32* @X
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+  %add15 = phi i32 [ %X.promoted, %for.body3.lr.ph ], [ %add, %for.body3 ]
+  %arrayidx5 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv18
+  %0 = load i32, i32* %arrayidx5
+  %add = add nsw i32 %add15, %0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.cond1.for.inc6_crit_edge, label %for.body3
+
+for.cond1.for.inc6_crit_edge:                     ; preds = %for.body3
+  %add.lcssa = phi i32 [ %add, %for.body3 ]
+  store i32 %add.lcssa, i32* @X
+  %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
+  %lftr.wideiv20 = trunc i64 %indvars.iv.next19 to i32
+  %exitcond21 = icmp eq i32 %lftr.wideiv20, %N
+  br i1 %exitcond21, label %for.end8, label %for.body3.lr.ph
+
+for.end8:                                         ; preds = %for.cond1.for.inc6_crit_edge, %entry
+  ret void
+}
+
+;; Not tightly nested. Do not interchange.
+;;  for( int i=1;i<N;i++)
+;;    for( int j=1;j<N;j++) {
+;;      for( int k=1;k<N;k++) {
+;;        X+=A[k][j];
+;;      }
+;;      Y+=B[j][i];
+;;    }
+
+;; Not tightly nested. Do not interchange.
+;; Not interchanged hence the phi's in the inner loop will not be split.
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            UnsupportedPHIOuter
+; CHECK-NEXT: Function:        reduction_03
+
+; IR-LABEL: @reduction_03(
+; IR-NOT: split
+
+define void @reduction_03(i32 %N) {
+entry:
+  %cmp35 = icmp sgt i32 %N, 1
+  br i1 %cmp35, label %for.cond4.preheader.lr.ph, label %for.end19
+
+for.cond4.preheader.lr.ph:                        ; preds = %for.cond1.for.inc17_crit_edge, %entry
+  %indvars.iv41 = phi i64 [ %indvars.iv.next42, %for.cond1.for.inc17_crit_edge ], [ 1, %entry ]
+  %Y.promoted = load i32, i32* @Y
+  br label %for.body6.lr.ph
+
+for.body6.lr.ph:                                  ; preds = %for.cond4.for.end_crit_edge, %for.cond4.preheader.lr.ph
+  %indvars.iv37 = phi i64 [ 1, %for.cond4.preheader.lr.ph ], [ %indvars.iv.next38, %for.cond4.for.end_crit_edge ]
+  %add1334 = phi i32 [ %Y.promoted, %for.cond4.preheader.lr.ph ], [ %add13, %for.cond4.for.end_crit_edge ]
+  %X.promoted = load i32, i32* @X
+  br label %for.body6
+
+for.body6:                                        ; preds = %for.body6, %for.body6.lr.ph
+  %indvars.iv = phi i64 [ 1, %for.body6.lr.ph ], [ %indvars.iv.next, %for.body6 ]
+  %arrayidx8 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv37
+  %0 = load i32, i32* %arrayidx8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.cond4.for.end_crit_edge, label %for.body6
+
+for.cond4.for.end_crit_edge:                      ; preds = %for.body6
+  %arrayidx12 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @B, i64 0, i64 %indvars.iv37, i64 %indvars.iv41
+  %1 = load i32, i32* %arrayidx12
+  %add13 = add nsw i32 %add1334, %1
+  %indvars.iv.next38 = add nuw nsw i64 %indvars.iv37, 1
+  %lftr.wideiv39 = trunc i64 %indvars.iv.next38 to i32
+  %exitcond40 = icmp eq i32 %lftr.wideiv39, %N
+  br i1 %exitcond40, label %for.cond1.for.inc17_crit_edge, label %for.body6.lr.ph
+
+for.cond1.for.inc17_crit_edge:                    ; preds = %for.cond4.for.end_crit_edge
+  %add13.lcssa = phi i32 [ %add13, %for.cond4.for.end_crit_edge ]
+  store i32 %add13.lcssa, i32* @Y
+  %indvars.iv.next42 = add nuw nsw i64 %indvars.iv41, 1
+  %lftr.wideiv43 = trunc i64 %indvars.iv.next42 to i32
+  %exitcond44 = icmp eq i32 %lftr.wideiv43, %N
+  br i1 %exitcond44, label %for.end19, label %for.cond4.preheader.lr.ph
+
+for.end19:                                        ; preds = %for.cond1.for.inc17_crit_edge, %entry
+  ret void
+}

Modified: llvm/trunk/test/Transforms/LoopInterchange/lcssa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopInterchange/lcssa.ll?rev=345877&r1=345876&r2=345877&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopInterchange/lcssa.ll (original)
+++ llvm/trunk/test/Transforms/LoopInterchange/lcssa.ll Thu Nov  1 12:25:00 2018
@@ -246,7 +246,6 @@ for.body3:
 
 outer.inc:                                        ; preds = %for.body3, %outer.header
   %sv = phi i64 [ 0, %outer.header ], [ 1, %for.body3 ]
-  store i64 %sv, i64* %ptr
   %iv.outer.next = add nsw i64 %iv.outer, 1
   %cmp = icmp eq i64 %iv.outer.next, 100
   br i1 %cmp, label %outer.header, label %for.exit

Modified: llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll?rev=345877&r1=345876&r2=345877&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll (original)
+++ llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll Thu Nov  1 12:25:00 2018
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -loop-interchange-threshold=-1000 -S 2>&1 | FileCheck %s
 ;; Checks the order of the inner phi nodes does not cause havoc.
 ;; The inner loop has a reduction into c. The IV is not the first phi.
 
@@ -23,8 +23,6 @@ define void @test(i32 %T, [90 x i32]* no
 ; CHECK-NEXT:    br label [[FOR2_HEADER:%.*]]
 ; CHECK:       for2.header:
 ; CHECK-NEXT:    [[J:%.*]] = phi i32 [ [[INC17:%.*]], [[FOR2_INC16:%.*]] ], [ 0, [[FOR2_HEADER_PREHEADER]] ]
-; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [90 x i32], [90 x i32]* [[C:%.*]], i32 [[I]], i32 [[J]]
-; CHECK-NEXT:    [[ARRAYIDX14_PROMOTED:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4
 ; CHECK-NEXT:    br label [[FOR3_SPLIT1:%.*]]
 ; CHECK:       for3.preheader:
 ; CHECK-NEXT:    br label [[FOR3:%.*]]
@@ -35,15 +33,14 @@ define void @test(i32 %T, [90 x i32]* no
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[K]], [[MUL]]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i32 [[ADD]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
-; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
-; CHECK-NEXT:    [[ADD15:%.*]] = add nsw i32 [[CONV]], [[ARRAYIDX14_PROMOTED]]
+; CHECK-NEXT:    [[ADD15:%.*]] = add nsw i16 [[TMP0]], 1
+; CHECK-NEXT:    store i16 [[ADD15]], i16* [[ARRAYIDX]]
 ; CHECK-NEXT:    br label [[FOR2_INC16]]
 ; CHECK:       for3.split:
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[K]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 90
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR3]]
 ; CHECK:       for2.inc16:
-; CHECK-NEXT:    store i32 [[ADD15]], i32* [[ARRAYIDX14]], align 4
 ; CHECK-NEXT:    [[INC17]] = add nuw nsw i32 [[J]], 1
 ; CHECK-NEXT:    [[EXITCOND47:%.*]] = icmp eq i32 [[INC17]], 90
 ; CHECK-NEXT:    br i1 [[EXITCOND47]], label [[FOR1_INC19]], label [[FOR2_HEADER]]
@@ -66,25 +63,20 @@ for1.header:
 
 for2.header:                                  ; preds = %for2.inc16, %for1.header
   %j = phi i32 [ 0, %for1.header ], [ %inc17, %for2.inc16 ]
-  %arrayidx14 = getelementptr inbounds [90 x i32], [90 x i32]* %C, i32 %i, i32 %j
-  %arrayidx14.promoted = load i32, i32* %arrayidx14, align 4
   br label %for3
 
 for3:                                        ; preds = %for3, %for2.header
-  %add1541 = phi i32 [ %arrayidx14.promoted, %for2.header ], [ %add15, %for3 ]
   %k = phi i32 [ 1, %for2.header ], [ %inc, %for3 ]
   %add = add nsw i32 %k, %mul
   %arrayidx = getelementptr inbounds i16, i16* %A, i32 %add
   %0 = load i16, i16* %arrayidx, align 2
-  %conv = sext i16 %0 to i32
-  %add15 = add nsw i32 %conv, %add1541
+  %add15 = add nsw i16 %0, 1
+  store i16 %add15, i16* %arrayidx
   %inc = add nuw nsw i32 %k, 1
   %exitcond = icmp eq i32 %inc, 90
   br i1 %exitcond, label %for2.inc16, label %for3
 
 for2.inc16:                                        ; preds = %for.body6
-  %add15.lcssa = phi i32 [ %add15, %for3 ]
-  store i32 %add15.lcssa, i32* %arrayidx14, align 4
   %inc17 = add nuw nsw i32 %j, 1
   %exitcond47 = icmp eq i32 %inc17, 90
   br i1 %exitcond47, label %for1.inc19, label %for2.header

Removed: llvm/trunk/test/Transforms/LoopInterchange/reductions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopInterchange/reductions.ll?rev=345876&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopInterchange/reductions.ll (original)
+++ llvm/trunk/test/Transforms/LoopInterchange/reductions.ll (removed)
@@ -1,272 +0,0 @@
-; REQUIRES: asserts
-; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info -verify-loop-lcssa -S -debug 2>&1 | FileCheck %s
-
- at A = common global [500 x [500 x i32]] zeroinitializer
- at X = common global i32 0
- at B = common global [500 x [500 x i32]] zeroinitializer
- at Y = common global i32 0
-
-;;  for( int i=1;i<N;i++)
-;;    for( int j=1;j<N;j++)
-;;      X+=A[j][i];
-
-;; Loop is interchanged check that the phi nodes are split and the promoted value is used instead of the reduction phi.
-; CHECK: Loops interchanged.
-
-define void @reduction_01(i32 %N) {
-entry:
-  %cmp16 = icmp sgt i32 %N, 1
-  br i1 %cmp16, label %for.body3.lr.ph, label %for.end8
-
-for.body3.lr.ph:                                  ; preds = %for.cond1.for.inc6_crit_edge, %entry
-  %indvars.iv18 = phi i64 [ %indvars.iv.next19, %for.cond1.for.inc6_crit_edge ], [ 1, %entry ]
-  %X.promoted = load i32, i32* @X
-  br label %for.body3
-
-for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
-  %indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
-  %add15 = phi i32 [ %X.promoted, %for.body3.lr.ph ], [ %add, %for.body3 ]
-  %arrayidx5 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv18
-  %0 = load i32, i32* %arrayidx5
-  %add = add nsw i32 %add15, %0
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %N
-  br i1 %exitcond, label %for.cond1.for.inc6_crit_edge, label %for.body3
-
-for.cond1.for.inc6_crit_edge:                     ; preds = %for.body3
-  %add.lcssa = phi i32 [ %add, %for.body3 ]
-  store i32 %add.lcssa, i32* @X
-  %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
-  %lftr.wideiv20 = trunc i64 %indvars.iv.next19 to i32
-  %exitcond21 = icmp eq i32 %lftr.wideiv20, %N
-  br i1 %exitcond21, label %for.end8, label %for.body3.lr.ph
-
-for.end8:                                         ; preds = %for.cond1.for.inc6_crit_edge, %entry
-  ret void
-}
-
-;; Test for more than 1 reductions inside a loop.
-;;  for( int i=1;i<N;i++)
-;;    for( int j=1;j<N;j++)
-;;      for( int k=1;k<N;k++) {
-;;        X+=A[k][j];
-;;        Y+=B[k][i];
-;;      }
-
-;; Loop is interchanged check that the phi nodes are split and the promoted value is used instead of the reduction phi.
-; CHECK: Loops interchanged.
-
-define void @reduction_02(i32 %N) {
-entry:
-  %cmp34 = icmp sgt i32 %N, 1
-  br i1 %cmp34, label %for.cond4.preheader.preheader, label %for.end19
-
-for.cond4.preheader.preheader:                    ; preds = %for.inc17, %entry
-  %indvars.iv40 = phi i64 [ %indvars.iv.next41, %for.inc17 ], [ 1, %entry ]
-  br label %for.body6.lr.ph
-
-for.body6.lr.ph:                                  ; preds = %for.cond4.for.inc14_crit_edge, %for.cond4.preheader.preheader
-  %indvars.iv36 = phi i64 [ %indvars.iv.next37, %for.cond4.for.inc14_crit_edge ], [ 1, %for.cond4.preheader.preheader ]
-  %X.promoted = load i32, i32* @X
-  %Y.promoted = load i32, i32* @Y
-  br label %for.body6
-
-for.body6:                                        ; preds = %for.body6, %for.body6.lr.ph
-  %indvars.iv = phi i64 [ 1, %for.body6.lr.ph ], [ %indvars.iv.next, %for.body6 ]
-  %add1331 = phi i32 [ %Y.promoted, %for.body6.lr.ph ], [ %add13, %for.body6 ]
-  %add30 = phi i32 [ %X.promoted, %for.body6.lr.ph ], [ %add, %for.body6 ]
-  %arrayidx8 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv36
-  %0 = load i32, i32* %arrayidx8
-  %add = add nsw i32 %add30, %0
-  %arrayidx12 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv40
-  %1 = load i32, i32* %arrayidx12
-  %add13 = add nsw i32 %add1331, %1
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %N
-  br i1 %exitcond, label %for.cond4.for.inc14_crit_edge, label %for.body6
-
-for.cond4.for.inc14_crit_edge:                    ; preds = %for.body6
-  %add.lcssa = phi i32 [ %add, %for.body6 ]
-  %add13.lcssa = phi i32 [ %add13, %for.body6 ]
-  store i32 %add.lcssa, i32* @X
-  store i32 %add13.lcssa, i32* @Y
-  %indvars.iv.next37 = add nuw nsw i64 %indvars.iv36, 1
-  %lftr.wideiv38 = trunc i64 %indvars.iv.next37 to i32
-  %exitcond39 = icmp eq i32 %lftr.wideiv38, %N
-  br i1 %exitcond39, label %for.inc17, label %for.body6.lr.ph
-
-for.inc17:                                        ; preds = %for.cond4.for.inc14_crit_edge
-  %add.lcssa.lcssa = phi i32 [ %add.lcssa, %for.cond4.for.inc14_crit_edge ]
-  %indvars.iv.next41 = add nuw nsw i64 %indvars.iv40, 1
-  %lftr.wideiv42 = trunc i64 %indvars.iv.next41 to i32
-  %exitcond43 = icmp eq i32 %lftr.wideiv42, %N
-  br i1 %exitcond43, label %for.end19, label %for.cond4.preheader.preheader
-
-for.end19:                                        ; preds = %for.inc17, %entry
-  %res1 = phi i32 [ 0, %entry ], [ %add.lcssa.lcssa, %for.inc17 ]
-  store i32 %res1, i32* @X
-  ret void
-}
-
-;; Not tightly nested. Do not interchange.
-;;  for( int i=1;i<N;i++)
-;;    for( int j=1;j<N;j++) {
-;;      for( int k=1;k<N;k++) {
-;;        X+=A[k][j];
-;;      }
-;;      Y+=B[j][i];
-;;    }
-
-;; Not tightly nested. Do not interchange.
-;; Not interchanged hence the phi's in the inner loop will not be split.
-; CHECK: Outer loops with reductions are not supported currently.
-
-define void @reduction_03(i32 %N) {
-entry:
-  %cmp35 = icmp sgt i32 %N, 1
-  br i1 %cmp35, label %for.cond4.preheader.lr.ph, label %for.end19
-
-for.cond4.preheader.lr.ph:                        ; preds = %for.cond1.for.inc17_crit_edge, %entry
-  %indvars.iv41 = phi i64 [ %indvars.iv.next42, %for.cond1.for.inc17_crit_edge ], [ 1, %entry ]
-  %Y.promoted = load i32, i32* @Y
-  br label %for.body6.lr.ph
-
-for.body6.lr.ph:                                  ; preds = %for.cond4.for.end_crit_edge, %for.cond4.preheader.lr.ph
-  %indvars.iv37 = phi i64 [ 1, %for.cond4.preheader.lr.ph ], [ %indvars.iv.next38, %for.cond4.for.end_crit_edge ]
-  %add1334 = phi i32 [ %Y.promoted, %for.cond4.preheader.lr.ph ], [ %add13, %for.cond4.for.end_crit_edge ]
-  %X.promoted = load i32, i32* @X
-  br label %for.body6
-
-for.body6:                                        ; preds = %for.body6, %for.body6.lr.ph
-  %indvars.iv = phi i64 [ 1, %for.body6.lr.ph ], [ %indvars.iv.next, %for.body6 ]
-  %add31 = phi i32 [ %X.promoted, %for.body6.lr.ph ], [ %add, %for.body6 ]
-  %arrayidx8 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv37
-  %0 = load i32, i32* %arrayidx8
-  %add = add nsw i32 %add31, %0
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %N
-  br i1 %exitcond, label %for.cond4.for.end_crit_edge, label %for.body6
-
-for.cond4.for.end_crit_edge:                      ; preds = %for.body6
-  %add.lcssa = phi i32 [ %add, %for.body6 ]
-  store i32 %add.lcssa, i32* @X
-  %arrayidx12 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @B, i64 0, i64 %indvars.iv37, i64 %indvars.iv41
-  %1 = load i32, i32* %arrayidx12
-  %add13 = add nsw i32 %add1334, %1
-  %indvars.iv.next38 = add nuw nsw i64 %indvars.iv37, 1
-  %lftr.wideiv39 = trunc i64 %indvars.iv.next38 to i32
-  %exitcond40 = icmp eq i32 %lftr.wideiv39, %N
-  br i1 %exitcond40, label %for.cond1.for.inc17_crit_edge, label %for.body6.lr.ph
-
-for.cond1.for.inc17_crit_edge:                    ; preds = %for.cond4.for.end_crit_edge
-  %add13.lcssa = phi i32 [ %add13, %for.cond4.for.end_crit_edge ]
-  store i32 %add13.lcssa, i32* @Y
-  %indvars.iv.next42 = add nuw nsw i64 %indvars.iv41, 1
-  %lftr.wideiv43 = trunc i64 %indvars.iv.next42 to i32
-  %exitcond44 = icmp eq i32 %lftr.wideiv43, %N
-  br i1 %exitcond44, label %for.end19, label %for.cond4.preheader.lr.ph
-
-for.end19:                                        ; preds = %for.cond1.for.inc17_crit_edge, %entry
-  ret void
-}
-
-;; Multiple use of reduction not safe. Do not interchange.
-;;  for( int i=1;i<N;i++)
-;;    for( int j=1;j<N;j++)
-;;      for( int k=1;k<N;k++) {
-;;        X+=A[k][j];
-;;        Y+=X;
-;;      }
-
-;; Not interchanged hence the phi's in the inner loop will not be split.
-; CHECK: Only inner loops with induction or reduction PHI nodes are supported currently.
-
-define void @reduction_04(i32 %N) {
-entry:
-  %cmp28 = icmp sgt i32 %N, 1
-  br i1 %cmp28, label %for.cond4.preheader.preheader, label %for.end15
-
-for.cond4.preheader.preheader:                    ; preds = %for.inc13, %entry
-  %i.029 = phi i32 [ %inc14, %for.inc13 ], [ 1, %entry ]
-  br label %for.body6.lr.ph
-
-for.body6.lr.ph:                                  ; preds = %for.cond4.for.inc10_crit_edge, %for.cond4.preheader.preheader
-  %indvars.iv30 = phi i64 [ %indvars.iv.next31, %for.cond4.for.inc10_crit_edge ], [ 1, %for.cond4.preheader.preheader ]
-  %X.promoted = load i32, i32* @X
-  %Y.promoted = load i32, i32* @Y
-  br label %for.body6
-
-for.body6:                                        ; preds = %for.body6, %for.body6.lr.ph
-  %indvars.iv = phi i64 [ 1, %for.body6.lr.ph ], [ %indvars.iv.next, %for.body6 ]
-  %add925 = phi i32 [ %Y.promoted, %for.body6.lr.ph ], [ %add9, %for.body6 ]
-  %add24 = phi i32 [ %X.promoted, %for.body6.lr.ph ], [ %add, %for.body6 ]
-  %arrayidx8 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv30
-  %0 = load i32, i32* %arrayidx8
-  %add = add nsw i32 %add24, %0
-  %add9 = add nsw i32 %add925, %add
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %N
-  br i1 %exitcond, label %for.cond4.for.inc10_crit_edge, label %for.body6
-
-for.cond4.for.inc10_crit_edge:                    ; preds = %for.body6
-  %add.lcssa = phi i32 [ %add, %for.body6 ]
-  %add9.lcssa = phi i32 [ %add9, %for.body6 ]
-  store i32 %add.lcssa, i32* @X
-  store i32 %add9.lcssa, i32* @Y
-  %indvars.iv.next31 = add nuw nsw i64 %indvars.iv30, 1
-  %lftr.wideiv32 = trunc i64 %indvars.iv.next31 to i32
-  %exitcond33 = icmp eq i32 %lftr.wideiv32, %N
-  br i1 %exitcond33, label %for.inc13, label %for.body6.lr.ph
-
-for.inc13:                                        ; preds = %for.cond4.for.inc10_crit_edge
-  %inc14 = add nuw nsw i32 %i.029, 1
-  %exitcond34 = icmp eq i32 %inc14, %N
-  br i1 %exitcond34, label %for.end15, label %for.cond4.preheader.preheader
-
-for.end15:                                        ; preds = %for.inc13, %entry
-  ret void
-}
-
-;;  for( int i=1;i<N;i++)
-;;    for( int j=1;j<N;j++)
-;;      X+=A[j][i];
-;;  Y = X
-; CHECK: Loops interchanged.
-define void @reduction_05(i32 %N) {
-entry:
-  %cmp16 = icmp sgt i32 %N, 1
-  br i1 %cmp16, label %for.body7.lr.ph, label %for.end8
-
-for.body7.lr.ph:                                  ; preds = %for.cond1.for.inc6_crit_edge, %entry
-  %indvars.iv18 = phi i64 [ %indvars.iv.next19, %for.cond1.for.inc6_crit_edge ], [ 1, %entry ]
-  %X.promoted = load i32, i32* @X
-  br label %for.body7
-
-for.body7:                                        ; preds = %for.body7, %for.body7.lr.ph
-  %indvars.iv = phi i64 [ 1, %for.body7.lr.ph ], [ %indvars.iv.next, %for.body7 ]
-  %add15 = phi i32 [ %X.promoted, %for.body7.lr.ph ], [ %add, %for.body7 ]
-  %arrayidx5 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv18
-  %0 = load i32, i32* %arrayidx5
-  %add = add nsw i32 %add15, %0
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %N
-  br i1 %exitcond, label %for.cond1.for.inc6_crit_edge, label %for.body7
-
-for.cond1.for.inc6_crit_edge:                     ; preds = %for.body7
-  %add.lcssa = phi i32 [ %add, %for.body7 ]
-  store i32 %add.lcssa, i32* @X
-  %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
-  %lftr.wideiv20 = trunc i64 %indvars.iv.next19 to i32
-  %exitcond21 = icmp eq i32 %lftr.wideiv20, %N
-  br i1 %exitcond21, label %for.end8, label %for.body7.lr.ph
-
-for.end8:                                         ; preds = %for.cond1.for.inc6_crit_edge, %entry
-  %add.res = phi i32 [ %add.lcssa, %for.cond1.for.inc6_crit_edge ], [ 0, %entry ]
-  store i32 %add.res, i32* @Y
-  ret void
-}